Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[3.18.x] CFE-3982: Random process killing #5368

Merged
merged 5 commits into from
Nov 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 9 additions & 6 deletions cf-check/dump.c
Original file line number Diff line number Diff line change
Expand Up @@ -277,16 +277,16 @@ static void print_struct_or_string(
{
if (structs)
{
if (StringEndsWith(file, "cf_lastseen.lmdb")
if (StringContains(file, "cf_lastseen.lmdb")
&& StringStartsWith(key.mv_data, "q"))
{
print_struct_lastseen_quality(value, strip_strings);
}
else if (StringEndsWith(file, "cf_lock.lmdb"))
else if (StringContains(file, "cf_lock.lmdb"))
{
print_struct_lock_data(value, strip_strings);
}
else if (StringEndsWith(file, "cf_observations.lmdb"))
else if (StringContains(file, "cf_observations.lmdb"))
{
if (StringEqual(key.mv_data, "DATABASE_AGE"))
{
Expand All @@ -299,15 +299,18 @@ static void print_struct_or_string(
print_struct_averages(value, strip_strings, tskey_filename);
}
}
else if (StringEndsWith(file, "history.lmdb"))
else if (StringEqual(file, "history.lmdb") ||
StringEndsWith(file, FILE_SEPARATOR_STR"history.lmdb") ||
StringEqual(file, "history.lmdb.backup") ||
StringEndsWith(file, FILE_SEPARATOR_STR"history.lmdb.backup"))
{
print_struct_averages(value, strip_strings, tskey_filename);
}
else if (StringEndsWith(file, "cf_state.lmdb"))
else if (StringContains(file, "cf_state.lmdb"))
{
print_struct_persistent_class(value, strip_strings);
}
else if (StringEndsWith(file, "nova_agent_execution.lmdb"))
else if (StringContains(file, "nova_agent_execution.lmdb"))
{
if (StringEqual(key.mv_data, "delta_gavr"))
{
Expand Down
105 changes: 66 additions & 39 deletions libpromises/locks.c
Original file line number Diff line number Diff line change
Expand Up @@ -95,46 +95,10 @@ static void CopyLockDatabaseAtomically(const char *from, const char *to,
const char *from_pretty_name,
const char *to_pretty_name);

static void RestoreLockDatabase(void);

static void VerifyThatDatabaseIsNotCorrupt_once(void)
{
int uptime = GetUptimeSeconds(time(NULL));
if (uptime <= 0)
{
Log(LOG_LEVEL_VERBOSE,
"Not able to determine uptime when verifying lock database. "
"Will assume the database is in order.");
return;
}

char *db_path = DBIdToPath(dbid_locks);
struct stat statbuf;
if (stat(db_path, &statbuf) == 0)
{
if (statbuf.st_mtime < time(NULL) - uptime)
{
// We have rebooted since the database was last updated.
// Restore it from our backup.
RestoreLockDatabase();
}
}

free(db_path);
}

static void VerifyThatDatabaseIsNotCorrupt(void)
{
static pthread_once_t uptime_verified = PTHREAD_ONCE_INIT;
pthread_once(&uptime_verified, &VerifyThatDatabaseIsNotCorrupt_once);
}

CF_DB *OpenLock()
{
CF_DB *dbp;

VerifyThatDatabaseIsNotCorrupt();

if (!OpenDB(&dbp, dbid_locks))
{
return NULL;
Expand Down Expand Up @@ -554,6 +518,63 @@ static void PromiseTypeString(char *dst, size_t dst_size, const Promise *pp)
}
}

/**
* A helper best-effort function to prevent us from killing non CFEngine
* processes with matching PID-start_time combinations **when/where it's easy to
* check**.
*/
static bool IsCfengineLockHolder(pid_t pid)
{
char procfile[PATH_MAX];
snprintf(procfile, PATH_MAX, "/proc/%ju/comm", (uintmax_t) pid);
int f = open(procfile, O_RDONLY);
/* On platforms where /proc doesn't exist, we would have to do a more
complicated check probably not worth it in this helper best-effort
function. */
if (f == -1)
{
/* assume true where we cannot check */
return true;
}

/* more than any possible CFEngine lock holder's name */
char command[32] = {0};
ssize_t n_read = FullRead(f, command, sizeof(command));
close(f);
if ((n_read <= 1) || (n_read == sizeof(command)))
{
Log(LOG_LEVEL_VERBOSE, "Failed to get command for process %ju", (uintmax_t) pid);
/* assume true where we cannot check */
return true;
}
if (command[n_read - 1] == '\n')
{
command[n_read - 1] = '\0';
}

/* potential CFEngine lock holders (others like cf-net, cf-key,... are not
* supposed/expected to be lock holders) */
const char *const cfengine_procs[] = {
"cf-promises",
"lt-cf-agent", /* when running from a build with 'libtool --mode=execute' */
"cf-agent",
"cf-execd",
"cf-serverd",
"cf-monitord",
"cf-hub",
NULL
};
for (size_t i = 0; cfengine_procs[i] != NULL; i++)
{
if (StringEqual(cfengine_procs[i], command))
{
return true;
}
}
Log(LOG_LEVEL_DEBUG, "'%s' not considered a CFEngine process", command);
return false;
}

static bool KillLockHolder(const char *lock)
{
bool ret;
Expand Down Expand Up @@ -587,6 +608,13 @@ static bool KillLockHolder(const char *lock)

CloseLock(dbp);

if (!IsCfengineLockHolder(lock_data.pid)) {
Log(LOG_LEVEL_VERBOSE,
"Lock holder with PID %ju was replaced by a non CFEngine process, ignoring request to kill it",
(uintmax_t) lock_data.pid);
return true;
}

if (GracefulTerminate(lock_data.pid, lock_data.process_start_time))
{
Log(LOG_LEVEL_INFO,
Expand Down Expand Up @@ -1074,11 +1102,10 @@ void GetLockName(char *lockname, const char *locktype,
}
}

static void RestoreLockDatabase(void)
void RestoreLockDatabase(void)
{
// We don't do any locking here (since we can't trust the database), but
// this should be right after bootup, so we should be the only one.
// Worst case someone else will just copy the same file to the same
// worst case someone else will just copy the same file to the same
// location.
char *db_path = DBIdToPath(dbid_locks);
char *db_path_backup;
Expand Down
1 change: 1 addition & 0 deletions libpromises/locks.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ void GetLockName(char *lockname, const char *locktype,
const char *base, const Rlist *params);
void PurgeLocks(void);
void BackupLockDatabase(void);
void RestoreLockDatabase(void);

// Used in enterprise/nova code:
CF_DB *OpenLock();
Expand Down
1 change: 0 additions & 1 deletion misc/systemd/cf-apache.service.in
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,3 @@ UMask=0177

[Install]
WantedBy=multi-user.target
WantedBy=cfengine3.service
1 change: 0 additions & 1 deletion misc/systemd/cf-execd.service.in
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,3 @@ KillMode=process

[Install]
WantedBy=multi-user.target
WantedBy=cfengine3.service
1 change: 0 additions & 1 deletion misc/systemd/cf-hub.service.in
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,3 @@ RestartSec=10

[Install]
WantedBy=multi-user.target
WantedBy=cfengine3.service
1 change: 0 additions & 1 deletion misc/systemd/cf-monitord.service.in
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,3 @@ RestartSec=10

[Install]
WantedBy=multi-user.target
WantedBy=cfengine3.service
1 change: 0 additions & 1 deletion misc/systemd/cf-postgres.service.in
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,3 @@ ExecReload=@bindir@/pg_ctl -w -D ${PGDATA} -l /var/log/postgresql.log reload -m

[Install]
WantedBy=multi-user.target
WantedBy=cfengine3.service
1 change: 0 additions & 1 deletion misc/systemd/cf-reactor.service.in
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,3 @@ RestartSec=10

[Install]
WantedBy=multi-user.target
WantedBy=cfengine3.service
1 change: 0 additions & 1 deletion misc/systemd/cf-runalerts.service.in
Original file line number Diff line number Diff line change
Expand Up @@ -20,5 +20,4 @@ RestartSec=10

[Install]
WantedBy=multi-user.target
WantedBy=cfengine3.service
WantedBy=cf-postgres.service
1 change: 0 additions & 1 deletion misc/systemd/cf-serverd.service.in
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,3 @@ RestartSec=10

[Install]
WantedBy=network-online.target
WantedBy=cfengine3.service