Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[3.18.x] A few LMDB related fixes and improvements #5380

Merged
merged 9 commits into from
Nov 20, 2023
40 changes: 36 additions & 4 deletions cf-check/diagnose.c
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,11 @@ size_t diagnose_files(
#include <validate.h>
#include <openssl/rand.h>

/* NOTE: Must be in sync with LMDB_MAXSIZE in libpromises/dbm_lmdb.c. */
#ifndef LMDB_MAXSIZE
#define LMDB_MAXSIZE 104857600
#endif

#define CF_CHECK_CREATE_STRING(name) \
#name,

Expand Down Expand Up @@ -524,6 +529,25 @@ static char *follow_symlink(const char *path)
return xstrdup(target_buf);
}

bool lmdb_file_needs_rotation(const char *file, int *usage)
{
struct stat sb;
if (stat(file, &sb) == 0)
{
int usage_pct = (((float) sb.st_size) / LMDB_MAXSIZE) * 100;
if (usage != NULL)
{
*usage = usage_pct;
}
return (usage_pct >= 95);
}
else
{
Log(LOG_LEVEL_ERR, "Failed to stat() '%s' when checking usage: %s", file, GetErrorStr());
return false;
}
}

/**
* @param[in] filenames DB files to diagnose/check
* @param[out] corrupt place to store the resulting sequence of corrupted
Expand Down Expand Up @@ -590,18 +614,26 @@ size_t diagnose_files(

if (symlink_target != NULL)
{
int usage;
bool needs_rotation = lmdb_file_needs_rotation(symlink_target, &usage);
Log(LOG_LEVEL_INFO,
"Status of '%s' -> '%s': %s\n",
"Status of '%s' -> '%s': %s [%d%% usage%s]\n",
symlink,
symlink_target,
CF_CHECK_STRING(r));
CF_CHECK_STRING(r),
usage,
needs_rotation ? ", needs rotation" : "");
}
else
{
int usage;
bool needs_rotation = lmdb_file_needs_rotation(filename, &usage);
Log(LOG_LEVEL_INFO,
"Status of '%s': %s\n",
"Status of '%s': %s [%d%% usage%s]\n",
filename,
CF_CHECK_STRING(r));
CF_CHECK_STRING(r),
usage,
needs_rotation ? ", needs rotation" : "");
}


Expand Down
2 changes: 2 additions & 0 deletions cf-check/diagnose.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,8 @@ int lmdb_errno_to_cf_check_code(int r);
int signal_to_cf_check_code(int sig);
void report_mdb_error(const char *db_file, const char *op, int rc);

bool lmdb_file_needs_rotation(const char *file, int *usage);

size_t diagnose_files(
const Seq *filenames, Seq **corrupt, bool foreground, bool validate, bool test_write);
int diagnose_main(int argc, const char *const *argv);
Expand Down
46 changes: 29 additions & 17 deletions cf-check/dump.c
Original file line number Diff line number Diff line change
Expand Up @@ -103,9 +103,10 @@ static void print_struct_lastseen_quality(
else
{
// TODO: improve names of struct members in QPoint and KeyHostSeen
const KeyHostSeen *const quality = value.mv_data;
const time_t lastseen = quality->lastseen;
const QPoint Q = quality->Q;
KeyHostSeen quality;
memcpy(&quality, value.mv_data, sizeof(quality));
const time_t lastseen = quality.lastseen;
const QPoint Q = quality.Q;

JsonElement *q_json = JsonObjectCreate(4);
JsonObjectAppendReal(q_json, "q", Q.q);
Expand Down Expand Up @@ -136,10 +137,11 @@ static void print_struct_lock_data(
else
{
// TODO: improve names of struct members in LockData
const LockData *const lock = value.mv_data;
const pid_t pid = lock->pid;
const time_t time = lock->time;
const time_t process_start_time = lock->process_start_time;
LockData lock;
memcpy(&lock, value.mv_data, sizeof(lock));
const pid_t pid = lock.pid;
const time_t time = lock.time;
const time_t process_start_time = lock.process_start_time;

JsonElement *json = JsonObjectCreate(3);
JsonObjectAppendInteger(json, "pid", pid);
Expand Down Expand Up @@ -168,8 +170,9 @@ static void print_struct_averages(
{
// TODO: clean up Averages
char **obnames = NULL;
const Averages *const averages = value.mv_data;
const time_t last_seen = averages->last_seen;
Averages averages;
memcpy(&averages, value.mv_data, sizeof(averages));
const time_t last_seen = averages.last_seen;

obnames = GetObservableNames(tskey_filename);
JsonElement *all_observables = JsonObjectCreate(CF_OBSERVABLES);
Expand All @@ -178,7 +181,7 @@ static void print_struct_averages(
{
char *name = obnames[i];
JsonElement *observable = JsonObjectCreate(4);
QPoint Q = averages->Q[i];
QPoint Q = averages.Q[i];

JsonObjectAppendReal(observable, "q", Q.q);
JsonObjectAppendReal(observable, "expect", Q.expect);
Expand Down Expand Up @@ -216,7 +219,11 @@ static void print_struct_persistent_class(
}
else
{
const PersistentClassInfo *const class_info = value.mv_data;
/* Make a copy to ensure proper alignment. We cannot just copy data to a
* local PersistentClassInfo variable because it contains a
* variable-length string at the end (see the struct definition). */
PersistentClassInfo *class_info = malloc(value.mv_size);
memcpy(class_info, value.mv_data, value.mv_size);
const unsigned int expires = class_info->expires;
const PersistentClassPolicy policy = class_info->policy;
const char *policy_str;
Expand Down Expand Up @@ -250,6 +257,7 @@ static void print_struct_persistent_class(
// String is not terminated, abort or fall back to default:
debug_abort_if_reached();
print_json_string(value.mv_data, value.mv_size, strip_strings);
free(class_info);
return;
}

Expand All @@ -264,6 +272,7 @@ static void print_struct_persistent_class(
JsonWriteCompact(w, top_json);
FileWriterDetach(w);
JsonDestroy(top_json);
free(class_info);
}
}

Expand Down Expand Up @@ -291,8 +300,9 @@ static void print_struct_or_string(
if (StringEqual(key.mv_data, "DATABASE_AGE"))
{
assert(sizeof(double) == value.mv_size);
const double *const age = value.mv_data;
printf("%f", *age);
double age;
memcpy(&age, value.mv_data, sizeof(age));
printf("%f", age);
}
else
{
Expand All @@ -315,14 +325,16 @@ static void print_struct_or_string(
if (StringEqual(key.mv_data, "delta_gavr"))
{
assert(sizeof(double) == value.mv_size);
const double *const average = value.mv_data;
printf("%f", *average);
double average;
memcpy(&average, value.mv_data, sizeof(average));
printf("%f", average);
}
else if (StringEqual(key.mv_data, "last_exec"))
{
assert(sizeof(time_t) == value.mv_size);
const time_t *const last_exec = value.mv_data;
printf("%ju", (uintmax_t) (*last_exec));
time_t last_exec;
memcpy(&last_exec, value.mv_data, sizeof(last_exec));
printf("%ju", (uintmax_t) (last_exec));
}
else
{
Expand Down
109 changes: 96 additions & 13 deletions cf-check/repair.c
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,9 @@ static void print_usage(void)
{
printf("Usage: cf-check repair [-f] [FILE ...]\n");
printf("Example: cf-check repair /var/cfengine/state/cf_lastseen.lmdb\n");
printf("Options: -f|--force repair LMDB files that look OK ");
printf("Options:\n"
"-f|--force repair LMDB files that look OK\n"
"-w|--test-write test writing when checking files\n");
}

int remove_files(Seq *files)
Expand Down Expand Up @@ -80,7 +82,7 @@ int remove_files(Seq *files)
return failures;
}

static bool record_repair_timestamp(int fd_tstamp)
static bool record_timestamp(int fd_tstamp)
{
time_t this_timestamp = time(NULL);
lseek(fd_tstamp, 0, SEEK_SET);
Expand Down Expand Up @@ -158,7 +160,7 @@ int repair_lmdb_file(const char *file, int fd_tstamp)
}
else
{
if (!record_repair_timestamp(fd_tstamp))
if (!record_timestamp(fd_tstamp))
{
Log(LOG_LEVEL_ERR, "Failed to write the timestamp of repair of the '%s' file",
file);
Expand All @@ -178,7 +180,7 @@ int repair_lmdb_file(const char *file, int fd_tstamp)
}
else
{
if (!record_repair_timestamp(fd_tstamp))
if (!record_timestamp(fd_tstamp))
{
Log(LOG_LEVEL_ERR, "Failed to write the timestamp of repair of the '%s' file",
file);
Expand All @@ -200,7 +202,7 @@ int repair_lmdb_file(const char *file, int fd_tstamp)
ret = -1;
goto cleanup;
}
if (!record_repair_timestamp(fd_tstamp))
if (!record_timestamp(fd_tstamp))
{
Log(LOG_LEVEL_ERR, "Failed to write the timestamp of repair of the '%s' file",
file);
Expand All @@ -217,7 +219,71 @@ int repair_lmdb_file(const char *file, int fd_tstamp)
return ret;
}

int repair_lmdb_files(Seq *files, bool force)
/**
* @param file LMDB file to rotate
* @param fd_tstamp An open FD to the repair timestamp file or -1
*
* @note If #fd_tstamp != -1 then it is expected to be open and with file locks
* taken care of. If #fd_tstamp == -1, this function opens the rotation
* timestamp file on its own and takes care of the file locks.
*/
int rotate_lmdb_file(const char *file, int fd_tstamp)
{
int ret;
FileLock lock = EMPTY_FILE_LOCK;
if (fd_tstamp == -1)
{
char *tstamp_file = StringFormat("%s.rotated", file);
int lock_ret = ExclusiveFileLockPath(&lock, tstamp_file, true); /* wait=true */
free(tstamp_file);
if (lock_ret < 0)
{
/* Should never happen because we tried to wait for the lock. */
Log(LOG_LEVEL_ERR,
"Failed to acquire lock for the '%s' DB repair timestamp file",
file);
ret = -1;
goto cleanup;
}
fd_tstamp = lock.fd;
}

time_t now = time(NULL);
{
char *rotated_file = StringFormat("%s.rotated_%jd", file, (intmax_t) now);
ret = rename(file, rotated_file);
free(rotated_file);
}
if (ret != 0)
{
Log(LOG_LEVEL_ERR,
"Failed to rotate the '%s' DB file (%s), will be removed instead",
file, GetErrorStr());
ret = unlink(file);
if (ret != 0)
{
Log(LOG_LEVEL_ERR, "Failed to remove the '%s' DB file: %s",
file, GetErrorStr());
}
}
if (ret == 0)
{
if (!record_timestamp(fd_tstamp))
{
Log(LOG_LEVEL_ERR, "Failed to write the timestamp of rotation of the '%s' DB file",
file);
}
}

cleanup:
if (lock.fd != -1)
{
ExclusiveFileUnlock(&lock, true); /* close=true */
}
return ret;
}

static int repair_lmdb_files(Seq *files, bool force, bool test_write)
{
assert(files != NULL);
assert(SeqLength(files) > 0);
Expand All @@ -229,7 +295,7 @@ int repair_lmdb_files(Seq *files, bool force)
}
else
{
const int corruptions = diagnose_files(files, &corrupt, false, false, false);
const int corruptions = diagnose_files(files, &corrupt, false, false, test_write);
if (corruptions != 0)
{
assert(corrupt != NULL);
Expand All @@ -256,6 +322,14 @@ int repair_lmdb_files(Seq *files, bool force)
{
ret++;
}
int usage;
if (lmdb_file_needs_rotation(file, &usage))
{
if (rotate_lmdb_file(file, -1) != -1)
{
Log(LOG_LEVEL_INFO, "Rotated '%s' DB with %d%% usage", file, usage);
}
}
}

if (!force)
Expand All @@ -278,29 +352,38 @@ int repair_lmdb_files(Seq *files, bool force)

int repair_main(int argc, const char *const *const argv)
{
size_t offset = 1;
bool force = false;
if (argc > 1 && argv[1] != NULL && argv[1][0] == '-')
bool test_write = false;
int i = 1;
for (; (i < argc) && (argv[i] != NULL) && (argv[i][0] == '-'); i++)
{
if (StringMatchesOption(argv[1], "--force", "-f"))
if (StringMatchesOption(argv[i], "--force", "-f"))
{
offset++;
force = true;
}
else if (StringMatchesOption(argv[i], "--test-write", "-w"))
{
test_write = true;
}
else
{
print_usage();
printf("Unrecognized option: '%s'\n", argv[1]);
return 1;
}
}
if (force && test_write)
{
Log(LOG_LEVEL_WARNING, "Ignoring --test-write due to --force skipping DB checks");
}
size_t offset = i;
Seq *files = argv_to_lmdb_files(argc, argv, offset);
if (files == NULL || SeqLength(files) == 0)
{
Log(LOG_LEVEL_ERR, "No database files to repair");
return 1;
}
const int ret = repair_lmdb_files(files, force);
const int ret = repair_lmdb_files(files, force, test_write);
SeqDestroy(files);
return ret;
}
Expand All @@ -325,7 +408,7 @@ int repair_lmdb_default(bool force)
Log(LOG_LEVEL_INFO, "Skipping local database repair, no lmdb files");
return 0;
}
const int ret = repair_lmdb_files(files, force);
const int ret = repair_lmdb_files(files, force, false);
SeqDestroy(files);

if (ret != 0)
Expand Down
1 change: 1 addition & 0 deletions cf-check/repair.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,6 @@
int repair_main(int argc, const char *const *argv);
int repair_lmdb_default(bool force);
int repair_lmdb_file(const char *file, int fd_tstamp);
int rotate_lmdb_file(const char *file, int fd_tstamp);

#endif
Loading