diff --git a/cf-check/diagnose.c b/cf-check/diagnose.c index 7689bca611..c04444adb2 100644 --- a/cf-check/diagnose.c +++ b/cf-check/diagnose.c @@ -39,6 +39,11 @@ size_t diagnose_files( #include #include +/* NOTE: Must be in sync with LMDB_MAXSIZE in libpromises/dbm_lmdb.c. */ +#ifndef LMDB_MAXSIZE +#define LMDB_MAXSIZE 104857600 +#endif + #define CF_CHECK_CREATE_STRING(name) \ #name, @@ -524,6 +529,25 @@ static char *follow_symlink(const char *path) return xstrdup(target_buf); } +bool lmdb_file_needs_rotation(const char *file, int *usage) +{ + struct stat sb; + if (stat(file, &sb) == 0) + { + int usage_pct = (((float) sb.st_size) / LMDB_MAXSIZE) * 100; + if (usage != NULL) + { + *usage = usage_pct; + } + return (usage_pct >= 95); + } + else + { + Log(LOG_LEVEL_ERR, "Failed to stat() '%s' when checking usage: %s", file, GetErrorStr()); + return false; + } +} + /** * @param[in] filenames DB files to diagnose/check * @param[out] corrupt place to store the resulting sequence of corrupted @@ -590,18 +614,26 @@ size_t diagnose_files( if (symlink_target != NULL) { + int usage; + bool needs_rotation = lmdb_file_needs_rotation(symlink_target, &usage); Log(LOG_LEVEL_INFO, - "Status of '%s' -> '%s': %s\n", + "Status of '%s' -> '%s': %s [%d%% usage%s]\n", symlink, symlink_target, - CF_CHECK_STRING(r)); + CF_CHECK_STRING(r), + usage, + needs_rotation ? ", needs rotation" : ""); } else { + int usage; + bool needs_rotation = lmdb_file_needs_rotation(filename, &usage); Log(LOG_LEVEL_INFO, - "Status of '%s': %s\n", + "Status of '%s': %s [%d%% usage%s]\n", filename, - CF_CHECK_STRING(r)); + CF_CHECK_STRING(r), + usage, + needs_rotation ? ", needs rotation" : ""); } diff --git a/cf-check/diagnose.h b/cf-check/diagnose.h index 4bb01c7dde..5f6204b251 100644 --- a/cf-check/diagnose.h +++ b/cf-check/diagnose.h @@ -85,6 +85,8 @@ int lmdb_errno_to_cf_check_code(int r); int signal_to_cf_check_code(int sig); void report_mdb_error(const char *db_file, const char *op, int rc); +bool lmdb_file_needs_rotation(const char *file, int *usage); + size_t diagnose_files( const Seq *filenames, Seq **corrupt, bool foreground, bool validate, bool test_write); int diagnose_main(int argc, const char *const *argv); diff --git a/cf-check/dump.c b/cf-check/dump.c index 2a87c57f3b..6ddc89cad6 100644 --- a/cf-check/dump.c +++ b/cf-check/dump.c @@ -103,9 +103,10 @@ static void print_struct_lastseen_quality( else { // TODO: improve names of struct members in QPoint and KeyHostSeen - const KeyHostSeen *const quality = value.mv_data; - const time_t lastseen = quality->lastseen; - const QPoint Q = quality->Q; + KeyHostSeen quality; + memcpy(&quality, value.mv_data, sizeof(quality)); + const time_t lastseen = quality.lastseen; + const QPoint Q = quality.Q; JsonElement *q_json = JsonObjectCreate(4); JsonObjectAppendReal(q_json, "q", Q.q); @@ -136,10 +137,11 @@ static void print_struct_lock_data( else { // TODO: improve names of struct members in LockData - const LockData *const lock = value.mv_data; - const pid_t pid = lock->pid; - const time_t time = lock->time; - const time_t process_start_time = lock->process_start_time; + LockData lock; + memcpy(&lock, value.mv_data, sizeof(lock)); + const pid_t pid = lock.pid; + const time_t time = lock.time; + const time_t process_start_time = lock.process_start_time; JsonElement *json = JsonObjectCreate(3); JsonObjectAppendInteger(json, "pid", pid); @@ -168,8 +170,9 @@ static void print_struct_averages( { // TODO: clean up Averages char **obnames = NULL; - const Averages *const averages = value.mv_data; - const time_t last_seen = averages->last_seen; + Averages averages; + memcpy(&averages, value.mv_data, sizeof(averages)); + const time_t last_seen = averages.last_seen; obnames = GetObservableNames(tskey_filename); JsonElement *all_observables = JsonObjectCreate(CF_OBSERVABLES); @@ -178,7 +181,7 @@ static void print_struct_averages( { char *name = obnames[i]; JsonElement *observable = JsonObjectCreate(4); - QPoint Q = averages->Q[i]; + QPoint Q = averages.Q[i]; JsonObjectAppendReal(observable, "q", Q.q); JsonObjectAppendReal(observable, "expect", Q.expect); @@ -216,7 +219,11 @@ static void print_struct_persistent_class( } else { - const PersistentClassInfo *const class_info = value.mv_data; + /* Make a copy to ensure proper alignment. We cannot just copy data to a + * local PersistentClassInfo variable because it contains a + * variable-length string at the end (see the struct definition). */ + PersistentClassInfo *class_info = malloc(value.mv_size); + memcpy(class_info, value.mv_data, value.mv_size); const unsigned int expires = class_info->expires; const PersistentClassPolicy policy = class_info->policy; const char *policy_str; @@ -250,6 +257,7 @@ static void print_struct_persistent_class( // String is not terminated, abort or fall back to default: debug_abort_if_reached(); print_json_string(value.mv_data, value.mv_size, strip_strings); + free(class_info); return; } @@ -264,6 +272,7 @@ static void print_struct_persistent_class( JsonWriteCompact(w, top_json); FileWriterDetach(w); JsonDestroy(top_json); + free(class_info); } } @@ -291,8 +300,9 @@ static void print_struct_or_string( if (StringEqual(key.mv_data, "DATABASE_AGE")) { assert(sizeof(double) == value.mv_size); - const double *const age = value.mv_data; - printf("%f", *age); + double age; + memcpy(&age, value.mv_data, sizeof(age)); + printf("%f", age); } else { @@ -315,14 +325,16 @@ static void print_struct_or_string( if (StringEqual(key.mv_data, "delta_gavr")) { assert(sizeof(double) == value.mv_size); - const double *const average = value.mv_data; - printf("%f", *average); + double average; + memcpy(&average, value.mv_data, sizeof(average)); + printf("%f", average); } else if (StringEqual(key.mv_data, "last_exec")) { assert(sizeof(time_t) == value.mv_size); - const time_t *const last_exec = value.mv_data; - printf("%ju", (uintmax_t) (*last_exec)); + time_t last_exec; + memcpy(&last_exec, value.mv_data, sizeof(last_exec)); + printf("%ju", (uintmax_t) (last_exec)); } else { diff --git a/cf-check/repair.c b/cf-check/repair.c index d2ad3409d6..bf2c784c16 100644 --- a/cf-check/repair.c +++ b/cf-check/repair.c @@ -38,7 +38,9 @@ static void print_usage(void) { printf("Usage: cf-check repair [-f] [FILE ...]\n"); printf("Example: cf-check repair /var/cfengine/state/cf_lastseen.lmdb\n"); - printf("Options: -f|--force repair LMDB files that look OK "); + printf("Options:\n" + "-f|--force repair LMDB files that look OK\n" + "-w|--test-write test writing when checking files\n"); } int remove_files(Seq *files) @@ -80,7 +82,7 @@ int remove_files(Seq *files) return failures; } -static bool record_repair_timestamp(int fd_tstamp) +static bool record_timestamp(int fd_tstamp) { time_t this_timestamp = time(NULL); lseek(fd_tstamp, 0, SEEK_SET); @@ -158,7 +160,7 @@ int repair_lmdb_file(const char *file, int fd_tstamp) } else { - if (!record_repair_timestamp(fd_tstamp)) + if (!record_timestamp(fd_tstamp)) { Log(LOG_LEVEL_ERR, "Failed to write the timestamp of repair of the '%s' file", file); @@ -178,7 +180,7 @@ int repair_lmdb_file(const char *file, int fd_tstamp) } else { - if (!record_repair_timestamp(fd_tstamp)) + if (!record_timestamp(fd_tstamp)) { Log(LOG_LEVEL_ERR, "Failed to write the timestamp of repair of the '%s' file", file); @@ -200,7 +202,7 @@ int repair_lmdb_file(const char *file, int fd_tstamp) ret = -1; goto cleanup; } - if (!record_repair_timestamp(fd_tstamp)) + if (!record_timestamp(fd_tstamp)) { Log(LOG_LEVEL_ERR, "Failed to write the timestamp of repair of the '%s' file", file); @@ -217,7 +219,71 @@ int repair_lmdb_file(const char *file, int fd_tstamp) return ret; } -int repair_lmdb_files(Seq *files, bool force) +/** + * @param file LMDB file to rotate + * @param fd_tstamp An open FD to the repair timestamp file or -1 + * + * @note If #fd_tstamp != -1 then it is expected to be open and with file locks + * taken care of. If #fd_tstamp == -1, this function opens the rotation + * timestamp file on its own and takes care of the file locks. + */ +int rotate_lmdb_file(const char *file, int fd_tstamp) +{ + int ret; + FileLock lock = EMPTY_FILE_LOCK; + if (fd_tstamp == -1) + { + char *tstamp_file = StringFormat("%s.rotated", file); + int lock_ret = ExclusiveFileLockPath(&lock, tstamp_file, true); /* wait=true */ + free(tstamp_file); + if (lock_ret < 0) + { + /* Should never happen because we tried to wait for the lock. */ + Log(LOG_LEVEL_ERR, + "Failed to acquire lock for the '%s' DB repair timestamp file", + file); + ret = -1; + goto cleanup; + } + fd_tstamp = lock.fd; + } + + time_t now = time(NULL); + { + char *rotated_file = StringFormat("%s.rotated_%jd", file, (intmax_t) now); + ret = rename(file, rotated_file); + free(rotated_file); + } + if (ret != 0) + { + Log(LOG_LEVEL_ERR, + "Failed to rotate the '%s' DB file (%s), will be removed instead", + file, GetErrorStr()); + ret = unlink(file); + if (ret != 0) + { + Log(LOG_LEVEL_ERR, "Failed to remove the '%s' DB file: %s", + file, GetErrorStr()); + } + } + if (ret == 0) + { + if (!record_timestamp(fd_tstamp)) + { + Log(LOG_LEVEL_ERR, "Failed to write the timestamp of rotation of the '%s' DB file", + file); + } + } + + cleanup: + if (lock.fd != -1) + { + ExclusiveFileUnlock(&lock, true); /* close=true */ + } + return ret; +} + +static int repair_lmdb_files(Seq *files, bool force, bool test_write) { assert(files != NULL); assert(SeqLength(files) > 0); @@ -229,7 +295,7 @@ int repair_lmdb_files(Seq *files, bool force) } else { - const int corruptions = diagnose_files(files, &corrupt, false, false, false); + const int corruptions = diagnose_files(files, &corrupt, false, false, test_write); if (corruptions != 0) { assert(corrupt != NULL); @@ -256,6 +322,14 @@ int repair_lmdb_files(Seq *files, bool force) { ret++; } + int usage; + if (lmdb_file_needs_rotation(file, &usage)) + { + if (rotate_lmdb_file(file, -1) != -1) + { + Log(LOG_LEVEL_INFO, "Rotated '%s' DB with %d%% usage", file, usage); + } + } } if (!force) @@ -278,15 +352,19 @@ int repair_lmdb_files(Seq *files, bool force) int repair_main(int argc, const char *const *const argv) { - size_t offset = 1; bool force = false; - if (argc > 1 && argv[1] != NULL && argv[1][0] == '-') + bool test_write = false; + int i = 1; + for (; (i < argc) && (argv[i] != NULL) && (argv[i][0] == '-'); i++) { - if (StringMatchesOption(argv[1], "--force", "-f")) + if (StringMatchesOption(argv[i], "--force", "-f")) { - offset++; force = true; } + else if (StringMatchesOption(argv[i], "--test-write", "-w")) + { + test_write = true; + } else { print_usage(); @@ -294,13 +372,18 @@ int repair_main(int argc, const char *const *const argv) return 1; } } + if (force && test_write) + { + Log(LOG_LEVEL_WARNING, "Ignoring --test-write due to --force skipping DB checks"); + } + size_t offset = i; Seq *files = argv_to_lmdb_files(argc, argv, offset); if (files == NULL || SeqLength(files) == 0) { Log(LOG_LEVEL_ERR, "No database files to repair"); return 1; } - const int ret = repair_lmdb_files(files, force); + const int ret = repair_lmdb_files(files, force, test_write); SeqDestroy(files); return ret; } @@ -325,7 +408,7 @@ int repair_lmdb_default(bool force) Log(LOG_LEVEL_INFO, "Skipping local database repair, no lmdb files"); return 0; } - const int ret = repair_lmdb_files(files, force); + const int ret = repair_lmdb_files(files, force, false); SeqDestroy(files); if (ret != 0) diff --git a/cf-check/repair.h b/cf-check/repair.h index f911af5d5f..9a1e1423bb 100644 --- a/cf-check/repair.h +++ b/cf-check/repair.h @@ -6,5 +6,6 @@ int repair_main(int argc, const char *const *argv); int repair_lmdb_default(bool force); int repair_lmdb_file(const char *file, int fd_tstamp); +int rotate_lmdb_file(const char *file, int fd_tstamp); #endif diff --git a/cf-check/validate.c b/cf-check/validate.c index 0b1b70309d..185444f1fe 100644 --- a/cf-check/validate.c +++ b/cf-check/validate.c @@ -301,9 +301,10 @@ static void UpdateValidatorLastseen( const char direction = key_string[1]; if (direction == 'i' || direction == 'o') { - const KeyHostSeen *const data = value.mv_data; + KeyHostSeen data; + memcpy(&data, value.mv_data, sizeof(data)); - const time_t lastseen = data->lastseen; + const time_t lastseen = data.lastseen; const time_t current = time(NULL); Log(LOG_LEVEL_DEBUG, @@ -353,8 +354,9 @@ static void UpdateValidatorLock( const char *key_string = key.mv_data; - const LockData *const lock = value.mv_data; - const time_t lock_time = lock->time; + LockData lock; + memcpy(&lock, value.mv_data, sizeof(lock)); + const time_t lock_time = lock.time; const time_t current = time(NULL); Log(LOG_LEVEL_DEBUG, diff --git a/libpromises/dbm_api.c b/libpromises/dbm_api.c index 097a934030..b07a53f5f7 100644 --- a/libpromises/dbm_api.c +++ b/libpromises/dbm_api.c @@ -548,6 +548,12 @@ bool CleanDB(DBHandle *handle) return ret; } +int GetDBUsagePercentage(const DBHandle *handle) +{ + assert(handle != NULL); + return DBPrivGetDBUsagePercentage(handle->filename); +} + /** * Freezes the DB so that it is never touched by this process again. In * particular, new OpenDB() calls are ignored and CloseAllDBExit() also ignores diff --git a/libpromises/dbm_api.h b/libpromises/dbm_api.h index bf730d2d79..da6f4bad1a 100644 --- a/libpromises/dbm_api.h +++ b/libpromises/dbm_api.h @@ -78,6 +78,7 @@ void CloseDB(CF_DB *dbp); DBHandle *GetDBHandleFromFilename(const char *db_file_name); time_t GetDBOpenTimestamp(const DBHandle *handle); +int GetDBUsagePercentage(const DBHandle *handle); bool HasKeyDB(CF_DB *dbp, const char *key, int key_size); int ValueSizeDB(CF_DB *dbp, const char *key, int key_size); diff --git a/libpromises/dbm_lmdb.c b/libpromises/dbm_lmdb.c index f16c6e985f..5095dfafd9 100644 --- a/libpromises/dbm_lmdb.c +++ b/libpromises/dbm_lmdb.c @@ -79,13 +79,18 @@ static int DB_MAX_READERS = -1; /******************************************************************************/ static void HandleLMDBCorruption(MDB_env *env, const char *msg); +static void HandleFullLMDB(MDB_env *env); -static inline void CheckLMDBCorrupted(int rc, MDB_env *env) +static inline void CheckLMDBUsable(int rc, MDB_env *env) { if (rc == MDB_CORRUPTED) { HandleLMDBCorruption(env, ""); } + else if (rc == MDB_MAP_FULL) + { + HandleFullLMDB(env); + } } static int GetReadTransaction(DBPriv *const db, DBTxn **const txn) @@ -134,7 +139,7 @@ static int GetWriteTransaction(DBPriv *const db, DBTxn **const txn) if (db_txn->txn != NULL && !db_txn->rw_txn) { rc = mdb_txn_commit(db_txn->txn); - CheckLMDBCorrupted(rc, db->env); + CheckLMDBUsable(rc, db->env); if (rc != MDB_SUCCESS) { Log(LOG_LEVEL_ERR, "Unable to close read-only transaction in '%s': %s", @@ -146,7 +151,7 @@ static int GetWriteTransaction(DBPriv *const db, DBTxn **const txn) if (db_txn->txn == NULL) { rc = mdb_txn_begin(db->env, NULL, 0, &db_txn->txn); - CheckLMDBCorrupted(rc, db->env); + CheckLMDBUsable(rc, db->env); if (rc == MDB_SUCCESS) { db_txn->rw_txn = true; @@ -197,6 +202,7 @@ const char *DBPrivGetFileExtension(void) return "lmdb"; } +/* NOTE: Must be in sync with LMDB_MAXSIZE in cf-check/diagnose.c. */ #ifndef LMDB_MAXSIZE #define LMDB_MAXSIZE 104857600 #endif @@ -280,6 +286,43 @@ static bool RepairedAfterOpen(const char *lmdb_file, int fd_tstamp) return false; } +/** + * @warning Expects @fd_stamp to be locked. + */ +static bool RotatedAfterOpen(const char *lmdb_file, int fd_tstamp) +{ + time_t rotated_tstamp = -1; + ssize_t n_read = read(fd_tstamp, &rotated_tstamp, sizeof(time_t)); + lseek(fd_tstamp, 0, SEEK_SET); + + if (n_read < 0) + { + Log(LOG_LEVEL_ERR, "Failed to read %s: %s", lmdb_file, GetErrorStr()); + } + else if (n_read == 0) + { + /* EOF (empty file) => never rotated */ + Log(LOG_LEVEL_VERBOSE, "DB '%s' never rotated before", lmdb_file); + } + else if ((size_t) n_read < sizeof(time_t)) + { + /* error */ + Log(LOG_LEVEL_ERR, "Failed to read the timestamp of rotation of the '%s' DB", + lmdb_file); + } + else + { + /* read the timestamp => Check if the LMDB file was rotated after + * we opened it last time. */ + DBHandle *handle = GetDBHandleFromFilename(lmdb_file); + if (rotated_tstamp > GetDBOpenTimestamp(handle)) + { + return true; + } + } + return false; +} + static void HandleLMDBCorruption(MDB_env *env, const char *msg) { const char *lmdb_file = mdb_env_get_userctx(env); @@ -490,6 +533,220 @@ static void HandleLMDBCorruption(MDB_env *env, const char *msg) #endif /* __MINGW32__ */ } +/** + * A modified clone of HandleLMDBCorruption() for handling full LMDBs. It's not + * easy and nice to share much code between the two functions, unfortunately. + */ +static void HandleFullLMDB(MDB_env *env) +{ + const char *lmdb_file = mdb_env_get_userctx(env); + Log(LOG_LEVEL_CRIT, "'%s' DB full!", lmdb_file); + + /* Freeze the DB ASAP. This also makes the call to exit() safe regarding + * this particular DB because exit handlers will ignore it. */ + DBHandle *handle = GetDBHandleFromFilename(lmdb_file); + FreezeDB(handle); + +#ifdef _WIN32 + /* Not much we can do on Windows because there is no fork() and file locking + * is also not so nice. */ + Log(LOG_LEVEL_WARNING, "Moving the full DB file '%s' aside", + lmdb_file); + time_t now = time(NULL); + char *rotated_file = StringFormat("%s.rotated.%jd", lmdb_file, (intmax_t) now); + if (rename(lmdb_file, rotated_file) != 0) + { + free(rotated_file); + Log(LOG_LEVEL_CRIT, + "Failed to move the full DB file '%s' aside (%s), will be removed instead", + lmdb_file, GetErrorStr()); + if (unlink(lmdb_file) != 0) + { + Log(LOG_LEVEL_CRIT, "Failed to remove the full DB file '%s': %s", + lmdb_file, GetErrorStr()); + } + exit(EC_CORRUPTION_REPAIR_FAILED); + } + free(rotated_file); + exit(EC_CORRUPTION_REPAIRED); +#else + /* To avoid two processes acting on the same corrupted file at once, file + * locks are involved. Looking at OpenDBInstance() and DBPathLock() + * in libpromises/db_api.c might also be useful.*/ + + /* Only allow one thread at a time to handle a full or corrupted DB. File + * locks are *process* specific so threads could step on each others + * toes. */ + ThreadLock(cft_db_corruption_lock); + + char *tstamp_file = StringFormat("%s.rotated", lmdb_file); + char *db_lock_file = StringFormat("%s.lock", lmdb_file); + + int fd_tstamp = safe_open(tstamp_file, O_CREAT|O_RDWR); + if (fd_tstamp == -1) + { + Log(LOG_LEVEL_CRIT, "Failed to open the '%s' DB rotation timestamp file", + lmdb_file); + ThreadUnlock(cft_db_corruption_lock); + free(db_lock_file); + free(tstamp_file); + + exit(EC_CORRUPTION_REPAIR_FAILED); + } + FileLock tstamp_lock = { .fd = fd_tstamp }; + + int fd_db_lock = safe_open(db_lock_file, O_CREAT|O_RDWR); + if (fd_db_lock == -1) + { + Log(LOG_LEVEL_CRIT, "Failed to open the '%s' DB lock file", + lmdb_file); + ThreadUnlock(cft_db_corruption_lock); + close(fd_tstamp); + free(db_lock_file); + free(tstamp_file); + + exit(EC_CORRUPTION_REPAIR_FAILED); + } + FileLock db_lock = { .fd = fd_db_lock }; + + int ret; + bool handle_rotation = true; + + /* Make sure we are not holding the DB's lock (potentially needed by some + * other process for the repair or rotation) to avoid deadlocks. */ + Log(LOG_LEVEL_DEBUG, "Releasing lock on the '%s' DB", lmdb_file); + ExclusiveFileUnlock(&db_lock, false); /* close=false */ + + ret = SharedFileLock(&tstamp_lock, true); + if (ret == 0) + { + if (RotatedAfterOpen(lmdb_file, fd_tstamp)) + { + /* The corruption has already been handled. This process should just + * die because we have no way to return to the point where it would + * just open the new (repaired or rotated) LMDB file. */ + handle_rotation = false; + } + SharedFileUnlock(&tstamp_lock, false); + } + else + { + /* should never happen (we tried to wait), but if it does, just log an + * error and keep going */ + Log(LOG_LEVEL_ERR, + "Failed to get shared lock for the rotation timestamp of the '%s' DB", + lmdb_file); + } + + if (!handle_rotation) + { + /* Just clean after ourselves and terminate the process. */ + ThreadUnlock(cft_db_corruption_lock); + close(fd_db_lock); + close(fd_tstamp); + free(db_lock_file); + free(tstamp_file); + + exit(EC_CORRUPTION_REPAIRED); + } + + /* HERE is a window for some other process to do the rotation between when we + * checked the timestamp using the shared lock above and the attempt to get + * the exclusive lock right below. However, this is detected by checking the + * contents of the timestamp file again below, while holding the EXCLUSIVE + * lock. */ + + ret = ExclusiveFileLock(&tstamp_lock, true); + if (ret != 0) + { + /* should never happen (we tried to wait), but if it does, just + * terminate because doing the rotation without the lock could be + * disasterous */ + Log(LOG_LEVEL_ERR, + "Failed to get shared lock for the rotation timestamp of the '%s' DB", + lmdb_file); + + ThreadUnlock(cft_db_corruption_lock); + close(fd_db_lock); + close(fd_tstamp); + free(db_lock_file); + free(tstamp_file); + + exit(EC_CORRUPTION_REPAIR_FAILED); + } + + /* Cleared to resolve the corruption. */ + + /* 1. Acquire the lock for the DB to prevent more processes trying to use + * it while it is corrupted (wait till the lock is available). */ + while (ExclusiveFileLock(&db_lock, false) == -1) + { + /* busy wait to do the logging */ + Log(LOG_LEVEL_INFO, "Waiting for the lock on the '%s' DB", + lmdb_file); + sleep(1); + } + + /* 2. Check the last rotation timestamp again (see the big "HERE..." comment + * above) */ + if (RotatedAfterOpen(lmdb_file, fd_tstamp)) + { + /* Some other process rotated the DB since we checked last time, + * nothing more to do here. */ + ThreadUnlock(cft_db_corruption_lock); + close(fd_db_lock); /* releases locks */ + close(fd_tstamp); /* releases locks */ + free(db_lock_file); + free(tstamp_file); + + exit(EC_CORRUPTION_REPAIRED); + } + + /* 3. Rotate the DB or at least move it out of the way. */ + ret = rotate_lmdb_file(lmdb_file, fd_tstamp); + bool rotation_successful = (ret == 0); + if (rotation_successful) + { + Log(LOG_LEVEL_NOTICE, "DB '%s' successfully rotated", lmdb_file); + } + else + { + Log(LOG_LEVEL_CRIT, "Failed to rotate '%s' DB", lmdb_file); + } + + /* 4. Make the rotated DB available for others. Also release the locks + * in the opposite order in which they were acquired to avoid + * deadlocks. */ + if (ExclusiveFileUnlock(&db_lock, true) != 0) + { + Log(LOG_LEVEL_ERR, "Failed to release the acquired lock for '%s'", + db_lock_file); + } + + /* 5. Signal that the rotation is done (also closes fd_tstamp). */ + if (ExclusiveFileUnlock(&tstamp_lock, true) != 0) + { + Log(LOG_LEVEL_ERR, "Failed to release the acquired lock for '%s'", + tstamp_file); + } + + ThreadUnlock(cft_db_corruption_lock); + free(db_lock_file); + free(tstamp_file); + /* fd_db_lock and fd_tstamp are already closed by the calls to + * ExclusiveFileUnlock above. */ + + if (rotation_successful) + { + exit(EC_CORRUPTION_REPAIRED); + } + else + { + exit(EC_CORRUPTION_REPAIR_FAILED); + } +#endif /* _WIN32 */ +} + DBPriv *DBPrivOpenDB(const char *const dbpath, const dbid id) { DBPriv *const db = xcalloc(1, sizeof(DBPriv)); @@ -603,7 +860,7 @@ DBPriv *DBPrivOpenDB(const char *const dbpath, const dbid id) int attempts = N_LMDB_EINVAL_RETRIES; while ((rc != 0) && (attempts-- > 0)) { - CheckLMDBCorrupted(rc, db->env); + CheckLMDBUsable(rc, db->env); if (rc != EINVAL) { Log(LOG_LEVEL_ERR, "Could not open database txn %s: %s", @@ -624,7 +881,7 @@ DBPriv *DBPrivOpenDB(const char *const dbpath, const dbid id) goto err; } rc = mdb_open(txn, NULL, 0, &db->dbi); - CheckLMDBCorrupted(rc, db->env); + CheckLMDBUsable(rc, db->env); if (rc) { Log(LOG_LEVEL_ERR, "Could not open database dbi %s: %s", @@ -633,7 +890,7 @@ DBPriv *DBPrivOpenDB(const char *const dbpath, const dbid id) goto err; } rc = mdb_txn_commit(txn); - CheckLMDBCorrupted(rc, db->env); + CheckLMDBUsable(rc, db->env); if (rc) { Log(LOG_LEVEL_ERR, "Could not commit database dbi %s: %s", @@ -700,6 +957,18 @@ bool DBPrivClean(DBPriv *db) return (mdb_drop(txn->txn, db->dbi, EMPTY_DB) != 0); } +int DBPrivGetDBUsagePercentage(const char *db_path) +{ + struct stat sb; + int ret = stat(db_path, &sb); + if (ret == -1) + { + Log(LOG_LEVEL_ERR, "Failed to get size of '%s': %s", db_path, GetErrorStr()); + return -1; + } + return (int) ((((float) sb.st_size) / LMDB_MAXSIZE) * 100); +} + void DBPrivCommit(DBPriv *db) { assert(db != NULL); @@ -709,7 +978,7 @@ void DBPrivCommit(DBPriv *db) { assert(!db_txn->cursor_open); const int rc = mdb_txn_commit(db_txn->txn); - CheckLMDBCorrupted(rc, db->env); + CheckLMDBUsable(rc, db->env); if (rc != MDB_SUCCESS) { Log(LOG_LEVEL_ERR, "Could not commit database transaction to '%s': %s", @@ -735,7 +1004,7 @@ bool DBPrivHasKey(DBPriv *db, const void *key, int key_size) mkey.mv_data = (void *) key; mkey.mv_size = key_size; rc = mdb_get(txn->txn, db->dbi, &mkey, &data); - CheckLMDBCorrupted(rc, db->env); + CheckLMDBUsable(rc, db->env); if (rc != 0 && rc != MDB_NOTFOUND) { Log(LOG_LEVEL_ERR, "Could not read database entry from '%s': %s", @@ -764,7 +1033,7 @@ int DBPrivGetValueSize(DBPriv *const db, const void *const key, const int key_si mkey.mv_data = (void *) key; mkey.mv_size = key_size; rc = mdb_get(txn->txn, db->dbi, &mkey, &data); - CheckLMDBCorrupted(rc, db->env); + CheckLMDBUsable(rc, db->env); if (rc && rc != MDB_NOTFOUND) { Log(LOG_LEVEL_ERR, "Could not read database entry from '%s': %s", @@ -800,7 +1069,7 @@ bool DBPrivRead( mkey.mv_data = (void *) key; mkey.mv_size = key_size; rc = mdb_get(txn->txn, db->dbi, &mkey, &data); - CheckLMDBCorrupted(rc, db->env); + CheckLMDBUsable(rc, db->env); if (rc == MDB_SUCCESS) { if (dest_size > data.mv_size) @@ -842,7 +1111,7 @@ bool DBPrivWrite( data.mv_data = (void *)value; data.mv_size = value_size; rc = mdb_put(txn->txn, db->dbi, &mkey, &data, 0); - CheckLMDBCorrupted(rc, db->env); + CheckLMDBUsable(rc, db->env); if (rc != MDB_SUCCESS) { Log(LOG_LEVEL_ERR, "Could not write database entry to '%s': %s", @@ -873,7 +1142,7 @@ bool DBPrivOverwrite(DBPriv *db, const char *key, int key_size, const void *valu mkey.mv_data = (void *) key; mkey.mv_size = key_size; rc = mdb_get(txn->txn, db->dbi, &mkey, &orig_data); - CheckLMDBCorrupted(rc, db->env); + CheckLMDBUsable(rc, db->env); if ((rc != MDB_SUCCESS) && (rc != MDB_NOTFOUND)) { Log(LOG_LEVEL_ERR, "Could not read database entry from '%s': %s", @@ -914,7 +1183,7 @@ bool DBPrivOverwrite(DBPriv *db, const char *key, int key_size, const void *valu new_data.mv_data = (void *)value; new_data.mv_size = value_size; rc = mdb_put(txn->txn, db->dbi, &mkey, &new_data, 0); - CheckLMDBCorrupted(rc, db->env); + CheckLMDBUsable(rc, db->env); if (rc != MDB_SUCCESS) { Log(LOG_LEVEL_ERR, "Could not write database entry to '%s': %s", @@ -940,7 +1209,7 @@ bool DBPrivDelete(DBPriv *const db, const void *const key, const int key_size) mkey.mv_data = (void *) key; mkey.mv_size = key_size; rc = mdb_del(txn->txn, db->dbi, &mkey, NULL); - CheckLMDBCorrupted(rc, db->env); + CheckLMDBUsable(rc, db->env); if (rc == MDB_NOTFOUND) { Log(LOG_LEVEL_DEBUG, "Entry not found in '%s': %s", @@ -969,7 +1238,7 @@ DBCursorPriv *DBPrivOpenCursor(DBPriv *const db) { assert(!txn->cursor_open); rc = mdb_cursor_open(txn->txn, db->dbi, &mc); - CheckLMDBCorrupted(rc, db->env); + CheckLMDBUsable(rc, db->env); if (rc == MDB_SUCCESS) { cursor = xcalloc(1, sizeof(DBCursorPriv)); @@ -1009,7 +1278,7 @@ bool DBPrivAdvanceCursor( } int rc = mdb_cursor_get(cursor->mc, &mkey, &data, MDB_NEXT); - CheckLMDBCorrupted(rc, cursor->db->env); + CheckLMDBUsable(rc, cursor->db->env); if (rc == MDB_SUCCESS) { // Align second buffer to 64-bit boundary, to avoid alignment errors on @@ -1048,7 +1317,7 @@ bool DBPrivAdvanceCursor( { mkey.mv_data = *key; rc = mdb_cursor_get(cursor->mc, &mkey, NULL, MDB_SET); - CheckLMDBCorrupted(rc, cursor->db->env); + CheckLMDBUsable(rc, cursor->db->env); // TODO: Should the return value be checked? } cursor->pending_delete = false; @@ -1061,7 +1330,7 @@ bool DBPrivDeleteCursorEntry(DBCursorPriv *const cursor) assert(cursor != NULL); int rc = mdb_cursor_get(cursor->mc, &cursor->delkey, NULL, MDB_GET_CURRENT); - CheckLMDBCorrupted(rc, cursor->db->env); + CheckLMDBUsable(rc, cursor->db->env); if (rc == MDB_SUCCESS) { cursor->pending_delete = true; @@ -1089,7 +1358,7 @@ bool DBPrivWriteCursorEntry( curkey.mv_size = sizeof(cursor->curkv); rc = mdb_cursor_put(cursor->mc, &curkey, &data, MDB_CURRENT); - CheckLMDBCorrupted(rc, cursor->db->env); + CheckLMDBUsable(rc, cursor->db->env); if (rc != MDB_SUCCESS) { Log(LOG_LEVEL_ERR, "Could not write cursor entry to '%s': %s", diff --git a/libpromises/dbm_priv.h b/libpromises/dbm_priv.h index d24d726c45..7f0d688bb3 100644 --- a/libpromises/dbm_priv.h +++ b/libpromises/dbm_priv.h @@ -54,6 +54,8 @@ void DBPrivCloseDB(DBPriv *hdbp); void DBPrivCommit(DBPriv *hdbp); bool DBPrivClean(DBPriv *hdbp); +int DBPrivGetDBUsagePercentage(const char *db_path); + bool DBPrivHasKey(DBPriv *db, const void *key, int key_size); int DBPrivGetValueSize(DBPriv *db, const void *key, int key_size); diff --git a/libpromises/dbm_quick.c b/libpromises/dbm_quick.c index ee9b5db054..baa9a223f8 100644 --- a/libpromises/dbm_quick.c +++ b/libpromises/dbm_quick.c @@ -206,6 +206,12 @@ bool DBPrivClean(DBPriv *db) return true; } +int DBPrivGetDBUsagePercentage(ARG_UNUSED const char *db_path) +{ + Log(LOG_LEVEL_WARNING, "Cannot determine usage of a QuickDB database"); + return -1; +} + bool DBPrivRead(DBPriv *db, const void *key, int key_size, void *dest, size_t dest_size) { if (!Lock(db)) diff --git a/libpromises/dbm_tokyocab.c b/libpromises/dbm_tokyocab.c index ffbd04d64d..0b5b6c156d 100644 --- a/libpromises/dbm_tokyocab.c +++ b/libpromises/dbm_tokyocab.c @@ -245,6 +245,12 @@ bool DBPrivClean(DBPriv *db) return true; } +int DBPrivGetDBUsagePercentage(ARG_UNUSED const char *db_path) +{ + Log(LOG_LEVEL_WARNING, "Cannot determine usage of a TokyoCabinet database"); + return -1; +} + bool DBPrivHasKey(DBPriv *db, const void *key, int key_size) { // FIXME: distinguish between "entry not found" and "error occurred" diff --git a/libpromises/locks.c b/libpromises/locks.c index e295445816..b1727d387d 100644 --- a/libpromises/locks.c +++ b/libpromises/locks.c @@ -50,7 +50,6 @@ #endif #define CFLOGSIZE 1048576 /* Size of lock-log before rotation */ -#define CF_LOCKHORIZON ((time_t)(SECONDS_PER_WEEK * 4)) #define CF_MAXLOCKNUM 8192 #define CF_CRITIAL_SECTION "CF_CRITICAL_SECTION" @@ -62,6 +61,20 @@ #define LOG_LOCK_OP(__lock, __lock_sum, __lock_data) \ log_lock("Performing", __FUNCTION__, __lock, __lock_sum, __lock_data) +/** + * Map the locks DB usage percentage to the lock horizon interval (how old locks + * we want to keep). + */ +#define N_LOCK_HORIZON_USAGE_INTERVALS 4 /* 0-25, 26-50,... */ +static const time_t LOCK_HORIZON_USAGE_INTERVALS[N_LOCK_HORIZON_USAGE_INTERVALS] = { + 0, /* plenty of space, no cleanup needed (0 is a special + * value) */ + 4 * SECONDS_PER_WEEK, /* used to be the fixed value */ + 2 * SECONDS_PER_WEEK, /* a bit more aggressive, but still reasonable */ + SECONDS_PER_WEEK, /* as far as we want to go to avoid making long locks + * unreliable and practically non-functional */ +}; + typedef struct CfLockStack_ { char lock[CF_BUFSIZE]; char last[CF_BUFSIZE]; @@ -1197,66 +1210,85 @@ void BackupLockDatabase(void) void PurgeLocks(void) { - CF_DBC *dbcp; - char *key; - int ksize, vsize; - LockData lock_horizon; - LockData *entry = NULL; + DBHandle *db = OpenLock(); + if (db == NULL) + { + return; + } + time_t now = time(NULL); - CF_DB *dbp = OpenLock(); - if (dbp == NULL) + int usage_pct = GetDBUsagePercentage(db); + if (usage_pct == -1) + { + /* error already logged */ + /* no usage info, assume 50% */ + usage_pct = 50; + } + + unsigned short interval_idx = MIN(usage_pct / (100 / N_LOCK_HORIZON_USAGE_INTERVALS), + N_LOCK_HORIZON_USAGE_INTERVALS - 1); + const time_t lock_horizon_interval = LOCK_HORIZON_USAGE_INTERVALS[interval_idx]; + if (lock_horizon_interval == 0) { + Log(LOG_LEVEL_VERBOSE, "No lock purging needed (lock DB usage: %d %%)", usage_pct); + CloseLock(db); return; } + const time_t purge_horizon = now - lock_horizon_interval; + LockData lock_horizon; memset(&lock_horizon, 0, sizeof(lock_horizon)); - - if (ReadDB(dbp, "lock_horizon", &lock_horizon, sizeof(lock_horizon))) + if (ReadDB(db, "lock_horizon", &lock_horizon, sizeof(lock_horizon))) { - if (now - lock_horizon.time < SECONDS_PER_WEEK * 4) + if (lock_horizon.time > purge_horizon) { Log(LOG_LEVEL_VERBOSE, "No lock purging scheduled"); - CloseLock(dbp); + CloseLock(db); return; } } - Log(LOG_LEVEL_VERBOSE, "Looking for stale locks to purge"); + Log(LOG_LEVEL_VERBOSE, + "Looking for stale locks (older than %jd seconds) to purge", + (intmax_t) lock_horizon_interval); - if (!NewDBCursor(dbp, &dbcp)) + DBCursor *cursor; + if (!NewDBCursor(db, &cursor)) { char *db_path = DBIdToPath(dbid_locks); Log(LOG_LEVEL_ERR, "Unable to get cursor for locks database '%s'", db_path); free(db_path); - CloseLock(dbp); + CloseLock(db); return; } - while (NextDB(dbcp, &key, &ksize, (void **)&entry, &vsize)) + char *key; + int ksize, vsize; + LockData *entry = NULL; + while (NextDB(cursor, &key, &ksize, (void **)&entry, &vsize)) { #ifdef LMDB LOG_LOCK_OP("", key, entry); #endif - if (STARTSWITH(key, "last.internal_bundle.track_license.handle")) + if (StringStartsWith(key, "last.internal_bundle.track_license.handle")) { continue; } - if (now - entry->time > (time_t) CF_LOCKHORIZON) + if (entry->time < purge_horizon) { Log(LOG_LEVEL_VERBOSE, "Purging lock (%jd s elapsed): %s", (intmax_t) (now - entry->time), key); - DBCursorDeleteEntry(dbcp); + DBCursorDeleteEntry(cursor); } } + DeleteDBCursor(cursor); Log(LOG_LEVEL_DEBUG, "Finished purging locks"); lock_horizon.time = now; - DeleteDBCursor(dbcp); - - WriteDB(dbp, "lock_horizon", &lock_horizon, sizeof(lock_horizon)); - CloseLock(dbp); + WriteDB(db, "lock_horizon", &lock_horizon, sizeof(lock_horizon)); + CloseLock(db); }