From 64f83ef9f5ec4714652068b211cab998d104b156 Mon Sep 17 00:00:00 2001
From: Vratislav Podzimek <vratislav.podzimek@northern.tech>
Date: Wed, 10 Jun 2020 17:46:48 +0200
Subject: [PATCH 1/9] Do aligned access to LMDB data in cf-check

Data stored in LMDB and accessed with the 'MDB_val.data' field is
a pointer to a map of the LMDB file (set up with mmap()). This
doesn't ensure proper alignment and so in order to avoid
potential SIGBUS due to bad alignment (on architectures sensitive
to these issues), we need to create local copies of the data
before trying to access them in any way than just one byte at a
time.

Ticket: CFE-3373
Changelog: None
(cherry picked from commit a7fbac9b2bd04092b8b7e415636026fa0d7b41b4)
---
 cf-check/dump.c     | 46 ++++++++++++++++++++++++++++-----------------
 cf-check/validate.c | 10 ++++++----
 2 files changed, 35 insertions(+), 21 deletions(-)

diff --git a/cf-check/dump.c b/cf-check/dump.c
index 2a87c57f3b..6ddc89cad6 100644
--- a/cf-check/dump.c
+++ b/cf-check/dump.c
@@ -103,9 +103,10 @@ static void print_struct_lastseen_quality(
     else
     {
         // TODO: improve names of struct members in QPoint and KeyHostSeen
-        const KeyHostSeen *const quality = value.mv_data;
-        const time_t lastseen = quality->lastseen;
-        const QPoint Q = quality->Q;
+        KeyHostSeen quality;
+        memcpy(&quality, value.mv_data, sizeof(quality));
+        const time_t lastseen = quality.lastseen;
+        const QPoint Q = quality.Q;
 
         JsonElement *q_json = JsonObjectCreate(4);
         JsonObjectAppendReal(q_json, "q", Q.q);
@@ -136,10 +137,11 @@ static void print_struct_lock_data(
     else
     {
         // TODO: improve names of struct members in LockData
-        const LockData *const lock = value.mv_data;
-        const pid_t pid = lock->pid;
-        const time_t time = lock->time;
-        const time_t process_start_time = lock->process_start_time;
+        LockData lock;
+        memcpy(&lock, value.mv_data, sizeof(lock));
+        const pid_t pid = lock.pid;
+        const time_t time = lock.time;
+        const time_t process_start_time = lock.process_start_time;
 
         JsonElement *json = JsonObjectCreate(3);
         JsonObjectAppendInteger(json, "pid", pid);
@@ -168,8 +170,9 @@ static void print_struct_averages(
     {
         // TODO: clean up Averages
         char **obnames = NULL;
-        const Averages *const averages = value.mv_data;
-        const time_t last_seen = averages->last_seen;
+        Averages averages;
+        memcpy(&averages, value.mv_data, sizeof(averages));
+        const time_t last_seen = averages.last_seen;
 
         obnames = GetObservableNames(tskey_filename);
         JsonElement *all_observables = JsonObjectCreate(CF_OBSERVABLES);
@@ -178,7 +181,7 @@ static void print_struct_averages(
         {
             char *name = obnames[i];
             JsonElement *observable = JsonObjectCreate(4);
-            QPoint Q = averages->Q[i];
+            QPoint Q = averages.Q[i];
 
             JsonObjectAppendReal(observable, "q", Q.q);
             JsonObjectAppendReal(observable, "expect", Q.expect);
@@ -216,7 +219,11 @@ static void print_struct_persistent_class(
     }
     else
     {
-        const PersistentClassInfo *const class_info = value.mv_data;
+        /* Make a copy to ensure proper alignment. We cannot just copy data to a
+         * local PersistentClassInfo variable because it contains a
+         * variable-length string at the end (see the struct definition). */
+        PersistentClassInfo *class_info = malloc(value.mv_size);
+        memcpy(class_info, value.mv_data, value.mv_size);
         const unsigned int expires = class_info->expires;
         const PersistentClassPolicy policy = class_info->policy;
         const char *policy_str;
@@ -250,6 +257,7 @@ static void print_struct_persistent_class(
             // String is not terminated, abort or fall back to default:
             debug_abort_if_reached();
             print_json_string(value.mv_data, value.mv_size, strip_strings);
+            free(class_info);
             return;
         }
 
@@ -264,6 +272,7 @@ static void print_struct_persistent_class(
         JsonWriteCompact(w, top_json);
         FileWriterDetach(w);
         JsonDestroy(top_json);
+        free(class_info);
     }
 }
 
@@ -291,8 +300,9 @@ static void print_struct_or_string(
             if (StringEqual(key.mv_data, "DATABASE_AGE"))
             {
                 assert(sizeof(double) == value.mv_size);
-                const double *const age = value.mv_data;
-                printf("%f", *age);
+                double age;
+                memcpy(&age, value.mv_data, sizeof(age));
+                printf("%f", age);
             }
             else
             {
@@ -315,14 +325,16 @@ static void print_struct_or_string(
             if (StringEqual(key.mv_data, "delta_gavr"))
             {
                 assert(sizeof(double) == value.mv_size);
-                const double *const average = value.mv_data;
-                printf("%f", *average);
+                double average;
+                memcpy(&average, value.mv_data, sizeof(average));
+                printf("%f", average);
             }
             else if (StringEqual(key.mv_data, "last_exec"))
             {
                 assert(sizeof(time_t) == value.mv_size);
-                const time_t *const last_exec = value.mv_data;
-                printf("%ju", (uintmax_t) (*last_exec));
+                time_t last_exec;
+                memcpy(&last_exec, value.mv_data, sizeof(last_exec));
+                printf("%ju", (uintmax_t) (last_exec));
             }
             else
             {
diff --git a/cf-check/validate.c b/cf-check/validate.c
index 0b1b70309d..185444f1fe 100644
--- a/cf-check/validate.c
+++ b/cf-check/validate.c
@@ -301,9 +301,10 @@ static void UpdateValidatorLastseen(
         const char direction = key_string[1];
         if (direction == 'i' || direction == 'o')
         {
-            const KeyHostSeen *const data = value.mv_data;
+            KeyHostSeen data;
+            memcpy(&data, value.mv_data, sizeof(data));
 
-            const time_t lastseen = data->lastseen;
+            const time_t lastseen = data.lastseen;
             const time_t current = time(NULL);
 
             Log(LOG_LEVEL_DEBUG,
@@ -353,8 +354,9 @@ static void UpdateValidatorLock(
 
     const char *key_string = key.mv_data;
 
-    const LockData *const lock = value.mv_data;
-    const time_t lock_time = lock->time;
+    LockData lock;
+    memcpy(&lock, value.mv_data, sizeof(lock));
+    const time_t lock_time = lock.time;
     const time_t current = time(NULL);
 
     Log(LOG_LEVEL_DEBUG,

From 3f5f282778801f7c238995f2d1ce59f3de911898 Mon Sep 17 00:00:00 2001
From: Vratislav Podzimek <vratislav.podzimek@northern.tech>
Date: Mon, 13 Nov 2023 16:43:14 +0100
Subject: [PATCH 2/9] Use `CF_LOCKHORIZON` to check whether to purge locks

We have that constant so we should use it.

Ticket: ENT-8201
Changelog: None
(cherry picked from commit e6bd200f42b8883dc1a571a4026e522883a76d65)
---
 libpromises/locks.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libpromises/locks.c b/libpromises/locks.c
index e295445816..d1bd5e8d51 100644
--- a/libpromises/locks.c
+++ b/libpromises/locks.c
@@ -1214,7 +1214,7 @@ void PurgeLocks(void)
 
     if (ReadDB(dbp, "lock_horizon", &lock_horizon, sizeof(lock_horizon)))
     {
-        if (now - lock_horizon.time < SECONDS_PER_WEEK * 4)
+        if ((now - lock_horizon.time) < CF_LOCKHORIZON)
         {
             Log(LOG_LEVEL_VERBOSE, "No lock purging scheduled");
             CloseLock(dbp);

From 4c75f98de5ce5dfe3709bf1d07d7fc171e52b1b7 Mon Sep 17 00:00:00 2001
From: Vratislav Podzimek <vratislav.podzimek@northern.tech>
Date: Mon, 13 Nov 2023 17:38:43 +0100
Subject: [PATCH 3/9] Add GetDBUsagePercentage() function

So that we can check how full DBs are.

For now only implemented for LMDB where we set/know the maximum
size.

Ticket: ENT-8201
Ticket: CFE-2136
Changelog: None
(cherry picked from commit b775f2eed3d9b3dbc222d1cc513fb4a7f03c7ea3)
---
 libpromises/dbm_api.c      |  6 ++++++
 libpromises/dbm_api.h      |  1 +
 libpromises/dbm_lmdb.c     | 12 ++++++++++++
 libpromises/dbm_priv.h     |  2 ++
 libpromises/dbm_quick.c    |  6 ++++++
 libpromises/dbm_tokyocab.c |  6 ++++++
 6 files changed, 33 insertions(+)

diff --git a/libpromises/dbm_api.c b/libpromises/dbm_api.c
index 097a934030..b07a53f5f7 100644
--- a/libpromises/dbm_api.c
+++ b/libpromises/dbm_api.c
@@ -548,6 +548,12 @@ bool CleanDB(DBHandle *handle)
     return ret;
 }
 
+int GetDBUsagePercentage(const DBHandle *handle)
+{
+    assert(handle != NULL);
+    return DBPrivGetDBUsagePercentage(handle->filename);
+}
+
 /**
  * Freezes the DB so that it is never touched by this process again. In
  * particular, new OpenDB() calls are ignored and CloseAllDBExit() also ignores
diff --git a/libpromises/dbm_api.h b/libpromises/dbm_api.h
index bf730d2d79..da6f4bad1a 100644
--- a/libpromises/dbm_api.h
+++ b/libpromises/dbm_api.h
@@ -78,6 +78,7 @@ void CloseDB(CF_DB *dbp);
 
 DBHandle *GetDBHandleFromFilename(const char *db_file_name);
 time_t GetDBOpenTimestamp(const DBHandle *handle);
+int GetDBUsagePercentage(const DBHandle *handle);
 
 bool HasKeyDB(CF_DB *dbp, const char *key, int key_size);
 int ValueSizeDB(CF_DB *dbp, const char *key, int key_size);
diff --git a/libpromises/dbm_lmdb.c b/libpromises/dbm_lmdb.c
index f16c6e985f..bde8e45f7f 100644
--- a/libpromises/dbm_lmdb.c
+++ b/libpromises/dbm_lmdb.c
@@ -700,6 +700,18 @@ bool DBPrivClean(DBPriv *db)
     return (mdb_drop(txn->txn, db->dbi, EMPTY_DB) != 0);
 }
 
+int DBPrivGetDBUsagePercentage(const char *db_path)
+{
+    struct stat sb;
+    int ret = stat(db_path, &sb);
+    if (ret == -1)
+    {
+        Log(LOG_LEVEL_ERR, "Failed to get size of '%s': %s", db_path, GetErrorStr());
+        return -1;
+    }
+    return (int) ((((float) sb.st_size) / LMDB_MAXSIZE) * 100);
+}
+
 void DBPrivCommit(DBPriv *db)
 {
     assert(db != NULL);
diff --git a/libpromises/dbm_priv.h b/libpromises/dbm_priv.h
index d24d726c45..7f0d688bb3 100644
--- a/libpromises/dbm_priv.h
+++ b/libpromises/dbm_priv.h
@@ -54,6 +54,8 @@ void DBPrivCloseDB(DBPriv *hdbp);
 void DBPrivCommit(DBPriv *hdbp);
 bool DBPrivClean(DBPriv *hdbp);
 
+int DBPrivGetDBUsagePercentage(const char *db_path);
+
 bool DBPrivHasKey(DBPriv *db, const void *key, int key_size);
 int DBPrivGetValueSize(DBPriv *db, const void *key, int key_size);
 
diff --git a/libpromises/dbm_quick.c b/libpromises/dbm_quick.c
index ee9b5db054..baa9a223f8 100644
--- a/libpromises/dbm_quick.c
+++ b/libpromises/dbm_quick.c
@@ -206,6 +206,12 @@ bool DBPrivClean(DBPriv *db)
     return true;
 }
 
+int DBPrivGetDBUsagePercentage(ARG_UNUSED const char *db_path)
+{
+    Log(LOG_LEVEL_WARNING, "Cannot determine usage of a QuickDB database");
+    return -1;
+}
+
 bool DBPrivRead(DBPriv *db, const void *key, int key_size, void *dest, size_t dest_size)
 {
     if (!Lock(db))
diff --git a/libpromises/dbm_tokyocab.c b/libpromises/dbm_tokyocab.c
index ffbd04d64d..0b5b6c156d 100644
--- a/libpromises/dbm_tokyocab.c
+++ b/libpromises/dbm_tokyocab.c
@@ -245,6 +245,12 @@ bool DBPrivClean(DBPriv *db)
     return true;
 }
 
+int DBPrivGetDBUsagePercentage(ARG_UNUSED const char *db_path)
+{
+    Log(LOG_LEVEL_WARNING, "Cannot determine usage of a TokyoCabinet database");
+    return -1;
+}
+
 bool DBPrivHasKey(DBPriv *db, const void *key, int key_size)
 {
     // FIXME: distinguish between "entry not found" and "error occurred"

From 9f9f14a6d55a5c1131c73220e8a4ff031fa0224e Mon Sep 17 00:00:00 2001
From: Vratislav Podzimek <vratislav.podzimek@northern.tech>
Date: Tue, 14 Nov 2023 10:53:04 +0100
Subject: [PATCH 4/9] Make lock purging react to locks DB usage

Ideally, locks should never need to be cleaned because the ones
used by the current policy set should be periodically updated and
the old ones that no longer match anything in policy should be
very few in numbers and thus taking very little space. And we
want to retain locks for more than 4 weeks whenever possible
because for example locks used by our daemons to prevent multiple
instances from running should stay intact while our daemons run
which should ideally be for the whole boot time of a machine.

Also, our documentation doesn't specify any hard expire interval
for locks (any upper limit) so if possible, we should retain and
respect even very old locks.

If the locks DB grows too big, however, we need to do purging,
potentially even more aggressively than just with the 4 week
horizon if needed.

Ticket: ENT-8201
Ticket: CFE-2136
Ticket: ENT-5898
Changelog: CFEngine locks are now purged dynamically based on the
           local locks DB usage/size ranging from no purging (<=25%
           usage) to purging of locks older than 1 week (>75% usage)
(cherry picked from commit 0a4550807fa652c5038d971b766e779301f962f8)
---
 libpromises/locks.c | 42 ++++++++++++++++++++++++++++++++++++++----
 1 file changed, 38 insertions(+), 4 deletions(-)

diff --git a/libpromises/locks.c b/libpromises/locks.c
index d1bd5e8d51..1eca411a73 100644
--- a/libpromises/locks.c
+++ b/libpromises/locks.c
@@ -50,7 +50,6 @@
 #endif
 
 #define CFLOGSIZE 1048576       /* Size of lock-log before rotation */
-#define CF_LOCKHORIZON ((time_t)(SECONDS_PER_WEEK * 4))
 #define CF_MAXLOCKNUM 8192
 
 #define CF_CRITIAL_SECTION "CF_CRITICAL_SECTION"
@@ -62,6 +61,20 @@
 #define LOG_LOCK_OP(__lock, __lock_sum, __lock_data)            \
     log_lock("Performing", __FUNCTION__, __lock, __lock_sum, __lock_data)
 
+/**
+ * Map the locks DB usage percentage to the lock horizon interval (how old locks
+ * we want to keep).
+ */
+#define N_LOCK_HORIZON_USAGE_INTERVALS 4 /* 0-25, 26-50,... */
+static const time_t LOCK_HORIZON_USAGE_INTERVALS[N_LOCK_HORIZON_USAGE_INTERVALS] = {
+    0,                    /* plenty of space, no cleanup needed (0 is a special
+                           * value) */
+    4 * SECONDS_PER_WEEK, /* used to be the fixed value */
+    2 * SECONDS_PER_WEEK, /* a bit more aggressive, but still reasonable  */
+    SECONDS_PER_WEEK,     /* as far as we want to go to avoid making long locks
+                           * unreliable and practically non-functional */
+};
+
 typedef struct CfLockStack_ {
     char lock[CF_BUFSIZE];
     char last[CF_BUFSIZE];
@@ -1210,11 +1223,30 @@ void PurgeLocks(void)
         return;
     }
 
+    int usage_pct = GetDBUsagePercentage(dbp);
+    if (usage_pct == -1)
+    {
+        /* error already logged */
+        /* no usage info, assume 50% */
+        usage_pct = 50;
+    }
+
+    unsigned short interval_idx = MIN(usage_pct / (100 / N_LOCK_HORIZON_USAGE_INTERVALS),
+                                      N_LOCK_HORIZON_USAGE_INTERVALS - 1);
+    const time_t lock_horizon_interval = LOCK_HORIZON_USAGE_INTERVALS[interval_idx];
+    if (lock_horizon_interval == 0)
+    {
+        Log(LOG_LEVEL_VERBOSE, "No lock purging needed (lock DB usage: %d %%)", usage_pct);
+        CloseLock(dbp);
+        return;
+    }
+    const time_t purge_horizon = now - lock_horizon_interval;
+
     memset(&lock_horizon, 0, sizeof(lock_horizon));
 
     if (ReadDB(dbp, "lock_horizon", &lock_horizon, sizeof(lock_horizon)))
     {
-        if ((now - lock_horizon.time) < CF_LOCKHORIZON)
+        if (lock_horizon.time > purge_horizon)
         {
             Log(LOG_LEVEL_VERBOSE, "No lock purging scheduled");
             CloseLock(dbp);
@@ -1222,7 +1254,9 @@ void PurgeLocks(void)
         }
     }
 
-    Log(LOG_LEVEL_VERBOSE, "Looking for stale locks to purge");
+    Log(LOG_LEVEL_VERBOSE,
+        "Looking for stale locks (older than %jd seconds) to purge",
+        (intmax_t) lock_horizon_interval);
 
     if (!NewDBCursor(dbp, &dbcp))
     {
@@ -1244,7 +1278,7 @@ void PurgeLocks(void)
             continue;
         }
 
-        if (now - entry->time > (time_t) CF_LOCKHORIZON)
+        if (entry->time < purge_horizon)
         {
             Log(LOG_LEVEL_VERBOSE, "Purging lock (%jd s elapsed): %s",
                 (intmax_t) (now - entry->time), key);

From db93827e42aba3656a661e2d2dd5b8e59f55b389 Mon Sep 17 00:00:00 2001
From: Vratislav Podzimek <vratislav.podzimek@northern.tech>
Date: Tue, 14 Nov 2023 11:10:57 +0100
Subject: [PATCH 5/9] Improve code quality of PurgeLocks()

Declare variables when they are used and give them better
names. Also use the nicer names for DB types.

Ticket: ENT-8201
Changelog: None
(cherry picked from commit a2159f9454a99c5f44763d23eb0161cb5573cee6)
---
 libpromises/locks.c | 44 +++++++++++++++++++++-----------------------
 1 file changed, 21 insertions(+), 23 deletions(-)

diff --git a/libpromises/locks.c b/libpromises/locks.c
index 1eca411a73..b1727d387d 100644
--- a/libpromises/locks.c
+++ b/libpromises/locks.c
@@ -1210,20 +1210,15 @@ void BackupLockDatabase(void)
 
 void PurgeLocks(void)
 {
-    CF_DBC *dbcp;
-    char *key;
-    int ksize, vsize;
-    LockData lock_horizon;
-    LockData *entry = NULL;
-    time_t now = time(NULL);
-
-    CF_DB *dbp = OpenLock();
-    if (dbp == NULL)
+    DBHandle *db = OpenLock();
+    if (db == NULL)
     {
         return;
     }
 
-    int usage_pct = GetDBUsagePercentage(dbp);
+    time_t now = time(NULL);
+
+    int usage_pct = GetDBUsagePercentage(db);
     if (usage_pct == -1)
     {
         /* error already logged */
@@ -1237,19 +1232,19 @@ void PurgeLocks(void)
     if (lock_horizon_interval == 0)
     {
         Log(LOG_LEVEL_VERBOSE, "No lock purging needed (lock DB usage: %d %%)", usage_pct);
-        CloseLock(dbp);
+        CloseLock(db);
         return;
     }
     const time_t purge_horizon = now - lock_horizon_interval;
 
+    LockData lock_horizon;
     memset(&lock_horizon, 0, sizeof(lock_horizon));
-
-    if (ReadDB(dbp, "lock_horizon", &lock_horizon, sizeof(lock_horizon)))
+    if (ReadDB(db, "lock_horizon", &lock_horizon, sizeof(lock_horizon)))
     {
         if (lock_horizon.time > purge_horizon)
         {
             Log(LOG_LEVEL_VERBOSE, "No lock purging scheduled");
-            CloseLock(dbp);
+            CloseLock(db);
             return;
         }
     }
@@ -1258,22 +1253,26 @@ void PurgeLocks(void)
         "Looking for stale locks (older than %jd seconds) to purge",
         (intmax_t) lock_horizon_interval);
 
-    if (!NewDBCursor(dbp, &dbcp))
+    DBCursor *cursor;
+    if (!NewDBCursor(db, &cursor))
     {
         char *db_path = DBIdToPath(dbid_locks);
         Log(LOG_LEVEL_ERR, "Unable to get cursor for locks database '%s'",
             db_path);
         free(db_path);
-        CloseLock(dbp);
+        CloseLock(db);
         return;
     }
 
-    while (NextDB(dbcp, &key, &ksize, (void **)&entry, &vsize))
+    char *key;
+    int ksize, vsize;
+    LockData *entry = NULL;
+    while (NextDB(cursor, &key, &ksize, (void **)&entry, &vsize))
     {
 #ifdef LMDB
         LOG_LOCK_OP("<unknown>", key, entry);
 #endif
-        if (STARTSWITH(key, "last.internal_bundle.track_license.handle"))
+        if (StringStartsWith(key, "last.internal_bundle.track_license.handle"))
         {
             continue;
         }
@@ -1282,15 +1281,14 @@ void PurgeLocks(void)
         {
             Log(LOG_LEVEL_VERBOSE, "Purging lock (%jd s elapsed): %s",
                 (intmax_t) (now - entry->time), key);
-            DBCursorDeleteEntry(dbcp);
+            DBCursorDeleteEntry(cursor);
         }
     }
+    DeleteDBCursor(cursor);
 
     Log(LOG_LEVEL_DEBUG, "Finished purging locks");
 
     lock_horizon.time = now;
-    DeleteDBCursor(dbcp);
-
-    WriteDB(dbp, "lock_horizon", &lock_horizon, sizeof(lock_horizon));
-    CloseLock(dbp);
+    WriteDB(db, "lock_horizon", &lock_horizon, sizeof(lock_horizon));
+    CloseLock(db);
 }

From 6cef698327a12f27575f80438a8915e3ee7c613b Mon Sep 17 00:00:00 2001
From: Vratislav Podzimek <vratislav.podzimek@northern.tech>
Date: Wed, 15 Nov 2023 14:03:22 +0100
Subject: [PATCH 6/9] Rotate almost full LMDB files in cf-check repair

Almost full LMDBs are dangerous, so they should be rotated as
part of `cf-check repair`.

Ticket: CFE-3374
Changelog: `cf-check repair` now rotates DB files with high usage (>95%)
(cherry picked from commit 11ed41efe12b48a11a69939f99d548cc2969de48)
---
 cf-check/diagnose.c    | 24 +++++++++++++
 cf-check/diagnose.h    |  2 ++
 cf-check/repair.c      | 80 +++++++++++++++++++++++++++++++++++++++---
 cf-check/repair.h      |  1 +
 libpromises/dbm_lmdb.c |  1 +
 5 files changed, 104 insertions(+), 4 deletions(-)

diff --git a/cf-check/diagnose.c b/cf-check/diagnose.c
index 7689bca611..a7c621b746 100644
--- a/cf-check/diagnose.c
+++ b/cf-check/diagnose.c
@@ -39,6 +39,11 @@ size_t diagnose_files(
 #include <validate.h>
 #include <openssl/rand.h>
 
+/* NOTE: Must be in sync with LMDB_MAXSIZE in libpromises/dbm_lmdb.c. */
+#ifndef LMDB_MAXSIZE
+#define LMDB_MAXSIZE    104857600
+#endif
+
 #define CF_CHECK_CREATE_STRING(name) \
   #name,
 
@@ -524,6 +529,25 @@ static char *follow_symlink(const char *path)
     return xstrdup(target_buf);
 }
 
+bool lmdb_file_needs_rotation(const char *file, int *usage)
+{
+    struct stat sb;
+    if (stat(file, &sb) == 0)
+    {
+        int usage_pct = (((float) sb.st_size) / LMDB_MAXSIZE) * 100;
+        if (usage != NULL)
+        {
+            *usage = usage_pct;
+        }
+        return (usage_pct >= 95);
+    }
+    else
+    {
+        Log(LOG_LEVEL_ERR, "Failed to stat() '%s' when checking usage: %s", file, GetErrorStr());
+        return false;
+    }
+}
+
 /**
  * @param[in]  filenames  DB files to diagnose/check
  * @param[out] corrupt    place to store the resulting sequence of corrupted
diff --git a/cf-check/diagnose.h b/cf-check/diagnose.h
index 4bb01c7dde..5f6204b251 100644
--- a/cf-check/diagnose.h
+++ b/cf-check/diagnose.h
@@ -85,6 +85,8 @@ int lmdb_errno_to_cf_check_code(int r);
 int signal_to_cf_check_code(int sig);
 void report_mdb_error(const char *db_file, const char *op, int rc);
 
+bool lmdb_file_needs_rotation(const char *file, int *usage);
+
 size_t diagnose_files(
     const Seq *filenames, Seq **corrupt, bool foreground, bool validate, bool test_write);
 int diagnose_main(int argc, const char *const *argv);
diff --git a/cf-check/repair.c b/cf-check/repair.c
index d2ad3409d6..77e3f95aeb 100644
--- a/cf-check/repair.c
+++ b/cf-check/repair.c
@@ -80,7 +80,7 @@ int remove_files(Seq *files)
     return failures;
 }
 
-static bool record_repair_timestamp(int fd_tstamp)
+static bool record_timestamp(int fd_tstamp)
 {
     time_t this_timestamp = time(NULL);
     lseek(fd_tstamp, 0, SEEK_SET);
@@ -158,7 +158,7 @@ int repair_lmdb_file(const char *file, int fd_tstamp)
             }
             else
             {
-                if (!record_repair_timestamp(fd_tstamp))
+                if (!record_timestamp(fd_tstamp))
                 {
                     Log(LOG_LEVEL_ERR, "Failed to write the timestamp of repair of the '%s' file",
                         file);
@@ -178,7 +178,7 @@ int repair_lmdb_file(const char *file, int fd_tstamp)
             }
             else
             {
-                if (!record_repair_timestamp(fd_tstamp))
+                if (!record_timestamp(fd_tstamp))
                 {
                     Log(LOG_LEVEL_ERR, "Failed to write the timestamp of repair of the '%s' file",
                         file);
@@ -200,7 +200,7 @@ int repair_lmdb_file(const char *file, int fd_tstamp)
                 ret = -1;
                 goto cleanup;
             }
-            if (!record_repair_timestamp(fd_tstamp))
+            if (!record_timestamp(fd_tstamp))
             {
                 Log(LOG_LEVEL_ERR, "Failed to write the timestamp of repair of the '%s' file",
                     file);
@@ -217,6 +217,70 @@ int repair_lmdb_file(const char *file, int fd_tstamp)
     return ret;
 }
 
+/**
+ * @param file      LMDB file to rotate
+ * @param fd_tstamp An open FD to the repair timestamp file or -1
+ *
+ * @note If #fd_tstamp != -1 then it is expected to be open and with file locks
+ *       taken care of. If #fd_tstamp == -1, this function opens the rotation
+ *       timestamp file on its own and takes care of the file locks.
+ */
+int rotate_lmdb_file(const char *file, int fd_tstamp)
+{
+    int ret;
+    FileLock lock = EMPTY_FILE_LOCK;
+    if (fd_tstamp == -1)
+    {
+        char *tstamp_file = StringFormat("%s.rotated", file);
+        int lock_ret = ExclusiveFileLockPath(&lock, tstamp_file, true); /* wait=true */
+        free(tstamp_file);
+        if (lock_ret < 0)
+        {
+            /* Should never happen because we tried to wait for the lock. */
+            Log(LOG_LEVEL_ERR,
+                "Failed to acquire lock for the '%s' DB repair timestamp file",
+                file);
+            ret = -1;
+            goto cleanup;
+        }
+        fd_tstamp = lock.fd;
+    }
+
+    time_t now = time(NULL);
+    {
+        char *rotated_file = StringFormat("%s.rotated_%jd", file, (intmax_t) now);
+        ret = rename(file, rotated_file);
+        free(rotated_file);
+    }
+    if (ret != 0)
+    {
+        Log(LOG_LEVEL_ERR,
+            "Failed to rotate the '%s' DB file (%s), will be removed instead",
+            file, GetErrorStr());
+        ret = unlink(file);
+        if (ret != 0)
+        {
+            Log(LOG_LEVEL_ERR, "Failed to remove the '%s' DB file: %s",
+                file, GetErrorStr());
+        }
+    }
+    if (ret == 0)
+    {
+        if (!record_timestamp(fd_tstamp))
+        {
+            Log(LOG_LEVEL_ERR, "Failed to write the timestamp of rotation of the '%s' DB file",
+                file);
+        }
+    }
+
+  cleanup:
+    if (lock.fd != -1)
+    {
+        ExclusiveFileUnlock(&lock, true); /* close=true */
+    }
+    return ret;
+}
+
 int repair_lmdb_files(Seq *files, bool force)
 {
     assert(files != NULL);
@@ -256,6 +320,14 @@ int repair_lmdb_files(Seq *files, bool force)
         {
             ret++;
         }
+        int usage;
+        if (lmdb_file_needs_rotation(file, &usage))
+        {
+            if (rotate_lmdb_file(file, -1) != -1)
+            {
+                Log(LOG_LEVEL_INFO, "Rotated '%s' DB with %d%% usage", file, usage);
+            }
+        }
     }
 
     if (!force)
diff --git a/cf-check/repair.h b/cf-check/repair.h
index f911af5d5f..9a1e1423bb 100644
--- a/cf-check/repair.h
+++ b/cf-check/repair.h
@@ -6,5 +6,6 @@
 int repair_main(int argc, const char *const *argv);
 int repair_lmdb_default(bool force);
 int repair_lmdb_file(const char *file, int fd_tstamp);
+int rotate_lmdb_file(const char *file, int fd_tstamp);
 
 #endif
diff --git a/libpromises/dbm_lmdb.c b/libpromises/dbm_lmdb.c
index bde8e45f7f..92941eb048 100644
--- a/libpromises/dbm_lmdb.c
+++ b/libpromises/dbm_lmdb.c
@@ -197,6 +197,7 @@ const char *DBPrivGetFileExtension(void)
     return "lmdb";
 }
 
+/* NOTE: Must be in sync with LMDB_MAXSIZE in cf-check/diagnose.c. */
 #ifndef LMDB_MAXSIZE
 #define LMDB_MAXSIZE    104857600
 #endif

From 149b8224daa0b390f855952f39cc9f7da1c99467 Mon Sep 17 00:00:00 2001
From: Vratislav Podzimek <vratislav.podzimek@northern.tech>
Date: Tue, 14 Nov 2023 15:30:00 +0100
Subject: [PATCH 7/9] Handle MDB_MAP_FULL

If an LMDB database is full, we get a special error from LMDB
when trying to use the DB giving us an option to handle the
situation.

We could try to prune/purge the DB, but there's no general rule
for which data to preserve so we just move the file aside with a
`.rotated.$timestamp` extension.

Continuing (restarting, really) with a new empty LMDB file is
better than getting stuck. So it's a last-resort solution, but
our LMDB files should never get full.

Ticket: ENT-8201
Changelog: Full LMDB files are now handled gracefully by moving
           them aside and using new empty LMDB files
(cherry picked from commit bddc4e009639cc9762997879fc578afa3b87195c)
---
 libpromises/dbm_lmdb.c | 294 ++++++++++++++++++++++++++++++++++++++---
 1 file changed, 275 insertions(+), 19 deletions(-)

diff --git a/libpromises/dbm_lmdb.c b/libpromises/dbm_lmdb.c
index 92941eb048..5095dfafd9 100644
--- a/libpromises/dbm_lmdb.c
+++ b/libpromises/dbm_lmdb.c
@@ -79,13 +79,18 @@ static int DB_MAX_READERS = -1;
 /******************************************************************************/
 
 static void HandleLMDBCorruption(MDB_env *env, const char *msg);
+static void HandleFullLMDB(MDB_env *env);
 
-static inline void CheckLMDBCorrupted(int rc, MDB_env *env)
+static inline void CheckLMDBUsable(int rc, MDB_env *env)
 {
     if (rc == MDB_CORRUPTED)
     {
         HandleLMDBCorruption(env, "");
     }
+    else if (rc == MDB_MAP_FULL)
+    {
+        HandleFullLMDB(env);
+    }
 }
 
 static int GetReadTransaction(DBPriv *const db, DBTxn **const txn)
@@ -134,7 +139,7 @@ static int GetWriteTransaction(DBPriv *const db, DBTxn **const txn)
     if (db_txn->txn != NULL && !db_txn->rw_txn)
     {
         rc = mdb_txn_commit(db_txn->txn);
-        CheckLMDBCorrupted(rc, db->env);
+        CheckLMDBUsable(rc, db->env);
         if (rc != MDB_SUCCESS)
         {
             Log(LOG_LEVEL_ERR, "Unable to close read-only transaction in '%s': %s",
@@ -146,7 +151,7 @@ static int GetWriteTransaction(DBPriv *const db, DBTxn **const txn)
     if (db_txn->txn == NULL)
     {
         rc = mdb_txn_begin(db->env, NULL, 0, &db_txn->txn);
-        CheckLMDBCorrupted(rc, db->env);
+        CheckLMDBUsable(rc, db->env);
         if (rc == MDB_SUCCESS)
         {
             db_txn->rw_txn = true;
@@ -281,6 +286,43 @@ static bool RepairedAfterOpen(const char *lmdb_file, int fd_tstamp)
     return false;
 }
 
+/**
+ * @warning Expects @fd_stamp to be locked.
+ */
+static bool RotatedAfterOpen(const char *lmdb_file, int fd_tstamp)
+{
+    time_t rotated_tstamp = -1;
+    ssize_t n_read = read(fd_tstamp, &rotated_tstamp, sizeof(time_t));
+    lseek(fd_tstamp, 0, SEEK_SET);
+
+    if (n_read < 0)
+    {
+        Log(LOG_LEVEL_ERR, "Failed to read %s: %s", lmdb_file, GetErrorStr());
+    }
+    else if (n_read == 0)
+    {
+        /* EOF (empty file) => never rotated */
+        Log(LOG_LEVEL_VERBOSE, "DB '%s' never rotated before", lmdb_file);
+    }
+    else if ((size_t) n_read < sizeof(time_t))
+    {
+        /* error */
+        Log(LOG_LEVEL_ERR, "Failed to read the timestamp of rotation of the '%s' DB",
+            lmdb_file);
+    }
+    else
+    {
+        /* read the timestamp => Check if the LMDB file was rotated after
+         * we opened it last time. */
+        DBHandle *handle = GetDBHandleFromFilename(lmdb_file);
+        if (rotated_tstamp > GetDBOpenTimestamp(handle))
+        {
+            return true;
+        }
+    }
+    return false;
+}
+
 static void HandleLMDBCorruption(MDB_env *env, const char *msg)
 {
     const char *lmdb_file = mdb_env_get_userctx(env);
@@ -491,6 +533,220 @@ static void HandleLMDBCorruption(MDB_env *env, const char *msg)
 #endif  /* __MINGW32__ */
 }
 
+/**
+ * A modified clone of HandleLMDBCorruption() for handling full LMDBs. It's not
+ * easy and nice to share much code between the two functions, unfortunately.
+ */
+static void HandleFullLMDB(MDB_env *env)
+{
+    const char *lmdb_file = mdb_env_get_userctx(env);
+    Log(LOG_LEVEL_CRIT, "'%s' DB full!", lmdb_file);
+
+    /* Freeze the DB ASAP. This also makes the call to exit() safe regarding
+     * this particular DB because exit handlers will ignore it. */
+    DBHandle *handle = GetDBHandleFromFilename(lmdb_file);
+    FreezeDB(handle);
+
+#ifdef _WIN32
+    /* Not much we can do on Windows because there is no fork() and file locking
+     * is also not so nice. */
+    Log(LOG_LEVEL_WARNING, "Moving the full DB file '%s' aside",
+        lmdb_file);
+    time_t now = time(NULL);
+    char *rotated_file = StringFormat("%s.rotated.%jd", lmdb_file, (intmax_t) now);
+    if (rename(lmdb_file, rotated_file) != 0)
+    {
+        free(rotated_file);
+        Log(LOG_LEVEL_CRIT,
+            "Failed to move the full DB file '%s' aside (%s), will be removed instead",
+            lmdb_file, GetErrorStr());
+        if (unlink(lmdb_file) != 0)
+        {
+            Log(LOG_LEVEL_CRIT, "Failed to remove the full DB file '%s': %s",
+                lmdb_file, GetErrorStr());
+        }
+        exit(EC_CORRUPTION_REPAIR_FAILED);
+    }
+    free(rotated_file);
+    exit(EC_CORRUPTION_REPAIRED);
+#else
+    /* To avoid two processes acting on the same corrupted file at once, file
+     * locks are involved. Looking at OpenDBInstance() and DBPathLock()
+     * in libpromises/db_api.c might also be useful.*/
+
+    /* Only allow one thread at a time to handle a full or corrupted DB. File
+     * locks are *process* specific so threads could step on each others
+     * toes. */
+    ThreadLock(cft_db_corruption_lock);
+
+    char *tstamp_file = StringFormat("%s.rotated", lmdb_file);
+    char *db_lock_file = StringFormat("%s.lock", lmdb_file);
+
+    int fd_tstamp = safe_open(tstamp_file, O_CREAT|O_RDWR);
+    if (fd_tstamp == -1)
+    {
+        Log(LOG_LEVEL_CRIT, "Failed to open the '%s' DB rotation timestamp file",
+            lmdb_file);
+        ThreadUnlock(cft_db_corruption_lock);
+        free(db_lock_file);
+        free(tstamp_file);
+
+        exit(EC_CORRUPTION_REPAIR_FAILED);
+    }
+    FileLock tstamp_lock = { .fd = fd_tstamp };
+
+    int fd_db_lock = safe_open(db_lock_file, O_CREAT|O_RDWR);
+    if (fd_db_lock == -1)
+    {
+        Log(LOG_LEVEL_CRIT, "Failed to open the '%s' DB lock file",
+            lmdb_file);
+        ThreadUnlock(cft_db_corruption_lock);
+        close(fd_tstamp);
+        free(db_lock_file);
+        free(tstamp_file);
+
+        exit(EC_CORRUPTION_REPAIR_FAILED);
+    }
+    FileLock db_lock = { .fd = fd_db_lock };
+
+    int ret;
+    bool handle_rotation = true;
+
+    /* Make sure we are not holding the DB's lock (potentially needed by some
+     * other process for the repair or rotation) to avoid deadlocks. */
+    Log(LOG_LEVEL_DEBUG, "Releasing lock on the '%s' DB", lmdb_file);
+    ExclusiveFileUnlock(&db_lock, false); /* close=false */
+
+    ret = SharedFileLock(&tstamp_lock, true);
+    if (ret == 0)
+    {
+        if (RotatedAfterOpen(lmdb_file, fd_tstamp))
+        {
+            /* The corruption has already been handled. This process should just
+             * die because we have no way to return to the point where it would
+             * just open the new (repaired or rotated) LMDB file. */
+            handle_rotation = false;
+        }
+        SharedFileUnlock(&tstamp_lock, false);
+    }
+    else
+    {
+        /* should never happen (we tried to wait), but if it does, just log an
+         * error and keep going */
+        Log(LOG_LEVEL_ERR,
+            "Failed to get shared lock for the rotation timestamp of the '%s' DB",
+            lmdb_file);
+    }
+
+    if (!handle_rotation)
+    {
+        /* Just clean after ourselves and terminate the process. */
+        ThreadUnlock(cft_db_corruption_lock);
+        close(fd_db_lock);
+        close(fd_tstamp);
+        free(db_lock_file);
+        free(tstamp_file);
+
+        exit(EC_CORRUPTION_REPAIRED);
+    }
+
+    /* HERE is a window for some other process to do the rotation between when we
+     * checked the timestamp using the shared lock above and the attempt to get
+     * the exclusive lock right below. However, this is detected by checking the
+     * contents of the timestamp file again below, while holding the EXCLUSIVE
+     * lock. */
+
+    ret = ExclusiveFileLock(&tstamp_lock, true);
+    if (ret != 0)
+    {
+        /* should never happen (we tried to wait), but if it does, just
+         * terminate because doing the rotation without the lock could be
+         * disasterous */
+        Log(LOG_LEVEL_ERR,
+            "Failed to get shared lock for the rotation timestamp of the '%s' DB",
+            lmdb_file);
+
+        ThreadUnlock(cft_db_corruption_lock);
+        close(fd_db_lock);
+        close(fd_tstamp);
+        free(db_lock_file);
+        free(tstamp_file);
+
+        exit(EC_CORRUPTION_REPAIR_FAILED);
+    }
+
+    /* Cleared to resolve the corruption. */
+
+    /* 1. Acquire the lock for the DB to prevent more processes trying to use
+     *    it while it is corrupted (wait till the lock is available). */
+    while (ExclusiveFileLock(&db_lock, false) == -1)
+    {
+        /* busy wait to do the logging */
+        Log(LOG_LEVEL_INFO, "Waiting for the lock on the '%s' DB",
+            lmdb_file);
+        sleep(1);
+    }
+
+    /* 2. Check the last rotation timestamp again (see the big "HERE..." comment
+     *    above) */
+    if (RotatedAfterOpen(lmdb_file, fd_tstamp))
+    {
+        /* Some other process rotated the DB since we checked last time,
+         * nothing more to do here. */
+        ThreadUnlock(cft_db_corruption_lock);
+        close(fd_db_lock);      /* releases locks */
+        close(fd_tstamp);       /* releases locks */
+        free(db_lock_file);
+        free(tstamp_file);
+
+        exit(EC_CORRUPTION_REPAIRED);
+    }
+
+    /* 3. Rotate the DB or at least move it out of the way. */
+    ret = rotate_lmdb_file(lmdb_file, fd_tstamp);
+    bool rotation_successful = (ret == 0);
+    if (rotation_successful)
+    {
+        Log(LOG_LEVEL_NOTICE, "DB '%s' successfully rotated", lmdb_file);
+    }
+    else
+    {
+        Log(LOG_LEVEL_CRIT, "Failed to rotate '%s' DB", lmdb_file);
+    }
+
+    /* 4. Make the rotated DB available for others. Also release the locks
+     *    in the opposite order in which they were acquired to avoid
+     *    deadlocks. */
+    if (ExclusiveFileUnlock(&db_lock, true) != 0)
+    {
+        Log(LOG_LEVEL_ERR, "Failed to release the acquired lock for '%s'",
+            db_lock_file);
+    }
+
+    /* 5. Signal that the rotation is done (also closes fd_tstamp). */
+    if (ExclusiveFileUnlock(&tstamp_lock, true) != 0)
+    {
+        Log(LOG_LEVEL_ERR, "Failed to release the acquired lock for '%s'",
+            tstamp_file);
+    }
+
+    ThreadUnlock(cft_db_corruption_lock);
+    free(db_lock_file);
+    free(tstamp_file);
+    /* fd_db_lock and fd_tstamp are already closed by the calls to
+     * ExclusiveFileUnlock above. */
+
+    if (rotation_successful)
+    {
+        exit(EC_CORRUPTION_REPAIRED);
+    }
+    else
+    {
+        exit(EC_CORRUPTION_REPAIR_FAILED);
+    }
+#endif  /* _WIN32 */
+}
+
 DBPriv *DBPrivOpenDB(const char *const dbpath, const dbid id)
 {
     DBPriv *const db = xcalloc(1, sizeof(DBPriv));
@@ -604,7 +860,7 @@ DBPriv *DBPrivOpenDB(const char *const dbpath, const dbid id)
     int attempts = N_LMDB_EINVAL_RETRIES;
     while ((rc != 0) && (attempts-- > 0))
     {
-        CheckLMDBCorrupted(rc, db->env);
+        CheckLMDBUsable(rc, db->env);
         if (rc != EINVAL)
         {
             Log(LOG_LEVEL_ERR, "Could not open database txn %s: %s",
@@ -625,7 +881,7 @@ DBPriv *DBPrivOpenDB(const char *const dbpath, const dbid id)
         goto err;
     }
     rc = mdb_open(txn, NULL, 0, &db->dbi);
-    CheckLMDBCorrupted(rc, db->env);
+    CheckLMDBUsable(rc, db->env);
     if (rc)
     {
         Log(LOG_LEVEL_ERR, "Could not open database dbi %s: %s",
@@ -634,7 +890,7 @@ DBPriv *DBPrivOpenDB(const char *const dbpath, const dbid id)
         goto err;
     }
     rc = mdb_txn_commit(txn);
-    CheckLMDBCorrupted(rc, db->env);
+    CheckLMDBUsable(rc, db->env);
     if (rc)
     {
         Log(LOG_LEVEL_ERR, "Could not commit database dbi %s: %s",
@@ -722,7 +978,7 @@ void DBPrivCommit(DBPriv *db)
     {
         assert(!db_txn->cursor_open);
         const int rc = mdb_txn_commit(db_txn->txn);
-        CheckLMDBCorrupted(rc, db->env);
+        CheckLMDBUsable(rc, db->env);
         if (rc != MDB_SUCCESS)
         {
             Log(LOG_LEVEL_ERR, "Could not commit database transaction to '%s': %s",
@@ -748,7 +1004,7 @@ bool DBPrivHasKey(DBPriv *db, const void *key, int key_size)
         mkey.mv_data = (void *) key;
         mkey.mv_size = key_size;
         rc = mdb_get(txn->txn, db->dbi, &mkey, &data);
-        CheckLMDBCorrupted(rc, db->env);
+        CheckLMDBUsable(rc, db->env);
         if (rc != 0 && rc != MDB_NOTFOUND)
         {
             Log(LOG_LEVEL_ERR, "Could not read database entry from '%s': %s",
@@ -777,7 +1033,7 @@ int DBPrivGetValueSize(DBPriv *const db, const void *const key, const int key_si
         mkey.mv_data = (void *) key;
         mkey.mv_size = key_size;
         rc = mdb_get(txn->txn, db->dbi, &mkey, &data);
-        CheckLMDBCorrupted(rc, db->env);
+        CheckLMDBUsable(rc, db->env);
         if (rc && rc != MDB_NOTFOUND)
         {
             Log(LOG_LEVEL_ERR, "Could not read database entry from '%s': %s",
@@ -813,7 +1069,7 @@ bool DBPrivRead(
         mkey.mv_data = (void *) key;
         mkey.mv_size = key_size;
         rc = mdb_get(txn->txn, db->dbi, &mkey, &data);
-        CheckLMDBCorrupted(rc, db->env);
+        CheckLMDBUsable(rc, db->env);
         if (rc == MDB_SUCCESS)
         {
             if (dest_size > data.mv_size)
@@ -855,7 +1111,7 @@ bool DBPrivWrite(
         data.mv_data = (void *)value;
         data.mv_size = value_size;
         rc = mdb_put(txn->txn, db->dbi, &mkey, &data, 0);
-        CheckLMDBCorrupted(rc, db->env);
+        CheckLMDBUsable(rc, db->env);
         if (rc != MDB_SUCCESS)
         {
             Log(LOG_LEVEL_ERR, "Could not write database entry to '%s': %s",
@@ -886,7 +1142,7 @@ bool DBPrivOverwrite(DBPriv *db, const char *key, int key_size, const void *valu
     mkey.mv_data = (void *) key;
     mkey.mv_size = key_size;
     rc = mdb_get(txn->txn, db->dbi, &mkey, &orig_data);
-    CheckLMDBCorrupted(rc, db->env);
+    CheckLMDBUsable(rc, db->env);
     if ((rc != MDB_SUCCESS) && (rc != MDB_NOTFOUND))
     {
         Log(LOG_LEVEL_ERR, "Could not read database entry from '%s': %s",
@@ -927,7 +1183,7 @@ bool DBPrivOverwrite(DBPriv *db, const char *key, int key_size, const void *valu
     new_data.mv_data = (void *)value;
     new_data.mv_size = value_size;
     rc = mdb_put(txn->txn, db->dbi, &mkey, &new_data, 0);
-    CheckLMDBCorrupted(rc, db->env);
+    CheckLMDBUsable(rc, db->env);
     if (rc != MDB_SUCCESS)
     {
         Log(LOG_LEVEL_ERR, "Could not write database entry to '%s': %s",
@@ -953,7 +1209,7 @@ bool DBPrivDelete(DBPriv *const db, const void *const key, const int key_size)
         mkey.mv_data = (void *) key;
         mkey.mv_size = key_size;
         rc = mdb_del(txn->txn, db->dbi, &mkey, NULL);
-        CheckLMDBCorrupted(rc, db->env);
+        CheckLMDBUsable(rc, db->env);
         if (rc == MDB_NOTFOUND)
         {
             Log(LOG_LEVEL_DEBUG, "Entry not found in '%s': %s",
@@ -982,7 +1238,7 @@ DBCursorPriv *DBPrivOpenCursor(DBPriv *const db)
     {
         assert(!txn->cursor_open);
         rc = mdb_cursor_open(txn->txn, db->dbi, &mc);
-        CheckLMDBCorrupted(rc, db->env);
+        CheckLMDBUsable(rc, db->env);
         if (rc == MDB_SUCCESS)
         {
             cursor = xcalloc(1, sizeof(DBCursorPriv));
@@ -1022,7 +1278,7 @@ bool DBPrivAdvanceCursor(
     }
 
     int rc = mdb_cursor_get(cursor->mc, &mkey, &data, MDB_NEXT);
-    CheckLMDBCorrupted(rc, cursor->db->env);
+    CheckLMDBUsable(rc, cursor->db->env);
     if (rc == MDB_SUCCESS)
     {
         // Align second buffer to 64-bit boundary, to avoid alignment errors on
@@ -1061,7 +1317,7 @@ bool DBPrivAdvanceCursor(
         {
             mkey.mv_data = *key;
             rc = mdb_cursor_get(cursor->mc, &mkey, NULL, MDB_SET);
-            CheckLMDBCorrupted(rc, cursor->db->env);
+            CheckLMDBUsable(rc, cursor->db->env);
             // TODO: Should the return value be checked?
         }
         cursor->pending_delete = false;
@@ -1074,7 +1330,7 @@ bool DBPrivDeleteCursorEntry(DBCursorPriv *const cursor)
     assert(cursor != NULL);
 
     int rc = mdb_cursor_get(cursor->mc, &cursor->delkey, NULL, MDB_GET_CURRENT);
-    CheckLMDBCorrupted(rc, cursor->db->env);
+    CheckLMDBUsable(rc, cursor->db->env);
     if (rc == MDB_SUCCESS)
     {
         cursor->pending_delete = true;
@@ -1102,7 +1358,7 @@ bool DBPrivWriteCursorEntry(
         curkey.mv_size = sizeof(cursor->curkv);
 
         rc = mdb_cursor_put(cursor->mc, &curkey, &data, MDB_CURRENT);
-        CheckLMDBCorrupted(rc, cursor->db->env);
+        CheckLMDBUsable(rc, cursor->db->env);
         if (rc != MDB_SUCCESS)
         {
             Log(LOG_LEVEL_ERR, "Could not write cursor entry to '%s': %s",

From 71a6bdd7e367c4669e991e1c980173113ae76215 Mon Sep 17 00:00:00 2001
From: Vratislav Podzimek <vratislav.podzimek@northern.tech>
Date: Wed, 15 Nov 2023 14:07:33 +0100
Subject: [PATCH 8/9] Add --test-write to cf-check repair

`cf-check diagnose` supports the `--test-write` option to check
if we can write into the given files. `cf-check repair` uses the
same code to identify files that need repairing so it should
provide the same option.

Ticket: CFE-3375
Changelog: `cf-check repair` now supports the `--test-write`
           option to check if DBs can be written to as part of
           identifying DBs that need repairing
(cherry picked from commit 5ed5585f61ac35a5a2fd4cbb89c06c542e292d70)
---
 cf-check/repair.c | 29 ++++++++++++++++++++---------
 1 file changed, 20 insertions(+), 9 deletions(-)

diff --git a/cf-check/repair.c b/cf-check/repair.c
index 77e3f95aeb..bf2c784c16 100644
--- a/cf-check/repair.c
+++ b/cf-check/repair.c
@@ -38,7 +38,9 @@ static void print_usage(void)
 {
     printf("Usage: cf-check repair [-f] [FILE ...]\n");
     printf("Example: cf-check repair /var/cfengine/state/cf_lastseen.lmdb\n");
-    printf("Options: -f|--force repair LMDB files that look OK ");
+    printf("Options:\n"
+           "-f|--force repair LMDB files that look OK\n"
+           "-w|--test-write test writing when checking files\n");
 }
 
 int remove_files(Seq *files)
@@ -281,7 +283,7 @@ int rotate_lmdb_file(const char *file, int fd_tstamp)
     return ret;
 }
 
-int repair_lmdb_files(Seq *files, bool force)
+static int repair_lmdb_files(Seq *files, bool force, bool test_write)
 {
     assert(files != NULL);
     assert(SeqLength(files) > 0);
@@ -293,7 +295,7 @@ int repair_lmdb_files(Seq *files, bool force)
     }
     else
     {
-        const int corruptions = diagnose_files(files, &corrupt, false, false, false);
+        const int corruptions = diagnose_files(files, &corrupt, false, false, test_write);
         if (corruptions != 0)
         {
             assert(corrupt != NULL);
@@ -350,15 +352,19 @@ int repair_lmdb_files(Seq *files, bool force)
 
 int repair_main(int argc, const char *const *const argv)
 {
-    size_t offset = 1;
     bool force = false;
-    if (argc > 1 && argv[1] != NULL && argv[1][0] == '-')
+    bool test_write = false;
+    int i = 1;
+    for (; (i < argc) && (argv[i] != NULL) && (argv[i][0] == '-'); i++)
     {
-        if (StringMatchesOption(argv[1], "--force", "-f"))
+        if (StringMatchesOption(argv[i], "--force", "-f"))
         {
-            offset++;
             force = true;
         }
+        else if (StringMatchesOption(argv[i], "--test-write", "-w"))
+        {
+            test_write = true;
+        }
         else
         {
             print_usage();
@@ -366,13 +372,18 @@ int repair_main(int argc, const char *const *const argv)
             return 1;
         }
     }
+    if (force && test_write)
+    {
+        Log(LOG_LEVEL_WARNING, "Ignoring --test-write due to --force skipping DB checks");
+    }
+    size_t offset = i;
     Seq *files = argv_to_lmdb_files(argc, argv, offset);
     if (files == NULL || SeqLength(files) == 0)
     {
         Log(LOG_LEVEL_ERR, "No database files to repair");
         return 1;
     }
-    const int ret = repair_lmdb_files(files, force);
+    const int ret = repair_lmdb_files(files, force, test_write);
     SeqDestroy(files);
     return ret;
 }
@@ -397,7 +408,7 @@ int repair_lmdb_default(bool force)
         Log(LOG_LEVEL_INFO, "Skipping local database repair, no lmdb files");
         return 0;
     }
-    const int ret = repair_lmdb_files(files, force);
+    const int ret = repair_lmdb_files(files, force, false);
     SeqDestroy(files);
 
     if (ret != 0)

From 81fdd8e9e0167e7a7f1855326396f180ba21cec5 Mon Sep 17 00:00:00 2001
From: Vratislav Podzimek <vratislav.podzimek@northern.tech>
Date: Wed, 15 Nov 2023 14:19:39 +0100
Subject: [PATCH 9/9] Show DB usage as part of DB status in cf-check diagnose

Plus the information if we think the DB needs rotation or
not. Almost full LMDBs are problematic and should be moved out of
the way.

Ticket: none
Changelog: `cf-check diagnose` now shows DB usage and a hint if
           rotation is required
(cherry picked from commit 6141736b27e882690dbbde45ad0833ddf60435bf)
---
 cf-check/diagnose.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/cf-check/diagnose.c b/cf-check/diagnose.c
index a7c621b746..c04444adb2 100644
--- a/cf-check/diagnose.c
+++ b/cf-check/diagnose.c
@@ -614,18 +614,26 @@ size_t diagnose_files(
 
         if (symlink_target != NULL)
         {
+            int usage;
+            bool needs_rotation = lmdb_file_needs_rotation(symlink_target, &usage);
             Log(LOG_LEVEL_INFO,
-                "Status of '%s' -> '%s': %s\n",
+                "Status of '%s' -> '%s': %s [%d%% usage%s]\n",
                 symlink,
                 symlink_target,
-                CF_CHECK_STRING(r));
+                CF_CHECK_STRING(r),
+                usage,
+                needs_rotation ? ", needs rotation" : "");
         }
         else
         {
+            int usage;
+            bool needs_rotation = lmdb_file_needs_rotation(filename, &usage);
             Log(LOG_LEVEL_INFO,
-                "Status of '%s': %s\n",
+                "Status of '%s': %s [%d%% usage%s]\n",
                 filename,
-                CF_CHECK_STRING(r));
+                CF_CHECK_STRING(r),
+                usage,
+                needs_rotation ? ", needs rotation" : "");
         }