diff --git a/TODO b/TODO index b5297290..6cdd6a70 100644 --- a/TODO +++ b/TODO @@ -39,3 +39,9 @@ LATER: * make "casync stat" work on a directory with a subpath * save/restore xfs/ext4 projid * tweak chunker: shift cut to last "marker". +* define sane errors we can show user messages about +* introduce a --best-effort mode when replaying, which means we'll ignore what we can't apply +* add "--cache-auto" mode, that works like "--cache" but finds a suitable place for the cache automatically +* use CaNameTable logic in seed, so that we can safely regenerate *any* chunk +* when building the cache, also build a seed +* make sure that "casync list" on a directory doesn't read any files diff --git a/meson.build b/meson.build index f550b84f..fdfd0b3e 100644 --- a/meson.build +++ b/meson.build @@ -33,6 +33,7 @@ c_args = ''' -Werror=return-type -Werror=incompatible-pointer-types -Werror=shadow + -Werror=int-conversion -Wstrict-prototypes -Wredundant-decls -Wmissing-noreturn @@ -272,6 +273,14 @@ test_fuse = find_program(test_fuse_sh) test('test-fuse.sh', test_fuse, timeout : 30 * 60) +test_cache_sh = configure_file( + output : 'test-cache.sh', + input : 'test/test-cache.sh.in', + configuration : substs) +test_cache = find_program(test_cache_sh) +test('test-cache.sh', test_cache, + timeout : 30 * 60) + udev_rule = configure_file( output : '75-casync.rules', input : 'src/75-casync.rules.in', diff --git a/src/cacache.c b/src/cacache.c new file mode 100644 index 00000000..df6738c4 --- /dev/null +++ b/src/cacache.c @@ -0,0 +1,442 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include +#include +#include +#include +#include + +#include "cacache.h" +#include "chattr.h" +#include "realloc-buffer.h" + +/* #undef EINVAL */ +/* #define EINVAL __LINE__ */ + +struct CaCache { + unsigned n_ref; + int fd; + char *path; + CaDigestType digest_type; + CaDigest *digest; +}; + +static CaDigestType ca_cache_digest_type(CaCache *c) { + assert_se(c); + + return c->digest_type >= 0 ? c->digest_type : CA_DIGEST_DEFAULT; +} + +CaCache *ca_cache_new(void) { + CaCache *c; + + c = new0(CaCache, 1); + if (!c) + return NULL; + + c->n_ref = 1; + c->fd = -1; + c->digest_type = _CA_DIGEST_TYPE_INVALID; + + return c; +} + +CaCache *ca_cache_unref(CaCache *c) { + if (!c) + return NULL; + + assert_se(c->n_ref > 0); + c->n_ref--; + + if (c->n_ref > 0) + return NULL; + + free(c->path); + safe_close(c->fd); + ca_digest_free(c->digest); + + return mfree(c); +} + +CaCache *ca_cache_ref(CaCache *c) { + if (!c) + return NULL; + + assert_se(c->n_ref > 0); + c->n_ref++; + + return c; +} + +int ca_cache_set_digest_type(CaCache *c, CaDigestType type) { + if (!c) + return -EINVAL; + if (type < 0) + return -EINVAL; + if (type >= _CA_DIGEST_TYPE_MAX) + return -EINVAL; + + if (c->digest) + return -EBUSY; + + c->digest_type = type; + return 0; +} + +int ca_cache_set_fd(CaCache *c, int fd) { + if (!c) + return -EINVAL; + if (fd < 0) + return -EINVAL; + + if (c->fd >= 0) + return -EBUSY; + if (c->path) + return -EBUSY; + + c->fd = fd; + return 0; +} + +int ca_cache_set_path(CaCache *c, const char *path) { + + if (!c) + return -EINVAL; + if (!path) + return -EINVAL; + + if (c->fd >= 0) + return -EBUSY; + if (c->path) + return -EBUSY; + + c->path = strdup(path); + if (!c->path) + return -ENOMEM; + + return 0; +} + +static int ca_cache_open(CaCache *c) { + int r; + + if (!c) + return -EINVAL; + if (c->fd >= 0) + return 0; + if (!c->path) + return -EUNATCH; + + (void) mkdir(c->path, 0777); + + c->fd = open(c->path, O_RDONLY|O_CLOEXEC|O_DIRECTORY); + if (c->fd < 0) + return -errno; + + /* We never want the cache to be saved, it's entirely redundant after all, let's hence set the FS_NODUMP_FL + * flag on it */ + r = mask_attr_fd(c->fd, FS_NODUMP_FL, FS_NODUMP_FL); + if (r < 0) + log_debug_errno(r, "Failed to set FS_NODUMP_PL flag on cache directory: %m"); + + return 0; +} + +int ca_cache_get(CaCache *c, CaLocation *location, CaChunkID *ret_chunk_id, CaOrigin **ret_origin) { + _cleanup_(ca_location_unrefp) CaLocation *patched_location = NULL; + _cleanup_(realloc_buffer_free) ReallocBuffer buffer = {}; + _cleanup_(ca_origin_unrefp) CaOrigin *origin = NULL; + char path[CA_CHUNK_ID_PATH_SIZE(NULL, ".cachi")]; + _cleanup_free_ char *dest = NULL; + _cleanup_(safe_closep) int fd = -1; + CaChunkID key_id, chunk_id; + int r; + + if (!c) + return -EINVAL; + if (!location) + return -EINVAL; + + assert_se(location->mtime != UINT64_MAX); + + r = ca_cache_open(c); + if (r < 0) + return r; + + if (!c->digest) { + r = ca_digest_new(ca_cache_digest_type(c), &c->digest); + if (r < 0) + return r; + } + + r = ca_location_id_make(c->digest, location, false, &key_id); + if (r < 0) + return r; + + ca_chunk_id_format_path(NULL, &key_id, ".cachi", path); + + r = readlinkat_malloc(c->fd, path, &dest); + if (r == -EINVAL) { + + /* Not a symlink? read as file then */ + fd = openat(c->fd, path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW); + if (fd < 0) + return -errno; + + r = realloc_buffer_read_target(&buffer, fd, CA_CHUNK_ID_SIZE); + if (r < 0) + return r; + if (r == 0) /* file too short */ + return -EINVAL; + + /* If this is a file, it begins with the binary chunk ID. Copy it out, and move ahead */ + memcpy(&chunk_id, realloc_buffer_data(&buffer), CA_CHUNK_ID_SIZE); + r = realloc_buffer_advance(&buffer, CA_CHUNK_ID_SIZE); + if (r < 0) + return r; + + } else if (r < 0) /* Any other error? */ + return r; + else { + const char *p, *q; + + /* A symlink? then add it to our buffer */ + if (!realloc_buffer_donate(&buffer, dest, strlen(dest)+1)) + return -ENOMEM; + dest = NULL; + + if (realloc_buffer_size(&buffer) < CA_CHUNK_ID_SIZE*2+1) + return -EINVAL; + + p = realloc_buffer_data(&buffer); + if (p[CA_CHUNK_ID_SIZE*2] != ':') + return -EINVAL; + + /* If this is a symlink, its target begins with an ASCII formatted chunk ID. Copy it out, and move ahead */ + q = strndupa(p, CA_CHUNK_ID_SIZE*2); + if (!ca_chunk_id_parse(q, &chunk_id)) + return -EINVAL; + + r = realloc_buffer_advance(&buffer, CA_CHUNK_ID_SIZE*2+1); + if (r < 0) + return r; + } + + r = ca_origin_new(&origin); + if (r < 0) + return r; + + for (;;) { + _cleanup_(ca_location_unrefp) CaLocation *item = NULL; + const char *p, *e; + size_t l; + + l = realloc_buffer_size(&buffer); + p = realloc_buffer_data(&buffer); + + e = memchr(p, 0, l); + if (!e) { + /* No NUL byte? If we can read more, try to do so. Otherwise, the file is truncated */ + if (fd >= 0) { + r = realloc_buffer_read(&buffer, fd); + if (r < 0) + return r; + if (r > 0) + continue; + } + + if (l == 0) /* EOF? */ + break; + + /* Truncated! */ + return -EINVAL; + } + + r = ca_location_parse(p, &item); + if (r < 0) + return r; + + /* In locations from the cache both size and mtime must be initialized */ + if (item->size == UINT64_MAX) + return -EINVAL; + if (item->mtime == UINT64_MAX) + return -EINVAL; + + r = realloc_buffer_advance(&buffer, e - p + 1); + if (r < 0) + return r; + + r = ca_origin_put(origin, item); + if (r < 0) + return r; + } + + /* The origin we read from the cache can't be empty */ + if (ca_origin_bytes(origin) == 0) + return -EINVAL; + + /* The first item of the origin must match our lookup key. If it doesn't something's bad. */ + if (!ca_location_equal(location, ca_origin_get(origin, 0), false)) + return -EINVAL; + + if (ret_chunk_id) + *ret_chunk_id = chunk_id; + + if (ret_origin) { + *ret_origin = origin; + origin = NULL; + } + + return 0; +} + +int ca_cache_put(CaCache *c, CaOrigin *origin, const CaChunkID *chunk_id) { + _cleanup_(realloc_buffer_free) ReallocBuffer buffer = {}; + char path[CA_CHUNK_ID_PATH_SIZE(NULL, ".cachi")]; + _cleanup_(unlink_and_freep) char *temp = NULL; + _cleanup_(safe_closep) int fd = -1; + CaLocation *first_location; + CaChunkID key_id; + const char *four; + size_t i; + int r; + + if (!c) + return -EINVAL; + if (!origin) + return -EINVAL; + if (!chunk_id) + return -EINVAL; + + first_location = ca_origin_get(origin, 0); + if (!first_location) + return -EINVAL; + + r = ca_cache_open(c); + if (r < 0) + return r; + + if (!c->digest) { + r = ca_digest_new(ca_cache_digest_type(c), &c->digest); + if (r < 0) + return r; + } + + r = ca_location_id_make(c->digest, first_location, false, &key_id); + if (r < 0) + return r; + + ca_chunk_id_format_path(NULL, &key_id, ".cachi", path); + + four = strndupa(path, 4); + (void) mkdirat(c->fd, four, 0755); + + if (ca_origin_items(origin) == 1) { + const char *f; + char *p; + + /* If there's only a single item, then let's try to create this as symlink, is it is the cheapest + * option */ + + f = ca_location_format(first_location); + if (!f) + return -ENOMEM; + + p = newa(char, CA_CHUNK_ID_SIZE*2 + 1 + strlen(f) + 1); + + ca_chunk_id_format(chunk_id, p); + p[CA_CHUNK_ID_SIZE*2] = ':'; + strcpy(p + CA_CHUNK_ID_SIZE*2+1, f); + + if (symlinkat(p, c->fd, path) < 0) { + if (errno == EEXIST) + return 0; + + if (errno != ENAMETOOLONG) + return -errno; + } else + return 1; + + } else { + /* Check if there's already a file for this, before we create one */ + if (faccessat(c->fd, path, F_OK, AT_SYMLINK_NOFOLLOW) >= 0) + return 0; + } + + r = tempfn_random(path, &temp); + if (r < 0) + return r; + + fd = openat(c->fd, temp, O_CREAT|O_WRONLY|O_CLOEXEC|O_EXCL|O_NOFOLLOW|O_EXCL, 0666); + if (fd < 0) + return -errno; + + if (!realloc_buffer_append(&buffer, chunk_id, sizeof(*chunk_id))) + return -ENOMEM; + + for (i = 0; i < ca_origin_items(origin); i++) { + CaLocation *l; + const char *f; + + r = realloc_buffer_write_maybe(&buffer, fd); + if (r < 0) + return r; + + l = ca_origin_get(origin, i); + if (!l) + return -EINVAL; + + f = ca_location_format(l); + if (!f) + return -ENOMEM; + + if (!realloc_buffer_append(&buffer, f, strlen(f) + 1)) + return -ENOMEM; + } + + r = realloc_buffer_write(&buffer, fd); + if (r < 0) + return r; + + if (renameat(c->fd, temp, c->fd, path) < 0) + return -errno; + + temp = mfree(temp); + return 1; +} + +int ca_cache_remove(CaCache *c, CaLocation *location) { + char path[CA_CHUNK_ID_PATH_SIZE(NULL, ".cachi")]; + CaChunkID key_id; + const char *four; + int r; + + if (!c) + return -EINVAL; + if (!location) + return -EINVAL; + + r = ca_cache_open(c); + if (r < 0) + return r; + + if (!c->digest) { + r = ca_digest_new(ca_cache_digest_type(c), &c->digest); + if (r < 0) + return r; + } + + r = ca_location_id_make(c->digest, location, false, &key_id); + if (r < 0) + return r; + + ca_chunk_id_format_path(NULL, &key_id, ".cachi", path); + + if (unlinkat(c->fd, path, 0) < 0) + return -errno; + + four = strndupa(path, 4); + (void) unlinkat(c->fd, four, AT_REMOVEDIR); + + return 0; +} diff --git a/src/cacache.h b/src/cacache.h new file mode 100644 index 00000000..34030570 --- /dev/null +++ b/src/cacache.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#ifndef foocacachehfoo +#define foocacachehfoo + +#include "cachunkid.h" +#include "calocation.h" +#include "caorigin.h" +#include "util.h" + +/* Implements a cache, that given a location returns a chunk ID plus a matching origin object encoding where the chunk + * is sourced from. This is used to speed up generation of chunks: when we looked at a specific chunk in an earlier + * run, then there's no need to regenerate its hash again as long as the origin inodes haven't changed */ + +typedef struct CaCache CaCache; + +CaCache *ca_cache_new(void); +CaCache *ca_cache_unref(CaCache *c); +CaCache *ca_cache_ref(CaCache *c); + +int ca_cache_set_digest_type(CaCache *c, CaDigestType type); + +int ca_cache_set_fd(CaCache *c, int fd); +int ca_cache_set_path(CaCache *c, const char *path); + +int ca_cache_get(CaCache *c, CaLocation *location, CaChunkID *ret_chunk_id, CaOrigin **ret_origin); +int ca_cache_put(CaCache *c, CaOrigin *origin, const CaChunkID *chunk_id); +int ca_cache_remove(CaCache *c, CaLocation *location); + +DEFINE_TRIVIAL_CLEANUP_FUNC(CaCache*, ca_cache_unref); + +#endif diff --git a/src/cachunk.c b/src/cachunk.c index 2ce97f5d..7e603b83 100644 --- a/src/cachunk.c +++ b/src/cachunk.c @@ -12,36 +12,6 @@ #include "def.h" #include "util.h" -#define CHUNK_PATH_SIZE(prefix, suffix) \ - (strlen_null(prefix) + 4 + 1 + CA_CHUNK_ID_FORMAT_MAX + strlen_null(suffix)) - -static char* ca_format_chunk_path( - const char *prefix, - const CaChunkID *chunkid, - const char *suffix, - char buffer[]) { - - size_t n; - - assert(chunkid); - assert(buffer); - - if (prefix) { - n = strlen(prefix); - memcpy(buffer, prefix, n); - } else - n = 0; - - ca_chunk_id_format(chunkid, buffer + n + 4 + 1); - memcpy(buffer + n, buffer + n + 4 + 1, 4); - buffer[n + 4] = '/'; - - if (suffix) - strcpy(buffer + n + 4 + 1 + CA_CHUNK_ID_FORMAT_MAX - 1, suffix); - - return buffer; -} - int ca_load_fd(int fd, ReallocBuffer *buffer) { uint64_t count = 0; @@ -548,7 +518,7 @@ int ca_decompress(const void *data, size_t size, ReallocBuffer *buffer) { int ca_chunk_file_open(int chunk_fd, const char *prefix, const CaChunkID *chunkid, const char *suffix, int flags) { - char path[CHUNK_PATH_SIZE(prefix, suffix)]; + char path[CA_CHUNK_ID_PATH_SIZE(prefix, suffix)]; bool made = false; char *slash = NULL; int r, fd; @@ -560,7 +530,7 @@ int ca_chunk_file_open(int chunk_fd, const char *prefix, const CaChunkID *chunki if (!chunkid) return -EINVAL; - ca_format_chunk_path(prefix, chunkid, suffix, path); + ca_chunk_id_format_path(prefix, chunkid, suffix, path); if ((flags & O_CREAT) == O_CREAT) { assert_se(slash = strrchr(path, '/')); @@ -593,14 +563,14 @@ int ca_chunk_file_open(int chunk_fd, const char *prefix, const CaChunkID *chunki } static int ca_chunk_file_access(int chunk_fd, const char *prefix, const CaChunkID *chunkid, const char *suffix) { - char path[CHUNK_PATH_SIZE(prefix, suffix)]; + char path[CA_CHUNK_ID_PATH_SIZE(prefix, suffix)]; if (chunk_fd < 0 && chunk_fd != AT_FDCWD) return -EINVAL; if (!chunkid) return -EINVAL; - ca_format_chunk_path(prefix, chunkid, suffix, path); + ca_chunk_id_format_path(prefix, chunkid, suffix, path); if (faccessat(chunk_fd, path, F_OK, AT_SYMLINK_NOFOLLOW) < 0) return errno == ENOENT ? 0 : -errno; @@ -609,14 +579,14 @@ static int ca_chunk_file_access(int chunk_fd, const char *prefix, const CaChunkI } static int ca_chunk_file_unlink(int chunk_fd, const char *prefix, const CaChunkID *chunkid, const char *suffix) { - char path[CHUNK_PATH_SIZE(prefix, suffix)], *slash; + char path[CA_CHUNK_ID_PATH_SIZE(prefix, suffix)], *slash; if (chunk_fd < 0 && chunk_fd != AT_FDCWD) return -EINVAL; if (!chunkid) return -EINVAL; - ca_format_chunk_path(prefix, chunkid, suffix, path); + ca_chunk_id_format_path(prefix, chunkid, suffix, path); if (unlinkat(chunk_fd, path, 0) < 0) return -errno; @@ -631,7 +601,7 @@ static int ca_chunk_file_unlink(int chunk_fd, const char *prefix, const CaChunkI } static int ca_chunk_file_rename(int chunk_fd, const char *prefix, const CaChunkID *chunkid, const char *old_suffix, const char *new_suffix) { - char old_path[CHUNK_PATH_SIZE(prefix, old_suffix)], new_path[CHUNK_PATH_SIZE(prefix, new_suffix)]; + char old_path[CA_CHUNK_ID_PATH_SIZE(prefix, old_suffix)], new_path[CA_CHUNK_ID_PATH_SIZE(prefix, new_suffix)]; int r; if (chunk_fd < 0 && chunk_fd != AT_FDCWD) @@ -639,8 +609,8 @@ static int ca_chunk_file_rename(int chunk_fd, const char *prefix, const CaChunkI if (!chunkid) return -EINVAL; - ca_format_chunk_path(prefix, chunkid, old_suffix, old_path); - ca_format_chunk_path(prefix, chunkid, new_suffix, new_path); + ca_chunk_id_format_path(prefix, chunkid, old_suffix, old_path); + ca_chunk_id_format_path(prefix, chunkid, new_suffix, new_path); r = rename_noreplace(chunk_fd, old_path, chunk_fd, new_path); if (r < 0) @@ -782,7 +752,7 @@ int ca_chunk_file_save( } int ca_chunk_file_mark_missing(int chunk_fd, const char *prefix, const CaChunkID *chunkid) { - char path[CHUNK_PATH_SIZE(prefix, NULL)]; + char path[CA_CHUNK_ID_PATH_SIZE(prefix, NULL)]; bool made = false; char *slash; int r; @@ -798,7 +768,7 @@ int ca_chunk_file_mark_missing(int chunk_fd, const char *prefix, const CaChunkID if (r > 0) return -EEXIST; - ca_format_chunk_path(prefix, chunkid, NULL, path); + ca_chunk_id_format_path(prefix, chunkid, NULL, path); assert_se(slash = strrchr(path, '/')); *slash = 0; diff --git a/src/cachunkid.c b/src/cachunkid.c index 69bd85f2..ddfa378f 100644 --- a/src/cachunkid.c +++ b/src/cachunkid.c @@ -5,6 +5,7 @@ #include "cachunk.h" #include "cachunkid.h" +#include "log.h" static char encode_char(uint8_t x) { x &= 0xF; @@ -83,3 +84,30 @@ int ca_chunk_id_make(CaDigest *digest, const void *p, size_t l, CaChunkID *ret) memcpy(ret, ca_digest_read(digest), sizeof(CaChunkID)); return 0; } + +char* ca_chunk_id_format_path( + const char *prefix, + const CaChunkID *chunkid, + const char *suffix, + char buffer[]) { + + size_t n; + + assert(chunkid); + assert(buffer); + + if (prefix) { + n = strlen(prefix); + memcpy(buffer, prefix, n); + } else + n = 0; + + ca_chunk_id_format(chunkid, buffer + n + 4 + 1); + memcpy(buffer + n, buffer + n + 4 + 1, 4); + buffer[n + 4] = '/'; + + if (suffix) + strcpy(buffer + n + 4 + 1 + CA_CHUNK_ID_FORMAT_MAX - 1, suffix); + + return buffer; +} diff --git a/src/cachunkid.h b/src/cachunkid.h index afc7433d..4fc762f9 100644 --- a/src/cachunkid.h +++ b/src/cachunkid.h @@ -48,4 +48,9 @@ static inline bool ca_chunk_id_is_null(const CaChunkID *a) { int ca_chunk_id_make(CaDigest *digest, const void *p, size_t l, CaChunkID *ret); +#define CA_CHUNK_ID_PATH_SIZE(prefix, suffix) \ + (strlen_null(prefix) + 4 + 1 + CA_CHUNK_ID_FORMAT_MAX + strlen_null(suffix)) + +char* ca_chunk_id_format_path(const char *prefix, const CaChunkID *chunkid, const char *suffix, char buffer[]); + #endif diff --git a/src/cacompression.c b/src/cacompression.c index dc6a2172..dbbd2d53 100644 --- a/src/cacompression.c +++ b/src/cacompression.c @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: LGPL-2.1+ */ -#include "util.h" #include "cacompression.h" +#include "util.h" static const char* const table[_CA_COMPRESSION_TYPE_MAX] = { [CA_COMPRESSION_XZ] = "xz", diff --git a/src/cadecoder.c b/src/cadecoder.c index 09000e9f..dea6f797 100644 --- a/src/cadecoder.c +++ b/src/cadecoder.c @@ -3462,8 +3462,7 @@ static int ca_decoder_finalize_child(CaDecoder *d, CaDecoderNode *n, CaDecoderNo if (child->fd >= 0) r = fstat(child->fd, &st); else { - if (!n) - return -EINVAL; + assert(dir_fd >= 0); r = fstatat(dir_fd, name, &st, AT_SYMLINK_NOFOLLOW); } @@ -3550,8 +3549,7 @@ static int ca_decoder_finalize_child(CaDecoder *d, CaDecoderNode *n, CaDecoderNo if (child->fd >= 0) r = fchown(child->fd, uid, gid); else { - if (!n) - return -EINVAL; + assert(dir_fd >= 0); r = fchownat(dir_fd, name, uid, gid, AT_SYMLINK_NOFOLLOW); } @@ -3583,8 +3581,7 @@ static int ca_decoder_finalize_child(CaDecoder *d, CaDecoderNode *n, CaDecoderNo if (child->fd >= 0) r = fchmod(child->fd, new_mode); else { - if (!n) - return -EINVAL; + assert(dir_fd >= 0); r = fchmodat(dir_fd, name, new_mode, AT_SYMLINK_NOFOLLOW); } @@ -3599,8 +3596,7 @@ static int ca_decoder_finalize_child(CaDecoder *d, CaDecoderNode *n, CaDecoderNo if (child->fd >= 0) r = fchmod(child->fd, read_le64(&child->entry->mode) & 07777); else { - if (!n) - return -EINVAL; + assert(dir_fd >= 0); r = fchmodat(dir_fd, name, read_le64(&child->entry->mode) & 07777, AT_SYMLINK_NOFOLLOW); } @@ -3615,8 +3611,7 @@ static int ca_decoder_finalize_child(CaDecoder *d, CaDecoderNode *n, CaDecoderNo acl_t new_acl; if (child->fd < 0) { - if (!n) - return -EINVAL; + assert(dir_fd >= 0); path_fd = openat(dir_fd, name, O_RDONLY|O_CLOEXEC|O_NOFOLLOW|O_PATH); if (path_fd < 0) @@ -3835,8 +3830,7 @@ static int ca_decoder_finalize_child(CaDecoder *d, CaDecoderNode *n, CaDecoderNo if (child->fd >= 0) r = futimens(child->fd, ts); else { - if (!n) - return -EINVAL; + assert(dir_fd >= 0); r = utimensat(dir_fd, name, ts, AT_SYMLINK_NOFOLLOW); } @@ -3909,6 +3903,30 @@ static int ca_decoder_finalize_child(CaDecoder *d, CaDecoderNode *n, CaDecoderNo } } + if (d->replay_feature_flags & (CA_FORMAT_WITH_SEC_TIME|CA_FORMAT_WITH_USEC_TIME|CA_FORMAT_WITH_NSEC_TIME|CA_FORMAT_WITH_2SEC_TIME)) { + uint64_t granularity; + + /* If we have restored the time validate it after all uses, since the backing file system might not + * provide the granularity we need, but we shouldn't permit that since we care about + * reproducability. */ + + if (child->fd >= 0) + r = fstat(child->fd, &st); + else { + assert(dir_fd >= 0); + r = fstatat(dir_fd, name, &st, AT_SYMLINK_NOFOLLOW); + } + if (r < 0) + return -errno; + + r = ca_feature_flags_time_granularity_nsec(d->replay_feature_flags, &granularity); + if (r < 0) + return r; + + if (timespec_to_nsec(st.st_mtim) / granularity != read_le64(&child->entry->mtime) / granularity) + return -EOPNOTSUPP; + } + return 0; } diff --git a/src/cadigest.h b/src/cadigest.h index 08a499f6..8b41b1c2 100644 --- a/src/cadigest.h +++ b/src/cadigest.h @@ -5,6 +5,7 @@ #include #include +#include typedef struct CaDigest CaDigest; @@ -23,6 +24,20 @@ int ca_digest_ensure_allocated(CaDigest **d, CaDigestType t); void ca_digest_write(CaDigest *d, const void *p, size_t l); +static inline void ca_digest_write_u8(CaDigest *d, uint8_t u) { + ca_digest_write(d, &u, sizeof(u)); +} + +static inline void ca_digest_write_u32(CaDigest *d, uint32_t u) { + u = htole32(u); + ca_digest_write(d, &u, sizeof(u)); +} + +static inline void ca_digest_write_u64(CaDigest *d, uint64_t u) { + u = htole64(u); + ca_digest_write(d, &u, sizeof(u)); +} + const void* ca_digest_read(CaDigest *d); void ca_digest_reset(CaDigest *d); diff --git a/src/caencoder.c b/src/caencoder.c index 50eb75f2..4dbdc6cc 100644 --- a/src/caencoder.c +++ b/src/caencoder.c @@ -30,6 +30,7 @@ #include "caformat-util.h" #include "caformat.h" #include "camakebst.h" +#include "canametable.h" #include "cautil.h" #include "chattr.h" #include "def.h" @@ -60,12 +61,6 @@ typedef struct CaEncoderACLEntry { }; } CaEncoderACLEntry; -typedef struct CaEncoderNameTable { - uint64_t hash; - uint64_t start_offset; - uint64_t end_offset; -} CaEncoderNameTable; - typedef struct CaEncoderNode { int fd; struct stat stat; @@ -125,15 +120,12 @@ typedef struct CaEncoderNode { bool selinux_label_valid:1; char *selinux_label; - /* The offset in the archive */ - uint64_t entry_offset; + /* The FS_IOC_GETVERSION generation */ + int generation; + int generation_valid; /* tri-state */ /* If this is a directory: file name lookup data */ - CaEncoderNameTable *name_table; - size_t n_name_table; - size_t n_name_table_allocated; - uint64_t previous_name_table_offset; - bool name_table_incomplete; + CaNameTable *name_table; /* For detecting mount boundaries */ int mount_id; @@ -170,6 +162,7 @@ struct CaEncoder { uint64_t archive_offset; uint64_t payload_offset; + uint64_t skipped_bytes; uid_t cached_uid; gid_t cached_gid; @@ -278,14 +271,10 @@ static void ca_encoder_node_free(CaEncoderNode *n) { n->stat.st_mode = 0; - n->name_table = mfree(n->name_table); - n->n_name_table = n->n_name_table_allocated = 0; - n->previous_name_table_offset = UINT64_MAX; - n->name_table_incomplete = false; - - n->entry_offset = UINT64_MAX; + n->name_table = ca_name_table_unref(n->name_table); n->mount_id = -1; + n->generation_valid = 1; } CaEncoder *ca_encoder_unref(CaEncoder *e) { @@ -381,9 +370,8 @@ int ca_encoder_set_base_fd(CaEncoder *e, int fd) { .acl_default_group_obj_permissions = UINT64_MAX, .acl_default_other_permissions = UINT64_MAX, .acl_default_mask_permissions = UINT64_MAX, - .previous_name_table_offset = UINT64_MAX, - .entry_offset = UINT64_MAX, .mount_id = -1, + .generation_valid = -1, }; e->n_nodes = 1; @@ -715,6 +703,32 @@ static int ca_encoder_node_read_selinux_label( #endif } +static int ca_encoder_node_read_generation( + CaEncoder *e, + CaEncoderNode *n) { + + assert(e); + assert(n); + + if (!S_ISDIR(n->stat.st_mode) && !S_ISREG(n->stat.st_mode)) + return 0; + if (n->fd < 0) + return -EBADFD; + if (n->generation_valid >= 0) /* Already read? */ + return 0; + + if (ioctl(n->fd, FS_IOC_GETVERSION, &n->generation) < 0) { + + if (!IN_SET(errno, ENOTTY, ENOSYS, EBADF, EOPNOTSUPP, EINVAL)) + return -errno; + + n->generation_valid = false; + } else + n->generation_valid = true; + + return 0; +} + static int compare_xattr(const void *a, const void *b) { const CaEncoderExtendedAttribute *x = a, *y = b; @@ -1590,8 +1604,8 @@ static CaEncoderNode* ca_encoder_init_child(CaEncoder *e) { .acl_default_group_obj_permissions = UINT64_MAX, .acl_default_other_permissions = UINT64_MAX, .acl_default_mask_permissions = UINT64_MAX, - .entry_offset = UINT64_MAX, .mount_id = -1, + .generation_valid = -1, }; return n; @@ -1802,6 +1816,106 @@ static void ca_encoder_enter_state(CaEncoder *e, CaEncoderState state) { e->payload_offset = 0; } +static int ca_encoder_initialize_name_table(CaEncoder *e, CaEncoderNode *n, uint64_t sub) { + CaEncoderNode *parent; + + assert(e); + assert(n); + assert(!n->name_table); + + /* Name tables only make sense for directories */ + if (!S_ISDIR(n->stat.st_mode)) + return 0; + + /* We can't set up a name table if we don't know our position */ + if (e->archive_offset == UINT64_MAX) + return 0; + if (e->archive_offset < sub) + return -EINVAL; + + /* Link up parent's filename table */ + parent = ca_encoder_node_parent_of(e, n); + if (parent && !parent->name_table) + return 0; /* Parent has no name table, hence there's no point for us either */ + + n->name_table = ca_name_table_new(); + if (!n->name_table) + return -ENOMEM; + + n->name_table->entry_offset = e->archive_offset - sub; + n->name_table->parent = parent ? ca_name_table_ref(parent->name_table) : NULL; + + return 1; +} + +static int ca_encoder_add_name_table_item(CaEncoder *e, CaEncoderNode *n, const char *name) { + CaNameItem *item; + int r; + + assert(e); + assert(n); + assert(S_ISDIR(n->stat.st_mode)); + assert(name); + + if (!n->name_table) + return 0; + + if (e->archive_offset == UINT64_MAX) { + n->name_table = ca_name_table_unref(n->name_table); + return 0; + } + + assert(ca_name_table_items(n->name_table) == 0 || + ca_name_table_last(n->name_table)->end_offset != UINT64_MAX); + + r = ca_name_table_add(&n->name_table, &item); + if (r < 0) + return r; + + *item = (CaNameItem) { + .hash = siphash24(name, strlen(name), (const uint8_t[16]) CA_FORMAT_GOODBYE_HASH_KEY), + .start_offset = e->archive_offset, + .end_offset = UINT64_MAX, + }; + + return 0; +} + +static int ca_encoder_update_name_table_end_offset(CaEncoder *e, CaEncoderNode *n) { + CaNameItem *item; + int r; + + assert(e); + assert(n); + assert(S_ISDIR(n->stat.st_mode)); + + if (!n->name_table) + return 0; + + if (e->archive_offset == UINT64_MAX) { + n->name_table = ca_name_table_unref(n->name_table); + return 0; + } + + if (ca_name_table_items(n->name_table) == 0) + return 0; + + r = ca_name_table_make_writable(&n->name_table, 0); + if (r < 0) + return r; + + item = ca_name_table_last(n->name_table); + assert(item); + + assert(item->start_offset != UINT64_MAX); + assert(item->end_offset == UINT64_MAX); + + assert(item->start_offset <= e->archive_offset); + item->end_offset = e->archive_offset; + + return 0; +} + static int ca_encoder_step_node(CaEncoder *e, CaEncoderNode *n) { int r; @@ -1843,6 +1957,10 @@ static int ca_encoder_step_node(CaEncoder *e, CaEncoderNode *n) { if (r < 0) return r; + r = ca_encoder_initialize_name_table(e, n, 0); + if (r < 0) + return r; + ca_encoder_enter_state(e, CA_ENCODER_ENTRY); return CA_ENCODER_NEXT_FILE; @@ -1906,14 +2024,9 @@ static int ca_encoder_step_node(CaEncoder *e, CaEncoderNode *n) { case CA_ENCODER_NEXT_DIRENT: - if (!n->name_table_incomplete) { - assert(n->n_name_table > 0); - - if (e->archive_offset == UINT64_MAX) - n->name_table_incomplete = true; - else - n->name_table[n->n_name_table-1].end_offset = e->archive_offset; - } + r = ca_encoder_update_name_table_end_offset(e, n); + if (r < 0) + return r; n->dirent_idx++; @@ -1931,13 +2044,6 @@ static int ca_encoder_step_node(CaEncoder *e, CaEncoderNode *n) { for (;;) { de = ca_encoder_node_current_dirent(n); if (!de) { - if (!n->name_table_incomplete && n->n_name_table > 0) { - if (e->archive_offset == UINT64_MAX) - n->name_table_incomplete = true; - else - n->name_table[n->n_name_table-1].end_offset = e->archive_offset; - } - ca_encoder_enter_state(e, CA_ENCODER_GOODBYE); return CA_ENCODER_DATA; } @@ -1957,20 +2063,9 @@ static int ca_encoder_step_node(CaEncoder *e, CaEncoderNode *n) { n->dirent_idx++; } - if (!n->name_table_incomplete) { - - if (e->archive_offset == UINT64_MAX) - n->name_table_incomplete = true; - else { - if (!GREEDY_REALLOC(n->name_table, n->n_name_table_allocated, n->n_name_table+1)) - return -ENOMEM; - - n->name_table[n->n_name_table++] = (CaEncoderNameTable) { - .start_offset = e->archive_offset, - .hash = siphash24(de->d_name, strlen(de->d_name), (const uint8_t[16]) CA_FORMAT_GOODBYE_HASH_KEY), - }; - } - } + r = ca_encoder_add_name_table_item(e, n, de->d_name); + if (r < 0) + return r; ca_encoder_enter_state(e, CA_ENCODER_FILENAME); return CA_ENCODER_DATA; @@ -2028,6 +2123,7 @@ static void ca_encoder_advance_buffer(CaEncoder *e) { assert(e); + /* Flush generated buffer */ sz = realloc_buffer_size(&e->buffer); e->payload_offset += sz; @@ -2035,6 +2131,14 @@ static void ca_encoder_advance_buffer(CaEncoder *e) { e->archive_offset += sz; realloc_buffer_empty(&e->buffer); + + /* Flush skipped bytes */ + e->payload_offset += e->skipped_bytes; + + if (e->archive_offset != UINT64_MAX) + e->archive_offset += e->skipped_bytes; + + e->skipped_bytes = 0; } int ca_encoder_step(CaEncoder *e) { @@ -2055,7 +2159,7 @@ int ca_encoder_step(CaEncoder *e) { return ca_encoder_step_node(e, n); } -static int ca_encoder_get_payload_data(CaEncoder *e, CaEncoderNode *n) { +static int ca_encoder_get_payload_data(CaEncoder *e, CaEncoderNode *n, uint64_t suggested_size) { uint64_t size; ssize_t m; size_t k; @@ -2066,6 +2170,8 @@ static int ca_encoder_get_payload_data(CaEncoder *e, CaEncoderNode *n) { assert(n); assert(S_ISREG(n->stat.st_mode) || S_ISBLK(n->stat.st_mode)); assert(e->state == CA_ENCODER_IN_PAYLOAD); + assert(realloc_buffer_size(&e->buffer) == 0); + assert(e->skipped_bytes == 0); r = ca_encoder_node_get_payload_size(n, &size); if (r < 0) @@ -2074,16 +2180,15 @@ static int ca_encoder_get_payload_data(CaEncoder *e, CaEncoderNode *n) { if (e->payload_offset >= size) /* at EOF? */ return 0; - if (realloc_buffer_size(&e->buffer) > 0) /* already in buffer? */ - return 1; - k = (size_t) MIN(BUFFER_SIZE, size - e->payload_offset); + if (suggested_size != UINT64_MAX && k > suggested_size) + k = suggested_size; p = realloc_buffer_acquire(&e->buffer, k); if (!p) return -ENOMEM; - m = read(n->fd, p, k); + m = pread(n->fd, p, k, e->payload_offset); if (m < 0) { r = -errno; goto fail; @@ -2100,15 +2205,47 @@ static int ca_encoder_get_payload_data(CaEncoder *e, CaEncoderNode *n) { return r; } +static int ca_encoder_skip_payload_data(CaEncoder *e, CaEncoderNode *n, uint64_t suggested_size) { + uint64_t size, d; + int r; + + assert(e); + assert(n); + assert(S_ISREG(n->stat.st_mode) || S_ISBLK(n->stat.st_mode)); + assert(e->state == CA_ENCODER_IN_PAYLOAD); + assert(realloc_buffer_size(&e->buffer) == 0); + assert(e->skipped_bytes == 0); + + /* Thisis much like ca_encoder_get_payload_data() but we don't actually try to read anything from disk, but + * instead skip right to the end of the file we are currently looking at. This is used if the caller is not + * actually interested in the payload, but just wants to proceed to whatever comes next. */ + + r = ca_encoder_node_get_payload_size(n, &size); + if (r < 0) + return r; + + if (e->payload_offset >= size) /* at EOF? */ + return 0; + + d = size - e->payload_offset; + if (suggested_size != UINT64_MAX && d > suggested_size) + d = suggested_size; + if (d > SIZE_MAX) + d = SIZE_MAX; + + e->skipped_bytes = d; + return 1; +} + static int ca_encoder_get_filename_data(CaEncoder *e, const struct dirent *de) { CaFormatFilename *filename; size_t size; assert(e); assert(de); - - if (realloc_buffer_size(&e->buffer) > 0) - return 1; + assert(e->state == CA_ENCODER_FILENAME); + assert(realloc_buffer_size(&e->buffer) == 0); + assert(e->skipped_bytes == 0); size = offsetof(CaFormatFilename, name) + strlen(de->d_name) + 1; @@ -2251,9 +2388,8 @@ static int ca_encoder_get_entry_data(CaEncoder *e, CaEncoderNode *n) { assert(e); assert(n); assert(e->state == CA_ENCODER_ENTRY); - - if (realloc_buffer_size(&e->buffer) > 0) /* Already generated */ - return 1; + assert(realloc_buffer_size(&e->buffer) == 0); + assert(e->skipped_bytes == 0); r = ca_encoder_node_read_chattr(e, n); if (r < 0) @@ -2528,55 +2664,36 @@ static int ca_encoder_get_entry_data(CaEncoder *e, CaEncoderNode *n) { /* fprintf(stderr, "entry at %" PRIu64 " (%s)\n", e->archive_offset, entry->name); */ - n->entry_offset = e->archive_offset; - return 1; } -static int name_table_compare(const void *a, const void *b) { - const CaEncoderNameTable *x = a, *y = b; - - if (x->hash < y->hash) - return -1; - if (x->hash > y->hash) - return 1; - - if (x->start_offset < y->start_offset) - return -1; - if (x->start_offset > y->start_offset) - return 1; - - return 0; -} - static int ca_encoder_get_goodbye_data(CaEncoder *e, CaEncoderNode *n) { - const CaEncoderNameTable *table; - _cleanup_free_ CaEncoderNameTable *bst = NULL; + _cleanup_(ca_name_table_unrefp) CaNameTable *bst = NULL; CaFormatGoodbye *g; CaFormatGoodbyeTail *tail; size_t size, i; + uint64_t start_offset; + int r; assert(e); assert(n); assert(S_ISDIR(n->stat.st_mode)); assert(e->state == CA_ENCODER_GOODBYE); + assert(realloc_buffer_size(&e->buffer) == 0); + assert(e->skipped_bytes == 0); - assert(sizeof(CaFormatGoodbyeTail) == sizeof(CaFormatGoodbyeItem)); - - if (realloc_buffer_size(&e->buffer) > 0) /* Already generated */ - return 1; + assert_cc(sizeof(CaFormatGoodbyeTail) == sizeof(CaFormatGoodbyeItem)); /* If we got here through a seek we don't know the correct addresses of the directory entries, hence can't * correctly generate the name GOODBYE record. */ - if (n->name_table_incomplete) + if (!n->name_table) return -ENOLINK; - /* Similar, if we don't know the entry offset */ - if (n->entry_offset == UINT64_MAX) + if (e->archive_offset == UINT64_MAX) return -ENOLINK; size = offsetof(CaFormatGoodbye, items) + - sizeof(CaFormatGoodbyeItem) * n->n_name_table + + sizeof(CaFormatGoodbyeItem) * ca_name_table_items(n->name_table) + sizeof(CaFormatGoodbyeTail); g = realloc_buffer_acquire(&e->buffer, size); @@ -2588,38 +2705,35 @@ static int ca_encoder_get_goodbye_data(CaEncoder *e, CaEncoderNode *n) { .size = htole64(size), }; - if (n->n_name_table <= 1) - table = n->name_table; - else { - qsort(n->name_table, n->n_name_table, sizeof(CaEncoderNameTable), name_table_compare); - - bst = new(CaEncoderNameTable, n->n_name_table); - if (!bst) - return -ENOMEM; + r = ca_name_table_make_bst(n->name_table, &bst); + if (r < 0) + return r; - ca_make_bst(n->name_table, n->n_name_table, sizeof(CaEncoderNameTable), bst); + if (e->archive_offset < e->payload_offset) + return -EINVAL; - table = bst; - } + /* After seeking we might be pointing into the middle of a GOODBYE record. Let's calculate the actual start + * position of the GOODBYE record by subtracting the offset into the record. */ + start_offset = e->archive_offset - e->payload_offset; - for (i = 0; i < n->n_name_table; i++) { - assert(table[i].start_offset < e->archive_offset); - assert(table[i].end_offset <= e->archive_offset); - assert(table[i].start_offset < table[i].end_offset); + for (i = 0; i < bst->n_items; i++) { + assert(bst->items[i].start_offset < start_offset); + assert(bst->items[i].end_offset <= start_offset); + assert(bst->items[i].start_offset < bst->items[i].end_offset); g->items[i] = (CaFormatGoodbyeItem) { - .offset = htole64(e->archive_offset - table[i].start_offset), - .size = htole64(table[i].end_offset - table[i].start_offset), - .hash = htole64(table[i].hash), + .offset = htole64(start_offset - bst->items[i].start_offset), + .size = htole64(bst->items[i].end_offset - bst->items[i].start_offset), + .hash = htole64(bst->items[i].hash), }; } - assert(n->entry_offset != UINT64_MAX); - assert(n->entry_offset < e->archive_offset); + assert(bst->entry_offset != UINT64_MAX); + assert(bst->entry_offset < start_offset); /* Write the tail */ - tail = (CaFormatGoodbyeTail*) (g->items + n->n_name_table); - write_le64(&tail->entry_offset, e->archive_offset - n->entry_offset); + tail = (CaFormatGoodbyeTail*) (g->items + bst->n_items); + write_le64(&tail->entry_offset, start_offset - bst->entry_offset); write_le64(&tail->size, size); write_le64(&tail->marker, CA_FORMAT_GOODBYE_TAIL_MARKER); @@ -2642,22 +2756,43 @@ static int ca_encoder_write_digest(CaEncoder *e, CaDigest **digest, const void * return 0; } -int ca_encoder_get_data(CaEncoder *e, const void **ret, size_t *ret_size) { +int ca_encoder_get_data( + CaEncoder *e, + uint64_t suggested_size, + const void **ret, + size_t *ret_size) { + bool skip_applied = false; CaEncoderNode *n; + bool really_want_hardlink_digest, really_want_payload_digest; int r; if (!e) return -EINVAL; - if (!ret) - return -EINVAL; - if (!ret_size) - return -EINVAL; + + /* A previous call in the same iteration was done with ret == NULL, and hence we decided to skip bytes, but the + * new call wants to see data for the same iteration after all? That's not supported! */ + if (e->skipped_bytes > 0 && ret) + return -EKEYREVOKED; + + if (realloc_buffer_size(&e->buffer) > 0 || + e->skipped_bytes > 0) /* already data in buffer, if so return it again? */ + goto done; n = ca_encoder_current_node(e); if (!n) return -EUNATCH; + really_want_hardlink_digest = + e->want_hardlink_digest && + !e->hardlink_digest_invalid && + IN_SET(e->state, CA_ENCODER_ENTRY, CA_ENCODER_IN_PAYLOAD); + + really_want_payload_digest = + e->want_payload_digest && + !e->payload_digest_invalid && + e->state == CA_ENCODER_IN_PAYLOAD; + switch (e->state) { case CA_ENCODER_ENTRY: @@ -2670,9 +2805,14 @@ int ca_encoder_get_data(CaEncoder *e, const void **ret, size_t *ret_size) { case CA_ENCODER_IN_PAYLOAD: assert(S_ISREG(n->stat.st_mode) || S_ISBLK(n->stat.st_mode)); - skip_applied = true; + if (!ret && !e->want_archive_digest && !really_want_hardlink_digest && !really_want_payload_digest) + /* OK, we can shortcut this, as neither the caller is interested in this data, nor do we need + * it for digest calculation. In this case let's just skip the whole shebang. */ + r = ca_encoder_skip_payload_data(e, n, suggested_size); + else + r = ca_encoder_get_payload_data(e, n, suggested_size); - r = ca_encoder_get_payload_data(e, n); + skip_applied = true; break; case CA_ENCODER_FILENAME: { @@ -2697,7 +2837,6 @@ int ca_encoder_get_data(CaEncoder *e, const void **ret, size_t *ret_size) { default: return -ENODATA; } - if (r < 0) return r; if (!skip_applied && r > 0) { @@ -2710,20 +2849,30 @@ int ca_encoder_get_data(CaEncoder *e, const void **ret, size_t *ret_size) { } if (r == 0) { /* EOF */ - *ret = NULL; - *ret_size = 0; + + assert(realloc_buffer_size(&e->buffer) == 0); + assert(e->skipped_bytes == 0); + + if (ret) + *ret = NULL; + if (ret_size) + *ret_size = 0; return 0; } if (e->want_archive_digest) ca_encoder_write_digest(e, &e->archive_digest, realloc_buffer_data(&e->buffer), realloc_buffer_size(&e->buffer)); - if (e->want_hardlink_digest && !e->hardlink_digest_invalid && IN_SET(e->state, CA_ENCODER_ENTRY, CA_ENCODER_IN_PAYLOAD)) + if (really_want_hardlink_digest) ca_encoder_write_digest(e, &e->hardlink_digest, realloc_buffer_data(&e->buffer), realloc_buffer_size(&e->buffer)); - if (e->want_payload_digest && !e->payload_digest_invalid && e->state == CA_ENCODER_IN_PAYLOAD) + if (really_want_payload_digest) ca_encoder_write_digest(e, &e->payload_digest, realloc_buffer_data(&e->buffer), realloc_buffer_size(&e->buffer)); - *ret = realloc_buffer_data(&e->buffer); - *ret_size = realloc_buffer_size(&e->buffer); +done: + if (ret) + *ret = realloc_buffer_data(&e->buffer); + + if (ret_size) + *ret_size = realloc_buffer_size(&e->buffer) + e->skipped_bytes; return 1; } @@ -2785,6 +2934,7 @@ static int ca_encoder_node_path(CaEncoder *e, CaEncoderNode *node, char **ret) { *ret = p; p = NULL; + return 0; } @@ -3163,9 +3313,9 @@ int ca_encoder_current_archive_offset(CaEncoder *e, uint64_t *ret) { } int ca_encoder_current_location(CaEncoder *e, uint64_t add, CaLocation **ret) { - CaLocationDesignator designator; - CaEncoderNode *node; + CaEncoderNode *node, *name_table_node; _cleanup_free_ char *path = NULL; + CaLocationDesignator designator; CaLocation *l; int r; @@ -3181,18 +3331,34 @@ int ca_encoder_current_location(CaEncoder *e, uint64_t add, CaLocation **ret) { switch (e->state) { case CA_ENCODER_ENTRY: + /* In CA_ENCODER_ENTRY state, we add the parent's name table to the location. For two reasons: our own + * name table will be empty anyway this early. And we generate ENTRY records for all files not just + * directories, but name tables apply only to directories. By not encoding the name table here we can + * thus treat all file types the same way. */ + + name_table_node = ca_encoder_node_parent_of(e, node); designator = CA_LOCATION_ENTRY; break; case CA_ENCODER_IN_PAYLOAD: assert(S_ISREG(node->stat.st_mode) || S_ISBLK(node->stat.st_mode)); + /* In CA_ENCODER_IN_PAYLOAD state we also use the parent's name table. Using our own makes no sense, as + * we won't have any name table on our own node, as only directories have that, and in this state we + * can't be in a directory */ + + name_table_node = ca_encoder_node_parent_of(e, node); designator = CA_LOCATION_PAYLOAD; break; case CA_ENCODER_FILENAME: assert(S_ISDIR(node->stat.st_mode)); + name_table_node = node; + + /* Here's a tweak: in CA_ENCODER_FILENAME state we actually encode the child's data, as we our current + * node might be the directory, but we need to serialize at which directory entry within it we + * currently are. */ node = ca_encoder_current_child_node(e); if (!node) return -EUNATCH; @@ -3203,6 +3369,7 @@ int ca_encoder_current_location(CaEncoder *e, uint64_t add, CaLocation **ret) { case CA_ENCODER_GOODBYE: assert(S_ISDIR(node->stat.st_mode)); + name_table_node = node; designator = CA_LOCATION_GOODBYE; break; @@ -3214,12 +3381,26 @@ int ca_encoder_current_location(CaEncoder *e, uint64_t add, CaLocation **ret) { if (r < 0 && r != -ENOTDIR) return r; + r = ca_encoder_node_read_generation(e, node); + if (r < 0) + return r; + r = ca_location_new(path, designator, e->payload_offset + add, UINT64_MAX, &l); if (r < 0) return r; + l->mtime = MAX(timespec_to_nsec(node->stat.st_mtim), + timespec_to_nsec(node->stat.st_ctim)); + l->inode = node->stat.st_ino; + l->generation_valid = node->generation_valid > 0; + l->generation = node->generation; + + l->name_table = name_table_node ? ca_name_table_ref(name_table_node->name_table) : NULL; + + if (e->archive_offset != UINT64_MAX) + l->archive_offset = e->archive_offset + add; + *ret = l; - path = NULL; return 0; } @@ -3243,7 +3424,7 @@ static int ca_encoder_node_seek_child(CaEncoder *e, CaEncoderNode *n, const char if (r < 0) return r; - n->name_table_incomplete = true; + n->name_table = ca_name_table_unref(n->name_table); de = ca_encoder_node_current_dirent(n); if (de && streq(name, de->d_name)) { @@ -3321,8 +3502,16 @@ static int ca_encoder_seek_path_and_enter(CaEncoder *e, const char *path) { assert(e); - if (isempty(path)) + if (isempty(path)) { + CaEncoderNode *node; + + node = ca_encoder_current_node(e); + if (!node) + return -EUNATCH; + + node->name_table = ca_name_table_unref(node->name_table); return 0; + } r = ca_encoder_seek_path(e, path); if (r < 0) @@ -3331,6 +3520,35 @@ static int ca_encoder_seek_path_and_enter(CaEncoder *e, const char *path) { return ca_encoder_enter_child(e); } +static int ca_encoder_node_install_name_table(CaEncoder *e, CaEncoderNode *node, CaNameTable *t) { + assert(e); + assert(node); + assert(S_ISDIR(node->stat.st_mode)); + assert(t); + + for (;;) { + ca_name_table_unref(node->name_table); + node->name_table = ca_name_table_ref(t); + + node = ca_encoder_node_parent_of(e, node); + if (!node) + break; + + t = t->parent; + if (!t) { + log_debug("Name table chain ended prematurely."); + return -ESPIPE; + } + } + + if (t->parent) { + log_debug("Name table chain too long."); + return -ESPIPE; + } + + return 0; +} + int ca_encoder_seek_location(CaEncoder *e, CaLocation *location) { CaEncoderNode *node; int r; @@ -3352,7 +3570,6 @@ int ca_encoder_seek_location(CaEncoder *e, CaLocation *location) { switch (location->designator) { case CA_LOCATION_ENTRY: - r = ca_encoder_seek_path_and_enter(e, location->path); if (r < 0) return r; @@ -3361,13 +3578,30 @@ int ca_encoder_seek_location(CaEncoder *e, CaLocation *location) { assert(node); node->dirent_idx = 0; - ca_encoder_enter_state(e, CA_ENCODER_ENTRY); - e->payload_offset = location->offset; - e->archive_offset = UINT64_MAX; + e->archive_offset = location->archive_offset; + + if (location->name_table) { + CaEncoderNode *parent; + + parent = ca_encoder_node_parent_of(e, node); + if (!parent) + return -ENXIO; + + /* Install the name table included in the location for our parents… */ + r = ca_encoder_node_install_name_table(e, parent, location->name_table); + if (r < 0) + return r; + } + + /* …and a new one for ourselves (if we are a directory that is). */ + r = ca_encoder_initialize_name_table(e, node, location->offset); + if (r < 0) + return r; realloc_buffer_empty(&e->buffer); + e->skipped_bytes = 0; ca_digest_reset(e->archive_digest); @@ -3380,7 +3614,7 @@ int ca_encoder_seek_location(CaEncoder *e, CaLocation *location) { if (e->want_hardlink_digest && !e->hardlink_digest_invalid) ca_digest_reset(e->hardlink_digest); - return CA_ENCODER_DATA; + return location->offset > 0 ? CA_ENCODER_DATA : CA_ENCODER_NEXT_FILE; case CA_LOCATION_PAYLOAD: { uint64_t size; @@ -3402,18 +3636,27 @@ int ca_encoder_seek_location(CaEncoder *e, CaLocation *location) { if (location->offset >= size) return -ENXIO; - if (lseek(node->fd, location->offset, SEEK_SET) == (off_t) -1) - return -errno; - ca_encoder_enter_state(e, CA_ENCODER_IN_PAYLOAD); e->payload_offset = location->offset; + e->archive_offset = location->archive_offset; - if (CA_ENCODER_AT_ROOT(e)) + if (e->archive_offset == UINT64_MAX && CA_ENCODER_AT_ROOT(e)) e->archive_offset = location->offset; - else - e->archive_offset = UINT64_MAX; + + if (location->name_table) { + CaEncoderNode *parent; + + parent = ca_encoder_node_parent_of(e, node); + if (!parent) + return -ENXIO; + + r = ca_encoder_node_install_name_table(e, parent, location->name_table); + if (r < 0) + return r; + } realloc_buffer_empty(&e->buffer); + e->skipped_bytes = 0; ca_digest_reset(e->archive_digest); @@ -3438,11 +3681,17 @@ int ca_encoder_seek_location(CaEncoder *e, CaLocation *location) { return -ENOTDIR; ca_encoder_enter_state(e, CA_ENCODER_FILENAME); - e->payload_offset = location->offset; - e->archive_offset = UINT64_MAX; + e->archive_offset = location->archive_offset; + + if (location->name_table) { + r = ca_encoder_node_install_name_table(e, node, location->name_table); + if (r < 0) + return r; + } realloc_buffer_empty(&e->buffer); + e->skipped_bytes = 0; ca_digest_reset(e->archive_digest); @@ -3465,13 +3714,18 @@ int ca_encoder_seek_location(CaEncoder *e, CaLocation *location) { return r; node->dirent_idx = node->n_dirents; - node->name_table_incomplete = node->n_dirents > 0; ca_encoder_enter_state(e, CA_ENCODER_GOODBYE); - e->payload_offset = location->offset; - e->archive_offset = UINT64_MAX; + e->archive_offset = location->archive_offset; + + if (location->name_table) { + r = ca_encoder_node_install_name_table(e, node, location->name_table); + if (r < 0) + return r; + } realloc_buffer_empty(&e->buffer); + e->skipped_bytes = 0; ca_digest_reset(e->archive_digest); diff --git a/src/caencoder.h b/src/caencoder.h index f14677b2..104ea7c5 100644 --- a/src/caencoder.h +++ b/src/caencoder.h @@ -39,7 +39,7 @@ int ca_encoder_get_base_fd(CaEncoder *e); int ca_encoder_step(CaEncoder *e); /* Output: archive stream data */ -int ca_encoder_get_data(CaEncoder *e, const void **ret, size_t *ret_size); +int ca_encoder_get_data(CaEncoder *e, uint64_t suggested_size, const void **ret, size_t *ret_size); int ca_encoder_current_path(CaEncoder *e, char **ret); int ca_encoder_current_mode(CaEncoder *d, mode_t *ret); diff --git a/src/caindex.c b/src/caindex.c index cea472b1..1ee74b73 100644 --- a/src/caindex.c +++ b/src/caindex.c @@ -1089,6 +1089,8 @@ int ca_index_seek(CaIndex *i, uint64_t offset, uint64_t *ret_skip) { if (r < 0) return r; + i->previous_chunk_offset = first_chunk_end; + if (ret_skip) *ret_skip = offset - first_chunk_end; diff --git a/src/calocation.c b/src/calocation.c index e404d24e..c6f5a1e3 100644 --- a/src/calocation.c +++ b/src/calocation.c @@ -1,6 +1,9 @@ /* SPDX-License-Identifier: LGPL-2.1+ */ +#include #include +#include +#include #include "calocation.h" #include "util.h" @@ -36,10 +39,12 @@ int ca_location_new( } } + l->n_ref = 1; l->designator = designator; l->offset = designator == CA_LOCATION_VOID ? 0 : offset; l->size = size; - l->n_ref = 1; + l->mtime = UINT64_MAX; + l->archive_offset = UINT64_MAX; *ret = l; return 0; @@ -59,6 +64,7 @@ CaLocation* ca_location_unref(CaLocation *l) { free(l->formatted); ca_file_root_unref(l->root); + ca_name_table_unref(l->name_table); return mfree(l); } @@ -73,46 +79,138 @@ CaLocation* ca_location_ref(CaLocation *l) { return l; } +int ca_location_copy(CaLocation *l, CaLocation **ret) { + CaLocation *copy = NULL; + int r; + + if (!ret) + return -EINVAL; + + if (!l) { + *ret = NULL; + return 0; + } + + r = ca_location_new(l->path, l->designator, l->offset, l->size, ©); + if (r < 0) + return r; + + copy->root = ca_file_root_ref(l->root); + + copy->mtime = l->mtime; + copy->inode = l->inode; + copy->generation_valid = l->generation_valid; + copy->generation = l->generation; + copy->name_table = ca_name_table_ref(l->name_table); + copy->archive_offset = l->archive_offset; + + *ret = copy; + + return 1; +} + const char* ca_location_format(CaLocation *l) { + _cleanup_free_ char *v = NULL; + if (!l) return NULL; - if (!l->formatted) { + if (l->formatted) + return l->formatted; + + if (l->size == UINT64_MAX) { + if (asprintf(&v, "%s+%c%" PRIu64, strempty(l->path), (char) l->designator, l->offset) < 0) + return NULL; + } else { + if (asprintf(&v, "%s+%c%" PRIu64 ":%" PRIu64, strempty(l->path), (char) l->designator, l->offset, l->size) < 0) + return NULL; + } - if (l->size == UINT64_MAX) { - if (asprintf(&l->formatted, "%s+%c%" PRIu64, strempty(l->path), (char) l->designator, l->offset) < 0) + if (l->mtime != UINT64_MAX) { + if (l->generation_valid) { + /* Note that the kernel suggests the generation is an "int", i.e. a signed entity. We'll format + * it as unsigned here however, to avoid another "-" in the fomatted version. */ + if (asprintf(&l->formatted, "%s@%" PRIu64 ".%" PRIu64 ".%u", v, l->inode, l->mtime, (unsigned) l->generation) < 0) return NULL; } else { - if (asprintf(&l->formatted, "%s+%c%" PRIu64 ":%" PRIu64, strempty(l->path), (char) l->designator, l->offset, l->size) < 0) + if (asprintf(&l->formatted, "%s@%" PRIu64 ".%" PRIu64, v, l->inode, l->mtime) < 0) return NULL; } + } else { + l->formatted = v; + v = NULL; } return l->formatted; } int ca_location_parse(const char *text, CaLocation **ret) { - uint64_t offset, size; - const char *e, *c; + uint64_t offset, size, mtime = UINT64_MAX, inode = 0; + const char *q, *e, *c, *u; CaLocation *l; char *n; - int r; + int r, generation = 0; + bool generation_valid = false; if (!text) return -EINVAL; if (!ret) return -EINVAL; - e = strrchr(text, '+'); - if (!e) + u = strrchr(text, '+'); + if (!u) return -EINVAL; - if (e != text && text[0] == '/') + if (u != text && text[0] == '/') /* Don't allow absolute paths */ return -EINVAL; - if (!CA_LOCATION_DESIGNATOR_VALID(e[1])) + if (!CA_LOCATION_DESIGNATOR_VALID(u[1])) return -EINVAL; + /* The '@' suffix is optional */ + q = strchr(u, '@'); + if (q) { + const char *z, *k; + + e = strndupa(u, q - u); + q++; + + /* There are either two or three dot separated parts after the '@' */ + z = strchr(q, '.'); + if (!z) + return -EINVAL; + + /* The first part is the inode number */ + q = strndupa(q, z - q); + r = safe_atou64(q, &inode); + if (r < 0) + return r; + z++; + + k = strchr(z, '.'); + if (k) { + unsigned g; + + z = strndupa(z, k - z); + + /* The third part is the generation, if there is one */ + r = safe_atou(k + 1, &g); + if (r < 0) + return r; + + generation = (int) g; + generation_valid = true; + } + + /* The second part is the mtime */ + r = safe_atou64(z, &mtime); + if (r < 0) + return r; + if (mtime == UINT64_MAX) + return -EINVAL; + } else + e = u; + c = strchr(e+2, ':'); if (c) { const char *a; @@ -137,10 +235,10 @@ int ca_location_parse(const char *text, CaLocation **ret) { if (size != UINT64_MAX && offset + size < offset) return -EINVAL; - if (e == text) + if (u == text) n = NULL; else { - n = strndup(text, e - text); + n = strndup(text, u - text); if (!n) return -ENOMEM; } @@ -161,13 +259,17 @@ int ca_location_parse(const char *text, CaLocation **ret) { l->size = size; l->designator = e[1]; l->n_ref = 1; + l->inode = inode; + l->mtime = mtime; + l->generation = generation; + l->generation_valid = generation_valid; + l->archive_offset = UINT64_MAX; *ret = l; return 0; } int ca_location_patch_size(CaLocation **l, uint64_t size) { - CaLocation *n; int r; /* Since we consider CaLocation objects immutable, let's duplicate the object, unless we are the only owner of it */ @@ -180,26 +282,24 @@ int ca_location_patch_size(CaLocation **l, uint64_t size) { if ((*l)->size == size) return 0; - if ((*l)->n_ref == 1) { - (*l)->size = size; + if ((*l)->n_ref == 1) (*l)->formatted = mfree((*l)->formatted); - return 1; - } - - r = ca_location_new((*l)->path, (*l)->designator, (*l)->offset, size, &n); - if (r < 0) - return r; + else { + CaLocation *n; - n->root = ca_file_root_ref((*l)->root); + r = ca_location_copy(*l, &n); + if (r < 0) + return r; - ca_location_unref(*l); - *l = n; + ca_location_unref(*l); + *l = n; + } + (*l)->size = size; return 1; } int ca_location_patch_root(CaLocation **l, CaFileRoot *root) { - CaLocation *copy; int r; if (!l) @@ -210,29 +310,24 @@ int ca_location_patch_root(CaLocation **l, CaFileRoot *root) { if ((*l)->root == root) return 0; - if ((*l)->n_ref == 1) { - ca_file_root_unref((*l)->root); - (*l)->root = ca_file_root_ref(root); - return 0; - } + if ((*l)->n_ref != 1) { + CaLocation *n; - r = ca_location_new((*l)->path, - (*l)->designator, - (*l)->offset, - (*l)->size, - ©); - if (r < 0) - return r; + r = ca_location_copy(*l, &n); + if (r < 0) + return r; - copy->root = ca_file_root_ref(root); + ca_location_unref(*l); + *l = n; + } - ca_location_unref(*l); - *l = copy; + ca_file_root_unref((*l)->root); + (*l)->root = ca_file_root_ref(root); return 0; } + int ca_location_advance(CaLocation **l, uint64_t n_bytes) { - CaLocation *n; int r; if (!l) @@ -243,33 +338,35 @@ int ca_location_advance(CaLocation **l, uint64_t n_bytes) { if (n_bytes == 0) return 0; - if ((*l)->size == UINT64_MAX) - return -ESPIPE; - if (n_bytes > (*l)->size) + if ((*l)->size != UINT64_MAX && n_bytes > (*l)->size) return -ESPIPE; - if ((*l)->n_ref == 1) { - if ((*l)->designator != CA_LOCATION_VOID) - (*l)->offset += n_bytes; - (*l)->size -= n_bytes; + if ((*l)->n_ref == 1) (*l)->formatted = mfree((*l)->formatted); - return 1; + else { + CaLocation *n; + + r = ca_location_copy(*l, &n); + if (r < 0) + return r; + + ca_location_unref(*l); + *l = n; } - r = ca_location_new((*l)->path, (*l)->designator, (*l)->offset + n_bytes, (*l)->size - n_bytes, &n); - if (r < 0) - return r; + if ((*l)->designator != CA_LOCATION_VOID) + (*l)->offset += n_bytes; - n->root = ca_file_root_ref((*l)->root); + if ((*l)->archive_offset != UINT64_MAX) + (*l)->archive_offset += n_bytes; - ca_location_unref(*l); - *l = n; + if ((*l)->size != UINT64_MAX) + (*l)->size -= n_bytes; return 1; } int ca_location_merge(CaLocation **a, CaLocation *b) { - CaLocation *copy; int r; if (!a) @@ -295,29 +392,40 @@ int ca_location_merge(CaLocation **a, CaLocation *b) { if ((*a)->designator != CA_LOCATION_VOID && (*a)->offset + (*a)->size != b->offset) return 0; - if ((*a)->n_ref == 1) { - (*a)->size += b->size; - (*a)->formatted = mfree((*a)->formatted); - return 1; + if ((*a)->mtime != b->mtime) + return 0; + + if ((*a)->mtime != UINT64_MAX) { + + if ((*a)->inode != b->inode) + return 0; + + if ((*a)->generation_valid != b->generation_valid) + return 0; + + if ((*a)->generation_valid && (*a)->generation != b->generation) + return 0; } - r = ca_location_new((*a)->path, - (*a)->designator, - (*a)->offset, - (*a)->size + b->size, - ©); - if (r < 0) - return r; + if ((*a)->n_ref == 1) + (*a)->formatted = mfree((*a)->formatted); + else { + CaLocation *n; - copy->root = ca_file_root_ref((*a)->root); + r = ca_location_copy(*a, &n); + if (r < 0) + return r; - ca_location_unref(*a); - *a = copy; + ca_location_unref(*a); + *a = n; + } + (*a)->size += b->size; return 1; } int ca_location_open(CaLocation *l) { + _cleanup_(safe_closep) int fd = -1; int r; if (!l) @@ -332,9 +440,9 @@ int ca_location_open(CaLocation *l) { if (l->root->fd >= 0) { if (isempty(l->path)) - r = fcntl(l->root->fd, F_DUPFD_CLOEXEC, 3); + fd = fcntl(l->root->fd, F_DUPFD_CLOEXEC, 3); else - r = openat(l->root->fd, l->path, O_CLOEXEC|O_RDONLY|O_NOCTTY|O_NOFOLLOW); + fd = openat(l->root->fd, l->path, O_CLOEXEC|O_RDONLY|O_NOCTTY|O_NOFOLLOW); } else { const char *p; @@ -348,11 +456,113 @@ int ca_location_open(CaLocation *l) { else p = strjoina(l->root->path, "/", l->path); - r = open(p, O_CLOEXEC|O_RDONLY|O_NOCTTY|O_NOFOLLOW); + fd = open(p, O_CLOEXEC|O_RDONLY|O_NOCTTY|O_NOFOLLOW); } - - if (r < 0) + if (fd < 0) return -errno; + if (l->mtime != UINT64_MAX) { + struct stat st; + uint64_t n; + + /* Ensure inode, mtime and generation still match */ + + if (fstat(fd, &st) < 0) + return -errno; + + if (st.st_ino != l->inode) + return -ESTALE; + + n = MAX(timespec_to_nsec(st.st_mtim), timespec_to_nsec(st.st_ctim)); + if (l->mtime != n) + return -ESTALE; + + if (l->generation_valid) { + int v; + + if (ioctl(fd, FS_IOC_GETVERSION, &v) < 0) + return -ESTALE; /* If the fs doesn't support the FS_IOC_GETVERSION ioctl anymore, the file has changed */ + + if (v != l->generation) + return -ESTALE; + } + } + + r = fd; + fd = -1; + return r; } + +int ca_location_id_make(CaDigest *digest, CaLocation *l, bool include_size, CaChunkID *ret) { + if (!digest) + return -EINVAL; + if (!l) + return -EINVAL; + if (!ret) + return -EINVAL; + + if (ca_digest_get_size(digest) != sizeof(CaChunkID)) + return -EINVAL; + + ca_digest_reset(digest); + + if (l->path) + ca_digest_write(digest, l->path, strlen(l->path)); + + ca_digest_write_u8(digest, (uint8_t) l->designator); + ca_digest_write_u64(digest, l->offset); + + if (include_size && l->size != UINT64_MAX) + ca_digest_write_u64(digest, l->size); + + if (l->mtime != UINT64_MAX) { + ca_digest_write_u64(digest, l->mtime); + ca_digest_write_u64(digest, l->inode); + + if (l->generation_valid) + ca_digest_write_u32(digest, (uint32_t) l->generation); + } + + memcpy(ret, ca_digest_read(digest), sizeof(CaChunkID)); + return 0; +} + +bool ca_location_equal(CaLocation *a, CaLocation *b, bool compare_size) { + if (a == b) + return true; + + if (!a || !b) + return false; + + if (a->designator != b->designator) + return false; + + if (a->offset != b->offset) + return false; + + if (!streq(strempty(a->path), strempty(b->path))) + return false; + + if (compare_size) + if (a->size != b->size) + return false; + + + if (a->mtime != b->mtime) + return false; + + if (a->mtime != UINT64_MAX) { + if (a->inode != b->inode) + return false; + + if (a->generation_valid != b->generation_valid) + return false; + + if (a->generation_valid) + if (a->generation != b->generation) + return false; + } + + return true; +} diff --git a/src/calocation.h b/src/calocation.h index e7ef3b88..b84969d6 100644 --- a/src/calocation.h +++ b/src/calocation.h @@ -6,7 +6,10 @@ #include #include +#include "cachunkid.h" +#include "cadigest.h" #include "cafileroot.h" +#include "canametable.h" #include "util.h" /* Describes the location of some data in a directory hierarchy. This is useful for cache management (i.e. remember @@ -34,21 +37,42 @@ static inline bool CA_LOCATION_DESIGNATOR_VALID(CaLocationDesignator d) { * once. When we change it we make copies. */ typedef struct CaLocation { unsigned n_ref; + + /* The following three fields are unconditionally part of the location */ CaLocationDesignator designator; char *path; uint64_t offset; - uint64_t size; /* if unspecified, may be UINT64_MAX */ + + /* The following two fields are optional */ + uint64_t size; /* if unspecified UINT64_MAX */ + CaFileRoot *root; /* if unspecified NULL */ + + /* The following is used to detect file changes, and are optional too */ + uint64_t mtime; + uint64_t inode; /* only set if mtime is != UINT64_MAX */ + int generation; /* only valid if generation_valid is true */ + bool generation_valid; + + /* The following encodes the file name tables so far built of for this node and all its parents, and is + * optional (in which case it is NULL) */ + CaNameTable *name_table; + + /* The archive offset at this location, if that's useful (if unspecified is UINT64_MAX) */ + uint64_t archive_offset; + + /* The formatted version of this location, if requested before */ char *formatted; - CaFileRoot *root; } CaLocation; int ca_location_new(const char *path, CaLocationDesignator designator, uint64_t offset, uint64_t size, CaLocation **ret); - #define ca_location_new_void(size, ret) ca_location_new(NULL, CA_LOCATION_VOID, 0, size, ret) +int ca_location_copy(CaLocation *l, CaLocation **ret); CaLocation* ca_location_unref(CaLocation *l); CaLocation* ca_location_ref(CaLocation *l); +DEFINE_TRIVIAL_CLEANUP_FUNC(CaLocation*, ca_location_unref); + const char* ca_location_format(CaLocation *l); int ca_location_parse(const char *text, CaLocation **ret); @@ -62,4 +86,8 @@ int ca_location_merge(CaLocation **a, CaLocation *b); int ca_location_open(CaLocation *l); +int ca_location_id_make(CaDigest *digest, CaLocation *l, bool include_size, CaChunkID *ret); + +bool ca_location_equal(CaLocation *a, CaLocation *b, bool compare_size); + #endif diff --git a/src/canametable.c b/src/canametable.c new file mode 100644 index 00000000..352e0a99 --- /dev/null +++ b/src/canametable.c @@ -0,0 +1,360 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#include "camakebst.h" +#include "canametable.h" +#include "realloc-buffer.h" + +#define START_ITEMS 16U + +CaNameTable *ca_name_table_new_size(size_t m) { + CaNameTable *t; + + if (m == (size_t) -1 || m < START_ITEMS) + m = START_ITEMS; + + t = malloc0(offsetof(CaNameTable, items) + m * sizeof(CaNameItem)); + if (!t) + return NULL; + + t->n_ref = 1; + t->entry_offset = UINT64_MAX; + + t->n_allocated = m; + + return t; +} + +CaNameTable *ca_name_table_ref(CaNameTable *t) { + if (!t) + return NULL; + + assert(t->n_ref > 0); + + t->n_ref++; + return t; +} + +CaNameTable *ca_name_table_unref(CaNameTable *t) { + if (!t) + return NULL; + + assert(t->n_ref > 0); + + t->n_ref--; + + if (t->n_ref > 0) + return NULL; + + ca_name_table_unref(t->parent); + free(t->formatted); + free(t); + + return NULL; +} + +int ca_name_table_make_writable(CaNameTable **t, size_t add) { + CaNameTable *nt; + size_t om, nm; + + if (!t) + return -EINVAL; + + if ((*t) && (*t)->n_ref == 1) { + size_t k; + + k = (*t)->n_items + add; + if (k < (*t)->n_items) + return -EOVERFLOW; + + if ((*t)->n_allocated >= k) + return 0; + } + + om = *t ? (*t)->n_items : 0; + nm = MAX(om * 2, START_ITEMS); + + if (nm < om+1) + return -EOVERFLOW; + + nt = malloc0(offsetof(CaNameTable, items) + nm * sizeof(CaNameItem)); + if (!nt) + return -ENOMEM; + + nt->n_ref = 1; + nt->entry_offset = (*t)->entry_offset; + nt->n_items = (*t)->n_items; + nt->n_allocated = nm; + nt->parent = ca_name_table_ref((*t)->parent); + + if (*t) + memcpy(nt->items, (*t)->items, om * sizeof(CaNameItem)); + + ca_name_table_unref(*t); + *t = nt; + + return 0; +} + +int ca_name_table_add(CaNameTable **t, CaNameItem **ret) { + int r; + + r = ca_name_table_make_writable(t, 1); + if (r < 0) + return r; + + assert(*t); + assert((*t)->n_ref == 1); + assert((*t)->n_allocated > (*t)->n_items); + + (*t)->formatted = mfree((*t)->formatted); + + *ret = (*t)->items + (*t)->n_items ++; + return 0; +} + +char* ca_name_table_format(CaNameTable *t) { + _cleanup_(realloc_buffer_free) ReallocBuffer buffer = {}; + CaNameTable *p; + char nul = 0; + int r; + + if (!t) + return NULL; + + if (t->formatted) + return t->formatted; + + for (p = t; p; p = p->parent) { + size_t i; + + r = realloc_buffer_printf(&buffer, "O%" PRIx64, p->entry_offset); + if (r < 0) + return NULL; + + for (i = 0; i < p->n_items; i++) { + CaNameItem *item; + + item = p->items + i; + + r = realloc_buffer_printf(&buffer, "H%" PRIx64 "S%" PRIx64 "X%" PRIx64, + item->hash, item->start_offset, item->end_offset); + if (r < 0) + return NULL; + } + } + + if (!realloc_buffer_append(&buffer, &nul, sizeof(nul))) + return NULL; + + t->formatted = realloc_buffer_steal(&buffer); + return t->formatted; +} + +static int parse_one(const char **text, uint64_t *ret_hash, uint64_t *ret_start_offset, uint64_t *ret_end_offset) { + uint64_t hash, start_offset, end_offset; + const char *x, *c; + size_t z; + int r; + + assert(text); + assert(*text); + assert(ret_hash); + assert(ret_start_offset); + assert(ret_end_offset); + + x = *text; + + if (*x != 'H') + return -EINVAL; + x++; + + z = strspn(x, HEXDIGITS); + c = strndupa(x, z); + r = safe_atox64(c, &hash); + if (r < 0) + return r; + + x += z; + + if (*x != 'S') + return -EINVAL; + x++; + + z = strspn(x, HEXDIGITS); + c = strndupa(x, z); + r = safe_atox64(c, &start_offset); + if (r < 0) + return r; + + x += z; + + if (*x != 'X') + return -EINVAL; + x++; + + z = strspn(x, HEXDIGITS); + c = strndupa(x, z); + r = safe_atox64(c, &end_offset); + if (r < 0) + return r; + + x += z; + + *text = x; + + *ret_hash = hash; + *ret_start_offset = start_offset; + *ret_end_offset = end_offset; + + return 0; +} + +int ca_name_table_parse(const char **text, CaNameTable **ret) { + _cleanup_(ca_name_table_unrefp) CaNameTable *nt = NULL; + const char *x, *eo; + size_t z; + int r; + + if (!text) + return -EINVAL; + if (!*text) + return -EINVAL; + if (!ret) + return -EINVAL; + + x = *text; + if (x[0] != 'O') + return -EINVAL; + x++; + + nt = ca_name_table_new(); + if (!nt) + return -ENOMEM; + + z = strspn(x, HEXDIGITS); + eo = strndupa(x, z); + r = safe_atox64(eo, &nt->entry_offset); + if (r < 0) + return r; + + x += z; + + for (;;) { + uint64_t hash, start_offset, end_offset; + CaNameItem *item; + + if (*x == 'O') { + r = ca_name_table_parse(&x, &nt->parent); + if (r < 0) + return r; + + break; + } + if (*x == 0) + break; + + r = parse_one(&x, &hash, &start_offset, &end_offset); + if (r < 0) + return r; + + r = ca_name_table_add(&nt, &item); + if (r < 0) + return r; + + item->hash = hash; + item->start_offset = start_offset; + item->end_offset= end_offset; + } + + *text = x; + + *ret = nt; + nt = NULL; + + return 0; +} + +static int name_table_compare(const void *a, const void *b) { + const CaNameItem *x = a, *y = b; + + if (x->hash < y->hash) + return -1; + if (x->hash > y->hash) + return 1; + + if (x->start_offset < y->start_offset) + return -1; + if (x->start_offset > y->start_offset) + return 1; + + return 0; +} + +int ca_name_table_make_bst(CaNameTable *t, CaNameTable **ret) { + _cleanup_free_ CaNameItem *buffer = NULL; + CaNameTable *nt = NULL; + size_t m; + + /* Reorders the specified name table to become a binary search tree. */ + + m = ca_name_table_items(t); + if (m <= 1) { + *ret = ca_name_table_ref(t); + return 0; + } + + buffer = new(CaNameItem, m); + if (!buffer) + return -ENOMEM; + + memcpy(buffer, t->items, m * sizeof(CaNameItem)); + qsort(buffer, m, sizeof(CaNameItem), name_table_compare); + + nt = ca_name_table_new_size(m); + if (!nt) + return -ENOMEM; + + nt->entry_offset = t->entry_offset; + nt->n_items = t->n_items; + + ca_make_bst(buffer, t->n_items, sizeof(CaNameItem), nt->items); + + *ret = nt; + return 0; +} + +int ca_name_table_dump(FILE *f, CaNameTable *t) { + size_t i; + + if (!f) + f = stderr; + + for (i = 0; i < ca_name_table_items(t); i++) + fprintf(f, "%5zu %016"PRIx64" %16"PRIx64" → %16"PRIx64" (%" PRIu64 ")\n", + i, + t->items[i].hash, + t->items[i].start_offset, + t->items[i].end_offset, + t->items[i].end_offset - t->items[i].start_offset); + + return 0; +} + +int ca_name_table_dump_recursive(FILE *f, CaNameTable *t) { + int r; + + if (!f) + f = stderr; + + while (t) { + r = ca_name_table_dump(f, t); + if (r < 0) + return r; + + t = t->parent; + fputs("LEVEL\n", f); + } + + fputs("TOP\n", f); + return 0; +} diff --git a/src/canametable.h b/src/canametable.h new file mode 100644 index 00000000..fd6e5221 --- /dev/null +++ b/src/canametable.h @@ -0,0 +1,76 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#ifndef foocanametablehfoo +#define foocanametablehfoo + +#include +#include + +#include "util.h" + +/* A name table object. Contains a list of CaNameItem that carry the hash of a filename plus the start and end archive + * offsets of the serializations of these files. The object is considered immutable, unless only a single reference is + * taken. The object also contains a reference to the same table of the parent node. This way it encodes reliably the + * current context of the directory serializations of all nodes and their parents at any moment in time. */ + +typedef struct CaNameItem { + uint64_t hash; /* hash of the filename */ + uint64_t start_offset; /* start offset of the node's serialization in the archive stream */ + uint64_t end_offset; /* end offset */ +} CaNameItem; + +typedef struct CaNameTable { + unsigned n_ref; + struct CaNameTable *parent; /* Parent's name table chained up */ + + uint64_t entry_offset; + + size_t n_items; + size_t n_allocated; + + char *formatted; + + CaNameItem items[]; +} CaNameTable; + +CaNameTable *ca_name_table_new_size(size_t m); +#define ca_name_table_new() ca_name_table_new_size((size_t) -1) + +CaNameTable *ca_name_table_ref(CaNameTable *t); +CaNameTable *ca_name_table_unref(CaNameTable *t); + +DEFINE_TRIVIAL_CLEANUP_FUNC(CaNameTable*, ca_name_table_unref); + +int ca_name_table_make_writable(CaNameTable **t, size_t add); +int ca_name_table_add(CaNameTable **t, CaNameItem **ret); + +char* ca_name_table_format(CaNameTable *t); +int ca_name_table_parse(const char **text, CaNameTable **ret); + +static inline size_t ca_name_table_items(CaNameTable *t) { + return t ? t->n_items : 0; +} + +static inline CaNameItem* ca_name_table_get(CaNameTable *t, size_t i) { + if (i >= ca_name_table_items(t)) + return NULL; + + return t->items + i; +} + +static inline CaNameItem *ca_name_table_last(CaNameTable *t) { + size_t n; + + n = ca_name_table_items(t); + if (n <= 0) + return NULL; + + return t->items + n - 1; +} + +int ca_name_table_make_bst(CaNameTable *t, CaNameTable **ret); + +int ca_name_table_dump(FILE *f, CaNameTable *t); +int ca_name_table_dump_recursive(FILE *f, CaNameTable *t); + +#endif diff --git a/src/caorigin.c b/src/caorigin.c index 4c7f8bc7..82698118 100644 --- a/src/caorigin.c +++ b/src/caorigin.c @@ -58,6 +58,8 @@ int ca_origin_put(CaOrigin *origin, CaLocation *location) { return -EINVAL; if (origin->n_items == 0) { + assert(origin->n_bytes == 0); + origin->first = ca_location_ref(location); origin->n_items = 1; origin->n_bytes = location->size; @@ -257,7 +259,7 @@ int ca_origin_dump(FILE *f, CaOrigin *origin) { if (!f) f = stderr; - for (i = 0; i < origin->n_items; i++) { + for (i = 0; i < ca_origin_items(origin); i++) { const char *c; c = ca_location_format(ca_origin_get(origin, i)); @@ -319,3 +321,26 @@ int ca_origin_put_void(CaOrigin *origin, uint64_t n_bytes) { return 0; } + +int ca_origin_extract_bytes(CaOrigin *origin, uint64_t n_bytes, CaOrigin **ret) { + _cleanup_(ca_origin_unrefp) CaOrigin *t = NULL; + int r; + + if (n_bytes == 0) { + *ret = NULL; + return 0; + } + + r = ca_origin_new(&t); + if (r < 0) + return r; + + r = ca_origin_concat(t, origin, n_bytes); + if (r < 0) + return r; + + *ret = t; + t = NULL; + + return 0; +} diff --git a/src/caorigin.h b/src/caorigin.h index 045bca75..26cbee2c 100644 --- a/src/caorigin.h +++ b/src/caorigin.h @@ -30,6 +30,8 @@ int ca_origin_put_void(CaOrigin *origin, uint64_t n_bytes); int ca_origin_advance_items(CaOrigin *origin, size_t n_drop); int ca_origin_advance_bytes(CaOrigin *origin, uint64_t n_bytes); +int ca_origin_extract_bytes(CaOrigin *origin, uint64_t n_bytes, CaOrigin **ret); + int ca_origin_dump(FILE *f, CaOrigin *origin); static inline size_t ca_origin_items(CaOrigin *origin) { @@ -40,4 +42,6 @@ static inline uint64_t ca_origin_bytes(CaOrigin *origin) { return origin ? origin->n_bytes : 0; } +DEFINE_TRIVIAL_CLEANUP_FUNC(CaOrigin*, ca_origin_unref); + #endif diff --git a/src/caseed.c b/src/caseed.c index 6aec6b3f..d212fb58 100644 --- a/src/caseed.c +++ b/src/caseed.c @@ -215,7 +215,7 @@ static int ca_seed_open(CaSeed *s) { (void) mkdir(s->cache_path, 0777); - s->cache_fd = open(s->cache_path, O_RDONLY|O_CLOEXEC|O_DIRECTORY|O_NOCTTY); + s->cache_fd = open(s->cache_path, O_RDONLY|O_CLOEXEC|O_DIRECTORY); if (s->cache_fd < 0) return -errno; } @@ -289,7 +289,7 @@ static int ca_seed_cache_chunks(CaSeed *s) { assert(s); - r = ca_encoder_get_data(s->encoder, &p, &l); + r = ca_encoder_get_data(s->encoder, UINT64_MAX, &p, &l); if (r == -ENODATA) return 0; if (r < 0) @@ -580,7 +580,7 @@ int ca_seed_get(CaSeed *s, size_t w; uint64_t m; - r = ca_encoder_get_data(s->encoder, &q, &w); + r = ca_encoder_get_data(s->encoder, UINT64_MAX, &q, &w); if (r == -ENODATA) break; if (r < 0) diff --git a/src/casync-tool.c b/src/casync-tool.c index 521166ad..3d6e284e 100644 --- a/src/casync-tool.c +++ b/src/casync-tool.c @@ -55,6 +55,7 @@ static bool arg_seed_output = true; static char *arg_store = NULL; static char **arg_extra_stores = NULL; static char **arg_seeds = NULL; +static char *arg_cache = NULL; static size_t arg_chunk_size_min = 0; static size_t arg_chunk_size_avg = 0; static size_t arg_chunk_size_max = 0; @@ -93,6 +94,7 @@ static void help(void) { " --compression=COMPRESSION\n" " Pick compression algorithm (zstd, xz or gzip)\n" " --seed=PATH Additional file or directory to use as seed\n" + " --cache=PATH Directory to use as encoder cache\n" " --rate-limit-bps=LIMIT Maximum bandwidth in bytes/s for remote\n" " communication\n" " --exclude-nodump=no Don't exclude files with chattr(1)'s +d 'nodump'\n" @@ -289,6 +291,7 @@ static int parse_argv(int argc, char *argv[]) { ARG_EXTRA_STORE, ARG_CHUNK_SIZE, ARG_SEED, + ARG_CACHE, ARG_RATE_LIMIT_BPS, ARG_WITH, ARG_WITHOUT, @@ -319,6 +322,7 @@ static int parse_argv(int argc, char *argv[]) { { "extra-store", required_argument, NULL, ARG_EXTRA_STORE }, { "chunk-size", required_argument, NULL, ARG_CHUNK_SIZE }, { "seed", required_argument, NULL, ARG_SEED }, + { "cache", required_argument, NULL, ARG_CACHE }, { "rate-limit-bps", required_argument, NULL, ARG_RATE_LIMIT_BPS }, { "with", required_argument, NULL, ARG_WITH }, { "without", required_argument, NULL, ARG_WITHOUT }, @@ -368,17 +372,12 @@ static int parse_argv(int argc, char *argv[]) { arg_dry_run = true; break; - case ARG_STORE: { - char *p; - - p = strdup(optarg); - if (!p) + case ARG_STORE: + r = free_and_strdup(&arg_store, optarg); + if (r < 0) return log_oom(); - free(arg_store); - arg_store = p; break; - } case ARG_EXTRA_STORE: @@ -406,6 +405,13 @@ static int parse_argv(int argc, char *argv[]) { break; + case ARG_CACHE: + r = free_and_strdup(&arg_cache, optarg); + if (r < 0) + return log_oom(); + + break; + case ARG_RATE_LIMIT_BPS: r = parse_size(optarg, &arg_rate_limit_bps); if (r < 0) @@ -823,7 +829,8 @@ static int verbose_print_path(CaSync *s, const char *verb) { } static int verbose_print_done_make(CaSync *s) { - uint64_t n_chunks = UINT64_MAX, size = UINT64_MAX, n_reused = UINT64_MAX, covering; + uint64_t n_chunks = UINT64_MAX, size = UINT64_MAX, n_reused = UINT64_MAX, covering, + n_cache_hits = UINT64_MAX, n_cache_misses = UINT64_MAX, n_cache_invalidated = UINT64_MAX, n_cache_added = UINT64_MAX; char buffer[FORMAT_BYTES_MAX]; int r; @@ -851,7 +858,7 @@ static int verbose_print_done_make(CaSync *s) { if (r < 0) return log_error_errno(r, "Failed to format feature flags: %m"); - log_error("Selected feature flags not actually applicable to backing file systems: %s", strnone(t)); + log_info("Selected feature flags not actually applicable to backing file systems: %s", strnone(t)); } } @@ -873,14 +880,33 @@ static int verbose_print_done_make(CaSync *s) { log_info("Number of chunks: %" PRIu64, n_chunks); if (n_reused != UINT64_MAX) { if (n_chunks != UINT64_MAX && n_chunks > 0) - log_info("Reused chunks: %"PRIu64 " (%"PRIu64 "%%)", + log_info("Reused (non-cached) chunks: %"PRIu64 " (%"PRIu64 "%%)", n_reused, n_reused * 100U / n_chunks); else - log_info("Reused chunks: %" PRIu64, n_reused); + log_info("Reused (non-cached) chunks: %" PRIu64, n_reused); } if (size != UINT64_MAX && n_chunks != UINT64_MAX) - log_error("Effective average chunk size: %s", format_bytes(buffer, sizeof(buffer), size / n_chunks)); + log_info("Effective average chunk size: %s", format_bytes(buffer, sizeof(buffer), size / n_chunks)); + + r = ca_sync_current_cache_hits(s, &n_cache_hits); + if (r < 0 && r != -ENODATA) + return log_error_errno(r, "Failed to read number of cache hits: %m"); + + r = ca_sync_current_cache_misses(s, &n_cache_misses); + if (r < 0 && r != -ENODATA) + return log_error_errno(r, "Failed to read number of cache misses: %m"); + + r = ca_sync_current_cache_invalidated(s, &n_cache_invalidated); + if (r < 0 && r != -ENODATA) + return log_error_errno(r, "Failed to read number of invalidated cache items: %m"); + + r = ca_sync_current_cache_added(s, &n_cache_added); + if (r < 0 && r != -ENODATA) + return log_error_errno(r, "Failed to read number of added cache items: %m"); + + if (n_cache_hits != UINT64_MAX && n_cache_misses != UINT64_MAX && n_cache_invalidated != UINT64_MAX && n_cache_added != UINT64_MAX) + log_info("Cache hits: %" PRIu64 ", misses: %" PRIu64 ", invalidated: %" PRIu64 ", added: %" PRIu64, n_cache_hits, n_cache_misses, n_cache_invalidated, n_cache_added); return 1; } @@ -1185,10 +1211,16 @@ static int verb_make(int argc, char *argv[]) { if (r < 0) return r; - r = ca_sync_enable_archive_digest(s, true); + r = ca_sync_enable_archive_digest(s, !arg_cache); if (r < 0) return log_error_errno(r, "Failed to enable archive digest: %m"); + if (arg_cache) { + r = ca_sync_set_cache_path(s, arg_cache); + if (r < 0) + return log_error_errno(r, "Failed to set cache: %m"); + } + (void) send_notify("READY=1"); for (;;) { @@ -1209,8 +1241,12 @@ static int verb_make(int argc, char *argv[]) { verbose_print_done_make(s); - assert_se(ca_sync_get_archive_digest(s, &digest) >= 0); - printf("%s\n", ca_chunk_id_format(&digest, t)); + r = ca_sync_get_archive_digest(s, &digest); + if (r >= 0) + printf("%s\n", ca_chunk_id_format(&digest, t)); + else if (r != -ENOMEDIUM) + return log_debug_errno(r, "Failed to query archive digest: %m"); + return 0; } @@ -3748,6 +3784,7 @@ int main(int argc, char *argv[]) { finish: free(arg_store); + free(arg_cache); strv_free(arg_extra_stores); strv_free(arg_seeds); diff --git a/src/casync.c b/src/casync.c index 73d88112..81cbe476 100644 --- a/src/casync.c +++ b/src/casync.c @@ -4,6 +4,7 @@ #include #include +#include "cacache.h" #include "cachunk.h" #include "cachunker.h" #include "cadecoder.h" @@ -34,6 +35,16 @@ typedef enum CaDirection { CA_SYNC_DECODE, } CaDirection; +/* In which state regarding cache check and validation are we? */ +typedef enum CaCacheState { + CA_SYNC_CACHE_OFF, /* Possibly write to the cache, but don't consult it */ + CA_SYNC_CACHE_CHECK, /* We are at a position where it might make sense to check the cache */ + CA_SYNC_CACHE_VERIFY, /* We found a chunk in the cache, and are validating if it still matches disk */ + CA_SYNC_CACHE_FAILED, /* We found a chunk in the cache, but it turned out not to match disk, now seek back where the cache experiment started */ + CA_SYNC_CACHE_SUCCEEDED, /* We found a chunk in the cache, and it checked out */ + CA_SYNC_CACHE_IDLE, /* We are not at a position where it might make sense to check the cache */ +} CaCacheState; + typedef struct CaSync { CaDirection direction; bool started; @@ -42,6 +53,7 @@ typedef struct CaSync { CaDecoder *decoder; CaChunker chunker; + CaChunker original_chunker; /* A copy of the full chunker state from the beginning, which we can use when we need to reset things */ CaIndex *index; CaRemote *remote_index; @@ -67,6 +79,13 @@ typedef struct CaSync { size_t current_seed; /* The seed we are currently indexing */ bool index_flags_propagated; + CaCache *cache; + CaCacheState cache_state; + CaChunkID current_cache_chunk_id; + uint64_t current_cache_chunk_size; + CaOrigin *current_cache_origin; + CaLocation *current_cache_start_location; + int base_fd; int boundary_fd; int archive_fd; @@ -83,6 +102,8 @@ typedef struct CaSync { ReallocBuffer archive_buffer; ReallocBuffer compress_buffer; + CaOrigin *buffer_origin; + CaDigest *chunk_digest; bool archive_eof; @@ -97,6 +118,11 @@ typedef struct CaSync { uint64_t n_reused_chunks; uint64_t n_prefetched_chunks; + uint64_t n_cache_hits; + uint64_t n_cache_misses; + uint64_t n_cache_invalidated; + uint64_t n_cache_added; + uint64_t archive_size; uint64_t chunk_skip; @@ -408,6 +434,14 @@ int ca_sync_set_uid_range(CaSync *s, uid_t u) { return 0; } +static void ca_sync_reset_cache_data(CaSync *s) { + assert(s); + + /* Resets the cache data, i.e. the cached item we are currently processing. */ + s->current_cache_start_location = ca_location_unref(s->current_cache_start_location); + s->current_cache_origin = ca_origin_unref(s->current_cache_origin); +} + CaSync *ca_sync_unref(CaSync *s) { size_t i; @@ -432,6 +466,9 @@ CaSync *ca_sync_unref(CaSync *s) { ca_seed_unref(s->seeds[i]); free(s->seeds); + ca_sync_reset_cache_data(s); + ca_cache_unref(s->cache); + safe_close(s->base_fd); safe_close(s->boundary_fd); safe_close(s->archive_fd); @@ -459,6 +496,8 @@ CaSync *ca_sync_unref(CaSync *s) { realloc_buffer_free(&s->archive_buffer); realloc_buffer_free(&s->compress_buffer); + ca_origin_unref(s->buffer_origin); + ca_file_root_unref(s->archive_root); ca_digest_free(s->chunk_digest); @@ -1162,6 +1201,80 @@ int ca_sync_add_seed_path(CaSync *s, const char *path) { return 0; } +int ca_sync_set_cache_fd(CaSync *s, int fd) { + _cleanup_(ca_cache_unrefp) CaCache *cache = NULL; + int r; + + if (!s) + return -EINVAL; + if (fd < 0) + return -EINVAL; + + if (s->cache) + return -EBUSY; + + cache = ca_cache_new(); + if (!cache) + return -ENOMEM; + + r = ca_cache_set_fd(cache, fd); + if (r < 0) + return r; + + s->cache = cache; + cache = NULL; + + return 0; +} + +int ca_sync_set_cache_path(CaSync *s, const char *path) { + + _cleanup_(ca_cache_unrefp) CaCache *cache = NULL; + int r; + + if (!s) + return -EINVAL; + if (!path) + return -EINVAL; + + if (s->cache) + return -EBUSY; + + cache = ca_cache_new(); + if (!cache) + return -ENOMEM; + + r = ca_cache_set_path(cache, path); + if (r < 0) + return r; + + s->cache = cache; + cache = NULL; + + return 0; +} + +static bool ca_sync_use_cache(CaSync *s) { + assert(s); + + /* Returns true if we can use the cache when reading. This only works if we don't generate an archive or + * archive digest and if we actually have a cache configured. */ + + if (s->direction != CA_SYNC_ENCODE) + return false; + + if (s->archive_fd >= 0) + return false; + + if (s->remote_archive) + return false; + + if (s->archive_digest || s->payload_digest || s->hardlink_digest) + return false; + + return !!s->cache; +} + static int ca_sync_start(CaSync *s) { size_t i; int r; @@ -1378,8 +1491,7 @@ static int ca_sync_start(CaSync *s) { } if (s->encoder) { - /* If we are writing an index file we need the archive digest unconditionally, as it is included in its end */ - r = ca_encoder_enable_archive_digest(s->encoder, s->archive_digest || s->index); + r = ca_encoder_enable_archive_digest(s->encoder, s->archive_digest); if (r < 0) return r; @@ -1390,6 +1502,8 @@ static int ca_sync_start(CaSync *s) { r = ca_encoder_enable_hardlink_digest(s->encoder, s->hardlink_digest); if (r < 0) return r; + + s->original_chunker = s->chunker; } if (s->decoder) { @@ -1464,6 +1578,13 @@ static int ca_sync_start(CaSync *s) { return r; } + if (s->cache) { + r = ca_cache_set_digest_type(s->cache, ca_feature_flags_to_digest_type(s->feature_flags)); + if (r < 0) + return r; + } + + s->cache_state = ca_sync_use_cache(s) ? CA_SYNC_CACHE_CHECK : CA_SYNC_CACHE_OFF; s->started = true; return 1; @@ -1489,12 +1610,17 @@ static int ca_sync_write_remote_archive(CaSync *s, const void *p, size_t l) { return ca_remote_put_archive(s->remote_archive, p, l); } -static int ca_sync_write_one_chunk(CaSync *s, const void *p, size_t l) { +static int ca_sync_write_one_chunk(CaSync *s, const void *p, size_t l, CaOrigin *origin) { CaChunkID id; int r; assert(s); assert(p || l == 0); + assert(!origin || ca_origin_bytes(origin) == l); + + /* Processes a single chunk we just generated. Writes it to our wstore, our cache store, and our cache. Also + * writes a record about it into the index. Note that if we hit the cache ca_sync_write_one_cached_chunk() is + * called instead. */ r = ca_sync_make_chunk_id(s, p, l, &id); if (r < 0) @@ -1522,19 +1648,47 @@ static int ca_sync_write_one_chunk(CaSync *s, const void *p, size_t l) { return r; } + if (s->cache) { + log_debug("Adding cache entry %s.", ca_location_format(ca_origin_get(origin, 0))); + + r = ca_cache_put(s->cache, origin, &id); + if (r < 0) + return r; + if (r > 0) + s->n_cache_added++; + } + + if (ca_sync_use_cache(s)) + s->cache_state = CA_SYNC_CACHE_CHECK; + return 0; } -static int ca_sync_write_chunks(CaSync *s, const void *p, size_t l) { +static int ca_sync_write_chunks(CaSync *s, const void *p, size_t l, CaLocation *location) { int r; assert(s); assert(p || l == 0); + /* Splits up the data that was just generated into chunks, and calls ca_sync_write_one_chunk() for it */ + if (!s->wstore && !s->cache_store && !s->index) return 0; + if (location) { + if (!s->buffer_origin) { + r = ca_origin_new(&s->buffer_origin); + if (r < 0) + return r; + } + + r = ca_origin_put(s->buffer_origin, location); + if (r < 0) + return r; + } + while (l > 0) { + _cleanup_(ca_origin_unrefp) CaOrigin *chunk_origin = NULL; const void *chunk; size_t chunk_size, k; @@ -1556,7 +1710,22 @@ static int ca_sync_write_chunks(CaSync *s, const void *p, size_t l) { chunk_size = realloc_buffer_size(&s->buffer); } - r = ca_sync_write_one_chunk(s, chunk, chunk_size); + if (s->buffer_origin) { + if (chunk_size == ca_origin_bytes(s->buffer_origin)) { + chunk_origin = s->buffer_origin; + s->buffer_origin = NULL; + } else { + r = ca_origin_extract_bytes(s->buffer_origin, chunk_size, &chunk_origin); + if (r < 0) + return r; + + r = ca_origin_advance_bytes(s->buffer_origin, chunk_size); + if (r < 0) + return r; + } + } + + r = ca_sync_write_one_chunk(s, chunk, chunk_size, chunk_origin); if (r < 0) return r; @@ -1577,12 +1746,11 @@ static int ca_sync_write_final_chunk(CaSync *s) { if (!s->wstore && !s->cache_store && !s->index) return 0; - if (realloc_buffer_size(&s->buffer) == 0) - return 0; - - r = ca_sync_write_one_chunk(s, realloc_buffer_data(&s->buffer), realloc_buffer_size(&s->buffer)); - if (r < 0) - return r; + if (realloc_buffer_size(&s->buffer) > 0) { + r = ca_sync_write_one_chunk(s, realloc_buffer_data(&s->buffer), realloc_buffer_size(&s->buffer), s->buffer_origin); + if (r < 0) + return r; + } if (s->index) { r = ca_index_write_eof(s->index); @@ -1597,6 +1765,25 @@ static int ca_sync_write_final_chunk(CaSync *s) { return 0; } +static int ca_sync_write_one_cached_chunk(CaSync *s, const CaChunkID *id, uint64_t size, CaLocation *location) { + int r; + + assert(s); + assert(id); + + /* Much like ca_sync_write_one_chunk(), but is called when we are using the cache and had a cache hit */ + + s->n_written_chunks ++; + + if (s->index) { + r = ca_index_write_chunk(s->index, id, size); + if (r < 0) + return r; + } + + return 0; +} + static int ca_sync_install_archive(CaSync *s) { assert(s); @@ -1622,8 +1809,36 @@ static int ca_sync_write_remote_archive_eof(CaSync *s) { return ca_remote_put_archive_eof(s->remote_archive); } +static int ca_sync_cache_get(CaSync *s, CaLocation *location) { + int r; + + assert(s); + assert(location); + + assert(s->cache); + assert(!s->current_cache_start_location); + assert(!s->current_cache_origin); + + r = ca_cache_get(s->cache, location, &s->current_cache_chunk_id, &s->current_cache_origin); + if (r == -ENOENT) { /* No luck, no cached entry about this, let's generate new data then */ + log_debug("Cache miss at %s.", ca_location_format(location)); + s->n_cache_misses++; + return r; + } + if (r < 0) + return log_debug_errno(r, "Failed to acquire item from cache: %m"); + + log_debug("Yay, cache hit at %s.", ca_location_format(location)); + + s->current_cache_start_location = ca_location_ref(location); + s->current_cache_chunk_size = ca_origin_bytes(s->current_cache_origin); + + return r; +} + static int ca_sync_step_encode(CaSync *s) { int r, step; + bool next_step; assert(s); @@ -1642,14 +1857,181 @@ static int ca_sync_step_encode(CaSync *s) { return CA_SYNC_POLL; } - step = ca_encoder_step(s->encoder); - if (step < 0) - return step; + switch (s->cache_state) { + + case CA_SYNC_CACHE_OFF: + case CA_SYNC_CACHE_IDLE: + next_step = true; + break; + + case CA_SYNC_CACHE_CHECK: { + CaLocation *location; + + assert(!s->current_cache_start_location); + assert(!s->current_cache_origin); + + /* Let's see if there's already data in the buffer. If so, let's see if we have a cached entry for + * it. */ + if (realloc_buffer_size(&s->buffer) == 0) { + /* Nope, nothing new. Let's generate some new data then */ + next_step = true; + break; + } + + assert_se(location = ca_origin_get(s->buffer_origin, 0)); + + /* A previous iteration already read stuff into the buffer, however it wasn't enough for a whole + * chunk. Let's see if instead of generating further data the cache can help us. */ + + r = ca_sync_cache_get(s, location); + if (r == -ENOENT) { /* No luck */ + s->cache_state = CA_SYNC_CACHE_IDLE; + next_step = true; + break; + } + if (r < 0) + return r; + + /* Yippieh! */ + s->cache_state = CA_SYNC_CACHE_VERIFY; + return CA_SYNC_STEP; + } + + case CA_SYNC_CACHE_VERIFY: { + CaLocation *a, *b; + + assert(s->current_cache_start_location); + assert(s->current_cache_origin); + + if (ca_origin_bytes(s->current_cache_origin) == 0) { + /* Nice! All checked out! */ + log_debug("Cached block checked out (on buffer)."); + s->cache_state = CA_SYNC_CACHE_SUCCEEDED; + return CA_SYNC_STEP; + } + + /* Let's see if there's data in the buffer we need to verify first. */ + if (realloc_buffer_size(&s->buffer) == 0) { + /* Nope there is not, let's generate some new data then */ + next_step = true; + break; + } + + assert_se(a = ca_origin_get(s->current_cache_origin, 0)); + assert_se(b = ca_origin_get(s->buffer_origin, 0)); + + if (ca_location_equal(a, b, false)) { + uint64_t sz; + + /* Yay, this location checked out. Let's advance both our buffer (and its origin), and the + * cache origin. */ + + assert_se(a->size != UINT64_MAX); + assert_se(a->size != 0); + + assert_se(b->size != UINT64_MAX); + assert_se(b->size != 0); + + sz = MIN(a->size, b->size); + + r = ca_origin_advance_bytes(s->current_cache_origin, sz); + if (r < 0) + return r; + + r = ca_origin_advance_bytes(s->buffer_origin, sz); + if (r < 0) + return r; + + r = realloc_buffer_advance(&s->buffer, sz); + if (r < 0) + return r; + + return CA_SYNC_STEP; + } + + /* Ah, dang, this didn't check out. Let's revert back. */ + log_debug("Cache item out of date, location didn't match (on buffer)."); + s->cache_state = CA_SYNC_CACHE_FAILED; + return CA_SYNC_STEP; + } + + case CA_SYNC_CACHE_SUCCEEDED: + assert(s->current_cache_start_location); + assert(s->current_cache_origin); + + /* In the previous iteration we tried to use the cache, and it did work out. Yay! Let's now + * write the cached item, and seek where we are supposed to continue */ + + log_debug("Succeeded with cached block!"); + + r = ca_sync_write_one_cached_chunk(s, &s->current_cache_chunk_id, s->current_cache_chunk_size, s->current_cache_start_location); + if (r < 0) + return log_debug_errno(r, "Failed to write cached item to index: %m"); + + s->n_cache_hits++; + + /* Reset the cached item we are operating on */ + ca_sync_reset_cache_data(s); + s->cache_state = CA_SYNC_CACHE_CHECK; + + return CA_SYNC_STEP; + + case CA_SYNC_CACHE_FAILED: + assert(s->current_cache_start_location); + assert(s->current_cache_origin); + + /* In the previous iteration we tried to use the cache, but this didn't work out, the stuff on disk + * didn't match our cache anymore. In this iteration we'll hence seek back to where our cache adventure + * started, and read off disk again. But we'll remove the bogus cache entry first, so that this doesn't + * happen again, ever. */ + + log_debug("Cached block didn't check out, seeking back to %s.", ca_location_format(s->current_cache_start_location)); + + r = ca_cache_remove(s->cache, s->current_cache_start_location); + if (r < 0 && r != -ENOENT) + return log_debug_errno(r, "Failed to remove cache item: %m"); + + s->n_cache_invalidated++; + + step = ca_encoder_seek_location(s->encoder, s->current_cache_start_location); + if (step < 0) + return log_debug_errno(step, "Failed to seek encoder: %m"); + + /* Reset the cached item we are operating on */ + ca_sync_reset_cache_data(s); + s->cache_state = CA_SYNC_CACHE_IDLE; + + /* Clear whatever is currently in the buffer */ + realloc_buffer_empty(&s->buffer); + s->buffer_origin = ca_origin_unref(s->buffer_origin); + + s->chunker = s->original_chunker; + + next_step = false; + break; + } + + if (next_step) { + step = ca_encoder_step(s->encoder); + if (step < 0) + return log_debug_errno(step, "Failed to run encoder step: %m"); + } switch (step) { case CA_ENCODER_FINISHED: + if (s->cache_state == CA_SYNC_CACHE_VERIFY) { + /* What? There's still a cache item pending that didn't finish yet? If so, it's invalid. Let's + * treat it as miss */ + + log_debug("Cache item out of date, reached EOF too early."); + s->cache_state = CA_SYNC_CACHE_FAILED; + return CA_SYNC_STEP; + } + + assert(IN_SET(s->cache_state, CA_SYNC_CACHE_OFF, CA_SYNC_CACHE_CHECK, CA_SYNC_CACHE_IDLE)); + r = ca_sync_write_final_chunk(s); if (r < 0) return r; @@ -1671,36 +2053,197 @@ static int ca_sync_step_encode(CaSync *s) { return CA_SYNC_FINISHED; case CA_ENCODER_NEXT_FILE: - case CA_ENCODER_DONE_FILE: case CA_ENCODER_PAYLOAD: case CA_ENCODER_DATA: { + _cleanup_(ca_location_unrefp) CaLocation *location = NULL; const void *p; - size_t l; + size_t l, extra_offset = 0; - r = ca_encoder_get_data(s->encoder, &p, &l); - if (r >= 0) { + switch (s->cache_state) { + + case CA_SYNC_CACHE_OFF: + case CA_SYNC_CACHE_IDLE: + break; + + case CA_SYNC_CACHE_CHECK: { + /* Let's check if the cache provides an item for our current location */ - r = ca_sync_write_chunks(s, p, l); + assert(!s->current_cache_start_location); + assert(!s->current_cache_origin); + + assert(realloc_buffer_size(&s->buffer) == 0); + assert(ca_origin_bytes(s->buffer_origin) == 0); + + /* Let's figure out the location we are at right now */ + r = ca_encoder_current_location(s->encoder, 0, &location); + if (r < 0) + return log_debug_errno(r, "Failed to acquire encoder location: %m"); + + /* Hmm, let's check the cache now then, maybe we can use that and avoid the disk accesses for + the new data? */ + r = ca_sync_cache_get(s, location); + if (r == -ENOENT) { /* No luck, let's generate new data then. */ + s->cache_state = CA_SYNC_CACHE_IDLE; + break; + } if (r < 0) return r; - r = ca_sync_write_archive(s, p, l); + /* This worked! Let's proceed with verifying what we just got. */ + s->cache_state = CA_SYNC_CACHE_VERIFY; + } _fallthrough_; + + case CA_SYNC_CACHE_VERIFY: { + CaLocation *cached_location; + uint64_t sz; + size_t data_size; + + assert(s->current_cache_start_location); + assert(s->current_cache_origin); + + assert(realloc_buffer_size(&s->buffer) == 0); + assert(ca_origin_bytes(s->buffer_origin) == 0); + + if (!location) { + /* Let's figure out the location we are at right now */ + r = ca_encoder_current_location(s->encoder, 0, &location); + if (r < 0) + return log_debug_errno(r, "Failed to acquire encoder location: %m"); + } + + assert_se(cached_location = ca_origin_get(s->current_cache_origin, 0)); + + if (!ca_location_equal(location, cached_location, false)) { + /* We are not where we should be. Bummer. */ + log_debug("Cache item out of date, location didn't match (on encoder). %s != %s", + ca_location_format(location), ca_location_format(cached_location)); + s->cache_state = CA_SYNC_CACHE_FAILED; + return CA_SYNC_STEP; + } + + assert_se(cached_location->size != UINT64_MAX); + assert_se(cached_location->size != 0); + + /* Generate the data if necessary, but clarify that we are not actually interested, by passing NULL */ + r = ca_encoder_get_data(s->encoder, cached_location->size, NULL, &data_size); if (r < 0) - return r; + return log_debug_errno(r, "Failed to skip initial data: %m"); + + for (;;) { + if (data_size <= cached_location->size) { + + /* Less or equal data than we expected was generated. Let's advance the cache origin, + * and retry */ + + r = ca_origin_advance_bytes(s->current_cache_origin, data_size); + if (r < 0) + return r; + + goto done; + } + + /* Hmm, so more data than we expected was generated. When this happens in the middle of + * our cache verification that's a problem, as in that case the cache data didn't match + * reality. However, if that happens at the end of our validation, it's OK, however we + * need to do something useful with the remainder. */ + + if (ca_origin_items(s->current_cache_origin) > 1) { + /* Not at the end. Bummer. */ + log_debug("Cache item out of date, size didn't match."); + s->cache_state = CA_SYNC_CACHE_FAILED; + return CA_SYNC_STEP; + } + + log_debug("Chunk matched, writing out."); + + /* Yay, we are at the end of our cache entry. That's excellent! */ + + r = ca_sync_write_one_cached_chunk(s, &s->current_cache_chunk_id, s->current_cache_chunk_size, s->current_cache_start_location); + if (r < 0) + return log_debug_errno(r, "Failed to write cached item to index: %m"); + + sz = cached_location->size; + + s->n_cache_hits++; + ca_sync_reset_cache_data(s); + + /* So we have some more data to handle. Let's see if we have a hit for that one too. */ + + r = ca_location_advance(&location, sz); + if (r < 0) + return r; + data_size -= sz; + + r = ca_sync_cache_get(s, location); + if (r == -ENOENT) { + s->cache_state = CA_SYNC_CACHE_IDLE; + + /* So, this is available in the cache, we hence need to generate data here. Hence we + * need to call ca_encoder_get_data() again, but this time ask for real data. And then + * we need to skip what we already used. */ + extra_offset = sz; + break; + } + if (r < 0) + return r; + + assert_se(cached_location = ca_origin_get(s->current_cache_origin, 0)); + } + + assert_se(extra_offset > 0); + break; + } - r = ca_sync_write_remote_archive(s, p, l); + case CA_SYNC_CACHE_FAILED: + case CA_SYNC_CACHE_SUCCEEDED: + assert_not_reached("Unexpected cache state"); + } + + r = ca_encoder_get_data(s->encoder, UINT64_MAX, &p, &l); + if (r < 0) + return log_debug_errno(r, "Failed to acquire data: %m"); + + /* Apply the extra offset, if there's any */ + p = (const uint8_t*) p + extra_offset; + l -= extra_offset; + + if (s->cache) { + /* When caching enabled, query current encoder location, so that we can generate a cache entry + * for it. Note that the location might already be initialized, and if it is the 'extra_offset' + * is already applied to it. */ + + if (!location) { + r = ca_encoder_current_location(s->encoder, 0, &location); + if (r < 0) + return r; + } + + r = ca_location_patch_size(&location, l); if (r < 0) return r; + } - } else if (r != -ENODATA) + r = ca_sync_write_chunks(s, p, l, location); + if (r < 0) + return r; + + r = ca_sync_write_archive(s, p, l); + if (r < 0) return r; + r = ca_sync_write_remote_archive(s, p, l); + if (r < 0) + return r; + + done: return step == CA_ENCODER_NEXT_FILE ? CA_SYNC_NEXT_FILE : - step == CA_ENCODER_DONE_FILE ? CA_SYNC_DONE_FILE : step == CA_ENCODER_PAYLOAD ? CA_SYNC_PAYLOAD : CA_SYNC_STEP; } + case CA_ENCODER_DONE_FILE: + return CA_SYNC_DONE_FILE; + default: assert(false); } @@ -1755,16 +2298,22 @@ static int ca_sync_process_decoder_request(CaSync *s) { if (r == -EAGAIN) /* Not enough data */ return CA_SYNC_POLL; if (r < 0) - return r; + return log_debug_errno(r, "Failed to read index chunk: %m"); if (r == 0) { /* EOF */ r = ca_decoder_put_eof(s->decoder); if (r < 0) - return r; + return log_debug_errno(r, "Failed to put decoder EOF: %m"); return CA_SYNC_STEP; } + /* Let's check if the chunk size was properly determined. */ + if (s->next_chunk_size == UINT64_MAX) { + log_debug("Couldn't determine chunk size."); + return -ESPIPE; + } + s->next_chunk_valid = true; } @@ -1780,7 +2329,7 @@ static int ca_sync_process_decoder_request(CaSync *s) { if (r == -EALREADY) /* Don't have this right now, but it was already enqueued. */ return CA_SYNC_POLL; if (r < 0) - return r; + return log_debug_errno(r, "Failed to acquire chunk: %m"); if (s->next_chunk_size != UINT64_MAX && /* next_chunk_size will be -1 if we just seeked in the index file */ s->next_chunk_size != chunk_size) { ca_origin_unref(origin); @@ -1794,6 +2343,7 @@ static int ca_sync_process_decoder_request(CaSync *s) { * many bytes as necessary */ if (s->chunk_skip >= chunk_size) { ca_origin_unref(origin); + log_debug("Skip size larger than chunk. (%" PRIu64 " vs. %" PRIu64 ")", s->chunk_skip, chunk_size); return -EINVAL; } @@ -1806,7 +2356,7 @@ static int ca_sync_process_decoder_request(CaSync *s) { r = ca_decoder_put_data(s->decoder, p, chunk_size, origin); ca_origin_unref(origin); if (r < 0) - return r; + return log_debug_errno(r, "Decoder didn't accept chunk: %m"); return CA_SYNC_STEP; } @@ -1821,7 +2371,7 @@ static int ca_sync_process_decoder_request(CaSync *s) { n = read(s->archive_fd, p, BUFFER_SIZE); if (n < 0) - return -errno; + return log_debug_errno(errno, "Failed to read archive: %m"); assert((size_t) n <= BUFFER_SIZE); @@ -1829,7 +2379,7 @@ static int ca_sync_process_decoder_request(CaSync *s) { r = ca_decoder_put_eof(s->decoder); if (r < 0) - return r; + return log_debug_errno(r, "Failed to put decoder EOF: %m"); } else { CaOrigin *origin; @@ -1914,19 +2464,19 @@ static int ca_sync_process_decoder_seek(CaSync *s) { r = ca_decoder_get_seek_offset(s->decoder, &offset); if (r < 0) - return r; + return log_debug_errno(r, "Failed to get seek offset: %m"); if (s->index) { r = ca_index_seek(s->index, offset, &s->chunk_skip); if (r < 0) - return r; + return log_debug_errno(r, "Failed to seek in index: %m"); } else if (s->archive_fd >= 0) { off_t f; f = lseek(s->archive_fd, (off_t) offset, SEEK_SET); if (f == (off_t) -1) - return -errno; + return log_debug_errno(errno, "Failed to seek in archive: %m"); } else return -EOPNOTSUPP; @@ -2028,7 +2578,7 @@ static int ca_sync_step_decode(CaSync *s) { step = ca_decoder_step(s->decoder); if (step < 0) - return step; + return log_debug_errno(step, "Failed to run decoder step: %m"); switch (step) { @@ -2237,7 +2787,18 @@ static int ca_sync_remote_prefetch(CaSync *s) { requested ++; } - r = ca_index_set_position(s->index, saved); + if (saved > 0) { + /* Let's not just seek back to where we came from, but one earlier, and read it again, so that the + * previous offset is known, so that the size of the next chunk can be determined properly */ + + r = ca_index_set_position(s->index, saved-1); + if (r < 0) + return r; + + r = ca_index_read_chunk(s->index, NULL, NULL, NULL); + } else + r = ca_index_set_position(s->index, saved); + if (r < 0) return r; @@ -2652,11 +3213,11 @@ int ca_sync_get_local( r = ca_seed_get(s->seeds[i], chunk_id, &p, &l, ret_origin ? &origin : NULL); if (r == -ESTALE) { - log_info("Chunk cache is not up-to-date, ignoring."); + log_debug("Chunk cache is not up-to-date, ignoring."); continue; } if (r == -ENOLINK) { - log_info("Can't reproduce name table for GOODBYE record, ignoring."); + log_debug("Can't reproduce name table for GOODBYE record, ignoring."); continue; } if (r == -ENOENT) @@ -3436,7 +3997,7 @@ int ca_sync_get_payload(CaSync *s, const void **ret, size_t *ret_size) { if (s->decoder) return ca_decoder_get_payload(s->decoder, ret, ret_size); else if (s->encoder) - return ca_encoder_get_data(s->encoder, ret, ret_size); + return ca_encoder_get_data(s->encoder, UINT64_MAX, ret, ret_size); return -ENOTTY; } @@ -3536,8 +4097,7 @@ int ca_sync_enable_archive_digest(CaSync *s, bool b) { return 0; if (s->encoder) { - /* If we are writing an index file we need the archive digest unconditionally, as it is included in its end */ - r = ca_encoder_enable_archive_digest(s->encoder, b || s->index); + r = ca_encoder_enable_archive_digest(s->encoder, b); if (r < 0) return r; } @@ -3787,3 +4347,63 @@ int ca_sync_set_compression_type(CaSync *s, CaCompressionType compression) { s->compression_type = compression; return 0; } + +int ca_sync_current_cache_hits(CaSync *s, uint64_t *ret) { + if (!s) + return -EINVAL; + if (!ret) + return -EINVAL; + + if (s->direction != CA_SYNC_ENCODE) + return -ENODATA; + if (!s->cache) + return -ENODATA; + + *ret = s->n_cache_hits; + return 0; +} + +int ca_sync_current_cache_misses(CaSync *s, uint64_t *ret) { + if (!s) + return -EINVAL; + if (!ret) + return -EINVAL; + + if (s->direction != CA_SYNC_ENCODE) + return -ENODATA; + if (!s->cache) + return -ENODATA; + + *ret = s->n_cache_misses; + return 0; +} + +int ca_sync_current_cache_invalidated(CaSync *s, uint64_t *ret) { + if (!s) + return -EINVAL; + if (!ret) + return -EINVAL; + + if (s->direction != CA_SYNC_ENCODE) + return -ENODATA; + if (!s->cache) + return -ENODATA; + + *ret = s->n_cache_invalidated; + return 0; +} + +int ca_sync_current_cache_added(CaSync *s, uint64_t *ret) { + if (!s) + return -EINVAL; + if (!ret) + return -EINVAL; + + if (s->direction != CA_SYNC_ENCODE) + return -ENODATA; + if (!s->cache) + return -ENODATA; + + *ret = s->n_cache_added; + return 0; +} diff --git a/src/casync.h b/src/casync.h index 4aa9067c..e6a54b21 100644 --- a/src/casync.h +++ b/src/casync.h @@ -88,6 +88,10 @@ int ca_sync_add_store_auto(CaSync *sync, const char *locator); int ca_sync_add_seed_fd(CaSync *sync, int fd); int ca_sync_add_seed_path(CaSync *sync, const char *path); +/* Path to use as cache */ +int ca_sync_set_cache_fd(CaSync *sync, int fd); +int ca_sync_set_cache_path(CaSync *sync, const char *path); + int ca_sync_step(CaSync *sync); int ca_sync_poll(CaSync *s, uint64_t timeout_nsec, const sigset_t *ss); @@ -152,4 +156,9 @@ int ca_sync_get_local_request_bytes(CaSync *s, uint64_t *ret); int ca_sync_get_remote_requests(CaSync *s, uint64_t *ret); int ca_sync_get_remote_request_bytes(CaSync *s, uint64_t *ret); +int ca_sync_current_cache_hits(CaSync *s, uint64_t *ret); +int ca_sync_current_cache_misses(CaSync *s, uint64_t *ret); +int ca_sync_current_cache_invalidated(CaSync *s, uint64_t *ret); +int ca_sync_current_cache_added(CaSync *s, uint64_t *ret); + #endif diff --git a/src/gcc-macro.h b/src/gcc-macro.h new file mode 100644 index 00000000..615dd55f --- /dev/null +++ b/src/gcc-macro.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: LGPL-2.1+ */ + +#ifndef foogccmacrohfoo +#define foogccmacrohfoo + +#define _printf_(a,b) __attribute__ ((format (printf, a, b))) +#define _sentinel_ __attribute__ ((sentinel)) +#define _unused_ __attribute__ ((unused)) +#define _likely_(x) (__builtin_expect(!!(x),1)) +#define _unlikely_(x) (__builtin_expect(!!(x),0)) +#define _malloc_ __attribute__ ((malloc)) +#define _pure_ __attribute__ ((pure)) +#define _packed_ __attribute__ ((packed)) +#define _const_ __attribute__ ((const)) +#ifdef __clang__ +# define _alloc_(...) +#else +# define _alloc_(...) __attribute__ ((alloc_size(__VA_ARGS__))) +#endif +#if __GNUC__ >= 7 +#define _fallthrough_ __attribute__((fallthrough)) +#else +#define _fallthrough_ +#endif + +#endif diff --git a/src/log.c b/src/log.c index 5958c4f2..a977f7cd 100644 --- a/src/log.c +++ b/src/log.c @@ -2,11 +2,44 @@ #include #include +#include +#include #include "log.h" #include "util.h" -static int log_errorv( +static int get_log_level(void) { + static int cached_log_level = -1; + + if (cached_log_level < 0) { + const char *e; + + cached_log_level = LOG_INFO; + + e = getenv("CASYNC_LOG_LEVEL"); + if (e) { + if (STR_IN_SET(e, "emerg", "emergency", "0")) + cached_log_level = LOG_EMERG; + else if (STR_IN_SET(e, "alert", "1")) + cached_log_level = LOG_ALERT; + else if (STR_IN_SET(e, "crit", "critical", "2")) + cached_log_level = LOG_CRIT; + else if (STR_IN_SET(e, "err", "error", "3")) + cached_log_level = LOG_ERR; + else if (STR_IN_SET(e, "warn", "warning", "4")) + cached_log_level = LOG_WARNING; + else if (STR_IN_SET(e, "notice", "5")) + cached_log_level = LOG_NOTICE; + else if (STR_IN_SET(e, "debug", "7")) + cached_log_level = LOG_DEBUG; + } + } + + return cached_log_level; +} + +static int log_fullv( + int level, int error, const char *format, va_list ap) { @@ -14,7 +47,11 @@ static int log_errorv( int orig_errno = errno; const char *fmt; - fmt = strjoina(format, "\n"); + if (level > get_log_level()) + return -abs(error); + + if (!endswith(format, "\n")) + fmt = strjoina(format, "\n"); if (error != 0) errno = abs(error); @@ -29,7 +66,7 @@ int log_info_errno(int error, const char* format, ...) { int r; va_start(ap, format); - r = log_errorv(error, format, ap); + r = log_fullv(LOG_INFO, error, format, ap); va_end(ap); return r; @@ -40,7 +77,18 @@ int log_error_errno(int error, const char* format, ...) { int r; va_start(ap, format); - r = log_errorv(error, format, ap); + r = log_fullv(LOG_ERR, error, format, ap); + va_end(ap); + + return r; +} + +int log_debug_errno(int error, const char* format, ...) { + va_list ap; + int r; + + va_start(ap, format); + r = log_fullv(LOG_DEBUG, error, format, ap); va_end(ap); return r; diff --git a/src/log.h b/src/log.h index 6d4ae30a..24661ce1 100644 --- a/src/log.h +++ b/src/log.h @@ -6,10 +6,15 @@ #include #include -int log_info_errno(int error, const char* fmt, ...); -int log_error_errno(int error, const char* format, ...); +#include "gcc-macro.h" + +int log_info_errno(int error, const char* fmt, ...) _printf_(2,3); +int log_error_errno(int error, const char* fmt, ...) _printf_(2,3); +int log_debug_errno(int error, const char* fmt, ...) _printf_(2,3); + #define log_info(fmt, ...) log_info_errno(0, fmt, ##__VA_ARGS__) #define log_error(fmt, ...) log_error_errno(0, fmt, ##__VA_ARGS__) +#define log_debug(fmt, ...) log_debug_errno(0, fmt, ##__VA_ARGS__) static inline int log_oom(void) { log_error("Out of memory"); @@ -19,15 +24,15 @@ static inline int log_oom(void) { #define assert_se(x) \ do { \ if (!(x)) { \ - log_error("%s:%d (%s): assertion failed:" #x "\n", \ - __FILE__, __LINE__, __PRETTY_FUNCTION__); \ + log_error("%s:%d (%s): assertion failed: %s", \ + __FILE__, __LINE__, __PRETTY_FUNCTION__, #x); \ abort(); \ } \ } while(false) #define assert_not_reached(x) \ do { \ - log_error("%s:%d (%s): unreachable code reached:" x "\n", \ - __FILE__, __LINE__, __PRETTY_FUNCTION__); \ + log_error("%s:%d (%s): unreachable code reached: %s", \ + __FILE__, __LINE__, __PRETTY_FUNCTION__, x); \ abort(); \ } while(false) diff --git a/src/meson.build b/src/meson.build index b6933bdf..40013798 100644 --- a/src/meson.build +++ b/src/meson.build @@ -1,11 +1,15 @@ # SPDX-License-Identifier: LGPL-2.1+ -log_sources = files(''' +util_sources = files(''' log.c log.h + util.c + util.h '''.split()) libshared_sources = files(''' + cacache.c + cacache.h cachunk.c cachunk.h cachunker.c @@ -32,6 +36,8 @@ libshared_sources = files(''' calocation.h camakebst.c camakebst.h + canametable.c + canametable.h canbd.c canbd.h caorigin.c @@ -60,6 +66,7 @@ libshared_sources = files(''' fssize.h gc.c gc.h + gcc-macro.h hash-funcs.c hash-funcs.h hashmap.c @@ -79,14 +86,12 @@ libshared_sources = files(''' set.h siphash24.c siphash24.h - util.c - util.h '''.split()) libshared = static_library( 'shared', libshared_sources, - log_sources) + util_sources) casync_sources = files(''' casync-tool.c diff --git a/src/realloc-buffer.c b/src/realloc-buffer.c index 1d39adae..c25fd0dd 100644 --- a/src/realloc-buffer.c +++ b/src/realloc-buffer.c @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: LGPL-2.1+ */ #include +#include #include #include "def.h" @@ -216,7 +217,7 @@ int realloc_buffer_truncate(ReallocBuffer *b, size_t sz) { return 0; } -int realloc_buffer_read(ReallocBuffer *b, int fd) { +int realloc_buffer_read_size(ReallocBuffer *b, int fd, size_t add) { ssize_t l; void *p; @@ -225,20 +226,48 @@ int realloc_buffer_read(ReallocBuffer *b, int fd) { if (fd < 0) return -EINVAL; - p = realloc_buffer_extend(b, BUFFER_SIZE); + if (add == (size_t) -1 || add < BUFFER_SIZE) + add = BUFFER_SIZE; + + p = realloc_buffer_extend(b, add); if (!p) return -ENOMEM; - l = read(fd, p, BUFFER_SIZE); + l = read(fd, p, add); if (l < 0) { - realloc_buffer_shorten(b, BUFFER_SIZE); + realloc_buffer_shorten(b, add); return -errno; } - realloc_buffer_shorten(b, BUFFER_SIZE - l); + realloc_buffer_shorten(b, add - l); return l > 0; } +int realloc_buffer_read_target(ReallocBuffer *b, int fd, size_t target_size) { + int r; + + if (!b) + return -EINVAL; + + /* Reads data from the specified fd until the buffer contains at least target_size bytes. Returns > 0 if the + * size is reached, 0 if not (due to EOF) */ + + for (;;) { + size_t c; + + c = realloc_buffer_size(b); + if (c >= target_size) + return 1; + + r = realloc_buffer_read_size(b, fd, target_size - c); + if (r < 0) + return r; + + if (r == 0) + return 0; + } +} + void* realloc_buffer_steal(ReallocBuffer *b) { void *p; @@ -260,3 +289,106 @@ void* realloc_buffer_steal(ReallocBuffer *b) { return p; } + +void* realloc_buffer_donate(ReallocBuffer *b, void *p, size_t size) { + if (!b) + return NULL; + + if (realloc_buffer_size(b) > 0) { + void *ret; + + ret = realloc_buffer_append(b, p, size); + if (!ret) + return NULL; + + free(p); + return ret; + } + + free(b->data); + b->data = p; + b->start = 0; + b->allocated = b->end = size; + + return p; +} + +int realloc_buffer_write(ReallocBuffer *b, int fd) { + size_t done = 0; + int r; + + if (!b) + return -EINVAL; + if (fd < 0) + return -EINVAL; + + for (;;) { + size_t l; + ssize_t n; + + l = realloc_buffer_size(b); + if (l <= 0) + return done > 0; + + n = write(fd, realloc_buffer_data(b), l); + if (n < 0) + return -errno; + + r = realloc_buffer_advance(b, (size_t) n); + if (r < 0) + return r; + + done += (size_t) n; + } +} + +int realloc_buffer_write_maybe(ReallocBuffer *b, int fd) { + + if (!b) + return -EINVAL; + + /* Much like realloc_buffer_write(), but only write things if we collected a certain amount of data */ + + if (realloc_buffer_size(b) < BUFFER_SIZE) + return 0; + + return realloc_buffer_write(b, fd); +} + +int realloc_buffer_printf(ReallocBuffer *b, const char *fmt, ...) { + va_list ap; + size_t m = 64; + + if (!b) + return -EINVAL; + if (!fmt) + return -EINVAL; + + for (;;) { + size_t mm; + void *p; + int n; + + p = realloc_buffer_extend(b, m); + if (!p) + return -ENOMEM; + + va_start(ap, fmt); + n = vsnprintf(p, m, fmt, ap); + va_end(ap); + + if (n < (int) m) { + realloc_buffer_shorten(b, m - n); + return 0; + } + + realloc_buffer_shorten(b, m); + + mm = 2 * m; + if (mm < m) + return -EOVERFLOW; + + m = mm; + } + +} diff --git a/src/realloc-buffer.h b/src/realloc-buffer.h index acdb88e1..a7209143 100644 --- a/src/realloc-buffer.h +++ b/src/realloc-buffer.h @@ -66,8 +66,21 @@ int realloc_buffer_advance(ReallocBuffer *b, size_t sz); int realloc_buffer_shorten(ReallocBuffer *b, size_t sz); int realloc_buffer_truncate(ReallocBuffer *b, size_t sz); -int realloc_buffer_read(ReallocBuffer *b, int fd); +int realloc_buffer_read_size(ReallocBuffer *b, int fd, size_t add); + +static inline int realloc_buffer_read(ReallocBuffer *b, int fd) { + return realloc_buffer_read_size(b, fd, (size_t) -1); +} + +int realloc_buffer_write(ReallocBuffer *b, int fd); +int realloc_buffer_write_maybe(ReallocBuffer *b, int fd); + +int realloc_buffer_read_target(ReallocBuffer *b, int fd, size_t target_size); void* realloc_buffer_steal(ReallocBuffer *b); +void* realloc_buffer_donate(ReallocBuffer *b, void *p, size_t size); + +int realloc_buffer_printf(ReallocBuffer *b, const char *fmt, ...) _printf_(2,3); + #endif diff --git a/src/util.c b/src/util.c index ee706a99..072e6373 100644 --- a/src/util.c +++ b/src/util.c @@ -806,8 +806,31 @@ int safe_atollu(const char *s, long long unsigned *ret_llu) { return 0; } +int safe_atollx(const char *s, long long unsigned *ret_llu) { + char *x = NULL; + unsigned long long l; + + assert(s); + + s += strspn(s, WHITESPACE); + + errno = 0; + l = strtoull(s, &x, 16); + if (errno > 0) + return -errno; + if (!x || x == s || *x != 0) + return -EINVAL; + if (*s == '-') + return -ERANGE; + + if (ret_llu) + *ret_llu = l; + + return 0; +} + int readlinkat_malloc(int fd, const char *p, char **ret) { - size_t l = 100; + size_t l = 256; int r; assert(p); @@ -1407,3 +1430,26 @@ int is_dir(const char* path, bool follow) { return !!S_ISDIR(st.st_mode); } + +int free_and_strdup(char **p, const char *s) { + char *t; + + assert(p); + + /* Replaces a string pointer with an strdup()ed new string, possibly freeing the old one. */ + + if (streq_ptr(*p, s)) + return 0; + + if (s) { + t = strdup(s); + if (!t) + return -ENOMEM; + } else + t = NULL; + + free(*p); + *p = t; + + return 1; +} diff --git a/src/util.h b/src/util.h index 73d28aa1..3168bec6 100644 --- a/src/util.h +++ b/src/util.h @@ -21,6 +21,7 @@ #include #include +#include "gcc-macro.h" #include "log.h" #define new(t, n) ((t*) malloc((n) * sizeof(t))) @@ -202,20 +203,6 @@ static inline uint64_t random_u64(void) { #define random_bytes(p, n) dev_urandom(p, n); -#define _sentinel_ __attribute__ ((sentinel)) -#define _unused_ __attribute__ ((unused)) -#define _likely_(x) (__builtin_expect(!!(x),1)) -#define _unlikely_(x) (__builtin_expect(!!(x),0)) -#define _malloc_ __attribute__ ((malloc)) -#define _pure_ __attribute__ ((pure)) -#define _packed_ __attribute__ ((packed)) -#define _const_ __attribute__ ((const)) -#ifdef __clang__ -# define _alloc_(...) -#else -# define _alloc_(...) __attribute__ ((alloc_size(__VA_ARGS__))) -#endif - #define assert_cc(expr) static_assert(expr, #expr); #define CASE_F(X) case X: @@ -316,6 +303,7 @@ int safe_atoi(const char *s, int *ret_i); int safe_atou(const char *s, unsigned *ret_u); int safe_atollu(const char *s, unsigned long long *ret_u); +int safe_atollx(const char *s, unsigned long long *ret_u); static inline int safe_atou64(const char *s, uint64_t *ret_u) { return safe_atollu(s, (unsigned long long*) ret_u); @@ -325,6 +313,10 @@ static inline int safe_atou32(const char *s, uint32_t *ret_u) { return safe_atou(s, (unsigned*) ret_u); } +static inline int safe_atox64(const char *s, uint64_t *ret_u) { + return safe_atollx(s, (unsigned long long*) ret_u); +} + int readlinkat_malloc(int fd, const char *p, char **ret); int readlink_malloc(const char *p, char **ret); @@ -448,6 +440,7 @@ static inline int parse_gid(const char *s, gid_t *ret_gid) { #define ALPHABET_UPPER "ABCDEFGHIJKLMNOPQRSTUVWXYZ" #define ALPHABET ALPHABET_LOWER ALPHABET_UPPER #define DIGITS "0123456789" +#define HEXDIGITS DIGITS "ABCDEF" "abcdef" /* This is a bit more restricted than RFC3986 */ #define URL_PROTOCOL_FIRST ALPHABET_LOWER @@ -765,7 +758,8 @@ int is_dir(const char* path, bool follow); #define DEFINE_TRIVIAL_CLEANUP_FUNC(type, func) \ static inline void func##p(type *p) { \ - func(*p); \ + if (*p) \ + func(*p); \ } \ struct __useless_struct_to_allow_trailing_semicolon__ @@ -775,4 +769,20 @@ static inline void freep(void *p) { #define _cleanup_free_ _cleanup_(freep) +static inline void unlink_and_free(char *p) { + int saved_errno; + + if (!p) + return; + + saved_errno = errno; + (void) unlink(p); + errno = saved_errno; + + free(p); +} +DEFINE_TRIVIAL_CLEANUP_FUNC(char*, unlink_and_free); + +int free_and_strdup(char **p, const char *s); + #endif diff --git a/test/meson.build b/test/meson.build index 2a148fcc..5d24950f 100644 --- a/test/meson.build +++ b/test/meson.build @@ -2,5 +2,5 @@ notify_wait_sources = [ files('notify-wait.c'), - log_sources, + util_sources, ] diff --git a/test/test-cache.sh.in b/test/test-cache.sh.in new file mode 100755 index 00000000..d14debd8 --- /dev/null +++ b/test/test-cache.sh.in @@ -0,0 +1,76 @@ +#!/bin/bash -ex +# SPDX-License-Identifier: LGPL-2.1+ + +SCRATCH_DIR=${TMPDIR:-/var/tmp}/test-casync.$RANDOM +mkdir -p $SCRATCH_DIR/src + +if [ `id -u` == 0 ] ; then + cp -aT @top_srcdir@ $SCRATCH_DIR/src/casync +else + # If we lack privileges we use rsync rather than cp to copy, as it will just skip over device nodes + rsync -a @top_srcdir@/ $SCRATCH_DIR/src/casync +fi + +cd $SCRATCH_DIR/src + +@top_builddir@/casync list > $SCRATCH_DIR/test.list +@top_builddir@/casync mtree > $SCRATCH_DIR/test.mtree +@top_builddir@/casync digest > $SCRATCH_DIR/test.digest + +# Make three versions of the caidx: one with no cache, one with an unpopulated cache, and one with a populated cache +@top_builddir@/casync make $SCRATCH_DIR/test1.caidx +@top_builddir@/casync make --cache $SCRATCH_DIR/test.cache $SCRATCH_DIR/test2.caidx +@top_builddir@/casync make --cache $SCRATCH_DIR/test.cache $SCRATCH_DIR/test3.caidx + +cmp $SCRATCH_DIR/test1.caidx $SCRATCH_DIR/test2.caidx +cmp $SCRATCH_DIR/test1.caidx $SCRATCH_DIR/test3.caidx + +for f in test1 test2 test3 ; do + @top_builddir@/casync list $SCRATCH_DIR/$f.caidx > $SCRATCH_DIR/$f.list + @top_builddir@/casync mtree $SCRATCH_DIR/$f.caidx > $SCRATCH_DIR/$f.mtree + @top_builddir@/casync digest $SCRATCH_DIR/$f.caidx > $SCRATCH_DIR/$f.digest + + diff -q $SCRATCH_DIR/test.list $SCRATCH_DIR/$f.list + diff -q $SCRATCH_DIR/test.mtree $SCRATCH_DIR/$f.mtree + diff -q $SCRATCH_DIR/test.digest $SCRATCH_DIR/$f.digest +done + +# Make some changes: one in the beginning, one in the middle, one at the end +mkdir casync/000-early +touch casync/000-early/testa +mkdir casync/ggg-middle +touch casync/ggg-middle/testb +mkdir casync/zzz-late +touch casync/zzz-late/testc + +# Also remove a file +rm casync/NEWS + +# And update an existing one +echo xxx >> casync/README.md + +@top_builddir@/casync list > $SCRATCH_DIR/testx.list +@top_builddir@/casync mtree > $SCRATCH_DIR/testx.mtree +@top_builddir@/casync digest > $SCRATCH_DIR/testx.digest + +@top_builddir@/casync make $SCRATCH_DIR/test4.caidx +@top_builddir@/casync make --cache $SCRATCH_DIR/test.cache $SCRATCH_DIR/test5.caidx +@top_builddir@/casync make --cache $SCRATCH_DIR/test.cache $SCRATCH_DIR/test6.caidx +@top_builddir@/casync make $SCRATCH_DIR/test7.caidx + +cmp $SCRATCH_DIR/test4.caidx $SCRATCH_DIR/test5.caidx +cmp $SCRATCH_DIR/test4.caidx $SCRATCH_DIR/test6.caidx +cmp $SCRATCH_DIR/test4.caidx $SCRATCH_DIR/test7.caidx + +for f in test4 test5 test6 test7 ; do + @top_builddir@/casync list $SCRATCH_DIR/$f.caidx > $SCRATCH_DIR/$f.list + @top_builddir@/casync mtree $SCRATCH_DIR/$f.caidx > $SCRATCH_DIR/$f.mtree + @top_builddir@/casync digest $SCRATCH_DIR/$f.caidx > $SCRATCH_DIR/$f.digest + + diff -q $SCRATCH_DIR/testx.list $SCRATCH_DIR/$f.list + diff -q $SCRATCH_DIR/testx.mtree $SCRATCH_DIR/$f.mtree + diff -q $SCRATCH_DIR/testx.digest $SCRATCH_DIR/$f.digest +done + +chmod -R u+rwx $SCRATCH_DIR +rm -rf $SCRATCH_DIR diff --git a/test/test-caencoder.c b/test/test-caencoder.c index c221082d..a37690a3 100644 --- a/test/test-caencoder.c +++ b/test/test-caencoder.c @@ -82,7 +82,7 @@ static int encode(int dfd, int fd) { size_t sz; ssize_t n; - r = ca_encoder_get_data(e, &p, &sz); + r = ca_encoder_get_data(e, UINT64_MAX, &p, &sz); if (r == -ENODATA) break; if (r < 0)