From 468042d228f28ddea0529bd85b009cd0ac6b52b0 Mon Sep 17 00:00:00 2001 From: Wez Furlong Date: Tue, 24 May 2016 15:53:34 -0700 Subject: [PATCH] use art to optimize pending list construction Summary: In the recursive delete case we will receive notifications working up to the root of a tree from its leaves. Our existing pending list has very limited aggregation of events and will schedule a stat call for each deleted node. It is more efficient if we can lop off a portion of the tree when we add a recursive element at a higher location in the tree. That's what this diff does. It makes an invidual add operation a bit more expensive than it was previously, but the cost is tiny compared to the savings we get in terms of filesystem activity. The included benchmark demonstrates this with a synthetic tree containing around 260k nodes; these are added to a pending list first from the root to the leaves, and secondly from the leaves to the root. As part of this, we also generate a call to `w_lstat` to approximate the activity of our `stat_path` function. Prior to switching on the ART assisted index, both directions of insert would occupy approx. 2 seconds of wall time on my macbook. This represents the best case hot cache time for the `w_lstat` calls because the benchmark repeatedly examines the same file. With this diff we can prune the list of 260k nodes down to 12 nodes, and we will discover the pruned nodes as part of the recursive tree walk that will follow this in a real world situation. Test Plan: `make integration` (enabled -fsanitize=address to prove that our memory access is good, too). The benchmark results with this diff (previously, these would take 2 seconds each): ``` # took 0.011s to insert 12 items into pending coll # took 0.251s to reverse insert 12 items into pending coll ``` I also tried a pretty aggressive test where I would alternate between running: ``` hg sparse -X fooboid hg st watchman-diag | arc paste hg sparse --reset ``` (where fooboid is a large subdirectory of a large repo) --- Makefile.am | 17 +++ configure.ac | 1 + pending.c | 212 ++++++++++++++++++++++++++++++++---- root.c | 4 +- string.c | 26 +++++ tests/pending_test.c | 144 ++++++++++++++++++++++++ thirdparty/libart/src/art.c | 9 ++ watchman.h | 6 +- winbuild/Makefile | 2 +- 9 files changed, 393 insertions(+), 28 deletions(-) create mode 100644 tests/pending_test.c diff --git a/Makefile.am b/Makefile.am index b63172e9e7c1d..f3957e57d2831 100644 --- a/Makefile.am +++ b/Makefile.am @@ -121,6 +121,7 @@ TESTS = \ tests/argv.t \ tests/bser.t \ tests/ignore.t \ + tests/pending.t \ tests/log.t \ tests/wildmatch.t noinst_PROGRAMS = $(TESTS) @@ -219,6 +220,22 @@ tests_ignore_t_SOURCES = \ string.c \ log.c +tests_pending_t_CPPFLAGS = $(THIRDPARTY_CPPFLAGS) +tests_pending_t_LDADD = $(ART_LIB) $(TAP_LIB) $(JSON_LIB) +tests_pending_t_SOURCES = \ + tests/pending_test.c \ + watcher/helper.c \ + hash.c \ + ht.c \ + ignore.c \ + pending.c \ + expflags.c \ + opendir.c \ + cfg.c \ + time.c \ + string.c \ + log.c + tests_log_t_CPPFLAGS = $(THIRDPARTY_CPPFLAGS) tests_log_t_LDADD = $(JSON_LIB) $(TAP_LIB) tests_log_t_SOURCES = \ diff --git a/configure.ac b/configure.ac index 05a795c9178a8..c96e7d70153ee 100644 --- a/configure.ac +++ b/configure.ac @@ -256,6 +256,7 @@ AC_CHECK_HEADERS(CoreServices/CoreServices.h, [ ]) AC_CHECK_FUNCS(backtrace backtrace_symbols backtrace_symbols_fd) AC_CHECK_FUNCS(sys_siglist) +AC_CHECK_FUNCS(memmem) if test -n "$ac_cv_header_sys_statvfs_h"; then AC_CHECK_MEMBERS([struct statvfs.f_fstypename,struct statvfs.f_basetype], diff --git a/pending.c b/pending.c index c514562875f53..bdd49f7ee0ac5 100644 --- a/pending.c +++ b/pending.c @@ -10,6 +10,54 @@ static const struct flag_map kflags[] = { {0, NULL}, }; +// Since the tree has no internal knowledge about path structures, when we +// search for "foo/bar" it may return a prefix match for an existing node +// with the key "foo/bard". We use this function to test whether the string +// exactly matches the input ("foo/bar") or whether it has a slash as the next +// character after the common prefix ("foo/bar/" as a prefix). +static bool is_path_prefix(const char *path, size_t path_len, + size_t common_prefix) { + if (common_prefix == path_len) { + return true; + } + + if (common_prefix > path_len) { + return false; + } + + return path[common_prefix] == WATCHMAN_DIR_SEP +#ifdef _WIN32 + // Windows allows both kinds of slashes + || path[common_prefix] == '/' +#endif + ; +} + +// Helper to un-doubly-link a pending item. +static inline void unlink_item(struct watchman_pending_collection *coll, + struct watchman_pending_fs *p) { + if (coll->pending == p) { + coll->pending = p->next; + } + if (p->prev) { + p->prev->next = p->next; + } + if (p->next) { + p->next->prev = p->prev; + } +} + +// Helper to doubly-link a pending item to the head of a collection. +static inline void link_head(struct watchman_pending_collection *coll, + struct watchman_pending_fs *p) { + p->prev = NULL; + p->next = coll->pending; + if (coll->pending) { + coll->pending->prev = p; + } + coll->pending = p; +} + /* Free a pending_fs node */ void w_pending_fs_free(struct watchman_pending_fs *p) { w_string_delref(p->path); @@ -20,23 +68,20 @@ void w_pending_fs_free(struct watchman_pending_fs *p) { bool w_pending_coll_init(struct watchman_pending_collection *coll) { coll->pending = NULL; coll->pinged = false; - coll->pending_uniq = w_ht_new(WATCHMAN_BATCH_LIMIT, &w_ht_string_funcs); - if (!coll->pending_uniq) { - return false; - } if (pthread_mutex_init(&coll->lock, NULL)) { return false; } if (pthread_cond_init(&coll->cond, NULL)) { return false; } + art_tree_init(&coll->tree); return true; } /* destroy a pending_coll */ void w_pending_coll_destroy(struct watchman_pending_collection *coll) { w_pending_coll_drain(coll); - w_ht_free(coll->pending_uniq); + art_tree_destroy(&coll->tree); pthread_mutex_destroy(&coll->lock); pthread_cond_destroy(&coll->cond); } @@ -49,7 +94,8 @@ void w_pending_coll_drain(struct watchman_pending_collection *coll) { w_pending_fs_free(p); } - w_ht_free_entries(coll->pending_uniq); + art_tree_destroy(&coll->tree); + art_tree_init(&coll->tree); } /* compute a deadline on entry, then obtain the collection lock @@ -98,14 +144,122 @@ void w_pending_coll_unlock(struct watchman_pending_collection *coll) { } } -static inline void consolidate_item(struct watchman_pending_fs *p, - int flags) { +// Deletion is a bit awkward in this radix tree implementation. +// We can't recursively delete a given prefix as a built-in operation +// and it is non-trivial to add that functionality right now. +// When we lop-off a portion of a tree that we're going to analyze +// recursively, we have to iterate each leaf and explicitly delete +// that leaf. +// Since deletion invalidates the iteration state we have to signal +// to stop iteration after each deletion and then retry the prefix +// deletion. +// We use this kid_context state to pass down the required information +// to the iterator callback so that we adjust the overall state correctly. +struct kid_context { + w_string_t *root; + struct watchman_pending_collection *coll; +}; + +// We need to guarantee that we never collapse a cookie notification +// out of the pending list, because we absolutely must observe it coming +// in via the kernel notification mechanism in order for synchronization +// to be correct. +// Since we don't have a w_root_t available, we can't tell what the +// precise cookie prefix is for the current pending list here, so +// we do a substring match. Not the most elegant thing in the world. +static inline bool is_possibly_a_cookie(w_string_t *path) { + return w_string_contains_cstr_len(path, WATCHMAN_COOKIE_PREFIX, + sizeof(WATCHMAN_COOKIE_PREFIX) - 1); +} + +// This is the iterator callback we use to prune out obsoleted leaves. +// We need to compare the prefix to make sure that we don't delete +// a sibling node by mistake (see commentary on the is_path_prefix +// function for more on that). +static int delete_kids(void *data, const unsigned char *key, uint32_t key_len, + void *value) { + struct kid_context *ctx = data; + struct watchman_pending_fs *p = value; + unused_parameter(value); + + if ((p->flags & W_PENDING_CRAWL_ONLY) == 0 && key_len > ctx->root->len && + is_path_prefix((const char *)key, key_len, ctx->root->len) && + !is_possibly_a_cookie(p->path)) { + // Unlink the child from the pending index. + unlink_item(ctx->coll, p); + // and completely free it. + + w_pending_fs_free(p); + + // Remove it from the art tree also. + art_delete(&ctx->coll->tree, key, key_len); + + // Stop iteration because we just invalidated the iterator state + // by modifying the tree mid-iteration. + return 1; + } + + return 0; +} + +// if there are any entries that are obsoleted by a recursive insert, +// walk over them now and mark them as ignored. +static void +maybe_prune_obsoleted_children(struct watchman_pending_collection *coll, + w_string_t *path, int flags) { + if ((flags & (W_PENDING_RECURSIVE | W_PENDING_CRAWL_ONLY)) == + W_PENDING_RECURSIVE) { + struct kid_context ctx = {path, coll}; + // Since deletion invalidates the iterator, we need to repeatedly + // call this to prune out the nodes. It will return 0 once no + // matching prefixes are found and deleted. + while (art_iter_prefix(&coll->tree, (const uint8_t *)path->buf, path->len, + delete_kids, &ctx)) { + // OK; try again + ; + } + } +} + +static inline void consolidate_item(struct watchman_pending_collection *coll, + struct watchman_pending_fs *p, int flags) { // Increase the strength of the pending item if either of these // flags are set. // We upgrade crawl-only as well as recursive; it indicates that // we've recently just performed the stat and we want to avoid // infinitely trying to stat-and-crawl p->flags |= flags & (W_PENDING_CRAWL_ONLY|W_PENDING_RECURSIVE); + + maybe_prune_obsoleted_children(coll, p->path, p->flags); +} + +// Check the tree to see if there is a path that is earlier/higher in the +// filesystem than the input path; if there is, and it is recursive, +// return true to indicate that there is no need to track this new path +// due to the already scheduled higher level path. +static bool +is_obsoleted_by_containing_dir(struct watchman_pending_collection *coll, + w_string_t *path) { + struct watchman_pending_fs *p; + art_leaf *leaf = + art_longest_match(&coll->tree, (const uint8_t *)path->buf, path->len); + if (!leaf) { + return false; + } + p = leaf->value; + + if (p->flags & W_PENDING_RECURSIVE && + is_path_prefix(path->buf, path->len, leaf->key_len)) { + + if (is_possibly_a_cookie(path)) { + return false; + } + + // Yes: the pre-existing entry higher up in the tree obsoletes this + // one that we would add now. + return true; + } + return false; } /* add a pending entry. Will consolidate an existing entry with the @@ -116,19 +270,26 @@ bool w_pending_coll_add(struct watchman_pending_collection *coll, struct watchman_pending_fs *p; char flags_label[128]; - p = w_ht_val_ptr(w_ht_get(coll->pending_uniq, w_ht_ptr_val(path))); + p = art_search(&coll->tree, (const unsigned char*)path->buf, path->len); if (p) { /* Entry already exists: consolidate */ - consolidate_item(p, flags); + consolidate_item(coll, p, flags); /* all done */ return true; } + if (is_obsoleted_by_containing_dir(coll, path)) { + return true; + } + + // Try to allocate the new node before we prune any children. p = calloc(1, sizeof(*p)); if (!p) { return false; } + maybe_prune_obsoleted_children(coll, path, flags); + w_expand_flags(kflags, flags, flags_label, sizeof(flags_label)); w_log(W_LOG_DBG, "add_pending: %.*s %s\n", path->len, path->buf, flags_label); @@ -137,9 +298,8 @@ bool w_pending_coll_add(struct watchman_pending_collection *coll, p->path = path; w_string_addref(path); - p->next = coll->pending; - coll->pending = p; - w_ht_set(coll->pending_uniq, w_ht_ptr_val(path), w_ht_ptr_val(p)); + link_head(coll, p); + art_insert(&coll->tree, (const uint8_t *)path->buf, path->len, p); return true; } @@ -169,21 +329,28 @@ void w_pending_coll_append(struct watchman_pending_collection *target, struct watchman_pending_fs *p, *target_p; while ((p = w_pending_coll_pop(src)) != NULL) { - target_p = w_ht_val_ptr(w_ht_get(target->pending_uniq, - w_ht_ptr_val(p->path))); + target_p = art_search(&target->tree, + (const uint8_t *)p->path->buf, p->path->len); if (target_p) { /* Entry already exists: consolidate */ - consolidate_item(target_p, p->flags); + consolidate_item(target, target_p, p->flags); w_pending_fs_free(p); continue; } - p->next = target->pending; - target->pending = p; - w_ht_set(target->pending_uniq, w_ht_ptr_val(p->path), w_ht_ptr_val(p)); + if (is_obsoleted_by_containing_dir(target, p->path)) { + w_pending_fs_free(p); + continue; + } + maybe_prune_obsoleted_children(target, p->path, p->flags); + + link_head(target, p); + art_insert(&target->tree, (const uint8_t *)p->path->buf, p->path->len, p); } - w_ht_free_entries(src->pending_uniq); + // Empty the src tree and reset it + art_tree_destroy(&src->tree); + art_tree_init(&src->tree); src->pending = NULL; } @@ -196,8 +363,7 @@ struct watchman_pending_fs *w_pending_coll_pop( struct watchman_pending_fs *p = coll->pending; if (p) { - coll->pending = p->next; - p->next = NULL; + unlink_item(coll, p); } return p; @@ -205,7 +371,7 @@ struct watchman_pending_fs *w_pending_coll_pop( /* Returns the number of unique pending items in the collection */ uint32_t w_pending_coll_size(struct watchman_pending_collection *coll) { - return w_ht_size(coll->pending_uniq); + return (uint32_t)art_size(&coll->tree); } /* vim:ts=2:sw=2:et: diff --git a/root.c b/root.c index a216f11bed136..7220e1aa3dd7f 100644 --- a/root.c +++ b/root.c @@ -498,12 +498,12 @@ bool w_root_process_pending(w_root_t *root, } w_log(W_LOG_DBG, "processing %d events in %s\n", - w_ht_size(coll->pending_uniq), root->root_path->buf); + w_pending_coll_size(coll), root->root_path->buf); // Steal the contents pending = coll->pending; coll->pending = NULL; - w_ht_free_entries(coll->pending_uniq); + w_pending_coll_drain(coll); while (pending) { p = pending; diff --git a/string.c b/string.c index 9f38564f44f77..c4bb995076eba 100644 --- a/string.c +++ b/string.c @@ -348,6 +348,32 @@ bool w_string_startswith_caseless(w_string_t *str, w_string_t *prefix) return true; } +bool w_string_contains_cstr_len(w_string_t *str, const char *needle, + uint32_t nlen) { +#if HAVE_MEMMEM + return memmem(str->buf, str->len, needle, nlen) != NULL; +#else + // Most likely only for Windows. + // Inspired by http://stackoverflow.com/a/24000056/149111 + const char *haystack = str->buf; + uint32_t hlen = str->len; + const char *limit; + + if (nlen == 0 || hlen < nlen) { + return false; + } + + limit = haystack + hlen - nlen + 1; + while ((haystack = memchr(haystack, needle[0], limit - haystack)) != NULL) { + if (memcmp(haystack, needle, nlen) == 0) { + return true; + } + haystack++; + } + return false; +#endif +} + w_string_t *w_string_canon_path(w_string_t *str) { int end; diff --git a/tests/pending_test.c b/tests/pending_test.c new file mode 100644 index 0000000000000..4103a6b232f26 --- /dev/null +++ b/tests/pending_test.c @@ -0,0 +1,144 @@ +/* Copyright 2016-present Facebook, Inc. + * Licensed under the Apache License, Version 2.0. */ + +#include "watchman.h" +#include "thirdparty/tap.h" +#include "thirdparty/libart/src/art.h" + +struct pending_list { + struct watchman_pending_fs *pending, *avail, *end; +}; + +void w_request_shutdown(void) {} + +bool w_should_log_to_clients(int level) +{ + unused_parameter(level); + return false; +} + +void w_log_to_clients(int level, const char *buf) +{ + unused_parameter(level); + unused_parameter(buf); +} + + +struct watchman_pending_fs *next_pending(struct pending_list *list) { + if (list->avail == list->end) { + fail("make list alloc size bigger (used %u entries)", + list->avail - list->pending); + abort(); + } + + return list->avail++; +} + +static void build_list(struct pending_list *list, w_string_t *parent_name, + size_t depth, size_t num_files, size_t num_dirs) { + size_t i; + for (i = 0; i < num_files; i++) { + struct watchman_pending_fs *item = next_pending(list); + item->path = w_string_make_printf("%.*s/file%d", parent_name->len, + parent_name->buf, i); + item->flags = W_PENDING_VIA_NOTIFY; + } + + for (i = 0; i < num_dirs; i++) { + struct watchman_pending_fs *item = next_pending(list); + item->path = w_string_make_printf("%.*s/dir%d", parent_name->len, + parent_name->buf, i); + item->flags = W_PENDING_RECURSIVE; + + if (depth > 0) { + build_list(list, item->path, depth - 1, num_files, num_dirs); + } + } +} + +size_t process_items(struct watchman_pending_collection *coll) { + struct watchman_pending_fs *item; + size_t drained = 0; + struct stat st; + + while ((item = w_pending_coll_pop(coll)) != NULL) { + // To simulate looking at the file, we're just going to stat + // ourselves over and over, as the path we put in the list + // doesn't exist on the filesystem. We're measuring hot cache + // (best case) stat performance here. + w_lstat(__FILE__, &st, true); + //diag("lstat(%.*s)", item->path->len, item->path->buf); + w_pending_fs_free(item); + + drained++; + } + return drained; +} + +// Simulate +static void bench_pending(void) { + // These parameters give us 262140 items to track + const size_t tree_depth = 7; + const size_t num_files_per_dir = 8; + const size_t num_dirs_per_dir = 4; + w_string_t *root_name = w_string_new("/some/path"); + struct pending_list list; + const size_t alloc_size = 280000; + + list.pending = calloc(alloc_size, sizeof(struct watchman_pending_fs)); + list.avail = list.pending; + list.end = list.pending + alloc_size; + + build_list(&list, root_name, tree_depth, num_files_per_dir, num_dirs_per_dir); + diag("built list with %u items", list.avail - list.pending); + + struct timeval start, end; + { + struct watchman_pending_collection coll; + struct watchman_pending_fs *item; + size_t drained = 0; + + w_pending_coll_init(&coll); + + gettimeofday(&start, NULL); + for (item = list.pending; item < list.avail; item++) { + w_pending_coll_add(&coll, item->path, item->now, item->flags); + } + drained = process_items(&coll); + + gettimeofday(&end, NULL); + diag("took %.3fs to insert %u items into pending coll", + w_timeval_diff(start, end), drained); + } + + // and now in reverse + { + struct watchman_pending_collection coll; + struct watchman_pending_fs *item; + size_t drained = 0; + + w_pending_coll_init(&coll); + + gettimeofday(&start, NULL); + for (item = list.avail - 1; item >= list.pending; item--) { + w_pending_coll_add(&coll, item->path, item->now, item->flags); + } + + drained = process_items(&coll); + + gettimeofday(&end, NULL); + diag("took %.3fs to reverse insert %u items into pending coll", + w_timeval_diff(start, end), drained); + } +} + +int main(int argc, char **argv) { + (void)argc; + (void)argv; + + plan_tests(1); + bench_pending(); + pass("got here"); + + return exit_status(); +} diff --git a/thirdparty/libart/src/art.c b/thirdparty/libart/src/art.c index 76febb8655ba6..35c92de24f190 100644 --- a/thirdparty/libart/src/art.c +++ b/thirdparty/libart/src/art.c @@ -257,6 +257,11 @@ void* art_search(const art_tree *t, const unsigned char *key, int key_len) { depth = depth + n->partial_len; } + // Don't overflow the key buffer if we go too deep + if (depth >= key_len) { + return NULL; + } + // Recursively search child = find_child(n, key[depth]); n = (child) ? *child : NULL; @@ -290,6 +295,10 @@ art_leaf* art_longest_match(const art_tree *t, const unsigned char *key, int key depth = depth + n->partial_len; } + if (depth >= key_len) { + // Node depth is greater than input key; can't match + return NULL; + } // Recursively search child = find_child(n, key[depth]); n = (child) ? *child : NULL; diff --git a/watchman.h b/watchman.h index 34c237a767964..b7690822e862a 100644 --- a/watchman.h +++ b/watchman.h @@ -252,7 +252,7 @@ typedef struct watchman_clock w_clock_t; #define W_PENDING_VIA_NOTIFY 2 #define W_PENDING_CRAWL_ONLY 4 struct watchman_pending_fs { - struct watchman_pending_fs *next; + struct watchman_pending_fs *next, *prev; w_string_t *path; struct timeval now; int flags; @@ -260,10 +260,10 @@ struct watchman_pending_fs { struct watchman_pending_collection { struct watchman_pending_fs *pending; - w_ht_t *pending_uniq; pthread_mutex_t lock; pthread_cond_t cond; bool pinged; + art_tree tree; }; bool w_pending_coll_init(struct watchman_pending_collection *coll); @@ -676,6 +676,8 @@ w_string_t *w_string_path_cat_cstr_len(w_string_t *parent, const char *rhs, uint32_t rhs_len); bool w_string_startswith(w_string_t *str, w_string_t *prefix); bool w_string_startswith_caseless(w_string_t *str, w_string_t *prefix); +bool w_string_contains_cstr_len(w_string_t *str, const char *needle, + uint32_t nlen); w_string_t *w_string_shell_escape(const w_string_t *str); w_string_t *w_string_implode(json_t *arr, const char *delim); diff --git a/winbuild/Makefile b/winbuild/Makefile index 61a79135983f6..e357e4c60e3d1 100644 --- a/winbuild/Makefile +++ b/winbuild/Makefile @@ -203,7 +203,7 @@ tests\argv.exe: tests\argv.obj $(TEST_OBJS) @echo LINK $@ $(LINKER) $** $(LIBS) -tests\art.exe: tests\art_test.obj $(TEST_OBJS) +tests\art.exe: tests\art_test.obj $(TEST_OBJS) log.obj @echo LINK $@ $(LINKER) $** $(LIBS)