diff --git a/Makefile b/Makefile index f1f4f1396..499239fd9 100644 --- a/Makefile +++ b/Makefile @@ -131,6 +131,7 @@ SUBDIR += tests/pcre-flags SUBDIR += tests/pcre-repeat SUBDIR += tests/pred SUBDIR += tests/re_literal +SUBDIR += tests/re_strings SUBDIR += tests/reverse SUBDIR += tests/trim SUBDIR += tests/union diff --git a/src/libre/ac.c b/src/libre/ac.c index d5f8e6f4b..121eed487 100644 --- a/src/libre/ac.c +++ b/src/libre/ac.c @@ -13,10 +13,10 @@ #include #include +#include #include "ac.h" -#define ENDID_NONE ((fsm_end_id_t)-1) enum { POOL_BLOCK_SIZE = 256 }; struct trie_state { @@ -26,7 +26,9 @@ struct trie_state { unsigned int index; unsigned int output:1; unsigned int have_st:1; - fsm_end_id_t endid; /* or ENDID_NONE */ + + /* use a state set as an endid set */ + struct state_set *endids; }; struct trie_pool { @@ -75,6 +77,7 @@ newstate(struct trie_graph *g) st->index = ++g->nstates; st->output = 0; + st->endids = NULL; return st; } @@ -161,7 +164,9 @@ trie_add_word(struct trie_graph *g, const char *w, size_t n, const fsm_end_id_t g->depth = n; } - st->endid = (endid == NULL ? ENDID_NONE : *endid); + if (endid != NULL) { + state_set_add(&st->endids, NULL, (fsm_state_t)*endid); + } return g; } @@ -281,7 +286,7 @@ trie_to_fsm_state(struct trie_state *ts, struct fsm *fsm, assert(fsm != NULL); assert(q != NULL); - if (ts->output && have_end && ts->endid == ENDID_NONE) { + if (ts->output && have_end && state_set_empty(ts->endids)) { *q = single_end; return 1; } @@ -318,8 +323,13 @@ trie_to_fsm_state(struct trie_state *ts, struct fsm *fsm, if (ts->output) { fsm_setend(fsm, st, 1); - if (ts->endid != ENDID_NONE) { - if (!fsm_setendidstate(fsm, st, ts->endid)) { + + struct state_iter si; + fsm_state_t state; + state_set_reset(ts->endids, &si); + while (state_set_next(&si, &state)) { + fsm_end_id_t endid = (fsm_end_id_t)state; + if (!fsm_setendidstate(fsm, st, endid)) { return 0; } } diff --git a/tests/re_strings/Makefile b/tests/re_strings/Makefile new file mode 100644 index 000000000..7fc7f2548 --- /dev/null +++ b/tests/re_strings/Makefile @@ -0,0 +1,26 @@ +.include "../../share/mk/top.mk" + +TEST.tests/re_strings != ls -1 tests/re_strings/re_strings*.c +TEST_SRCDIR.tests/re_strings = tests/re_strings +TEST_OUTDIR.tests/re_strings = ${BUILD}/tests/re_strings + +.for n in ${TEST.tests/re_strings:T:R:C/^re_strings//} +test:: ${TEST_OUTDIR.tests/re_strings}/res${n} +SRC += ${TEST_SRCDIR.tests/re_strings}/re_strings${n}.c +CFLAGS.${TEST_SRCDIR.tests/re_strings}/re_strings${n}.c = -UNDEBUG + +${TEST_OUTDIR.tests/re_strings}/run${n}: ${TEST_OUTDIR.tests/re_strings}/re_strings${n}.o ${TEST_OUTDIR.tests/re_strings}/testutil.o + ${CC} ${CFLAGS} -o ${TEST_OUTDIR.tests/re_strings}/run${n} ${TEST_OUTDIR.tests/re_strings}/re_strings${n}.o ${TEST_OUTDIR.tests/re_strings}/testutil.o ${BUILD}/lib/libfsm.a ${BUILD}/lib/libre.a + +${TEST_OUTDIR.tests/re_strings}/re_strings${n}.o: tests/re_strings/testutil.h + +${TEST_OUTDIR.tests/re_strings}/res${n}: ${TEST_OUTDIR.tests/re_strings}/run${n} + ( ${TEST_OUTDIR.tests/re_strings}/run${n} 1>&2 && echo PASS || echo FAIL ) > ${TEST_OUTDIR.tests/re_strings}/res${n} + +.for lib in ${LIB:Mlibfsm} ${LIB:Mlibre} +${TEST_OUTDIR.tests/re_strings}/run${n}: ${BUILD}/lib/${lib:R}.a +.endfor +.endfor + +${TEST_OUTDIR.tests/re_strings}/testutil.o: tests/re_strings/testutil.c + ${CC} ${CFLAGS} -c -o ${TEST_OUTDIR.tests/re_strings}/testutil.o tests/re_strings/testutil.c diff --git a/tests/re_strings/re_strings1.c b/tests/re_strings/re_strings1.c new file mode 100644 index 000000000..44f10d41d --- /dev/null +++ b/tests/re_strings/re_strings1.c @@ -0,0 +1,21 @@ +#include "testutil.h" + +const char *strings[] = { + "aa", + "ab", + "ac", + "ba", + "bb", + "bc", + "ca", + "cb", + "cc", + NULL, +}; + +int main(int argc, char **argv) +{ + (void)argc; + (void)argv; + return run_test(strings); +} diff --git a/tests/re_strings/re_strings2.c b/tests/re_strings/re_strings2.c new file mode 100644 index 000000000..6e4c80459 --- /dev/null +++ b/tests/re_strings/re_strings2.c @@ -0,0 +1,17 @@ +#include "testutil.h" + +const char *strings[] = { + "first", + "duplicate", + "duplicate", + "duplicate", + "last", + NULL, +}; + +int main(int argc, char **argv) +{ + (void)argc; + (void)argv; + return run_test(strings); +} diff --git a/tests/re_strings/testutil.c b/tests/re_strings/testutil.c new file mode 100644 index 000000000..078ca4f4c --- /dev/null +++ b/tests/re_strings/testutil.c @@ -0,0 +1,69 @@ +#include "testutil.h" + +#include +#include + +#include "fsm/fsm.h" +#include "fsm/options.h" + +#include "re/re.h" +#include "re/strings.h" + +static struct fsm_options opt; + +#define MAX_INPUTS 100 +static fsm_end_id_t id_buf[MAX_INPUTS]; + +int +run_test(const char **strings) +{ + struct re_strings *s = re_strings_new(); + assert(s != NULL); + + fsm_end_id_t id = 0; + const char **input = strings; + while (*input != NULL) { + if (!re_strings_add_str(s, *input, &id)) { + assert(!"re_strings_add_str"); + } + + input++; + id++; + assert(id < MAX_INPUTS); + } + + const int flags = 0; /* not anchored */ + + struct fsm *fsm = re_strings_build(s, &opt, flags); + assert(fsm != NULL); + + /* Each literal string input should match, and the set of + * matching endids should include the expected one. */ + id = 0; + input = strings; + while (*input != NULL) { + fsm_state_t end; + const char **string = input; + const int res = fsm_exec(fsm, fsm_sgetc, string, &end, NULL); + assert(res > 0); /* match */ + + size_t written; + enum fsm_getendids_res eres = fsm_getendids(fsm, end, + MAX_INPUTS, id_buf, &written); + assert(eres == FSM_GETENDIDS_FOUND); + bool found = false; + for (size_t i = 0; i < written; i++) { + if (id_buf[i] == id) { + found = true; + break; + } + } + assert(found); + + input++; + id++; + } + + re_strings_free(s); + return EXIT_SUCCESS; +} diff --git a/tests/re_strings/testutil.h b/tests/re_strings/testutil.h new file mode 100644 index 000000000..6898200b7 --- /dev/null +++ b/tests/re_strings/testutil.h @@ -0,0 +1,11 @@ +#ifndef TESTUTIL_H +#define TESTUTIL_H + +#include +#include +#include + +int +run_test(const char **strings); + +#endif