Skip to content

Commit

Permalink
Merge pull request #106 from dag-erling/des/r-fixes
Browse files Browse the repository at this point in the history
Low-hanging fruit from the R repository
  • Loading branch information
dag-erling authored Aug 16, 2024
2 parents 1cff35b + b6d3254 commit c504ac3
Show file tree
Hide file tree
Showing 13 changed files with 74 additions and 55 deletions.
4 changes: 4 additions & 0 deletions lib/regexec.c
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,10 @@ tre_match(const tre_tnfa_t *tnfa, const void *string, size_t len,
if (source->rewind == NULL || source->compare == NULL)
/* The backtracking matcher requires rewind and compare
capabilities from the input stream. */
#ifndef TRE_USE_ALLOCA
if (tags)
xfree(tags);
#endif /* !TRE_USE_ALLOCA */
return REG_BADPAT;
}
status = tre_tnfa_run_backtrack(tnfa, string, (int)len, type,
Expand Down
4 changes: 2 additions & 2 deletions lib/tre-ast.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@ typedef struct {
void *obj; /* Pointer to actual node. */
int nullable;
int submatch_id;
int num_submatches;
int num_tags;
unsigned int num_submatches;
unsigned int num_tags;
tre_pos_and_tags_t *firstpos;
tre_pos_and_tags_t *lastpos;
} tre_ast_node_t;
Expand Down
40 changes: 24 additions & 16 deletions lib/tre-compile.c
Original file line number Diff line number Diff line change
Expand Up @@ -150,10 +150,10 @@ tre_add_tags(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree,
/* True for first pass (counting number of needed tags) */
int first_pass = (mem == NULL || tnfa == NULL);
int *regset, *orig_regset;
int num_tags = 0; /* Total number of tags. */
int num_minimals = 0; /* Number of special minimal tags. */
int tag = 0; /* The tag that is to be added next. */
int next_tag = 1; /* Next tag to use after this one. */
unsigned int num_tags = 0; /* Total number of tags. */
unsigned int num_minimals = 0; /* Number of special minimal tags. */
unsigned int tag = 0; /* The tag that is to be added next. */
unsigned int next_tag = 1; /* Next tag to use after this one. */
int *parents; /* Stack of submatches the current submatch is
contained in. */
int minimal_tag = -1; /* Tag that marks the beginning of a minimal match. */
Expand Down Expand Up @@ -688,8 +688,8 @@ tre_copy_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
{
tre_literal_t *lit = node->obj;
int pos = lit->position;
int min = lit->code_min;
int max = lit->code_max;
long min = lit->code_min;
long max = lit->code_max;
if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
{
/* XXX - e.g. [ab] has only one position but two
Expand All @@ -712,8 +712,16 @@ tre_copy_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
first_tag = 0;
}
*result = tre_ast_new_literal(mem, min, max);
if (*result == NULL)
if (*result == NULL) {
status = REG_ESPACE;
break;
}
if (!IS_SPECIAL(lit)) {
((tre_literal_t *)(*result)->obj)->u.class = lit->u.class;
((tre_literal_t *)(*result)->obj)->neg_classes = lit->neg_classes;
} else if (IS_PARAMETER(lit)) {
((tre_literal_t *)(*result)->obj)->u.params = lit->u.params;
}

if (pos > *max_pos)
*max_pos = pos;
Expand Down Expand Up @@ -1049,7 +1057,7 @@ tre_set_empty(tre_mem_t mem)
}

static tre_pos_and_tags_t *
tre_set_one(tre_mem_t mem, int position, int code_min, int code_max,
tre_set_one(tre_mem_t mem, int position, long code_min, long code_max,
tre_ctype_t class, tre_ctype_t *neg_classes, int backref)
{
tre_pos_and_tags_t *new_set;
Expand Down Expand Up @@ -1338,7 +1346,7 @@ tre_compute_npfl(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree,
return REG_ESPACE;
node->lastpos = tre_set_one(mem, lit->position, 0,
TRE_CHAR_MAX, 0, NULL,
(int)lit->code_max);
lit->code_max);
if (!node->lastpos)
return REG_ESPACE;
}
Expand All @@ -1361,13 +1369,13 @@ tre_compute_npfl(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree,
node->nullable = 0;
lit->position = (*nextpos)++;
node->firstpos =
tre_set_one(mem, lit->position, (int)lit->code_min,
(int)lit->code_max, 0, NULL, -1);
tre_set_one(mem, lit->position, lit->code_min,
lit->code_max, 0, NULL, -1);
if (!node->firstpos)
return REG_ESPACE;
node->lastpos = tre_set_one(mem, lit->position,
(int)lit->code_min,
(int)lit->code_max,
lit->code_min,
lit->code_max,
lit->u.class, lit->neg_classes,
-1);
if (!node->lastpos)
Expand Down Expand Up @@ -1880,8 +1888,8 @@ tre_compile(regex_t *preg, const tre_char_t *regex, size_t n, int cflags)
parse_ctx.len = n;
parse_ctx.cflags = cflags;
parse_ctx.max_backref = -1;
/* workaround for PR#14408: use 8-bit optimizations in 8-bit mode */
parse_ctx.cur_max = (cflags & REG_USEBYTES) ? 1 : TRE_MB_CUR_MAX;
/* Use 8-bit optimizations in 8-bit mode */
parse_ctx.mb_cur_max = (cflags & REG_USEBYTES) ? 1 : TRE_MB_CUR_MAX;
DPRINT(("tre_compile: parsing '%.*" STRF "'\n", (int)n, regex));
errcode = tre_parse(&parse_ctx);
if (errcode != REG_OK)
Expand Down Expand Up @@ -2021,7 +2029,7 @@ tre_compile(regex_t *preg, const tre_char_t *regex, size_t n, int cflags)
/* If in eight bit mode, compute a table of characters that can be the
first character of a match. */
tnfa->first_char = -1;
if (TRE_MB_CUR_MAX == 1 && !tmp_ast_l->nullable)
if (parse_ctx.mb_cur_max == 1 && !tmp_ast_l->nullable)
{
int count = 0;
tre_cint_t k;
Expand Down
3 changes: 0 additions & 3 deletions lib/tre-match-approx.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,6 @@ char *alloca ();
#endif
#endif /* TRE_USE_ALLOCA */

#define __USE_STRING_INLINES
#undef __NO_INLINE__

#include <assert.h>
#include <stdlib.h>
#include <string.h>
Expand Down
2 changes: 1 addition & 1 deletion lib/tre-match-backtrack.c
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ typedef struct tre_backtrack_struct {
#define tre_bt_mem_new tre_mem_newa
#define tre_bt_mem_alloc tre_mem_alloca
#define tre_bt_mem_destroy(obj) do { } while (0)
#define xafree(obj) /* do nothing, obj was obtained with alloca() */
#define xafree(obj) do { } while (0) /* do nothing, obj was obtained with alloca() */
#else /* !TRE_USE_ALLOCA */
#define tre_bt_mem_new tre_mem_new
#define tre_bt_mem_alloc tre_mem_alloc
Expand Down
6 changes: 3 additions & 3 deletions lib/tre-match-parallel.c
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ typedef struct {

#ifdef TRE_DEBUG
static void
tre_print_reach(const tre_tnfa_t *tnfa, tre_tnfa_reach_t *reach, int num_tags)
tre_print_reach(const tre_tnfa_reach_t *reach, int num_tags)
{
int i;

Expand Down Expand Up @@ -344,9 +344,9 @@ tre_tnfa_run_parallel(const tre_tnfa_t *tnfa, const void *string, int len,

#ifdef TRE_DEBUG
DPRINT(("%3d:%2lc/%05d |", pos - 1, (tre_cint_t)prev_c, (int)prev_c));
tre_print_reach(tnfa, reach_next, num_tags);
tre_print_reach(reach_next, num_tags);
DPRINT(("%3d:%2lc/%05d |", pos, (tre_cint_t)next_c, (int)next_c));
tre_print_reach(tnfa, reach_next, num_tags);
tre_print_reach(reach_next, num_tags);
#endif /* TRE_DEBUG */

/* Swap `reach' and `reach_next'. */
Expand Down
4 changes: 2 additions & 2 deletions lib/tre-mem.c
Original file line number Diff line number Diff line change
Expand Up @@ -106,12 +106,12 @@ tre_mem_alloc_impl(tre_mem_t mem, int provided, void *provided_block,
}
else
{
int block_size;
size_t block_size;
if (size * 8 > TRE_MEM_BLOCK_SIZE)
block_size = size * 8;
else
block_size = TRE_MEM_BLOCK_SIZE;
DPRINT(("tre_mem_alloc: allocating new %d byte block\n",
DPRINT(("tre_mem_alloc: allocating new %zu byte block\n",
block_size));
l = xmalloc(sizeof(*l));
if (l == NULL)
Expand Down
41 changes: 25 additions & 16 deletions lib/tre-parse.c
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ tre_expand_macro(const tre_char_t *regex, const tre_char_t *regex_end,
unsigned int j;
DPRINT(("Expanding macro '%c' => '%s'\n",
tre_macros[i].c, tre_macros[i].expansion));
for (j = 0; tre_macros[i].expansion[j] && j < buf_len; j++)
for (j = 0; tre_macros[i].expansion[j] && j < buf_len - 1; j++)
buf[j] = tre_macros[i].expansion[j];
buf[j] = 0;
break;
Expand Down Expand Up @@ -128,7 +128,6 @@ tre_expand_ctype(tre_mem_t mem, tre_ctype_t class, tre_ast_node_t ***items,
reg_errcode_t status = REG_OK;
tre_cint_t c;
int j, min = -1, max = 0;
assert(TRE_MB_CUR_MAX == 1);

DPRINT((" expanding class to character ranges\n"));
for (j = 0; (j < 256) && (status == REG_OK); j++)
Expand Down Expand Up @@ -162,7 +161,7 @@ tre_compare_items(const void *a, const void *b)
const tre_ast_node_t *node_a = *(tre_ast_node_t * const *)a;
const tre_ast_node_t *node_b = *(tre_ast_node_t * const *)b;
tre_literal_t *l_a = node_a->obj, *l_b = node_b->obj;
int a_min = l_a->code_min, b_min = l_b->code_min;
long a_min = l_a->code_min, b_min = l_b->code_min;

if (a_min < b_min)
return -1;
Expand Down Expand Up @@ -194,7 +193,15 @@ int tre_iscntrl_func(tre_cint_t c) { return tre_iscntrl(c); }
int tre_isdigit_func(tre_cint_t c) { return tre_isdigit(c); }
int tre_isgraph_func(tre_cint_t c) { return tre_isgraph(c); }
int tre_islower_func(tre_cint_t c) { return tre_islower(c); }
int tre_isprint_func(tre_cint_t c) { return tre_isprint(c); }
int tre_isprint_func(tre_cint_t c)
{
return
#if defined(WIN32) && TRE_WCHAR
/* On Windows, iswprint(L'\t') incorrectly returns true. */
c != L'\t' &&
#endif
tre_isprint(c);
}
int tre_ispunct_func(tre_cint_t c) { return tre_ispunct(c); }
int tre_isspace_func(tre_cint_t c) { return tre_isspace(c); }
int tre_isupper_func(tre_cint_t c) { return tre_isupper(c); }
Expand Down Expand Up @@ -300,7 +307,7 @@ tre_parse_bracket_items(tre_parse_ctx_t *ctx, int negate,
{
char tmp_str[64];
const tre_char_t *endptr = re + 2;
int len;
size_t len;
DPRINT(("tre_parse_bracket: class: '%.*" STRF "'\n", REST(re)));
while (endptr < ctx->re_end && *endptr != CHAR_COLON)
endptr++;
Expand All @@ -310,7 +317,7 @@ tre_parse_bracket_items(tre_parse_ctx_t *ctx, int negate,
#ifdef TRE_WCHAR
{
tre_char_t tmp_wcs[64];
wcsncpy(tmp_wcs, re + 2, (size_t)len);
wcsncpy(tmp_wcs, re + 2, len);
tmp_wcs[len] = L'\0';
#if defined HAVE_WCSRTOMBS
{
Expand All @@ -332,7 +339,7 @@ tre_parse_bracket_items(tre_parse_ctx_t *ctx, int negate,
if (!class)
status = REG_ECTYPE;
/* Optimize character classes for 8 bit character sets. */
if (status == REG_OK && ctx->cur_max == 1)
if (status == REG_OK && ctx->mb_cur_max == 1)
{
status = tre_expand_ctype(ctx->mem, class, items,
&i, &max_i, ctx->cflags);
Expand Down Expand Up @@ -421,7 +428,8 @@ tre_parse_bracket(tre_parse_ctx_t *ctx, tre_ast_node_t **result)
int negate = 0;
reg_errcode_t status = REG_OK;
tre_ast_node_t **items, *u, *n;
int i = 0, j, max_i = 32, curr_max, curr_min;
int i = 0, j, max_i = 32;
long curr_max, curr_min;
tre_ctype_t neg_classes[MAX_NEG_CLASSES];
int num_neg_classes = 0;

Expand Down Expand Up @@ -451,21 +459,21 @@ tre_parse_bracket(tre_parse_ctx_t *ctx, tre_ast_node_t **result)
/* Build a union of the items in the array, negated if necessary. */
for (j = 0; j < i && status == REG_OK; j++)
{
int min, max;
long min, max;
tre_literal_t *l = items[j]->obj;
min = l->code_min;
max = l->code_max;

DPRINT(("item: %d - %d, class %ld, curr_max = %d\n",
(int)l->code_min, (int)l->code_max, (long)l->u.class, curr_max));
DPRINT(("item: %ld - %ld, class %ld, curr_max = %ld\n",
l->code_min, l->code_max, (long)l->u.class, curr_max));

if (negate)
{
if (min < curr_max)
{
/* Overlap. */
curr_max = MAX(max + 1, curr_max);
DPRINT(("overlap, curr_max = %d\n", curr_max));
DPRINT(("overlap, curr_max = %ld\n", curr_max));
l = NULL;
}
else
Expand All @@ -490,7 +498,7 @@ tre_parse_bracket(tre_parse_ctx_t *ctx, tre_ast_node_t **result)
if (l != NULL)
{
int k;
DPRINT(("creating %d - %d\n", (int)l->code_min, (int)l->code_max));
DPRINT(("creating %ld - %ld\n", l->code_min, l->code_max));
if (num_neg_classes > 0)
{
l->neg_classes = tre_mem_alloc(ctx->mem,
Expand Down Expand Up @@ -525,7 +533,7 @@ tre_parse_bracket(tre_parse_ctx_t *ctx, tre_ast_node_t **result)
if (negate)
{
int k;
DPRINT(("final: creating %d - %d\n", curr_min, (int)TRE_CHAR_MAX));
DPRINT(("final: creating %ld - %ld\n", curr_min, (long)TRE_CHAR_MAX));
n = tre_ast_new_literal(ctx->mem, curr_min, TRE_CHAR_MAX);
if (n == NULL)
status = REG_ESPACE;
Expand Down Expand Up @@ -1353,7 +1361,7 @@ tre_parse(tre_parse_ctx_t *ctx)
break;

case CHAR_RPAREN: /* end of current subexpression */
if (((ctx->cflags & REG_EXTENDED) && depth > 0)
if ((ctx->cflags & REG_EXTENDED && depth > 0)
|| (!(ctx->cflags & REG_EXTENDED) && ctx->re > ctx->re_start
&& *(ctx->re - 1) == CHAR_BACKSLASH))
{
Expand Down Expand Up @@ -1621,7 +1629,7 @@ tre_parse(tre_parse_ctx_t *ctx)


/* We are expecting an atom. If the subexpression (or the whole
regexp ends here, we interpret it as an empty expression
regexp) ends here, we interpret it as an empty expression
(which matches an empty string). */
if (
#ifdef REG_LITERAL
Expand Down Expand Up @@ -1694,6 +1702,7 @@ tre_parse(tre_parse_ctx_t *ctx)
{
int submatch_id = tre_stack_pop_int(stack);

assert(result);
if (result->submatch_id >= 0)
{
tre_ast_node_t *n, *tmp_node;
Expand Down
4 changes: 2 additions & 2 deletions lib/tre-parse.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@ typedef struct {
int nofirstsub;
/* The currently set approximate matching parameters. */
int params[TRE_PARAM_LAST];
/* the CUR_MAX in use */
int cur_max;
/* the MB_CUR_MAX in use */
int mb_cur_max;
} tre_parse_ctx_t;

/* Parses a wide character regexp pattern into a syntax tree. This parser
Expand Down
13 changes: 7 additions & 6 deletions lib/xmalloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include <config.h>
#endif /* HAVE_CONFIG_H */

#include <stdint.h>
#include <stdlib.h>
#include <assert.h>
#include <stdio.h>
Expand Down Expand Up @@ -71,18 +72,18 @@ hash_table_new(void)
return tbl;
}

static int
static unsigned int
hash_void_ptr(void *ptr)
{
int hash;
int i;
unsigned int hash;
unsigned int i;

/* I took this hash function just off the top of my head, I have
no idea whether it is bad or very bad. */
hash = 0;
for (i = 0; i < (int)sizeof(ptr)*8 / TABLE_BITS; i++)
for (i = 0; i < sizeof(ptr) * 8 / TABLE_BITS; i++)
{
hash ^= (unsigned long)ptr >> i*8;
hash ^= (uintptr_t)ptr >> i * 8;
hash += i * 17;
hash &= TABLE_MASK;
}
Expand All @@ -93,7 +94,7 @@ static void
hash_table_add(hashTable *tbl, void *ptr, size_t bytes,
const char *file, int line, const char *func)
{
int i;
unsigned int i;
hashTableItem *item, *new;

i = hash_void_ptr(ptr);
Expand Down
4 changes: 2 additions & 2 deletions m4/vl_prog_cc_warnings.m4
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,9 @@ EOF
dnl GCC
if test "$GCC" = "yes"; then
if test -z "$ansi"; then
vl_cv_prog_cc_warnings="-Wall"
vl_cv_prog_cc_warnings="-Wall -Wextra"
else
vl_cv_prog_cc_warnings="-Wall -ansi -pedantic"
vl_cv_prog_cc_warnings="-Wall -Wextra -ansi -pedantic"
fi
dnl Most compilers print some kind of a version string with some command
Expand Down
2 changes: 1 addition & 1 deletion tests/randtest.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
#define REGEXP_MAX_LEN 16

int
main(int argc, char **argv)
main(void)
{
int len, i, flags, n;
char regex[50];
Expand Down
Loading

0 comments on commit c504ac3

Please sign in to comment.