Skip to content

Commit

Permalink
Add TODO comments. Further small review comments
Browse files Browse the repository at this point in the history
  • Loading branch information
NWilson committed Oct 28, 2024
1 parent 3edb8c0 commit f28d637
Show file tree
Hide file tree
Showing 8 changed files with 55 additions and 15 deletions.
8 changes: 8 additions & 0 deletions src/pcre2_auto_possess.c
Original file line number Diff line number Diff line change
Expand Up @@ -482,6 +482,8 @@ switch(c)
case OP_CLASS:
case OP_XCLASS:
case OP_ECLASS:
/* TODO: [EC] https://github.com/PCRE2Project/pcre2/issues/537
Add back the "ifdef SUPPORT_WIDE_CHARS" once we stop emitting ECLASS for this case. */
if (c == OP_XCLASS || c == OP_ECLASS)
end = code + GET(code, 0) - 1;
else
Expand Down Expand Up @@ -1117,6 +1119,8 @@ for(;;)
break;
#endif

/* TODO: [EC] https://github.com/PCRE2Project/pcre2/issues/537
Enclose in "ifdef SUPPORT_WIDE_CHARS" once we stop emitting ECLASS for this case. */
case OP_ECLASS:
if (PRIV(eclass)(chr,
(list_ptr == list ? code : base_end) - list_ptr[2] + LINK_SIZE,
Expand Down Expand Up @@ -1226,6 +1230,8 @@ for (;;)
}
else if (c == OP_CLASS || c == OP_NCLASS || c == OP_XCLASS || c == OP_ECLASS)
{
/* TODO: [EC] https://github.com/PCRE2Project/pcre2/issues/537
Add back the "ifdef SUPPORT_WIDE_CHARS" once we stop emitting ECLASS for this case. */
if (c == OP_XCLASS || c == OP_ECLASS)
repeat_opcode = code + GET(code, 1);
else
Expand Down Expand Up @@ -1301,6 +1307,8 @@ for (;;)
code += GET(code, 1 + 2*LINK_SIZE);
break;

/* TODO: [EC] https://github.com/PCRE2Project/pcre2/issues/537
Add back the "ifdef SUPPORT_WIDE_CHARS" once we stop emitting ECLASS for this case. */
case OP_ECLASS:
case OP_XCLASS:
code += GET(code, 1);
Expand Down
20 changes: 17 additions & 3 deletions src/pcre2_compile.c
Original file line number Diff line number Diff line change
Expand Up @@ -3572,6 +3572,13 @@ while (ptr < ptrend)
case CHAR_LEFT_SQUARE_BRACKET:
okquantifier = TRUE;

/* TODO: [EC] https://github.com/PCRE2Project/pcre2/issues/536
We shall support Perl's (?[...]) syntax. We need a variable class_perlext = true
and a goto jumping here if we see "(?[...". We need to check for closing "])" and
also implement the completely idiosyncratic nesting and operator rules in this
mode. We can hopefully emit exactly the same META codes as for the UTS#18
syntax, so that only parser changes are required for the Perl syntax. */

/* In another (POSIX) regex library, the ugly syntax [[:<:]] and [[:>:]] is
used for "start of word" and "end of word". As these are otherwise illegal
sequences, we don't break anything by recognizing them. They are replaced
Expand Down Expand Up @@ -3721,7 +3728,8 @@ while (ptr < ptrend)
ptr[1] == CHAR_MINUS) &&
(ptr + 1 == ptrend || ptr[1] != CHAR_RIGHT_SQUARE_BRACKET))
{
// TODO: We need a better lookahead here to match Perl; should skip \Q\E, and whitespace in /xx mode.
/* TODO: [EC] https://github.com/PCRE2Project/pcre2/issues/538
We need a better lookahead here to match Perl; should skip \Q\E, and whitespace in /xx mode. */
errorcode = ERR50;
goto FAILED;
}
Expand Down Expand Up @@ -4094,7 +4102,8 @@ while (ptr < ptrend)
ptr[1] == CHAR_MINUS) &&
(ptr + 1 == ptrend || ptr[1] != CHAR_RIGHT_SQUARE_BRACKET))
{
// TODO: We need a better lookahead here to match Perl; should skip \Q\E, and whitespace in /xx mode.
/* TODO: [EC] https://github.com/PCRE2Project/pcre2/issues/538
We need a better lookahead here to match Perl; should skip \Q\E, and whitespace in /xx mode. */
errorcode = ERR50;
goto FAILED;
}
Expand Down Expand Up @@ -6781,6 +6790,8 @@ for (;; pptr++)
#ifdef SUPPORT_WIDE_CHARS
case OP_XCLASS:
#endif
/* TODO: [EC] https://github.com/PCRE2Project/pcre2/issues/537
Enclose in the "ifdef SUPPORT_WIDE_CHARS" once we stop emitting ECLASS for this case. */
case OP_ECLASS:
case OP_CLASS:
case OP_NCLASS:
Expand Down Expand Up @@ -6815,7 +6826,8 @@ for (;; pptr++)
(?!), disallow a quantifier at parse time. We ought to be able to ignore this. */

case OP_FAIL:
/* TODO: Should this be removed, now that '[]' has been changed so it doesn't produce OP_FAIL? */
/* TODO: [EC] https://github.com/PCRE2Project/pcre2/issues/541
Should this be removed, now that '[]' has been changed so it doesn't produce OP_FAIL? */
PCRE2_UNREACHABLE();
goto END_REPEAT;

Expand Down Expand Up @@ -7430,6 +7442,8 @@ for (;; pptr++)
tempcode += 1 + 32/sizeof(PCRE2_UCHAR);
break;

/* TODO: [EC] https://github.com/PCRE2Project/pcre2/issues/537
Add back the "ifdef SUPPORT_WIDE_CHARS" once we stop emitting ECLASS for this case. */
case OP_XCLASS:
case OP_ECLASS:
tempcode += GET(tempcode, 1);
Expand Down
7 changes: 4 additions & 3 deletions src/pcre2_compile.h
Original file line number Diff line number Diff line change
Expand Up @@ -231,8 +231,9 @@ extern const int PRIV(posix_class_maps)[];

/* Merge intersecting ranges of classes. */

class_ranges *PRIV(optimize_class)(uint32_t *start_ptr, uint32_t *end_ptr,
uint32_t options, uint32_t xoptions, compile_block* cb);
class_ranges *PRIV(optimize_class)(uint32_t *start_ptr,
const uint32_t *end_ptr, uint32_t options, uint32_t xoptions,
compile_block *cb);

/* Set bits in classbits according to the property type */

Expand All @@ -258,7 +259,7 @@ BOOL PRIV(compile_class_nested)(uint32_t options, uint32_t xoptions,
OP_CLASS, OP_NCLASS, OP_XCLASS, or OP_ALLANY into pcode. */

BOOL PRIV(compile_class_not_nested)(uint32_t options, uint32_t xoptions,
uint32_t *start_ptr, uint32_t *end_ptr, PCRE2_UCHAR **pcode,
uint32_t *start_ptr, const uint32_t *end_ptr, PCRE2_UCHAR **pcode,
BOOL negate_class, int *errorcodeptr, compile_block *cb,
PCRE2_SIZE *lengthptr);

Expand Down
23 changes: 15 additions & 8 deletions src/pcre2_compile_class.c
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,8 @@ append_non_ascii_range(uint32_t options, uint32_t *buffer)
}

static size_t
parse_class(uint32_t *ptr, uint32_t *end_ptr, uint32_t options, uint32_t *buffer)
parse_class(uint32_t *ptr, const uint32_t *end_ptr, uint32_t options,
uint32_t *buffer)
{
size_t total_size = 0;
size_t size;
Expand Down Expand Up @@ -457,8 +458,9 @@ static const uint32_t char_list_starts[] = {
XCL_CHAR_LIST_LOW_16_START,
};

class_ranges *PRIV(optimize_class)(uint32_t *start_ptr, uint32_t *end_ptr,
uint32_t options, uint32_t xoptions, compile_block* cb)
class_ranges *PRIV(optimize_class)(uint32_t *start_ptr,
const uint32_t *end_ptr, uint32_t options, uint32_t xoptions,
compile_block *cb)
{
class_ranges* cranges;
uint32_t *ptr;
Expand Down Expand Up @@ -933,11 +935,9 @@ while (p[0] < 256)
p += n + 1;
}
}
#endif



#if PCRE2_CODE_UNIT_WIDTH == 8
/*************************************************
* Add characters not in a list to a class *
*************************************************/
Expand Down Expand Up @@ -966,7 +966,7 @@ while (p[0] < 256)
p++;
}
}
#endif
#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */



Expand Down Expand Up @@ -1042,8 +1042,9 @@ become a single OP_CLASS (or OP_NCLASS, OP_XCLASS, or OP_ALLANY). */

BOOL
PRIV(compile_class_not_nested)(uint32_t options, uint32_t xoptions,
uint32_t *start_ptr, uint32_t *end_ptr, PCRE2_UCHAR **pcode, BOOL negate_class,
int *errorcodeptr, compile_block *cb, PCRE2_SIZE *lengthptr)
uint32_t *start_ptr, const uint32_t *end_ptr, PCRE2_UCHAR **pcode,
BOOL negate_class, int *errorcodeptr, compile_block *cb,
PCRE2_SIZE *lengthptr)
{
uint32_t *pptr = start_ptr;
PCRE2_UCHAR *code = *pcode;
Expand Down Expand Up @@ -1942,6 +1943,12 @@ PRIV(compile_class_nested)(uint32_t options, uint32_t xoptions,
uint32_t **pptr, PCRE2_UCHAR **pcode, int *errorcodeptr,
compile_block *cb, PCRE2_SIZE *lengthptr)
{
/* TODO: [EC] https://github.com/PCRE2Project/pcre2/issues/537
We shall convert this recursive descent into a stack-based precedence parser.
We shall optimise it, so that OP_CLASS/NCLASS are constant-folded.
We shall potentially fold all the bitsets, so that there's only one bitset
held by the OP_ECLASS. */

uint32_t *ptr = *pptr;
PCRE2_UCHAR *code = *pcode;
BOOL negated;
Expand Down
2 changes: 2 additions & 0 deletions src/pcre2_jit_compile.c
Original file line number Diff line number Diff line change
Expand Up @@ -1102,6 +1102,8 @@ switch(*cc)
case OP_CALLOUT_STR:
return cc + GET(cc, 1 + 2*LINK_SIZE);

/* TODO: [EC] https://github.com/PCRE2Project/pcre2/issues/537
Add back the "if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8" once we stop emitting ECLASS for this case. */
case OP_ECLASS:
case OP_XCLASS:
return cc + GET(cc, 1);
Expand Down
2 changes: 2 additions & 0 deletions src/pcre2_match.c
Original file line number Diff line number Diff line change
Expand Up @@ -2330,6 +2330,8 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode,
#define Lmin F->temp_32[0]
#define Lmax F->temp_32[1]

/* TODO: [EC] https://github.com/PCRE2Project/pcre2/issues/537
Enclose in "ifdef SUPPORT_WIDE_CHARS" once we stop emitting ECLASS for this case. */
case OP_ECLASS:
{
Leclass_data = Fecode + 1 + LINK_SIZE; /* Save for matching */
Expand Down
6 changes: 6 additions & 0 deletions src/pcre2_study.c
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,9 @@ for (;;)

/* Check a class for variable quantification */

/* TODO: [EC] https://github.com/PCRE2Project/pcre2/issues/537
Add back the "ifdef SUPPORT_WIDE_CHARS" once we stop emitting ECLASS for this case. */

case OP_CLASS:
case OP_NCLASS:
case OP_XCLASS:
Expand Down Expand Up @@ -1714,6 +1717,9 @@ do

/* Set-based ECLASS: treat it the same as a "complex" XCLASS; give up. */

/* TODO: [EC] https://github.com/PCRE2Project/pcre2/issues/537
Enclose in "ifdef SUPPORT_WIDE_CHARS" once we stop emitting ECLASS for this case. */

case OP_ECLASS:
return SSB_FAIL;

Expand Down
2 changes: 1 addition & 1 deletion src/pcre2_xclass.c
Original file line number Diff line number Diff line change
Expand Up @@ -530,7 +530,7 @@ while (ptr < data_end)
}

/* The final bit left on the stack now holds the match result. */
return (stack & 1u);
return (stack & 1u) != 0;
}

/* End of pcre2_xclass.c */

0 comments on commit f28d637

Please sign in to comment.