Skip to content

Commit

Permalink
add support for control characters (\cX)
Browse files Browse the repository at this point in the history
  • Loading branch information
CogentRedTester committed Jun 11, 2023
1 parent d5023dc commit 4120c3a
Show file tree
Hide file tree
Showing 4 changed files with 45 additions and 0 deletions.
5 changes: 5 additions & 0 deletions index.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,11 @@ declare class SuperExpressive {
*/
char(c: string): SuperExpressive;

/**
* Matches a control code for the latin character `c`.
*/
controlChar(c: string): SuperExpressive;

/**
* Matches any character that falls between `a` and `b`. Ordering is defined by a characters ASCII or unicode value.
*/
Expand Down
14 changes: 14 additions & 0 deletions index.js
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ const replaceAll = (s, find, replace) => s.replace(new RegExp(`\\${find}`, 'g'),
const escapeSpecial = s => specialChars.reduce((acc, char) => replaceAll(acc, char, `\\${char}`), s);

const namedGroupRegex = /^[a-z]+\w*$/i;
const controlCharRegex = /^[a-z]$/i;

const quantifierTable = {
oneOrMore: '+',
Expand Down Expand Up @@ -91,6 +92,7 @@ const t = {
char: asType('char', { classCompatible: true }),
range: asType('range', { classCompatible: true }),
string: asType('string', { quantifierRequiresGroup: true }),
controlChar: asType('controlChar', { classCompatible: true }),
namedBackreference: name => deferredType('namedBackreference', { name }),
backreference: index => deferredType('backreference', { index }),
capture: deferredType('capture', { containsChildren: true }),
Expand Down Expand Up @@ -454,6 +456,17 @@ class SuperExpressive {
return next;
}

controlChar(c) {
assert(typeof c === 'string', `c must be a string (got ${c})`);
assert(controlCharRegex.test(c), `controlChar() can only be called with a single character from a-z (got ${c})`);

const next = this[clone]();
const currentFrame = next[getCurrentFrame]();
currentFrame.elements.push(next[applyQuantifier](t.controlChar(c.toUpperCase())));

return next;
}

range(a, b) {
const strA = a.toString();
const strB = b.toString();
Expand Down Expand Up @@ -664,6 +677,7 @@ class SuperExpressive {
case 'nullByte': return '\\0';
case 'string': return el.value;
case 'char': return el.value;
case 'controlChar': return `\\c${el.value}`;
case 'range': return `[${el.value[0]}-${el.value[1]}]`;
case 'anythingButRange': return `[^${el.value[0]}-${el.value[1]}]`;
case 'anyOfChars': return `[${el.value}]`;
Expand Down
12 changes: 12 additions & 0 deletions index.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,18 @@ describe('SuperExpressive', () => {
() => SuperExpressive().char('hello')
);

testRegexEquality('controlChar', /\cM/, SuperExpressive().controlChar('m'));
testErrorConditition(
'controlChar: more than one',
'controlChar() can only be called with a single character from a-z (got aa)',
() => SuperExpressive().controlChar('aa')
);
testErrorConditition(
'controlChar: invalid character',
'controlChar() can only be called with a single character from a-z (got ~)',
() => SuperExpressive().controlChar('~')
);

testRegexEquality('range', /[a-z]/, SuperExpressive().range('a', 'z'));
});

Expand Down
14 changes: 14 additions & 0 deletions readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@
- [.anythingButRange(a, b)](#anythingButRangea-b)
- [.string(s)](#strings)
- [.char(c)](#charc)
- [.controlChar(c)](#controlCharc)
- [.range(a, b)](#rangea-b)
- [.subexpression(expr, opts)](#subexpressionexpr-opts)
- [.toRegexString()](#toRegexString)
Expand Down Expand Up @@ -860,6 +861,19 @@ SuperExpressive()
/x/
```

### .controlChar(c)

Matches a control character using carat notation (`Ctrl^c`) where `c` is a single latin letter from A-Z.

**Example**
```JavaScript
SuperExpressive()
.controlChar('J')
.toRegex();
// ->
/\cJ/
```

### .range(a, b)

Matches any character that falls between `a` and `b`. Ordering is defined by a characters ASCII or unicode value.
Expand Down

0 comments on commit 4120c3a

Please sign in to comment.