Skip to content

Commit

Permalink
Changed regex operators to only work with STRINGs
Browse files Browse the repository at this point in the history
refs: TryGhost#22

The main use case for regex operators is doing type-ahead type matches on values.
These kinds of values will start with a single char, contain spaces, and contain characters
that should be interpretted literally.

To support this, the right value type in NQL is STRING, not LITERAL.

LITERALs are designed for matching tag slugs and similar computed values.

This commit changes the behaviour completely to only support STRING.

We also need to be super careful about how we handle regex characters and strings so
I've added some more tests to demonstrate that the right thing is happening
  • Loading branch information
ErisDS committed Mar 8, 2022
1 parent 6d0f96b commit 5a41d38
Show file tree
Hide file tree
Showing 9 changed files with 142 additions and 24 deletions.
13 changes: 13 additions & 0 deletions packages/mongo-knex/test/unit/convertor.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -363,3 +363,16 @@ describe('Relations', function () {
.should.eql('select * from `posts` where `posts`.`id` not in (select `posts`.`id` from `posts` left join `posts_meta` on `posts_meta`.`post_id` = `posts`.`id` where `posts_meta`.`meta_title` in (\'Meta of A Whole New World\'))');
});
});

describe('RegExp/Like queries', function () {
it('are well behaved', function () {
runQuery({title: {$regex: /'/i}})
.should.eql('select * from `posts` where lower(`posts`.`title`) like \'%\\\'%\'');

runQuery({title: {$regex: /;/i}})
.should.eql('select * from `posts` where lower(`posts`.`title`) like \'%;%\'');

runQuery({title: {$regex: /';select * from `settings` where `value` like '/i}})
.should.eql('select * from `posts` where lower(`posts`.`title`) like \'%\\\';select * from `settings` where `value` like \\\'%\'');
});
});
8 changes: 4 additions & 4 deletions packages/nql-lang/dist/parser.js

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions packages/nql-lang/lib/scope.js
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,8 @@ module.exports = {
return value.replace(re, '$1');
},

literalToRegExp(value, modifier) {
let escapedValue = value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
stringToRegExp(value, modifier) {
let escapedValue = value.replace(/[.*+?^$(){}|[\]\\]/g, '\\$&');

if (modifier === '^') {
escapedValue = '^' + escapedValue;
Expand Down
6 changes: 3 additions & 3 deletions packages/nql-lang/src/nql.y
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,9 @@ DATEOP
;

REGEXPOP
: CONTAINS LITERAL { $$ = yy.literalToRegExp($2); }
| STARTSWITH LITERAL { $$ = yy.literalToRegExp($2, '^'); }
| ENDSWITH LITERAL { $$ = yy.literalToRegExp($2, '$'); }
: CONTAINS STRING { $2 = $2.replace(/^'|'$/g, ''); $2 = yy.unescape($2); $$ = yy.stringToRegExp($2); }
| STARTSWITH STRING { $2 = $2.replace(/^'|'$/g, ''); $2 = yy.unescape($2); $$ = yy.stringToRegExp($2, '^'); }
| ENDSWITH STRING { $2 = $2.replace(/^'|'$/g, ''); $2 = yy.unescape($2); $$ = yy.stringToRegExp($2, '$'); }
;

OP
Expand Down
17 changes: 17 additions & 0 deletions packages/nql-lang/test/lexer.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -786,6 +786,23 @@ describe('Lexer', function () {
{token: 'GT', matched: '>'},
{token: 'NUMBER', matched: '5'}
]);

lex('tag:photo+image:-null,tag.count:>5+foo:~\'bar\'').should.eql([
{token: 'PROP', matched: 'tag:'},
{token: 'LITERAL', matched: 'photo'},
{token: 'AND', matched: '+'},
{token: 'PROP', matched: 'image:'},
{token: 'NOT', matched: '-'},
{token: 'NULL', matched: 'null'},
{token: 'OR', matched: ','},
{token: 'PROP', matched: 'tag.count:'},
{token: 'GT', matched: '>'},
{token: 'NUMBER', matched: '5'},
{token: 'AND', matched: '+'},
{token: 'PROP', matched: 'foo:'},
{token: 'CONTAINS', matched: '~'},
{token: 'STRING', matched: '\'bar\''}
]);
});

it('grouped expressions', function () {
Expand Down
32 changes: 26 additions & 6 deletions packages/nql-lang/test/parser.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -104,17 +104,37 @@ describe('Parser', function () {
});

it('can parse CONTAINS, STARTSWITH and ENDSWITH with and without NOT', function () {
parse('email:~gmail.com').should.eql({email: {$regex: /gmail\.com/i}});
parse(`email:~'gmail.com'`).should.eql({email: {$regex: /gmail\.com/i}});

parse('email:-~gmail.com').should.eql({email: {$not: /gmail\.com/i}});
parse(`email:-~'gmail.com'`).should.eql({email: {$not: /gmail\.com/i}});

parse('email:~^gmail.com').should.eql({email: {$regex: /^gmail\.com/i}});
parse(`email:~^'gmail.com'`).should.eql({email: {$regex: /^gmail\.com/i}});

parse('email:-~^gmail.com').should.eql({email: {$not: /^gmail\.com/i}});
parse(`email:-~^'gmail.com'`).should.eql({email: {$not: /^gmail\.com/i}});

parse('email:~$gmail.com').should.eql({email: {$regex: /gmail\.com$/i}});
parse(`email:~$'gmail.com'`).should.eql({email: {$regex: /gmail\.com$/i}});

parse('email:-~$gmail.com').should.eql({email: {$not: /gmail\.com$/i}});
parse(`email:-~$'gmail.com'`).should.eql({email: {$not: /gmail\.com$/i}});
});

it('can parse CONTAINS, STARTSWITH and ENDSWITH and handle regexchars', function () {
parse(`email:~'.*+?^$(){}|[]\\'`).should.eql({email: {$regex: /\.\*\+\?\^\$\(\)\{\}\|\[\]\\/i}});
});

it('can parse CONTAINS, STARTSWITH and ENDSWITH and handle quotes', function () {
parse(`name:~'john o\\'nolan'`).should.eql({name: {$regex: /john o'nolan/i}});

parse(`name:~'john o\\"nolan'`).should.eql({name: {$regex: /john o"nolan/i}});

// TODO: Fix this BUG
(function () {
parse(`name:~'john o"nolan'`);
}).should.throw();

// This will probably never be possible
(function () {
parse(`name:~'john o'nolan'`);
}).should.throw();
});
});

Expand Down
6 changes: 3 additions & 3 deletions packages/nql/test/integration/nql_knex.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ describe('Integration with Knex', function () {

describe('Regex / Like queries', function () {
it('can do a contains query', function () {
const query = nql('title:~ne');
const query = nql(`title:~'ne'`);

return query
.querySQL(knex('posts'))
Expand All @@ -40,7 +40,7 @@ describe('Integration with Knex', function () {
});

it('can do a startswith query', function () {
const query = nql('title:~^wh');
const query = nql(`title:~^'wh'`);

return query
.querySQL(knex('posts'))
Expand All @@ -52,7 +52,7 @@ describe('Integration with Knex', function () {
});

it('can do an endswith query', function () {
const query = nql('title:~$es');
const query = nql(`title:~$'es'`);

return query
.querySQL(knex('posts'))
Expand Down
12 changes: 6 additions & 6 deletions packages/nql/test/integration/nql_mingo.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ describe('Integration with Mingo', function () {
});

it('$regex contains', function () {
const query = nql('title:~th');
const query = nql(`title:~'th'`);

query.queryJSON(simpleJSON.posts[0]).should.eql(false);
query.queryJSON(simpleJSON.posts[1]).should.eql(false);
Expand All @@ -111,7 +111,7 @@ describe('Integration with Mingo', function () {
});

it('$not contains', function () {
const query = nql('title:-~th');
const query = nql(`title:-~'th'`);

query.queryJSON(simpleJSON.posts[0]).should.eql(true);
query.queryJSON(simpleJSON.posts[1]).should.eql(true);
Expand All @@ -122,7 +122,7 @@ describe('Integration with Mingo', function () {
});

it('$regex startswith', function () {
const query = nql('title:~^Th');
const query = nql(`title:~^'Th'`);

query.queryJSON(simpleJSON.posts[0]).should.eql(false);
query.queryJSON(simpleJSON.posts[1]).should.eql(false);
Expand All @@ -133,7 +133,7 @@ describe('Integration with Mingo', function () {
});

it('$not startswith', function () {
const query = nql('title:-~^Th');
const query = nql(`title:-~^'Th'`);

query.queryJSON(simpleJSON.posts[0]).should.eql(true);
query.queryJSON(simpleJSON.posts[1]).should.eql(true);
Expand All @@ -144,7 +144,7 @@ describe('Integration with Mingo', function () {
});

it('$regex endswith', function () {
const query = nql('title:~$st');
const query = nql(`title:~$'st'`);

query.queryJSON(simpleJSON.posts[0]).should.eql(true);
query.queryJSON(simpleJSON.posts[1]).should.eql(true);
Expand All @@ -155,7 +155,7 @@ describe('Integration with Mingo', function () {
});

it('$not endswith', function () {
const query = nql('title:-~$st');
const query = nql(`title:-~$'st'`);

query.queryJSON(simpleJSON.posts[0]).should.eql(false);
query.queryJSON(simpleJSON.posts[1]).should.eql(false);
Expand Down
68 changes: 68 additions & 0 deletions packages/nql/test/unit/query.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
require('../utils');

const nql = require('../../');
const knex = require('knex')({client: 'mysql'});

describe('NQL -> SQL', function () {
describe('Can handle regexes safely', function () {
it('can handle regex STRING with escaped quotes', function () {
let query;

query = nql(`name:~'John O\\'Nolan'`);

query.lex().should.eql([
{token: 'PROP', matched: 'name:'},
{token: 'CONTAINS', matched: '~'},
{token: 'STRING', matched: '\'John O\\\'Nolan\''}
]);

query.toJSON().should.eql({name: {$regex: /John O'Nolan/i}});
query.querySQL(knex('users')).toQuery().should.eql('select * from `users` where lower(`users`.`name`) like \'%john o\\\'nolan%\'');

query = nql(`name:~'John O\\"Nolan'`);
query.toJSON().should.eql({name: {$regex: /John O"Nolan/i}});

query.lex().should.eql([
{token: 'PROP', matched: 'name:'},
{token: 'CONTAINS', matched: '~'},
{token: 'STRING', matched: '\'John O\\"Nolan\''}
]);

query.querySQL(knex('users')).toQuery().should.eql('select * from `users` where lower(`users`.`name`) like \'%john o\\"nolan%\'');

query = nql(`name:~'A\\'B\\"C\\"D\\''`);
query.toJSON().should.eql({name: {$regex: /A'B"C"D'/i}});

query.lex().should.eql([
{token: 'PROP', matched: 'name:'},
{token: 'CONTAINS', matched: '~'},
{token: 'STRING', matched: '\'A\\\'B\\"C\\"D\\\'\''}
]);

query.querySQL(knex('users')).toQuery().should.eql('select * from `users` where lower(`users`.`name`) like \'%a\\\'b\\"c\\"d\\\'%\'');
});

it('errors for unescaped quotes / injection patterns', function () {
(function () {
nql(`name:~'bad';'`).lex();
}).should.throw();

(function () {
nql(`name:~'';'`).lex();
}).should.throw();

let query;
// we can't force bad quotes, it errors as above
// query = nql("name:~'';select * from `settings` where `value` like ''");

// Can we trick SQL?
query = nql("name:~'\\';select * from `settings` where `value` like \\''"); // eslint-disable-line quotes

// The regex only has the regex chars escaped, not quotes
query.toJSON().should.eql({name: {$regex: /';select \* from `settings` where `value` like '/i}});

//SQL still ends up correctly escaped. This is all handled by knex... but having a test feels right
query.querySQL(knex('users')).toQuery().should.eql('select * from `users` where lower(`users`.`name`) like \'%\\\';select * from `settings` where `value` like \\\'%\'');
});
});
});

0 comments on commit 5a41d38

Please sign in to comment.