Skip to content

Commit

Permalink
Made detection for Non-TSQL files less aggressive (#2442)
Browse files Browse the repository at this point in the history
* cut down word list

* only flag flies with 2+ non tsql keywords

* further trimmed words

* added constant and more comments

* more trimming

* added aw and wwi test

* shortene test files
  • Loading branch information
laurennat authored Feb 4, 2025
1 parent ee150f1 commit 87f3c0e
Show file tree
Hide file tree
Showing 6 changed files with 1,012 additions and 45 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,13 @@ public static class TSqlDetectionConstants
///
public const int SqlFileErrorLimit = 50;

/// <summary>
/// The number of Non-TSQL keywords that need to be found
/// to trigger a notification
/// </summary>
///
public const int NonTSqlKeywordLimit = 2;

/// <summary>
/// A list of keywords that exist in MySQL and PostgreSQL documentation
/// but are not present in T-SQL documentation.
Expand All @@ -52,30 +59,27 @@ public static class TSqlDetectionConstants
/// - PostgreSQL: https://www.postgresql.org/docs/current/sql-keywords-appendix.html
/// - T-SQL: https://learn.microsoft.com/en-us/sql/t-sql/language-elements/reserved-keywords-transact-sql
/// Some of the MySql keywords have an overlap with PGSQL keywords
/// Additionally, this was tested on files from https://github.com/microsoft/sql-server-samples/tree/master
/// Keywords flagged by TSql files in this repo were removed
/// </summary>
public static readonly HashSet<string> Keywords = new HashSet<string>(new[]
{
// MYSQL words
"ACCESSIBLE",
"AGAINST",
"ALGORITHM",
"ANALYZE",
"AUTHENTICATION",
"AUTOEXTEND_SIZE",
"AUTO_INCREMENT",
"AVG_ROW_LENGTH",
"BERNOULLI",
"BINLOG",
"BLOCK",
"BOOL",
"BTREE",
"BUCKETS",
"BYTE",
"CHAIN",
"CHALLENGE_RESPONSE",
"CHANNEL",
"CHARSET",
"CHECKSUM",
"CIPHER",
"CLASS_ORIGIN",
"CLONE",
Expand All @@ -100,7 +104,6 @@ public static class TSqlDetectionConstants
"DEFINER",
"DELAYED",
"DELAY_KEY_WRITE",
"DENSE_RANK",
"DIRECTORY",
"DISCARD",
"DISTINCTROW",
Expand Down Expand Up @@ -172,7 +175,6 @@ public static class TSqlDetectionConstants
"IO_THREAD",
"IPC",
"JSON_TABLE",
"JSON_VALUE",
"KEYRING",
"KEY_BLOCK_SIZE",
"LAST_VALUE",
Expand Down Expand Up @@ -249,8 +251,6 @@ public static class TSqlDetectionConstants
"QUARTER",
"QUICK",
"RANDOM",
"READ_WRITE",
"REBUILD",
"RECOVER",
"REDO_BUFFER_SIZE",
"REDUNDANT",
Expand All @@ -264,7 +264,6 @@ public static class TSqlDetectionConstants
"RELAY_THREAD",
"RELOAD",
"REMOVE",
"REORGANIZE",
"REPAIR",
"REPEAT",
"REPEATABLE",
Expand Down Expand Up @@ -300,14 +299,12 @@ public static class TSqlDetectionConstants
"SECONDARY_LOAD",
"SECONDARY_UNLOAD",
"SECOND_MICROSECOND",
"SECURITY",
"SEPARATOR",
"SERIAL",
"SERIALIZABLE",
"SHARE",
"SIGNAL",
"SIGNED",
"SKIP",
"SLAVE",
"SLOW",
"SONAME",
Expand Down Expand Up @@ -364,7 +361,6 @@ public static class TSqlDetectionConstants
"STATS_AUTO_RECALC",
"STATS_PERSISTENT",
"STATS_SAMPLE_PAGES",
"STOP",
"STRAIGHT_JOIN",
"STREAM",
"SUBCLASS_ORIGIN",
Expand Down Expand Up @@ -399,11 +395,9 @@ public static class TSqlDetectionConstants
"USE_FRM",
"UTC_TIME",
"UTC_TIMESTAMP",
"VALIDATION",
"VARCHARACTER",
"VCPU",
"VIRTUAL",
"WAIT",
"WEIGHT_STRING",
"WRAPPER",
"X509",
Expand All @@ -415,16 +409,12 @@ public static class TSqlDetectionConstants
// PGSQL words
"ABSENT",
"ACCORDING",
"ACOS",
"ALSO",
"ANALYSE",
"ANY_VALUE",
"ARRAY_AGG",
"ARRAY_​MAX_​CARDINALITY",
"ASIN",
"ASSIGNMENT",
"ATAN",
"ATTACH",
"BACKWARD",
"BASE64",
"BEGIN_FRAME",
Expand All @@ -448,7 +438,6 @@ public static class TSqlDetectionConstants
"CONVERSION",
"COPARTITION",
"COPY",
"COS",
"COSH",
"CURRENT_ROW",
"CURRENT_​DEFAULT_​TRANSFORM_​GROUP",
Expand Down Expand Up @@ -480,11 +469,9 @@ public static class TSqlDetectionConstants
"DYNAMIC_FUNCTION",
"DYNAMIC_​FUNCTION_​CODE",
"ENCODING",
"ENCRYPTED",
"END_FRAME",
"END_PARTITION",
"EXCLUSIVE",
"EXP",
"EXPRESSION",
"EXTENSION",
"FINAL",
Expand All @@ -496,14 +483,12 @@ public static class TSqlDetectionConstants
"FS",
"FULFILL",
"GRANTED",
"GREATEST",
"HEADER",
"ILIKE",
"IMMEDIATELY",
"IMMUTABLE",
"IMPLEMENTATION",
"IMPLICIT",
"INCREMENT",
"INDENT",
"INHERIT",
"INHERITS",
Expand All @@ -515,29 +500,24 @@ public static class TSqlDetectionConstants
"JSON_EXISTS",
"JSON_OBJECT",
"JSON_OBJECTAGG",
"JSON_QUERY",
"JSON_SCALAR",
"JSON_SERIALIZE",
"JSON_TABLE_PRIMITIVE",
"KEY_MEMBER",
"LEAKPROOF",
"LEAST",
"LIBRARY",
"LISTAGG",
"LISTEN",
"LOG10",
"LOGGED",
"LPAD",
"MAPPING",
"MATCHED",
"MATCHES",
"MATCH_NUMBER",
"MATCH_RECOGNIZE",
"MATERIALIZED",
"MERGE_ACTION",
"MESSAGE_LENGTH",
"MESSAGE_OCTET_LENGTH",
"MOVE",
"MUMPS",
"NAMESPACE",
"NESTING",
Expand Down Expand Up @@ -566,7 +546,6 @@ public static class TSqlDetectionConstants
"PARAMETER_​SPECIFIC_​CATALOG",
"PARAMETER_​SPECIFIC_​NAME",
"PARAMETER_​SPECIFIC_​SCHEMA",
"PASS",
"PASSING",
"PASSTHROUGH",
"PAST",
Expand All @@ -575,9 +554,7 @@ public static class TSqlDetectionConstants
"PLACING",
"PLANS",
"PLI",
"POLICY",
"PORTION",
"POWER",
"PREPARED",
"PRIVATE",
"PROCEDURAL",
Expand All @@ -602,14 +579,11 @@ public static class TSqlDetectionConstants
"SCOPE_SCHEMA",
"SEEK",
"SELECTIVE",
"SELF",
"SEMANTICS",
"SEQUENCES",
"SETOF",
"SIN",
"SINH",
"SORT_DIRECTION",
"SQRT",
"STABLE",
"STANDALONE",
"STDIN",
Expand All @@ -622,9 +596,7 @@ public static class TSqlDetectionConstants
"SUCCEEDS",
"SUPPORT",
"SYSID",
"TAN",
"TANH",
"TARGET",
"TEMP",
"TEMPLATE",
"THROUGH",
Expand Down Expand Up @@ -653,7 +625,6 @@ public static class TSqlDetectionConstants
"UTF32",
"UTF8",
"VACUUM",
"VALID",
"VALIDATE",
"VALIDATOR",
"VALUE_OF",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1754,18 +1754,25 @@ await ServiceHostInstance.SendEvent(
identifiers.Add(identifier.ToString());
}

int count = 0;
string[] nonTSqlKeywords = new string[TSqlDetectionConstants.NonTSqlKeywordLimit];
foreach (Token token in parseResult.Script.Tokens)
{
if (token.IsSignificant && TSqlDetectionConstants.Keywords.Contains(token.Text) && !identifiers.Contains(token.Text))
{
await ServiceHostInstance.SendEvent(
NonTSqlNotification.Type,
new NonTSqlParams
nonTSqlKeywords[count] = token.Text;
count++;
if (count == TSqlDetectionConstants.NonTSqlKeywordLimit)
{
OwnerUri = uri,
NonTSqlKeyword = token.Text,
});
return true;
await ServiceHostInstance.SendEvent(
NonTSqlNotification.Type,
new NonTSqlParams
{
OwnerUri = uri,
NonTSqlKeyword = string.Join(", ", nonTSqlKeywords)
});
return true;
}
}
}
return false;
Expand Down
Loading

0 comments on commit 87f3c0e

Please sign in to comment.