Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

NEW Macros #15

Merged
merged 7 commits into from
Nov 7, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 58 additions & 21 deletions chsql/src/chsql_extension.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,30 +65,30 @@ static DefaultMacro chsql_macros[] = {
{DEFAULT_SCHEMA, "toFloatOrNull", {"x", nullptr}, {{nullptr, nullptr}}, R"(TRY_CAST(x AS DOUBLE))"},
{DEFAULT_SCHEMA, "toFloatOrZero", {"x", nullptr}, {{nullptr, nullptr}}, R"(CASE WHEN TRY_CAST(x AS DOUBLE) IS NOT NULL THEN CAST(x AS DOUBLE) ELSE 0 END)"},
// -- Arithmetic macros
{DEFAULT_SCHEMA, "intDiv", {"a", "b"}, {{nullptr, nullptr}}, R"((CAST(a AS BIGINT) // CAST(b AS BIGINT)))"},
{DEFAULT_SCHEMA, "intDivOrNull", {"a", "b"}, {{nullptr, nullptr}}, R"(TRY_CAST((TRY_CAST(a AS BIGINT) // TRY_CAST(b AS BIGINT)) AS BIGINT))"},
{DEFAULT_SCHEMA, "intDiv", {"a", "b", nullptr}, {{nullptr, nullptr}}, R"((CAST(a AS BIGINT) // CAST(b AS BIGINT)))"},
{DEFAULT_SCHEMA, "intDivOrNull", {"a", "b", nullptr}, {{nullptr, nullptr}}, R"(TRY_CAST((TRY_CAST(a AS BIGINT) // TRY_CAST(b AS BIGINT)) AS BIGINT))"},
{DEFAULT_SCHEMA, "intDivOZero", {"x", nullptr}, {{nullptr, nullptr}}, R"(COALESCE((TRY_CAST((TRY_CAST(a AS BIGINT) // TRY_CAST(b AS BIGINT)) AS BIGINT)),0))"},
{DEFAULT_SCHEMA, "plus", {"a", "b"}, {{nullptr, nullptr}}, R"(add(a, b))"},
{DEFAULT_SCHEMA, "minus", {"a", "b"}, {{nullptr, nullptr}}, R"(subtract(a, b))"},
{DEFAULT_SCHEMA, "modulo", {"a", "b"}, {{nullptr, nullptr}}, R"(CAST(a AS BIGINT) % CAST(b AS BIGINT))"},
{DEFAULT_SCHEMA, "moduloOrZero", {"a", "b"}, {{nullptr, nullptr}}, R"(COALESCE(((TRY_CAST(a AS BIGINT) % TRY_CAST(b AS BIGINT))),0))"},
{DEFAULT_SCHEMA, "plus", {"a", "b", nullptr}, {{nullptr, nullptr}}, R"(add(a, b))"},
{DEFAULT_SCHEMA, "minus", {"a", "b", nullptr}, {{nullptr, nullptr}}, R"(subtract(a, b))"},
{DEFAULT_SCHEMA, "modulo", {"a", "b", nullptr}, {{nullptr, nullptr}}, R"(CAST(a AS BIGINT) % CAST(b AS BIGINT))"},
{DEFAULT_SCHEMA, "moduloOrZero", {"a", "b", nullptr}, {{nullptr, nullptr}}, R"(COALESCE(((TRY_CAST(a AS BIGINT) % TRY_CAST(b AS BIGINT))),0))"},
// -- Tuple macros
{DEFAULT_SCHEMA, "tupleIntDiv", {"a", "b"}, {{nullptr, nullptr}}, R"(apply(a, (x,i) -> apply(b, x -> CAST(x AS BIGINT))[i] // CAST(x AS BIGINT)))"},
{DEFAULT_SCHEMA, "tupleIntDivByNumber", {"a", "b"}, {{nullptr, nullptr}}, R"(apply(a, (x) -> CAST(apply(b, x -> CAST(x AS BIGINT))[1] as BIGINT) // CAST(x AS BIGINT)))"},
{DEFAULT_SCHEMA, "tupleDivide", {"a", "b"}, {{nullptr, nullptr}}, R"(apply(a, (x,i) -> apply(b, x -> CAST(x AS BIGINT))[i] / CAST(x AS BIGINT)))"},
{DEFAULT_SCHEMA, "tupleMultiply", {"a", "b"}, {{nullptr, nullptr}}, R"(apply(a, (x,i) -> CAST(apply(b, x -> CAST(x AS BIGINT))[i] as BIGINT) * CAST(x AS BIGINT)))"},
{DEFAULT_SCHEMA, "tupleMinus", {"a", "b"}, {{nullptr, nullptr}}, R"(apply(a, (x,i) -> apply(b, x -> CAST(x AS BIGINT))[i] - CAST(x AS BIGINT)))"},
{DEFAULT_SCHEMA, "tuplePlus", {"a", "b"}, {{nullptr, nullptr}}, R"(apply(a, (x,i) -> apply(b, x -> CAST(x AS BIGINT))[i] + CAST(x AS BIGINT)))"},
{DEFAULT_SCHEMA, "tupleMultiplyByNumber", {"a", "b"}, {{nullptr, nullptr}}, R"(apply(a, (x) -> CAST(apply(b, x -> CAST(x AS BIGINT))[1] as BIGINT) * CAST(x AS BIGINT)))"},
{DEFAULT_SCHEMA, "tupleDivideByNumber", {"a", "b"}, {{nullptr, nullptr}}, R"(apply(a, (x) -> CAST(apply(b, x -> CAST(x AS BIGINT))[1] as BIGINT) / CAST(x AS BIGINT)))"},
{DEFAULT_SCHEMA, "tupleModulo", {"a", "b"}, {{nullptr, nullptr}}, R"(apply(a, (x) -> CAST(apply(b, x -> CAST(x AS BIGINT))[1] as BIGINT) % CAST(x AS BIGINT)))"},
{DEFAULT_SCHEMA, "tupleModuloByNumber", {"a", "b"}, {{nullptr, nullptr}}, R"(apply(a, (x) -> CAST(apply(b, x -> CAST(x AS BIGINT))[1] as BIGINT) % CAST(x AS BIGINT)))"},
{DEFAULT_SCHEMA, "tupleConcat", {"a", "b"}, {{nullptr, nullptr}}, R"(list_concat(a, b))"},
{DEFAULT_SCHEMA, "tupleIntDiv", {"a", "b", nullptr}, {{nullptr, nullptr}}, R"(apply(a, (x,i) -> apply(b, x -> CAST(x AS BIGINT))[i] // CAST(x AS BIGINT)))"},
{DEFAULT_SCHEMA, "tupleIntDivByNumber", {"a", "b", nullptr}, {{nullptr, nullptr}}, R"(apply(a, (x) -> CAST(apply(b, x -> CAST(x AS BIGINT))[1] as BIGINT) // CAST(x AS BIGINT)))"},
{DEFAULT_SCHEMA, "tupleDivide", {"a", "b", nullptr}, {{nullptr, nullptr}}, R"(apply(a, (x,i) -> apply(b, x -> CAST(x AS BIGINT))[i] / CAST(x AS BIGINT)))"},
{DEFAULT_SCHEMA, "tupleMultiply", {"a", "b", nullptr}, {{nullptr, nullptr}}, R"(apply(a, (x,i) -> CAST(apply(b, x -> CAST(x AS BIGINT))[i] as BIGINT) * CAST(x AS BIGINT)))"},
{DEFAULT_SCHEMA, "tupleMinus", {"a", "b", nullptr}, {{nullptr, nullptr}}, R"(apply(a, (x,i) -> apply(b, x -> CAST(x AS BIGINT))[i] - CAST(x AS BIGINT)))"},
{DEFAULT_SCHEMA, "tuplePlus", {"a", "b", nullptr}, {{nullptr, nullptr}}, R"(apply(a, (x,i) -> apply(b, x -> CAST(x AS BIGINT))[i] + CAST(x AS BIGINT)))"},
{DEFAULT_SCHEMA, "tupleMultiplyByNumber", {"a", "b", nullptr}, {{nullptr, nullptr}}, R"(apply(a, (x) -> CAST(apply(b, x -> CAST(x AS BIGINT))[1] as BIGINT) * CAST(x AS BIGINT)))"},
{DEFAULT_SCHEMA, "tupleDivideByNumber", {"a", "b", nullptr}, {{nullptr, nullptr}}, R"(apply(a, (x) -> CAST(apply(b, x -> CAST(x AS BIGINT))[1] as BIGINT) / CAST(x AS BIGINT)))"},
{DEFAULT_SCHEMA, "tupleModulo", {"a", "b", nullptr}, {{nullptr, nullptr}}, R"(apply(a, (x) -> CAST(apply(b, x -> CAST(x AS BIGINT))[1] as BIGINT) % CAST(x AS BIGINT)))"},
{DEFAULT_SCHEMA, "tupleModuloByNumber", {"a", "b", nullptr}, {{nullptr, nullptr}}, R"(apply(a, (x) -> CAST(apply(b, x -> CAST(x AS BIGINT))[1] as BIGINT) % CAST(x AS BIGINT)))"},
{DEFAULT_SCHEMA, "tupleConcat", {"a", "b", nullptr}, {{nullptr, nullptr}}, R"(list_concat(a, b))"},
// -- String matching macros
{DEFAULT_SCHEMA, "match", {"string", "token"}, {{nullptr, nullptr}}, R"(string LIKE token)"},
// -- Array macros
{DEFAULT_SCHEMA, "arrayExists", {"needle", "haystack"}, {{nullptr, nullptr}}, R"(haystack @> ARRAY[needle])"},
{DEFAULT_SCHEMA, "arrayMap", {"e", "arr"}, {{nullptr, nullptr}}, R"(array_transform(arr, e -> (e * e)))"},
{DEFAULT_SCHEMA, "arrayExists", {"needle", "haystack", nullptr}, {{nullptr, nullptr}}, R"(haystack @> ARRAY[needle])"},
{DEFAULT_SCHEMA, "arrayMap", {"e", "arr", nullptr}, {{nullptr, nullptr}}, R"(array_transform(arr, e -> (e * e)))"},
// Date and Time Functions
{DEFAULT_SCHEMA, "toYear", {"date_expression", nullptr}, {{nullptr, nullptr}}, R"(EXTRACT(YEAR FROM date_expression))"},
{DEFAULT_SCHEMA, "toMonth", {"date_expression", nullptr}, {{nullptr, nullptr}}, R"(EXTRACT(MONTH FROM date_expression))"},
Expand Down Expand Up @@ -119,10 +119,32 @@ static DefaultMacro chsql_macros[] = {
// IP Address Functions
{DEFAULT_SCHEMA, "IPv4NumToString", {"num", nullptr}, {{nullptr, nullptr}}, R"(CONCAT(CAST((num >> 24) & 255 AS VARCHAR), '.', CAST((num >> 16) & 255 AS VARCHAR), '.', CAST((num >> 8) & 255 AS VARCHAR), '.', CAST(num & 255 AS VARCHAR)))"},
{DEFAULT_SCHEMA, "IPv4StringToNum", {"ip", nullptr}, {{nullptr, nullptr}}, R"(CAST(SPLIT_PART(ip, '.', 1) AS INTEGER) * 256 * 256 * 256 + CAST(SPLIT_PART(ip, '.', 2) AS INTEGER) * 256 * 256 + CAST(SPLIT_PART(ip, '.', 3) AS INTEGER) * 256 + CAST(SPLIT_PART(ip, '.', 4) AS INTEGER))"},
// -- JSON Macros
{DEFAULT_SCHEMA, "JSONExtract", {"json", "key", nullptr}, {{nullptr, nullptr}}, R"(json_extract(json, key))"},
{DEFAULT_SCHEMA, "JSONExtractString", {"json", "key", nullptr}, {{nullptr, nullptr}}, R"(CAST(json_extract(json, key) AS VARCHAR))"},
{DEFAULT_SCHEMA, "JSONExtractUInt", {"json", "key", nullptr}, {{nullptr, nullptr}}, R"(CAST(json_extract(json, key) AS UINTEGER))"},
{DEFAULT_SCHEMA, "JSONExtractInt", {"json", "key", nullptr}, {{nullptr, nullptr}}, R"(CAST(json_extract(json, key) AS INT32))"},
{DEFAULT_SCHEMA, "JSONExtractFloat", {"json", "key", nullptr}, {{nullptr, nullptr}}, R"(CAST(json_extract(json, key) AS DOUBLE))"},
{DEFAULT_SCHEMA, "JSONExtractRaw", {"json", "key", nullptr}, {{nullptr, nullptr}}, R"(json_extract(json, key))"},
{DEFAULT_SCHEMA, "JSONHas", {"json", "key", nullptr}, {{nullptr, nullptr}}, R"(json_extract(json, key) IS NOT NULL)"},
{DEFAULT_SCHEMA, "JSONLength", {"json", nullptr}, {{nullptr, nullptr}}, R"(json_array_length(json))"},
{DEFAULT_SCHEMA, "JSONType", {"json", "path", nullptr}, {{nullptr, nullptr}}, R"(CASE WHEN path IS NULL THEN json_each(json) ELSE json_each(json,path) END)"},
{DEFAULT_SCHEMA, "JSONExtractKeys", {"json", nullptr}, {{nullptr, nullptr}}, R"(json_object_keys(json))"},
{DEFAULT_SCHEMA, "JSONExtractValues", {"json", nullptr}, {{nullptr, nullptr}}, R"(json_each_text(json))"},
// -- Compare Macros
{DEFAULT_SCHEMA, "equals", {"a", "b", nullptr}, {{nullptr, nullptr}}, R"((a = b))"},
{DEFAULT_SCHEMA, "notEquals", {"a", "b", nullptr}, {{nullptr, nullptr}}, R"((a <> b))"},
{DEFAULT_SCHEMA, "less", {"a", "b", nullptr}, {{nullptr, nullptr}}, R"((a < b))"},
{DEFAULT_SCHEMA, "greater", {"a", "b", nullptr}, {{nullptr, nullptr}}, R"((a > b))"},
{DEFAULT_SCHEMA, "lessOrEquals", {"a", "b", nullptr}, {{nullptr, nullptr}}, R"((a <= b))"},
{DEFAULT_SCHEMA, "greaterOrEquals", {"a", "b", nullptr}, {{nullptr, nullptr}}, R"((a >= b))"},
// -- Misc macros
{DEFAULT_SCHEMA, "generateUUIDv4", {nullptr}, {{nullptr, nullptr}}, R"(toString(uuid()))"},
{DEFAULT_SCHEMA, "parseURL", {"url", "part", nullptr}, {{nullptr, nullptr}}, R"(CASE part WHEN 'protocol' THEN REGEXP_EXTRACT(url, '^(\w+)://') WHEN 'domain' THEN REGEXP_EXTRACT(url, '://([^/:]+)') WHEN 'port' THEN REGEXP_EXTRACT(url, ':(\d+)') WHEN 'path' THEN REGEXP_EXTRACT(url, '://[^/]+(/.+?)(\?|#|$)') WHEN 'query' THEN REGEXP_EXTRACT(url, '\?([^#]+)') WHEN 'fragment' THEN REGEXP_EXTRACT(url, '#(.+)$') END)"},
{DEFAULT_SCHEMA, "bitCount", {"num", nullptr}, {{nullptr, nullptr}}, R"(BIT_COUNT(num))"},
// Dictionary Emulation using MAP VARIABLES
{DEFAULT_SCHEMA, "dictGet", {"dict","attr", nullptr}, {{nullptr, nullptr}}, R"(getvariable(dict)[attr][1])"},
// -- End Macro
{nullptr, nullptr, {nullptr}, {{nullptr, nullptr}}, nullptr}};

// To add a new table SQL macro, add a new macro to this array!
Expand All @@ -142,8 +164,23 @@ static DefaultMacro chsql_macros[] = {
// clang-format off
static const DefaultTableMacro chsql_table_macros[] = {
{DEFAULT_SCHEMA, "numbers", {"x", nullptr}, {{"z", "0"}, {nullptr, nullptr}}, R"(SELECT * as number FROM generate_series(z,x-1);)"},
{DEFAULT_SCHEMA, "ch_scan", {"query", "server"}, {{"format", "JSONEachRow"}, {"user", "play"}, {nullptr, nullptr}}, R"(SELECT * FROM read_json_auto(concat(server, '/?default_format=', format, '&user=', user, '&query=', query)))"},
{DEFAULT_SCHEMA, "url", {"url", "format"}, {{nullptr, nullptr}}, R"(WITH "JSON" as (SELECT * FROM read_json_auto(url)), "PARQUET" as (SELECT * FROM read_parquet(url)), "CSV" as (SELECT * FROM read_csv_auto(url)), "BLOB" as (SELECT * FROM read_blob(url)), "TEXT" as (SELECT * FROM read_text(url)) FROM query_table(format))"},
{DEFAULT_SCHEMA, "url", {"url", "format", nullptr}, {{nullptr, nullptr}}, R"(WITH "JSON" as (SELECT * FROM read_json_auto(url)), "PARQUET" as (SELECT * FROM read_parquet(url)), "CSV" as (SELECT * FROM read_csv_auto(url)), "BLOB" as (SELECT * FROM read_blob(url)), "TEXT" as (SELECT * FROM read_text(url)) FROM query_table(format))"},
{DEFAULT_SCHEMA, "ch_scan", {"query", "server", nullptr}, {{"format", "JSONEachRow"}, {"user", "play"}, {nullptr, nullptr}}, R"(
WITH
"json" AS (SELECT * FROM read_json_auto(concat(server, '/?default_format=jsoneachrow&user=', user, '&query=', query))),
"parquet" AS (SELECT * FROM read_parquet(concat(server, '/?default_format=parquet&user=', user, '&query=', query))),
"csv" AS (SELECT * FROM read_csv_auto(concat(server, '/?default_format=csv&user=', user, '&query=', query))),
"blob" AS (SELECT * FROM read_blob(concat(server, '/?default_format=blob&user=', user, '&query=', query))),
"text" AS (SELECT * FROM read_text(concat(server, '/?default_format=text&user=', user, '&query=', query)))
FROM query_table(CASE
WHEN LOWER(format) LIKE 'json%' THEN 'json'
WHEN LOWER(format) LIKE 'csv%' THEN 'csv'
WHEN LOWER(format) = 'parquet' THEN 'parquet'
WHEN LOWER(format) = 'blob' THEN 'blob'
WHEN LOWER(format) = 'text' THEN 'text'
ELSE 'json'
END)
)"},
{nullptr, nullptr, {nullptr}, {{nullptr, nullptr}}, nullptr}
};
// clang-format on
Expand Down
Loading