diff --git a/.github/workflows/MainDistributionPipeline.yml b/.github/workflows/MainDistributionPipeline.yml index 3db6cf5..f8ea79e 100644 --- a/.github/workflows/MainDistributionPipeline.yml +++ b/.github/workflows/MainDistributionPipeline.yml @@ -18,6 +18,7 @@ jobs: with: duckdb_version: v0.10.1 extension_name: url_parser + exclude_archs: 'wasm_mvp;wasm_eh;wasm_threads;windows_amd64_rtools' duckdb-stable-deploy: name: Deploy extension binaries @@ -27,4 +28,5 @@ jobs: with: duckdb_version: v0.10.1 extension_name: url_parser + exclude_archs: 'wasm_mvp;wasm_eh;wasm_threads;windows_amd64_rtools' deploy_latest: ${{ startsWith(github.ref, 'refs/tags/v') || github.ref == 'refs/heads/main' }} diff --git a/CMakeLists.txt b/CMakeLists.txt index 2681f6b..045bb2a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,11 +3,6 @@ cmake_minimum_required(VERSION 2.8.12) # Set extension name here set(TARGET_NAME url_parser) -# DuckDB's extension distribution supports vcpkg. As such, dependencies can be added in ./vcpkg.json and then -# used in cmake with find_package. Feel free to remove or replace with other dependencies. -# Note that it should also be removed from vcpkg.json to prevent needlessly installing it.. -find_package(OpenSSL REQUIRED) - set(EXTENSION_NAME ${TARGET_NAME}_extension) set(LOADABLE_EXTENSION_NAME ${TARGET_NAME}_loadable_extension) @@ -19,12 +14,12 @@ set(EXTENSION_SOURCES src/url_parser_extension.cpp) build_static_extension(${TARGET_NAME} ${EXTENSION_SOURCES}) build_loadable_extension(${TARGET_NAME} " " ${EXTENSION_SOURCES}) -# Link OpenSSL in both the static library as the loadable extension -target_link_libraries(${EXTENSION_NAME} OpenSSL::SSL OpenSSL::Crypto) -target_link_libraries(${LOADABLE_EXTENSION_NAME} OpenSSL::SSL OpenSSL::Crypto) +find_package(Boost REQUIRED COMPONENTS url) +target_link_libraries(${EXTENSION_NAME} Boost::url) +target_link_libraries(${LOADABLE_EXTENSION_NAME} Boost::url) install( - TARGETS ${EXTENSION_NAME} - EXPORT "${DUCKDB_EXPORT_SET}" - LIBRARY DESTINATION "${INSTALL_LIB_DIR}" - ARCHIVE DESTINATION "${INSTALL_LIB_DIR}") + TARGETS ${EXTENSION_NAME} + EXPORT "${DUCKDB_EXPORT_SET}" + LIBRARY DESTINATION "${INSTALL_LIB_DIR}" + ARCHIVE DESTINATION "${INSTALL_LIB_DIR}") diff --git a/README.md b/README.md index 53fdd84..6be718b 100644 --- a/README.md +++ b/README.md @@ -1,86 +1,5 @@ -# Url_parser - +# Demo url_parser extension This repository is based on https://github.com/duckdb/extension-template, check it out if you want to build and ship your own DuckDB extension. ---- - -This extension, Url_parser, allow you to ... . - - -## Building -### Managing dependencies -DuckDB extensions uses VCPKG for dependency management. Enabling VCPKG is very simple: follow the [installation instructions](https://vcpkg.io/en/getting-started) or just run the following: -```shell -git clone https://github.com/Microsoft/vcpkg.git -./vcpkg/bootstrap-vcpkg.sh -export VCPKG_TOOLCHAIN_PATH=`pwd`/vcpkg/scripts/buildsystems/vcpkg.cmake -``` -Note: VCPKG is only required for extensions that want to rely on it for dependency management. If you want to develop an extension without dependencies, or want to do your own dependency management, just skip this step. Note that the example extension uses VCPKG to build with a dependency for instructive purposes, so when skipping this step the build may not work without removing the dependency. - -### Build steps -Now to build the extension, run: -```sh -make -``` -The main binaries that will be built are: -```sh -./build/release/duckdb -./build/release/test/unittest -./build/release/extension/url_parser/url_parser.duckdb_extension -``` -- `duckdb` is the binary for the duckdb shell with the extension code automatically loaded. -- `unittest` is the test runner of duckdb. Again, the extension is already linked into the binary. -- `url_parser.duckdb_extension` is the loadable binary as it would be distributed. - -## Running the extension -To run the extension code, simply start the shell with `./build/release/duckdb`. - -Now we can use the features from the extension directly in DuckDB. The template contains a single scalar function `url_parser()` that takes a string arguments and returns a string: -``` -D select url_parser('Jane') as result; -┌───────────────┐ -│ result │ -│ varchar │ -├───────────────┤ -│ Url_parser Jane 🐥 │ -└───────────────┘ -``` - -## Running the tests -Different tests can be created for DuckDB extensions. The primary way of testing DuckDB extensions should be the SQL tests in `./test/sql`. These SQL tests can be run using: -```sh -make test -``` - -### Installing the deployed binaries -To install your extension binaries from S3, you will need to do two things. Firstly, DuckDB should be launched with the -`allow_unsigned_extensions` option set to true. How to set this will depend on the client you're using. Some examples: - -CLI: -```shell -duckdb -unsigned -``` - -Python: -```python -con = duckdb.connect(':memory:', config={'allow_unsigned_extensions' : 'true'}) -``` - -NodeJS: -```js -db = new duckdb.Database(':memory:', {"allow_unsigned_extensions": "true"}); -``` - -Secondly, you will need to set the repository endpoint in DuckDB to the HTTP url of your bucket + version of the extension -you want to install. To do this run the following SQL query in DuckDB: -```sql -SET custom_extension_repository='bucket.s3.eu-west-1.amazonaws.com//latest'; -``` -Note that the `/latest` path will allow you to install the latest extension version available for your current version of -DuckDB. To specify a specific version, you can pass the version instead. - -After running these steps, you can install and load your extension using the regular INSTALL/LOAD commands in DuckDB: -```sql -INSTALL url_parser -LOAD url_parser -``` +This extension was created to illustrate how to build a duckdb extension for [this blogpost](https://duckdb.org/2024/03/22/dependency-management.html). Note that +this extension is built for v0.10.1 and will not be maintained. \ No newline at end of file diff --git a/src/url_parser_extension.cpp b/src/url_parser_extension.cpp index 905ef5a..81f0f56 100644 --- a/src/url_parser_extension.cpp +++ b/src/url_parser_extension.cpp @@ -8,40 +8,29 @@ #include "duckdb/main/extension_util.hpp" #include -// OpenSSL linked through vcpkg -#include +#include "boost/url/url_view.hpp" +#include "boost/url/parse.hpp" namespace duckdb { inline void UrlParserScalarFun(DataChunk &args, ExpressionState &state, Vector &result) { auto &name_vector = args.data[0]; UnaryExecutor::Execute( - name_vector, result, args.size(), - [&](string_t name) { - return StringVector::AddString(result, "UrlParser "+name.GetString()+" 🐥");; - }); -} - -inline void UrlParserOpenSSLVersionScalarFun(DataChunk &args, ExpressionState &state, Vector &result) { - auto &name_vector = args.data[0]; - UnaryExecutor::Execute( - name_vector, result, args.size(), - [&](string_t name) { - return StringVector::AddString(result, "UrlParser " + name.GetString() + - ", my linked OpenSSL version is " + - OPENSSL_VERSION_TEXT );; + name_vector, result, args.size(), + [&](string_t url) { + string url_string = url.GetString(); + boost::system::result parse_result = boost::urls::parse_uri( url_string ); + if (parse_result.has_error() || !parse_result.value().has_scheme()) { + return string_t(); + } + string scheme = parse_result.value().scheme(); + return StringVector::AddString(result, scheme); }); } static void LoadInternal(DatabaseInstance &instance) { - // Register a scalar function - auto url_parser_scalar_function = ScalarFunction("url_parser", {LogicalType::VARCHAR}, LogicalType::VARCHAR, UrlParserScalarFun); + auto url_parser_scalar_function = ScalarFunction("url_scheme", {LogicalType::VARCHAR}, LogicalType::VARCHAR, UrlParserScalarFun); ExtensionUtil::RegisterFunction(instance, url_parser_scalar_function); - - // Register another scalar function - auto url_parser_openssl_version_scalar_function = ScalarFunction("url_parser_openssl_version", {LogicalType::VARCHAR}, - LogicalType::VARCHAR, UrlParserOpenSSLVersionScalarFun); - ExtensionUtil::RegisterFunction(instance, url_parser_openssl_version_scalar_function); } void UrlParserExtension::Load(DuckDB &db) { @@ -56,12 +45,12 @@ std::string UrlParserExtension::Name() { extern "C" { DUCKDB_EXTENSION_API void url_parser_init(duckdb::DatabaseInstance &db) { - duckdb::DuckDB db_wrapper(db); - db_wrapper.LoadExtension(); + duckdb::DuckDB db_wrapper(db); + db_wrapper.LoadExtension(); } DUCKDB_EXTENSION_API const char *url_parser_version() { - return duckdb::DuckDB::LibraryVersion(); + return duckdb::DuckDB::LibraryVersion(); } } diff --git a/test/sql/url_parser.test b/test/sql/url_parser.test index fed87a1..c4cd7e5 100644 --- a/test/sql/url_parser.test +++ b/test/sql/url_parser.test @@ -2,22 +2,16 @@ # description: test url_parser extension # group: [url_parser] -# Before we load the extension, this will fail -statement error -SELECT url_parser('Sam'); ----- -Catalog Error: Scalar Function with name url_parser does not exist! - -# Require statement will ensure this test is run with this extension loaded require url_parser # Confirm the extension works query I -SELECT url_parser('Sam'); +SELECT url_scheme('https://github.com/duckdb/duckdb') ---- -UrlParser Sam 🐥 +https +# On parser errors or no finding a scheme, the result is also an empty string query I -SELECT url_parser_openssl_version('Michael'); +SELECT url_scheme('not:\a/valid_url') ---- -:UrlParser Michael, my linked OpenSSL version is OpenSSL.* \ No newline at end of file +(empty) diff --git a/vcpkg.json b/vcpkg.json index 85936bf..1c5d6a4 100644 --- a/vcpkg.json +++ b/vcpkg.json @@ -1,5 +1,5 @@ { "dependencies": [ - "openssl" + "boost-url" ] -} \ No newline at end of file +}