From ad3d5b651b820abb6f9c2af13d36fb110f34fa84 Mon Sep 17 00:00:00 2001 From: Siyuan Huang <73871299+kysshsy@users.noreply.github.com> Date: Sun, 8 Dec 2024 22:00:09 +0800 Subject: [PATCH] feat: Enhance explain (#146) * feat: enhance explain * test: add duckdb style explain test --- Cargo.lock | 10 +-- Cargo.toml | 2 +- src/duckdb/connection.rs | 25 ++++++- src/duckdb/json.rs | 4 +- src/hooks/utility.rs | 8 +-- src/hooks/utility/explain.rs | 116 +++++++++++++++++++++++++++--- tests/tests/explain.rs | 133 +++++++++++++++++++++++++++++++++++ 7 files changed, 271 insertions(+), 27 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a0296dc9..8f48bd65 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3506,7 +3506,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4979f22fdb869068da03c9f7528f8297c6fd2606bc3a4affe42e6a823fdb8da4" dependencies = [ "cfg-if", - "windows-targets 0.48.5", + "windows-targets 0.52.6", ] [[package]] @@ -4014,7 +4014,7 @@ dependencies = [ "pgrx", "serde_json", "signal-hook", - "sqlparser 0.50.0", + "sqlparser 0.52.0", "strum 0.26.3", "supabase-wrappers", "thiserror", @@ -5219,9 +5219,9 @@ dependencies = [ [[package]] name = "sqlparser" -version = "0.50.0" +version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2e5b515a2bd5168426033e9efbfd05500114833916f1d5c268f938b4ee130ac" +checksum = "9a875d8cd437cc8a97e9aeaeea352ec9a19aea99c23e9effb17757291de80b08" dependencies = [ "log", ] @@ -6222,7 +6222,7 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" dependencies = [ - "windows-sys 0.48.0", + "windows-sys 0.59.0", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index d7713d34..d8575158 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -28,7 +28,7 @@ duckdb = { git = "https://github.com/paradedb/duckdb-rs.git", features = [ pgrx = "0.12.7" serde_json = "1.0.128" signal-hook = "0.3.17" -sqlparser = "0.50.0" +sqlparser = "0.52.0" strum = { version = "0.26.3", features = ["derive"] } supabase-wrappers = { git = "https://github.com/paradedb/wrappers.git", default-features = false, rev = "8aef4a6" } thiserror = "1.0.63" diff --git a/src/duckdb/connection.rs b/src/duckdb/connection.rs index b420916e..530b2412 100644 --- a/src/duckdb/connection.rs +++ b/src/duckdb/connection.rs @@ -66,6 +66,7 @@ pub fn get_global_connection() -> &'static UnsafeCell { INIT.call_once(|| { init_globals(); }); + #[allow(static_mut_refs)] unsafe { GLOBAL_CONNECTION .as_ref() @@ -77,6 +78,7 @@ fn get_global_statement() -> &'static UnsafeCell>> { INIT.call_once(|| { init_globals(); }); + #[allow(static_mut_refs)] unsafe { GLOBAL_STATEMENT .as_ref() @@ -88,7 +90,10 @@ fn get_global_arrow() -> &'static UnsafeCell>> { INIT.call_once(|| { init_globals(); }); - unsafe { GLOBAL_ARROW.as_ref().expect("Arrow not initialized") } + #[allow(static_mut_refs)] + unsafe { + GLOBAL_ARROW.as_ref().expect("Arrow not initialized") + } } pub fn create_csv_view( @@ -249,3 +254,21 @@ pub fn set_search_path(search_path: Vec) -> Result<()> { Ok(()) } + +pub fn execute_explain(query: &str) -> Result { + let conn = unsafe { &*get_global_connection().get() }; + let mut stmt = conn.prepare(query)?; + let rows = stmt.query_row([], |row| { + let mut r = vec![]; + + let mut col_index = 1; + while let Ok(value) = row.get::<_, String>(col_index) { + r.push(value); + col_index += 1; + } + + Ok(r) + })?; + + Ok(rows.join("")) +} diff --git a/src/duckdb/json.rs b/src/duckdb/json.rs index 0772cdd6..efb1ac39 100644 --- a/src/duckdb/json.rs +++ b/src/duckdb/json.rs @@ -98,10 +98,10 @@ fn extract_option( table_options: &HashMap, quote: bool, ) -> Option { - return table_options.get(option.as_ref()).map(|res| match quote { + table_options.get(option.as_ref()).map(|res| match quote { true => format!("{option} = '{res}'"), false => format!("{option} = {res}"), - }); + }) } #[cfg(test)] diff --git a/src/hooks/utility.rs b/src/hooks/utility.rs index b42ef984..4fa5f3f0 100644 --- a/src/hooks/utility.rs +++ b/src/hooks/utility.rs @@ -163,13 +163,7 @@ fn parse_query_from_utility_stmt(query_string: &core::ffi::CStr) -> Result Ok(statement.to_string()), + Statement::Explain { statement, .. } => Ok(statement.to_string()), _ => bail!("unexpected utility statement: {}", query_string), } } diff --git a/src/hooks/utility/explain.rs b/src/hooks/utility/explain.rs index bdfca82b..96e64634 100644 --- a/src/hooks/utility/explain.rs +++ b/src/hooks/utility/explain.rs @@ -15,13 +15,26 @@ // You should have received a copy of the GNU Affero General Public License // along with this program. If not, see . -use std::ffi::CString; +use std::ffi::{CStr, CString}; +use std::time::Instant; use anyhow::Result; use pgrx::{error, pg_sys}; use super::parse_query_from_utility_stmt; -use crate::hooks::query::{get_query_relations, is_duckdb_query}; +use crate::{ + duckdb::connection, + hooks::query::{get_query_relations, is_duckdb_query, set_search_path_by_pg}, +}; + +enum Style { + Postgres, + Duckdb, +} +struct ExplainState { + analyze: bool, + style: Style, +} pub fn explain_query( query_string: &core::ffi::CStr, @@ -37,9 +50,34 @@ pub fn explain_query( return Ok(true); } - if unsafe { !(*stmt).options.is_null() } { - error!("the EXPLAIN options provided are not supported for DuckDB pushdown queries."); - } + let state = parse_explain_options(unsafe { (*stmt).options }); + let query = parse_query_from_utility_stmt(query_string)?; + + let output = match state.style { + Style::Postgres => { + let mut output = format!("DuckDB Scan: {}\n", query); + if state.analyze { + let start_time = Instant::now(); + set_search_path_by_pg()?; + connection::execute(&query, [])?; + let duration = start_time.elapsed(); + output += &format!( + "Execution Time: {:.3} ms\n", + duration.as_micros() as f64 / 1_000.0 + ); + } + output + } + Style::Duckdb => { + set_search_path_by_pg()?; + let explain_query = if state.analyze { + format!("EXPLAIN ANALYZE {query}") + } else { + format!("EXPLAIN {query}") + }; + connection::execute_explain(&explain_query)? + } + }; unsafe { let tstate = pg_sys::begin_tup_output_tupdesc( @@ -47,15 +85,71 @@ pub fn explain_query( pg_sys::ExplainResultDesc(stmt), &pg_sys::TTSOpsVirtual, ); - let query = format!( - "DuckDB Scan: {}", - parse_query_from_utility_stmt(query_string)? - ); - let query_c_str = CString::new(query)?; - pg_sys::do_text_output_multiline(tstate, query_c_str.as_ptr()); + let output_cstr = CString::new(output)?; + + pg_sys::do_text_output_multiline(tstate, output_cstr.as_ptr()); pg_sys::end_tup_output(tstate); } Ok(false) } + +fn parse_explain_options(options: *const pg_sys::List) -> ExplainState { + let mut explain_state = ExplainState { + analyze: false, + style: Style::Postgres, + }; + + if options.is_null() { + return explain_state; + } + + unsafe { + let elements = (*options).elements; + + for i in 0..(*options).length as isize { + let opt = (*elements.offset(i)).ptr_value as *mut pg_sys::DefElem; + + let opt_name = match CStr::from_ptr((*opt).defname).to_str() { + Ok(opt) => opt, + Err(e) => { + error!("failed to parse EXPLAIN option name: {e}"); + } + }; + match opt_name { + "analyze" => { + explain_state.analyze = pg_sys::defGetBoolean(opt); + } + "style" => { + let style = match CStr::from_ptr(pg_sys::defGetString(opt)).to_str() { + Ok(style) => style, + + Err(e) => { + error!("failed to parse STYLE option: {e}"); + } + }; + + explain_state.style = match parse_explain_style(style) { + Some(s) => s, + None => { + error!("unrecognized STYLE option: {style}") + } + }; + } + _ => error!("unrecognized EXPLAIN option \"{opt_name}\""), + } + } + } + + explain_state +} + +fn parse_explain_style(style: &str) -> Option