Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: bump to version 1.1.1 #12

Closed
wants to merge 32 commits into from
Closed
Changes from 1 commit
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
bbc85d7
feat: support "large" arrow data types (#307)
Mause Jun 3, 2024
d7438c7
Fixes the issue ignoring nulls when copy from arrow array to flat vec…
y-f-u Jun 3, 2024
6640ff2
change SQLite to DuckDB (#308)
era127 Jun 3, 2024
4f6535c
Organise workspace crates and consolidate Cargo.toml manifest propert…
Swoorup Jun 4, 2024
74fce0f
Update README.md (#242)
yoonghm Jun 4, 2024
f628e5a
Add support for DuckDB arrays when using Arrow's FixedSizeList (#323)
Jeadie Jun 4, 2024
4f772b3
Add support for BinaryArray in arrow-vtab (#324)
phillipleblanc Jun 5, 2024
f5da417
use openssl from runner (#327)
Maxxen Jun 6, 2024
3bd503c
Add ability to pass vendored feature to openssl in libduckdb-sys (#321)
muralisoundararajan Jun 6, 2024
f48a4e3
Implement appender for date/time types (#313)
rijkvp Jun 6, 2024
2fea269
Support decimal128 without casting to double (#328)
Maxxen Jun 6, 2024
fe9a3ab
feat: experimental support for further nested types (#318)
Mause Jun 8, 2024
c175a8a
upgrade arrow-rs to 52 (#332)
Mause Jun 8, 2024
c56e458
expose underlying schema type of statement (#333)
abhiaagarwal Jun 9, 2024
1c5e7cd
Remove mut from raw_query (#339)
rijkvp Jul 5, 2024
88dd455
Support Arrow type `LargeUtf8`. (#341)
Jeadie Jul 10, 2024
15ead11
Update duckdb-rs to DuckDB v1.0.0 (#336)
Mytherin Jul 10, 2024
983e02a
fix: don't write files outside of `OUT_DIR` (#347)
frectonz Jul 10, 2024
4d227f7
Remove built-in HTTPFS extension and fix Windows CI (#353)
Mytherin Jul 11, 2024
5996c3e
Upgrade duckdb-loadable-macros (#354)
Mytherin Jul 11, 2024
1c73aef
Move files out of vtab to core module (#359)
Swoorup Jul 20, 2024
f40058b
feat: set duckdb api to rust and add custom_user_agent config (#360)
hrl20 Aug 1, 2024
201a3d0
Rename `LogicalType` and `DataChunk` to have `Handle` suffix (#361)
Swoorup Aug 1, 2024
44e0ff1
Fix CI (#375)
samansmink Aug 27, 2024
02a0f3e
Set nulls correctly for all type of arrays/vectors (#344)
y-f-u Aug 30, 2024
36b83bc
add support of fixedsizebinary, duration, interval support in arrow (…
y-f-u Aug 30, 2024
e12fdb6
streaming arrow data support (#373)
y-f-u Sep 26, 2024
f887844
Upgrade arrow-rs to 53 (#382)
phillipleblanc Sep 26, 2024
71b01f7
C Extension API (#381)
samansmink Oct 8, 2024
2bd811e
bump loadable macros version (#389)
samansmink Oct 8, 2024
6d68c90
Merge remote-tracking branch 'upstream/main'
mharrisb1 Jan 9, 2025
33c800e
fix(ci): indexmap
mharrisb1 Jan 9, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
feat: support "large" arrow data types (duckdb#307)
* feat: add large arrow type support

* remove old match entry
Mause authored Jun 3, 2024
commit bbc85d701ab879a41bc4ed7dd50b49a79a1d2897
27 changes: 8 additions & 19 deletions src/row.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use std::{convert, sync::Arc};

use super::{Error, Result, Statement};
use crate::types::{self, EnumType, FromSql, FromSqlError, ValueRef};
use crate::types::{self, EnumType, FromSql, FromSqlError, ListType, ValueRef};

use arrow::array::DictionaryArray;
use arrow::{
@@ -570,22 +570,6 @@ impl<'stmt> Row<'stmt> {
_ => unimplemented!("{:?}", unit),
},
// TODO: support more data types
// DataType::List(_) => make_string_from_list!(column, row),
// DataType::Dictionary(index_type, _value_type) => match **index_type {
// DataType::Int8 => dict_array_value_to_string::<Int8Type>(column, row),
// DataType::Int16 => dict_array_value_to_string::<Int16Type>(column, row),
// DataType::Int32 => dict_array_value_to_string::<Int32Type>(column, row),
// DataType::Int64 => dict_array_value_to_string::<Int64Type>(column, row),
// DataType::UInt8 => dict_array_value_to_string::<UInt8Type>(column, row),
// DataType::UInt16 => dict_array_value_to_string::<UInt16Type>(column, row),
// DataType::UInt32 => dict_array_value_to_string::<UInt32Type>(column, row),
// DataType::UInt64 => dict_array_value_to_string::<UInt64Type>(column, row),
// _ => Err(ArrowError::InvalidArgumentError(format!(
// "Pretty printing not supported for {:?} due to index type",
// column.data_type()
// ))),
// },

// NOTE: DataTypes not supported by duckdb
// DataType::Date64 => make_string_date!(array::Date64Array, column, row),
// DataType::Time32(unit) if *unit == TimeUnit::Second => {
@@ -597,10 +581,15 @@ impl<'stmt> Row<'stmt> {
// DataType::Time64(unit) if *unit == TimeUnit::Nanosecond => {
// make_string_time!(array::Time64NanosecondArray, column, row)
// }
DataType::List(_data) => {
DataType::LargeList(..) => {
let arr = column.as_any().downcast_ref::<array::LargeListArray>().unwrap();

ValueRef::List(ListType::Large(arr), row)
}
DataType::List(..) => {
let arr = column.as_any().downcast_ref::<ListArray>().unwrap();

ValueRef::List(arr, row)
ValueRef::List(ListType::Regular(arr), row)
}
DataType::Dictionary(key_type, ..) => {
let column = column.as_any();
12 changes: 11 additions & 1 deletion src/test_all_types.rs
Original file line number Diff line number Diff line change
@@ -8,8 +8,18 @@ use crate::{

#[test]
fn test_all_types() -> crate::Result<()> {
let database = Connection::open_in_memory()?;
test_with_database(&Connection::open_in_memory()?)
}

#[test]
fn test_large_arrow_types() -> crate::Result<()> {
let cfg = crate::Config::default().with("arrow_large_buffer_size", "true")?;
let database = Connection::open_in_memory_with_flags(cfg)?;

test_with_database(&database)
}

fn test_with_database(database: &Connection) -> crate::Result<()> {
let excluded = vec![
// uhugeint, time_tz, and dec38_10 aren't supported in the duckdb arrow layer
"uhugeint",
8 changes: 3 additions & 5 deletions src/types/mod.rs
Original file line number Diff line number Diff line change
@@ -71,7 +71,7 @@ pub use self::{
from_sql::{FromSql, FromSqlError, FromSqlResult},
to_sql::{ToSql, ToSqlOutput},
value::Value,
value_ref::{EnumType, TimeUnit, ValueRef},
value_ref::{EnumType, ListType, TimeUnit, ValueRef},
};

use arrow::datatypes::DataType;
@@ -181,14 +181,12 @@ impl From<&DataType> for Type {
DataType::Binary => Self::Blob,
// DataType::FixedSizeBinary(_) => Self::FixedSizeBinary,
// DataType::LargeBinary => Self::LargeBinary,
DataType::Utf8 => Self::Text,
// DataType::LargeUtf8 => Self::LargeUtf8,
DataType::LargeUtf8 | DataType::Utf8 => Self::Text,
DataType::List(inner) => Self::List(Box::new(Type::from(inner.data_type()))),
// DataType::FixedSizeList(field, size) => Self::Array,
// DataType::LargeList(_) => Self::LargeList,
DataType::LargeList(inner) => Self::List(Box::new(Type::from(inner.data_type()))),
// DataType::Struct(inner) => Self::Struct,
// DataType::Union(_, _) => Self::Union,
// DataType::Dictionary(_, _) => Self::Enum,
DataType::Decimal128(..) => Self::Decimal,
DataType::Decimal256(..) => Self::Decimal,
// DataType::Map(field, ..) => Self::Map,
54 changes: 43 additions & 11 deletions src/types/value_ref.rs
Original file line number Diff line number Diff line change
@@ -4,7 +4,7 @@ use crate::types::{FromSqlError, FromSqlResult};
use crate::Row;
use rust_decimal::prelude::*;

use arrow::array::{Array, DictionaryArray, ListArray};
use arrow::array::{Array, ArrayRef, DictionaryArray, LargeListArray, ListArray};
use arrow::datatypes::{UInt16Type, UInt32Type, UInt8Type};

/// An absolute length of time in seconds, milliseconds, microseconds or nanoseconds.
@@ -75,11 +75,20 @@ pub enum ValueRef<'a> {
nanos: i64,
},
/// The value is a list
List(&'a ListArray, usize),
List(ListType<'a>, usize),
/// The value is an enum
Enum(EnumType<'a>, usize),
}

/// Wrapper type for different list sizes
#[derive(Debug, Copy, Clone, PartialEq)]
pub enum ListType<'a> {
/// The underlying list is a `ListArray`
Regular(&'a ListArray),
/// The underlying list is a `LargeListArray`
Large(&'a LargeListArray),
}

/// Wrapper type for different enum sizes
#[derive(Debug, Copy, Clone, PartialEq)]
pub enum EnumType<'a> {
@@ -116,7 +125,10 @@ impl ValueRef<'_> {
ValueRef::Date32(_) => Type::Date32,
ValueRef::Time64(..) => Type::Time64,
ValueRef::Interval { .. } => Type::Interval,
ValueRef::List(arr, _) => arr.data_type().into(),
ValueRef::List(arr, _) => match arr {
ListType::Large(arr) => arr.data_type().into(),
ListType::Regular(arr) => arr.data_type().into(),
},
ValueRef::Enum(..) => Type::Enum,
}
}
@@ -177,14 +189,26 @@ impl From<ValueRef<'_>> for Value {
ValueRef::Date32(d) => Value::Date32(d),
ValueRef::Time64(t, d) => Value::Time64(t, d),
ValueRef::Interval { months, days, nanos } => Value::Interval { months, days, nanos },
ValueRef::List(items, idx) => {
let offsets = items.offsets();
let range = offsets[idx]..offsets[idx + 1];
let map: Vec<Value> = range
.map(|row| Row::value_ref_internal(row.try_into().unwrap(), idx, items.values()).to_owned())
.collect();
Value::List(map)
}
ValueRef::List(items, idx) => match items {
ListType::Regular(items) => {
let offsets = items.offsets();
from_list(
offsets[idx].try_into().unwrap(),
offsets[idx + 1].try_into().unwrap(),
idx,
items.values(),
)
}
ListType::Large(items) => {
let offsets = items.offsets();
from_list(
offsets[idx].try_into().unwrap(),
offsets[idx + 1].try_into().unwrap(),
idx,
items.values(),
)
}
},
ValueRef::Enum(items, idx) => {
let value = Row::value_ref_internal(
idx,
@@ -207,6 +231,14 @@ impl From<ValueRef<'_>> for Value {
}
}

fn from_list(start: usize, end: usize, idx: usize, values: &ArrayRef) -> Value {
Value::List(
(start..end)
.map(|row| Row::value_ref_internal(row, idx, values).to_owned())
.collect(),
)
}

impl<'a> From<&'a str> for ValueRef<'a> {
#[inline]
fn from(s: &str) -> ValueRef<'_> {