Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

More conversions to and from arrow-rs #15

Merged
merged 5 commits into from
Jan 9, 2025
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
139 changes: 138 additions & 1 deletion src/array/list/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use crate::{
offset::{Offset, Offsets, OffsetsBuffer},
};

use super::{new_empty_array, specification::try_check_offsets_bounds, Array};
use super::{new_empty_array, specification::try_check_offsets_bounds, Array, PrimitiveArray};

#[cfg(feature = "arrow")]
mod data;
Expand Down Expand Up @@ -242,3 +242,140 @@ impl<O: Offset> Array for ListArray<O> {
Box::new(self.clone().with_validity(validity))
}
}

/// arrow2 -> arrow1 conversion
#[cfg(feature = "arrow")]
impl<O: Offset + arrow_array::OffsetSizeTrait> From<ListArray<O>>
for arrow_array::GenericListArray<O>
{
fn from(value: ListArray<O>) -> Self {
let field = ListArray::<O>::get_child_field(value.data_type());
let field = Arc::new(arrow_schema::Field::new(
"item",
field.clone().data_type.into(),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this cloning the whole field instead of just the datatype?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch!

field.is_nullable,
));
let offsets = value.offsets().clone().into();
let values = value.values().clone().into();
let nulls = value.validity().map(|x| x.clone().into());
Self::new(field, offsets, values, nulls)
}
}

/// arrow1 -> arrow2 conversion
#[cfg(feature = "arrow")]
impl<O: Offset + arrow_array::OffsetSizeTrait> From<arrow_array::GenericListArray<O>>
for ListArray<O>
{
fn from(array1: arrow_array::GenericListArray<O>) -> Self {
let (field1, offset_buffer1, array1, nulls1) = array1.into_parts();
let data_type1 = field1.data_type().clone();
Self::new(
Self::default_datatype(data_type1.into()),
offset_buffer1.into(),
array1.into(),
nulls1.map(Bitmap::from_arrow),
)
}
}

#[cfg(feature = "arrow")]
#[test]
fn test_arrow_list_array_conversion_non_null() {
#![allow(clippy::zero_prefixed_literal)]
/*
We build this:

[0_001, 0_002],
[1_001, 1_002, 1_003],
[],
[3_001, 3_002],
[4_001],
*/
let offsets = OffsetsBuffer::<i32>::from(Offsets::try_from(vec![0, 2, 5, 5, 7, 8]).unwrap());
let values = PrimitiveArray::<i16>::from_vec(vec![
0_001_i16, 0_002, //
1_001, 1_002, 1_003, //
//
3_001, 3_002, //
4_001,
]);
// let bitmap = Some(Bitmap::from([true, truefalse, true]));
let bitmap = None;

let list_array = ListArray::new(
DataType::List(Arc::new(Field::new("item", DataType::Int16, true))),
offsets,
values.boxed(),
bitmap,
);

// Skip first and last elements:
let list_array = list_array.sliced(1, 3);

assert_eq!(list_array.len(), 3);
assert_eq!(list_array.value(0).len(), 3);
assert_eq!(list_array.value(1).len(), 0);
assert_eq!(list_array.value(2).len(), 2);

let list_array_1 = arrow_array::ListArray::from(list_array.clone());
assert_eq!(list_array_1.value_length(0), 3);
assert_eq!(list_array_1.value_length(1), 0);
assert_eq!(list_array_1.value_length(2), 2);

let roundtripped = ListArray::from(list_array_1);

assert_eq!(list_array, roundtripped);
}

#[cfg(feature = "arrow")]
#[test]
fn test_arrow_list_array_conversion_nullable() {
#![allow(clippy::zero_prefixed_literal)]
/*
We build this:

[0_001, 0_002],
[1_001, 1_002, 1_003],
[],
[3_001, 3_002],
null,
[4_001],
*/
let offsets = OffsetsBuffer::<i32>::from(Offsets::try_from(vec![0, 2, 5, 5, 7, 7, 8]).unwrap());
let values = PrimitiveArray::<i16>::from_vec(vec![
0_001_i16, 0_002, //
1_001, 1_002, 1_003, //
// []
3_001, 3_002, //
// null
4_001,
]);
let bitmap = Some(Bitmap::from([true, true, true, true, false, true]));

let list_array = ListArray::new(
DataType::List(Arc::new(Field::new("item", DataType::Int16, true))),
offsets,
values.boxed(),
bitmap,
);

// Skip first and last elements:
let list_array = list_array.sliced(1, 4);

assert_eq!(list_array.len(), 4);
assert_eq!(list_array.value(0).len(), 3);
assert_eq!(list_array.value(1).len(), 0);
assert_eq!(list_array.value(2).len(), 2);
assert_eq!(list_array.value(3).len(), 0); // null

let list_array_1 = arrow_array::ListArray::from(list_array.clone());
assert_eq!(list_array_1.value_length(0), 3);
assert_eq!(list_array_1.value_length(1), 0);
assert_eq!(list_array_1.value_length(2), 2);
assert_eq!(list_array_1.value_length(3), 0); // null

let roundtripped = ListArray::from(list_array_1);

assert_eq!(list_array, roundtripped);
}
40 changes: 40 additions & 0 deletions src/bitmap/immutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,17 @@ impl Bitmap {
})
}

/// Convert from `arrow-rs` `NullBuffer`
#[cfg(feature = "arrow")]
pub fn from_arrow(nulls: arrow_buffer::buffer::NullBuffer) -> Self {
let offset = nulls.offset();
let len = nulls.len();
let null_count = nulls.null_count();
let bytes = crate::buffer::to_bytes(nulls.into_inner().into_inner());
// SAFETY: the invariants are held by the input
unsafe { Self::from_inner_unchecked(bytes.into(), offset, len, null_count) }
}

/// Returns the length of the [`Bitmap`].
#[inline]
pub fn len(&self) -> usize {
Expand Down Expand Up @@ -491,3 +502,32 @@ impl From<Bitmap> for arrow_buffer::buffer::NullBuffer {
unsafe { arrow_buffer::buffer::NullBuffer::new_unchecked(buffer, null_count) }
}
}

// // Can't implement this because of `impl<P: AsRef<[bool]>> From<P> for Bitmap`
// #[cfg(feature = "arrow")]
// impl From<arrow_buffer::buffer::NullBuffer> for Bitmap {
// fn from(value: arrow_buffer::buffer::NullBuffer) -> Self {
// let buffer = value.buffer.into();
// let null_count = value.null_count;
// // Safety: null count is accurate
// unsafe { Self::from_unchecked(buffer, null_count) }
// }
// }

#[cfg(feature = "arrow")]
#[test]
fn test_arrow_nullbuffer_conversion() {
let mut bitmap2 = Bitmap::from([false, true, false, false, true, false, false, false, true]);
bitmap2.slice(1, 6);

assert_eq!(
bitmap2,
Bitmap::from([true, false, false, true, false, false])
);

let nulls1 = arrow_buffer::buffer::NullBuffer::from(bitmap2.clone());
assert_eq!(nulls1.null_count(), bitmap2.null_count());

let back_again = Bitmap::from_arrow(nulls1);
assert_eq!(back_again, bitmap2);
}
44 changes: 44 additions & 0 deletions src/offset.rs
Original file line number Diff line number Diff line change
Expand Up @@ -541,3 +541,47 @@ impl<O: Offset> std::ops::Deref for OffsetsBuffer<O> {
self.0.as_slice()
}
}

/// arrow1 -> arrow2
#[cfg(feature = "arrow")]
impl<O: Offset + arrow_buffer::ArrowNativeType> From<arrow_buffer::OffsetBuffer<O>>
for OffsetsBuffer<O>
{
fn from(offset_buffer2: arrow_buffer::OffsetBuffer<O>) -> Self {
let buffer1: arrow_buffer::Buffer = offset_buffer2.into_inner().into_inner();
// SAFETY: the input buffer is guaranteed to be valid
unsafe { Self::new_unchecked(buffer1.into()) }
}
}

/// arrow2 -> arrow1
#[cfg(feature = "arrow")]
impl<O: Offset + arrow_buffer::ArrowNativeType> From<OffsetsBuffer<O>>
for arrow_buffer::OffsetBuffer<O>
{
fn from(offsets_buffer: OffsetsBuffer<O>) -> Self {
let num_elements = offsets_buffer.len();
Self::new(arrow_buffer::ScalarBuffer::new(
offsets_buffer.into_inner().into(),
0,
num_elements,
))
}
}

#[cfg(feature = "arrow")]
#[test]
fn test_arrow_offsets_buffer_conversion() {
let mut arrow2_offsets =
OffsetsBuffer::<i32>::from(Offsets::try_from(vec![0, 1, 3, 3, 12, 42]).unwrap());
arrow2_offsets.slice(1, 4);

assert_eq!(arrow2_offsets.as_slice(), [1, 3, 3, 12]);

let arrow1_offsets: arrow_buffer::OffsetBuffer<i32> = arrow2_offsets.clone().into();
assert_eq!(arrow1_offsets.as_ref(), [1, 3, 3, 12]);

let back_again = OffsetsBuffer::from(arrow1_offsets);
assert_eq!(back_again, arrow2_offsets);
assert_eq!(back_again.as_slice(), [1, 3, 3, 12]);
}
Loading