From d875f5ef879891627aabf6937145008679f35dbc Mon Sep 17 00:00:00 2001 From: Sean Smith Date: Fri, 27 Dec 2024 12:07:37 -0500 Subject: [PATCH 01/59] add buffer stubs --- .../src/arrays/buffer/buffer_manager.rs | 21 ++ .../src/arrays/buffer/mod.rs | 63 ++++++ .../src/arrays/buffer/physical_type.rs | 60 ++++++ .../src/arrays/buffer/raw.rs | 87 +++++++++ .../src/arrays/buffer/string_view.rs | 182 ++++++++++++++++++ .../src/arrays/executor/physical_type.rs | 5 + crates/rayexec_execution/src/arrays/mod.rs | 1 + 7 files changed, 419 insertions(+) create mode 100644 crates/rayexec_execution/src/arrays/buffer/buffer_manager.rs create mode 100644 crates/rayexec_execution/src/arrays/buffer/mod.rs create mode 100644 crates/rayexec_execution/src/arrays/buffer/physical_type.rs create mode 100644 crates/rayexec_execution/src/arrays/buffer/raw.rs create mode 100644 crates/rayexec_execution/src/arrays/buffer/string_view.rs diff --git a/crates/rayexec_execution/src/arrays/buffer/buffer_manager.rs b/crates/rayexec_execution/src/arrays/buffer/buffer_manager.rs new file mode 100644 index 000000000..5770927e7 --- /dev/null +++ b/crates/rayexec_execution/src/arrays/buffer/buffer_manager.rs @@ -0,0 +1,21 @@ +use std::fmt::Debug; + +use rayexec_error::Result; + +pub trait BufferManager: Debug + Sync + Send + Clone { + type Reservation: Debug; + + fn reserve_external(&self, additional_bytes: usize) -> Result; +} + +/// Placeholder buffer manager. +#[derive(Debug, Clone)] +pub struct NopBufferManager; + +impl BufferManager for NopBufferManager { + type Reservation = (); + + fn reserve_external(&self, _additional_bytes: usize) -> Result { + Ok(()) + } +} diff --git a/crates/rayexec_execution/src/arrays/buffer/mod.rs b/crates/rayexec_execution/src/arrays/buffer/mod.rs new file mode 100644 index 000000000..f315e3a70 --- /dev/null +++ b/crates/rayexec_execution/src/arrays/buffer/mod.rs @@ -0,0 +1,63 @@ +pub mod buffer_manager; +pub mod physical_type; +pub mod string_view; + +mod raw; + +use buffer_manager::{BufferManager, NopBufferManager}; +use raw::RawBufferParts; +use rayexec_error::Result; +use string_view::StringViewHeap; + +use super::executor::physical_type::{PhysicalStorage, PhysicalType}; + +#[derive(Debug)] +pub struct ArrayBuffer { + /// Physical type of the buffer. + physical_type: PhysicalType, + /// The primary data buffer. + /// + /// For primitive buffers, this will just contain the primitives themselves. + /// Other buffers like string buffers will store the metadata here. + primary: RawBufferParts, + /// Secondary buffer if needed for the buffer type. + secondary: Box>, +} + +impl ArrayBuffer +where + B: BufferManager, +{ + /// Create an array buffer with the given capacity for the primary data + /// buffer. + /// + /// The secondary buffer will be initialized to None. + pub(crate) fn with_primary_capacity( + manager: &B, + capacity: usize, + ) -> Result { + unimplemented!() + } +} + +#[derive(Debug)] +pub enum SecondaryBuffer { + StringViewHeap(StringViewHeap), + Temp(B), + None, +} + +impl Drop for ArrayBuffer { + fn drop(&mut self) { + let ptr = self.primary.ptr; + + unimplemented!() + // let len = self.primary.len * self.physical_type.primary_buffer_mem_size(); + // let cap = self.primary.cap * self.physical_type.primary_buffer_mem_size(); + + // let vec = unsafe { Vec::from_raw_parts(ptr, len, cap) }; + // std::mem::drop(vec); + + // self.primary.reservation.free() + } +} diff --git a/crates/rayexec_execution/src/arrays/buffer/physical_type.rs b/crates/rayexec_execution/src/arrays/buffer/physical_type.rs new file mode 100644 index 000000000..8b3ac355a --- /dev/null +++ b/crates/rayexec_execution/src/arrays/buffer/physical_type.rs @@ -0,0 +1,60 @@ +use std::fmt; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum PhysicalType { + UntypedNull, + Boolean, + Int8, + Int16, + Int32, + Int64, + Int128, + UInt8, + UInt16, + UInt32, + UInt64, + UInt128, + Float16, + Float32, + Float64, + Interval, + Binary, + Utf8, + List, + Struct, + Dictionary, +} + +impl PhysicalType { + pub const fn as_str(&self) -> &'static str { + match self { + Self::UntypedNull => "UntypedNull", + Self::Boolean => "Boolean", + Self::Int8 => "Int8", + Self::Int16 => "Int16", + Self::Int32 => "Int32", + Self::Int64 => "Int64", + Self::Int128 => "Int128", + Self::UInt8 => "UInt8", + Self::UInt16 => "UInt16", + Self::UInt32 => "UInt32", + Self::UInt64 => "UInt64", + Self::UInt128 => "UInt128", + Self::Float16 => "Float16", + Self::Float32 => "Float32", + Self::Float64 => "Float64", + Self::Interval => "Interval", + Self::Binary => "Binary", + Self::Utf8 => "Utf8", + Self::List => "List", + Self::Struct => "Struct", + Self::Dictionary => "Dictionary", + } + } +} + +impl fmt::Display for PhysicalType { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.as_str()) + } +} diff --git a/crates/rayexec_execution/src/arrays/buffer/raw.rs b/crates/rayexec_execution/src/arrays/buffer/raw.rs new file mode 100644 index 000000000..782f6dfa2 --- /dev/null +++ b/crates/rayexec_execution/src/arrays/buffer/raw.rs @@ -0,0 +1,87 @@ +use super::buffer_manager::BufferManager; + +#[derive(Debug)] +pub struct RawBufferParts { + /// Memory reservation for this buffer. + pub(crate) reservation: B::Reservation, + /// Raw pointer to start of vec. + pub(crate) ptr: *mut u8, + /// Number of elements `T` in the vec, not bytes. + pub(crate) len: usize, + /// Capacity of vec (`T` not bytes). + pub(crate) cap: usize, +} + +impl RawBufferParts { + pub fn try_new(manager: &B, len: usize) -> Result { + // Note that `vec!` may over-allocate, so we track that too. + // + // See + // > vec![x; n], vec![a, b, c, d], and Vec::with_capacity(n), will all + // > produce a Vec with at least the requested capacity. + let alloc_size = len * std::mem::size_of::(); + let reservation = manager.reserve_external(alloc_size)?; + + let mut data: Vec = vec![T::default(); len]; + + let ptr = data.as_mut_ptr(); + let len = data.len(); + let cap = data.capacity(); + + let additional = (cap * std::mem::size_of::()) - alloc_size; + if additional > 0 { + // TODO: Combine + // let additional = manager.reserve_external(additional)?; + // reservation = reservation.combine(additional); + } + + std::mem::forget(data); + + Ok(RawBufferParts { + reservation, + ptr: ptr.cast(), + len, + cap, + }) + } + + pub unsafe fn as_slice(&self) -> &[T] { + std::slice::from_raw_parts(self.ptr.cast::().cast_const(), self.len) + } + + pub unsafe fn as_slice_mut(&mut self) -> &mut [T] { + std::slice::from_raw_parts_mut(self.ptr.cast::(), self.len) + } + + pub unsafe fn resize(&mut self, manager: &B, len: usize) -> Result<()> { + if self.len == 0 { + // Special case when length is zero. + // + // We want to enable the use case where we initialize the buffer to + // nothing (null) and later append to it. However, the `T` that we + // pass in here might have a different alignment which wouldn't be + // safe. + // + // By just creating a new buffer, we can avoid that issue. + let new_self = Self::try_new::(manager, len)?; + *self = new_self; + return Ok(()); + } + + debug_assert_eq!(self.ptr as usize % std::mem::size_of::(), 0); + + let mut data: Vec = Vec::from_raw_parts(self.ptr.cast(), self.len, self.cap); + + // TODO: Reservation stuff. + + data.resize(len, T::default()); + + self.ptr = data.as_mut_ptr().cast(); + self.len = data.len(); + self.cap = data.capacity(); + + std::mem::forget(data); + + Ok(()) + } +} diff --git a/crates/rayexec_execution/src/arrays/buffer/string_view.rs b/crates/rayexec_execution/src/arrays/buffer/string_view.rs new file mode 100644 index 000000000..b4a412b92 --- /dev/null +++ b/crates/rayexec_execution/src/arrays/buffer/string_view.rs @@ -0,0 +1,182 @@ +use std::fmt; + +/// Metadata for small (<= 12 bytes) varlen data. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[repr(C)] +pub struct StringViewSmallMetadata { + pub len: i32, + pub inline: [u8; 12], +} + +/// Metadata for large (> 12 bytes) varlen data. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[repr(C)] +pub struct StringViewLargeMetadata { + pub len: i32, + pub prefix: [u8; 4], + pub buffer_idx: i32, + pub offset: i32, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum StringViewMetadata<'a> { + Small(&'a StringViewSmallMetadata), + Large(&'a StringViewLargeMetadata), +} + +#[derive(Clone, Copy)] +#[repr(C)] +pub union StringViewMetadataUnion { + small: StringViewSmallMetadata, + large: StringViewLargeMetadata, +} + +impl Default for StringViewMetadataUnion { + #[inline] + fn default() -> Self { + Self::zero() + } +} + +impl StringViewMetadataUnion { + #[inline] + pub fn as_metadata(&self) -> StringViewMetadata { + unsafe { + // i32 len is first field in both, safe to access from either + // variant. + if self.is_small() { + StringViewMetadata::Small(&self.small) + } else { + StringViewMetadata::Large(&self.large) + } + } + } + + pub const fn is_small(&self) -> bool { + // i32 len is first field in both, safe to access from either + // variant. + unsafe { self.small.len <= 12 } + } + + pub fn data_len(&self) -> i32 { + // SAFETY: `len` field is in the same place in both variants. + unsafe { self.small.len } + } + + pub(crate) const fn zero() -> Self { + Self { + small: StringViewSmallMetadata { + len: 0, + inline: [0; 12], + }, + } + } + + fn as_small(&self) -> StringViewSmallMetadata { + debug_assert!(self.is_small()); + unsafe { self.small } + } + + fn as_large(&self) -> StringViewLargeMetadata { + debug_assert!(!self.is_small()); + unsafe { self.large } + } +} + +impl From for StringViewMetadataUnion { + fn from(value: StringViewSmallMetadata) -> Self { + StringViewMetadataUnion { small: value } + } +} + +impl From for StringViewMetadataUnion { + fn from(value: StringViewLargeMetadata) -> Self { + StringViewMetadataUnion { large: value } + } +} + +impl fmt::Debug for StringViewMetadataUnion { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if self.is_small() { + let small = self.as_small(); + small.fmt(f) + } else { + let large = self.as_large(); + large.fmt(f) + } + } +} + +#[derive(Debug)] +pub struct StringViewHeap { + /// Buffer containing all blob data. + buffer: Vec, +} + +impl StringViewHeap { + // TODO: Tracker + pub const fn new() -> Self { + StringViewHeap { buffer: Vec::new() } + } + + pub fn push_bytes(&mut self, value: &[u8]) -> StringViewMetadataUnion { + if value.len() as i32 <= 12 { + // Store completely inline. + let mut inline = [0; 12]; + inline[0..value.len()].copy_from_slice(value); + + StringViewSmallMetadata { + len: value.len() as i32, + inline, + } + .into() + } else { + // Store prefix, buf index, and offset in line. Store complete copy + // in buffer. + + let offset = self.buffer.len(); + let mut prefix = [0; 4]; + let prefix_len = std::cmp::min(value.len(), 4); + prefix[0..prefix_len].copy_from_slice(&value[0..prefix_len]); + + self.buffer.extend_from_slice(value); + + StringViewLargeMetadata { + len: value.len() as i32, + prefix, + buffer_idx: 0, + offset: offset as i32, + } + .into() + } + } + + pub fn get<'a, 'b: 'a>(&'b self, metadata: &'a StringViewMetadataUnion) -> Option<&'a [u8]> { + if metadata.is_small() { + unsafe { Some(&metadata.small.inline[..(metadata.small.len as usize)]) } + } else { + unsafe { + let offset = metadata.large.offset as usize; + let len = metadata.large.len as usize; + + self.buffer.get(offset..(offset + len)) + } + } + } + + pub fn get_mut<'a, 'b: 'a>( + &'b mut self, + metadata: &'a mut StringViewMetadataUnion, + ) -> Option<&'a mut [u8]> { + if metadata.is_small() { + unsafe { Some(&mut metadata.small.inline[..(metadata.small.len as usize)]) } + } else { + unsafe { + let offset = metadata.large.offset as usize; + let len = metadata.large.len as usize; + + self.buffer.get_mut(offset..(offset + len)) + } + } + } +} diff --git a/crates/rayexec_execution/src/arrays/executor/physical_type.rs b/crates/rayexec_execution/src/arrays/executor/physical_type.rs index 516b9831f..cd247c18a 100644 --- a/crates/rayexec_execution/src/arrays/executor/physical_type.rs +++ b/crates/rayexec_execution/src/arrays/executor/physical_type.rs @@ -178,6 +178,11 @@ impl VarlenType for [u8] { /// Contains a lifetime to enable tying the returned storage to the provided /// array data. pub trait PhysicalStorage: Debug + Sync + Send + Clone + Copy + 'static { + // /// The type that's stored in the primary buffer. + // /// + // /// This should be small and fixed sized. + // type PrimaryBufferType: Sized + Debug + Default + Sync + Send + Clone + Copy; + /// The type that gets returned from the underlying array storage. type Type<'a>: Sync + Send; /// The type of the underlying array storage. diff --git a/crates/rayexec_execution/src/arrays/mod.rs b/crates/rayexec_execution/src/arrays/mod.rs index d35785fe0..970a90e32 100644 --- a/crates/rayexec_execution/src/arrays/mod.rs +++ b/crates/rayexec_execution/src/arrays/mod.rs @@ -1,6 +1,7 @@ pub mod array; pub mod batch; pub mod bitmap; +pub mod buffer; pub mod compute; pub mod datatype; pub mod executor; From e7521c5148409a6a0b645917570422100c17d6b2 Mon Sep 17 00:00:00 2001 From: Sean Smith Date: Fri, 27 Dec 2024 12:17:41 -0500 Subject: [PATCH 02/59] more stub --- .../src/arrays/buffer/physical_type.rs | 63 ++++++++++++++++++- 1 file changed, 62 insertions(+), 1 deletion(-) diff --git a/crates/rayexec_execution/src/arrays/buffer/physical_type.rs b/crates/rayexec_execution/src/arrays/buffer/physical_type.rs index 8b3ac355a..28ae73f8f 100644 --- a/crates/rayexec_execution/src/arrays/buffer/physical_type.rs +++ b/crates/rayexec_execution/src/arrays/buffer/physical_type.rs @@ -1,4 +1,4 @@ -use std::fmt; +use std::fmt::{self, Debug}; #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum PhysicalType { @@ -58,3 +58,64 @@ impl fmt::Display for PhysicalType { write!(f, "{}", self.as_str()) } } + +/// Represents an in-memory array that can be indexed into to retrieve values. +pub trait Addressable: Debug { + /// The type that get's returned. + type T: Send + Debug + ?Sized; + + fn len(&self) -> usize; + + fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Get a value at the given index. + fn get(&self, idx: usize) -> Option<&Self::T>; +} + +impl Addressable for &[T] +where + T: Debug + Send, +{ + type T = T; + + fn len(&self) -> usize { + (**self).len() + } + + fn get(&self, idx: usize) -> Option<&Self::T> { + (**self).get(idx) + } +} + +/// Represents in-memory storage that we can get mutable references to. +pub trait AddressableMut: Addressable { + /// Get a mutable reference to a value at the given index. + fn get_mut(&mut self, idx: usize) -> Option<&mut Self::T>; + + /// Put a value at the given index. + /// + /// Should panic if index is out of bounds. + fn put(&mut self, idx: usize, val: &Self::T); +} + +/// Trait for determining how we access the underlying storage for arrays. +pub trait PhysicalStorage: Debug + Sync + Send + Clone + Copy + 'static { + const PHYSICAL_TYPE: PhysicalType; + + /// The type that's stored in the primary buffer. + /// + /// This should be small and fixed sized. + type PrimaryBufferType: Sized + Debug + Default + Sync + Send + Clone + Copy; + + /// The logical type being stored that can be accessed. + /// + /// For primitive buffers, this will be the same as the primary buffer type. + type StorageType: ?Sized; + + /// Size in bytes of the type being stored in the primary buffer. + fn primary_buffer_type_size() -> usize { + std::mem::size_of::() + } +} From 3571ce11469a7efffe9c5df5c65c04b90c2ec337 Mon Sep 17 00:00:00 2001 From: Sean Smith Date: Fri, 27 Dec 2024 14:53:14 -0500 Subject: [PATCH 03/59] addressable --- .../src/arrays/buffer/mod.rs | 11 +- .../src/arrays/buffer/physical_type.rs | 101 ++++++++++++++++++ 2 files changed, 110 insertions(+), 2 deletions(-) diff --git a/crates/rayexec_execution/src/arrays/buffer/mod.rs b/crates/rayexec_execution/src/arrays/buffer/mod.rs index f315e3a70..8ce53327e 100644 --- a/crates/rayexec_execution/src/arrays/buffer/mod.rs +++ b/crates/rayexec_execution/src/arrays/buffer/mod.rs @@ -5,12 +5,11 @@ pub mod string_view; mod raw; use buffer_manager::{BufferManager, NopBufferManager}; +use physical_type::{PhysicalStorage, PhysicalType}; use raw::RawBufferParts; use rayexec_error::Result; use string_view::StringViewHeap; -use super::executor::physical_type::{PhysicalStorage, PhysicalType}; - #[derive(Debug)] pub struct ArrayBuffer { /// Physical type of the buffer. @@ -38,6 +37,14 @@ where ) -> Result { unimplemented!() } + + pub fn try_as_slice(&self) -> Result<&[S::PrimaryBufferType]> { + unimplemented!() + } + + pub fn try_as_slice_mut(&mut self) -> Result<&mut [S::PrimaryBufferType]> { + unimplemented!() + } } #[derive(Debug)] diff --git a/crates/rayexec_execution/src/arrays/buffer/physical_type.rs b/crates/rayexec_execution/src/arrays/buffer/physical_type.rs index 28ae73f8f..362d95755 100644 --- a/crates/rayexec_execution/src/arrays/buffer/physical_type.rs +++ b/crates/rayexec_execution/src/arrays/buffer/physical_type.rs @@ -1,5 +1,12 @@ use std::fmt::{self, Debug}; +use half::f16; +use rayexec_error::Result; + +use super::buffer_manager::BufferManager; +use super::ArrayBuffer; +use crate::arrays::scalar::interval::Interval; + #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum PhysicalType { UntypedNull, @@ -89,6 +96,21 @@ where } } +impl Addressable for &mut [T] +where + T: Debug + Send + Copy, +{ + type T = T; + + fn len(&self) -> usize { + (**self).len() + } + + fn get(&self, idx: usize) -> Option<&Self::T> { + (**self).get(idx) + } +} + /// Represents in-memory storage that we can get mutable references to. pub trait AddressableMut: Addressable { /// Get a mutable reference to a value at the given index. @@ -100,6 +122,19 @@ pub trait AddressableMut: Addressable { fn put(&mut self, idx: usize, val: &Self::T); } +impl AddressableMut for &mut [T] +where + T: Debug + Send + Copy, +{ + fn get_mut(&mut self, idx: usize) -> Option<&mut Self::T> { + (**self).get_mut(idx) + } + + fn put(&mut self, idx: usize, val: &Self::T) { + self[idx] = *val; + } +} + /// Trait for determining how we access the underlying storage for arrays. pub trait PhysicalStorage: Debug + Sync + Send + Clone + Copy + 'static { const PHYSICAL_TYPE: PhysicalType; @@ -114,8 +149,74 @@ pub trait PhysicalStorage: Debug + Sync + Send + Clone + Copy + 'static { /// For primitive buffers, this will be the same as the primary buffer type. type StorageType: ?Sized; + /// The type of the addressable storage. + type Addressable<'a>: Addressable; + /// Size in bytes of the type being stored in the primary buffer. fn primary_buffer_type_size() -> usize { std::mem::size_of::() } + + /// Get addressable storage for indexing into the array. + fn get_addressable(buffer: &ArrayBuffer) -> Result>; +} + +pub trait MutablePhysicalStorage: PhysicalStorage { + type AddressableMut<'a>: AddressableMut; + + /// Get mutable addressable storage for the array. + fn get_addressable_mut( + buffer: &mut ArrayBuffer, + ) -> Result>; +} + +macro_rules! generate_primitive { + ($prim:ty, $name:ident, $phys_typ:ident) => { + #[derive(Debug, Clone, Copy, PartialEq, Eq)] + pub struct $name; + + impl PhysicalStorage for $name { + const PHYSICAL_TYPE: PhysicalType = PhysicalType::$phys_typ; + + type PrimaryBufferType = $prim; + type StorageType = Self::PrimaryBufferType; + type Addressable<'a> = &'a [Self::StorageType]; + + fn get_addressable( + buffer: &ArrayBuffer, + ) -> Result> { + buffer.try_as_slice::() + } + } + + impl MutablePhysicalStorage for $name { + type AddressableMut<'a> = &'a mut [Self::StorageType]; + + fn get_addressable_mut( + buffer: &mut ArrayBuffer, + ) -> Result> { + buffer.try_as_slice_mut::() + } + } + }; } + +generate_primitive!(bool, PhysicalBool, Boolean); + +generate_primitive!(i8, PhysicalI8, Int8); +generate_primitive!(i16, PhysicalI16, Int16); +generate_primitive!(i32, PhysicalI32, Int32); +generate_primitive!(i64, PhysicalI64, Int64); +generate_primitive!(i128, PhysicalI128, Int128); + +generate_primitive!(u8, PhysicalU8, UInt8); +generate_primitive!(u16, PhysicalU16, UInt16); +generate_primitive!(u32, PhysicalU32, UInt32); +generate_primitive!(u64, PhysicalU64, UInt64); +generate_primitive!(u128, PhysicalU128, UInt128); + +generate_primitive!(f16, PhysicalF16, Float16); +generate_primitive!(f32, PhysicalF32, Float32); +generate_primitive!(f64, PhysicalF64, Float64); + +generate_primitive!(Interval, PhysicalInterval, Interval); From 24e0cfe6fe8fb82003c1c9f1587f5879a47f5e99 Mon Sep 17 00:00:00 2001 From: Sean Smith Date: Fri, 27 Dec 2024 16:32:04 -0500 Subject: [PATCH 04/59] string view --- .../src/arrays/buffer/mod.rs | 82 ++++++++++++++++--- .../src/arrays/buffer/physical_type.rs | 82 ++++++++++++++----- .../src/arrays/buffer/string_view.rs | 44 ++++++++++ 3 files changed, 177 insertions(+), 31 deletions(-) diff --git a/crates/rayexec_execution/src/arrays/buffer/mod.rs b/crates/rayexec_execution/src/arrays/buffer/mod.rs index 8ce53327e..7864609c6 100644 --- a/crates/rayexec_execution/src/arrays/buffer/mod.rs +++ b/crates/rayexec_execution/src/arrays/buffer/mod.rs @@ -7,8 +7,13 @@ mod raw; use buffer_manager::{BufferManager, NopBufferManager}; use physical_type::{PhysicalStorage, PhysicalType}; use raw::RawBufferParts; -use rayexec_error::Result; -use string_view::StringViewHeap; +use rayexec_error::{RayexecError, Result}; +use string_view::{ + StringViewAddressable, + StringViewAddressableMut, + StringViewHeap, + StringViewMetadataUnion, +}; #[derive(Debug)] pub struct ArrayBuffer { @@ -35,15 +40,73 @@ where manager: &B, capacity: usize, ) -> Result { - unimplemented!() + let primary = RawBufferParts::try_new(manager, capacity)?; + + Ok(ArrayBuffer { + physical_type: S::PHYSICAL_TYPE, + primary, + secondary: Box::new(SecondaryBuffer::None), + }) + } + + pub(crate) fn put_secondary_buffer(&mut self, secondary: SecondaryBuffer) { + self.secondary = Box::new(secondary) } pub fn try_as_slice(&self) -> Result<&[S::PrimaryBufferType]> { - unimplemented!() + self.check_type(S::PHYSICAL_TYPE)?; + let slice = unsafe { self.primary.as_slice::() }; + + Ok(slice) } pub fn try_as_slice_mut(&mut self) -> Result<&mut [S::PrimaryBufferType]> { - unimplemented!() + self.check_type(S::PHYSICAL_TYPE)?; + let slice = unsafe { self.primary.as_slice_mut::() }; + + Ok(slice) + } + + pub fn get_secondary(&self) -> &SecondaryBuffer { + &self.secondary + } + + pub fn get_secondary_mut(&mut self) -> &mut SecondaryBuffer { + &mut self.secondary + } + + pub fn try_as_string_view_addressable(&self) -> Result { + self.check_type(PhysicalType::Utf8)?; + + let metadata = unsafe { self.primary.as_slice::() }; + let heap = match self.secondary.as_ref() { + SecondaryBuffer::StringViewHeap(heap) => heap, + _ => return Err(RayexecError::new("Missing string heap")), + }; + + Ok(StringViewAddressable { metadata, heap }) + } + + pub fn try_as_string_view_addressable_mut(&mut self) -> Result { + self.check_type(PhysicalType::Utf8)?; + + let metadata = unsafe { self.primary.as_slice_mut::() }; + let heap = match self.secondary.as_mut() { + SecondaryBuffer::StringViewHeap(heap) => heap, + _ => return Err(RayexecError::new("Missing string heap")), + }; + + Ok(StringViewAddressableMut { metadata, heap }) + } + + fn check_type(&self, want: PhysicalType) -> Result<()> { + if want != self.physical_type { + return Err(RayexecError::new("Physical types don't match") + .with_field("have", self.physical_type) + .with_field("want", want)); + } + + Ok(()) } } @@ -58,12 +121,11 @@ impl Drop for ArrayBuffer { fn drop(&mut self) { let ptr = self.primary.ptr; - unimplemented!() - // let len = self.primary.len * self.physical_type.primary_buffer_mem_size(); - // let cap = self.primary.cap * self.physical_type.primary_buffer_mem_size(); + let len = self.primary.len * self.physical_type.primary_buffer_mem_size(); + let cap = self.primary.cap * self.physical_type.primary_buffer_mem_size(); - // let vec = unsafe { Vec::from_raw_parts(ptr, len, cap) }; - // std::mem::drop(vec); + let vec = unsafe { Vec::from_raw_parts(ptr, len, cap) }; + std::mem::drop(vec); // self.primary.reservation.free() } diff --git a/crates/rayexec_execution/src/arrays/buffer/physical_type.rs b/crates/rayexec_execution/src/arrays/buffer/physical_type.rs index 362d95755..ec1ce408f 100644 --- a/crates/rayexec_execution/src/arrays/buffer/physical_type.rs +++ b/crates/rayexec_execution/src/arrays/buffer/physical_type.rs @@ -4,6 +4,11 @@ use half::f16; use rayexec_error::Result; use super::buffer_manager::BufferManager; +use super::string_view::{ + StringViewAddressable, + StringViewAddressableMut, + StringViewMetadataUnion, +}; use super::ArrayBuffer; use crate::arrays::scalar::interval::Interval; @@ -33,6 +38,28 @@ pub enum PhysicalType { } impl PhysicalType { + pub const fn primary_buffer_mem_size(&self) -> usize { + match self { + Self::Boolean => PhysicalBool::PRIMARY_BUFFER_TYPE_SIZE, + Self::Int8 => PhysicalI8::PRIMARY_BUFFER_TYPE_SIZE, + Self::Int16 => PhysicalI16::PRIMARY_BUFFER_TYPE_SIZE, + Self::Int32 => PhysicalI32::PRIMARY_BUFFER_TYPE_SIZE, + Self::Int64 => PhysicalI64::PRIMARY_BUFFER_TYPE_SIZE, + Self::Int128 => PhysicalI128::PRIMARY_BUFFER_TYPE_SIZE, + Self::UInt8 => PhysicalU8::PRIMARY_BUFFER_TYPE_SIZE, + Self::UInt16 => PhysicalU16::PRIMARY_BUFFER_TYPE_SIZE, + Self::UInt32 => PhysicalU32::PRIMARY_BUFFER_TYPE_SIZE, + Self::UInt64 => PhysicalU64::PRIMARY_BUFFER_TYPE_SIZE, + Self::UInt128 => PhysicalU128::PRIMARY_BUFFER_TYPE_SIZE, + Self::Float16 => PhysicalF16::PRIMARY_BUFFER_TYPE_SIZE, + Self::Float32 => PhysicalF32::PRIMARY_BUFFER_TYPE_SIZE, + Self::Float64 => PhysicalF64::PRIMARY_BUFFER_TYPE_SIZE, + Self::Interval => PhysicalInterval::PRIMARY_BUFFER_TYPE_SIZE, + + _ => unimplemented!(), + } + } + pub const fn as_str(&self) -> &'static str { match self { Self::UntypedNull => "UntypedNull", @@ -96,23 +123,10 @@ where } } -impl Addressable for &mut [T] -where - T: Debug + Send + Copy, -{ - type T = T; - - fn len(&self) -> usize { - (**self).len() - } - - fn get(&self, idx: usize) -> Option<&Self::T> { - (**self).get(idx) - } -} - /// Represents in-memory storage that we can get mutable references to. -pub trait AddressableMut: Addressable { +pub trait AddressableMut: Debug { + type T: Send + Debug + ?Sized; + /// Get a mutable reference to a value at the given index. fn get_mut(&mut self, idx: usize) -> Option<&mut Self::T>; @@ -126,6 +140,8 @@ impl AddressableMut for &mut [T] where T: Debug + Send + Copy, { + type T = T; + fn get_mut(&mut self, idx: usize) -> Option<&mut Self::T> { (**self).get_mut(idx) } @@ -139,6 +155,9 @@ where pub trait PhysicalStorage: Debug + Sync + Send + Clone + Copy + 'static { const PHYSICAL_TYPE: PhysicalType; + /// Size in bytes of the type being stored in the primary buffer. + const PRIMARY_BUFFER_TYPE_SIZE: usize = std::mem::size_of::(); + /// The type that's stored in the primary buffer. /// /// This should be small and fixed sized. @@ -152,11 +171,6 @@ pub trait PhysicalStorage: Debug + Sync + Send + Clone + Copy + 'static { /// The type of the addressable storage. type Addressable<'a>: Addressable; - /// Size in bytes of the type being stored in the primary buffer. - fn primary_buffer_type_size() -> usize { - std::mem::size_of::() - } - /// Get addressable storage for indexing into the array. fn get_addressable(buffer: &ArrayBuffer) -> Result>; } @@ -220,3 +234,29 @@ generate_primitive!(f32, PhysicalF32, Float32); generate_primitive!(f64, PhysicalF64, Float64); generate_primitive!(Interval, PhysicalInterval, Interval); + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct PhysicalUtf8; + +impl PhysicalStorage for PhysicalUtf8 { + const PHYSICAL_TYPE: PhysicalType = PhysicalType::Utf8; + + type PrimaryBufferType = StringViewMetadataUnion; + type StorageType = str; + + type Addressable<'a> = StringViewAddressable<'a>; + + fn get_addressable(buffer: &ArrayBuffer) -> Result> { + buffer.try_as_string_view_addressable() + } +} + +impl MutablePhysicalStorage for PhysicalUtf8 { + type AddressableMut<'a> = StringViewAddressableMut<'a>; + + fn get_addressable_mut( + buffer: &mut ArrayBuffer, + ) -> Result> { + buffer.try_as_string_view_addressable_mut() + } +} diff --git a/crates/rayexec_execution/src/arrays/buffer/string_view.rs b/crates/rayexec_execution/src/arrays/buffer/string_view.rs index b4a412b92..aae74a6ec 100644 --- a/crates/rayexec_execution/src/arrays/buffer/string_view.rs +++ b/crates/rayexec_execution/src/arrays/buffer/string_view.rs @@ -1,5 +1,49 @@ use std::fmt; +use super::physical_type::{Addressable, AddressableMut}; + +#[derive(Debug)] +pub struct StringViewAddressable<'a> { + pub(crate) metadata: &'a [StringViewMetadataUnion], + pub(crate) heap: &'a StringViewHeap, +} + +impl<'a> Addressable for StringViewAddressable<'a> { + type T = str; + + fn len(&self) -> usize { + self.metadata.len() + } + + fn get(&self, idx: usize) -> Option<&Self::T> { + let m = self.metadata.get(idx)?; + let bs = self.heap.get(m)?; + Some(unsafe { std::str::from_utf8_unchecked(bs) }) + } +} + +#[derive(Debug)] +pub struct StringViewAddressableMut<'a> { + pub(crate) metadata: &'a mut [StringViewMetadataUnion], + pub(crate) heap: &'a mut StringViewHeap, +} + +impl<'a> AddressableMut for StringViewAddressableMut<'a> { + type T = str; + + fn get_mut(&mut self, idx: usize) -> Option<&mut Self::T> { + let m = self.metadata.get_mut(idx)?; + let bs = self.heap.get_mut(m)?; + Some(unsafe { std::str::from_utf8_unchecked_mut(bs) }) + } + + fn put(&mut self, idx: usize, val: &Self::T) { + let bs = val.as_bytes(); + let new_m = self.heap.push_bytes(bs); + self.metadata[idx] = new_m; + } +} + /// Metadata for small (<= 12 bytes) varlen data. #[derive(Debug, Clone, Copy, PartialEq, Eq)] #[repr(C)] From 69405a93c3a932c54bb1cf285b854511158738bb Mon Sep 17 00:00:00 2001 From: Sean Smith Date: Fri, 27 Dec 2024 16:34:00 -0500 Subject: [PATCH 05/59] temp rename --- crates/docgen/src/markdown_table.rs | 6 +- crates/rayexec_csv/src/reader.rs | 14 ++-- .../rayexec_execution/src/arrays/array/mod.rs | 48 ++++++------- crates/rayexec_execution/src/arrays/batch.rs | 14 ++-- .../src/arrays/compute/cast/array.rs | 68 +++++++++--------- .../src/arrays/compute/cast/behavior.rs | 6 +- .../src/arrays/compute/date.rs | 14 ++-- .../src/arrays/executor/aggregate/binary.rs | 10 +-- .../src/arrays/executor/aggregate/mod.rs | 6 +- .../src/arrays/executor/aggregate/unary.rs | 12 ++-- .../src/arrays/executor/physical_type.rs | 4 +- .../src/arrays/executor/scalar/binary.rs | 22 +++--- .../src/arrays/executor/scalar/fill.rs | 52 +++++++------- .../src/arrays/executor/scalar/hash.rs | 12 ++-- .../src/arrays/executor/scalar/list.rs | 16 ++--- .../src/arrays/executor/scalar/mod.rs | 4 +- .../src/arrays/executor/scalar/select.rs | 12 ++-- .../src/arrays/executor/scalar/ternary.rs | 12 ++-- .../src/arrays/executor/scalar/unary.rs | 22 +++--- .../src/arrays/executor/scalar/uniform.rs | 26 +++---- .../src/arrays/format/mod.rs | 4 +- .../src/arrays/format/pretty/table.rs | 70 ++++++++++--------- .../src/arrays/row/encoding.rs | 38 +++++----- .../rayexec_execution/src/arrays/row/mod.rs | 4 +- .../src/arrays/scalar/mod.rs | 10 +-- .../src/arrays/storage/list.rs | 12 ++-- .../rayexec_execution/src/arrays/testutil.rs | 16 ++--- .../intermediate/planner/plan_describe.rs | 6 +- .../intermediate/planner/plan_explain.rs | 6 +- .../intermediate/planner/plan_scan.rs | 4 +- .../intermediate/planner/plan_show_var.rs | 4 +- .../operators/hash_aggregate/chunk.rs | 8 +-- .../operators/hash_aggregate/compare.rs | 12 ++-- .../operators/hash_aggregate/distinct.rs | 6 +- .../operators/hash_aggregate/hash_table.rs | 46 ++++++------ .../execution/operators/hash_aggregate/mod.rs | 6 +- .../operators/hash_join/condition.rs | 4 +- .../src/execution/operators/sink.rs | 4 +- .../operators/sort/util/sort_keys.rs | 4 +- .../src/execution/operators/table_inout.rs | 4 +- .../src/execution/operators/test_util.rs | 4 +- .../src/execution/operators/unnest.rs | 26 +++---- .../src/execution/operators/util/broadcast.rs | 4 +- .../operators/util/outer_join_tracker.rs | 10 +-- .../src/execution/operators/util/resizer.rs | 40 +++++------ .../src/expr/physical/case_expr.rs | 8 +-- .../src/expr/physical/cast_expr.rs | 4 +- .../src/expr/physical/column_expr.rs | 4 +- .../src/expr/physical/literal_expr.rs | 4 +- .../src/expr/physical/mod.rs | 12 ++-- .../src/expr/physical/scalar_function_expr.rs | 4 +- .../src/functions/aggregate/builtin/avg.rs | 4 +- .../src/functions/aggregate/builtin/sum.rs | 14 ++-- .../src/functions/aggregate/states.rs | 30 ++++---- .../src/functions/scalar/builtin/arith/add.rs | 10 +-- .../src/functions/scalar/builtin/arith/div.rs | 12 ++-- .../src/functions/scalar/builtin/arith/mul.rs | 14 ++-- .../src/functions/scalar/builtin/arith/rem.rs | 10 +-- .../src/functions/scalar/builtin/arith/sub.rs | 10 +-- .../src/functions/scalar/builtin/boolean.rs | 24 +++---- .../functions/scalar/builtin/comparison.rs | 44 ++++++------ .../scalar/builtin/datetime/date_part.rs | 4 +- .../scalar/builtin/datetime/date_trunc.rs | 4 +- .../scalar/builtin/datetime/epoch.rs | 6 +- .../src/functions/scalar/builtin/is.rs | 10 +-- .../scalar/builtin/list/list_extract.rs | 14 ++-- .../scalar/builtin/list/list_values.rs | 17 +++-- .../src/functions/scalar/builtin/negate.rs | 6 +- .../functions/scalar/builtin/numeric/abs.rs | 4 +- .../functions/scalar/builtin/numeric/acos.rs | 4 +- .../functions/scalar/builtin/numeric/asin.rs | 4 +- .../functions/scalar/builtin/numeric/atan.rs | 4 +- .../functions/scalar/builtin/numeric/cbrt.rs | 4 +- .../functions/scalar/builtin/numeric/ceil.rs | 4 +- .../functions/scalar/builtin/numeric/cos.rs | 4 +- .../scalar/builtin/numeric/degrees.rs | 4 +- .../functions/scalar/builtin/numeric/exp.rs | 4 +- .../functions/scalar/builtin/numeric/floor.rs | 4 +- .../functions/scalar/builtin/numeric/isnan.rs | 4 +- .../functions/scalar/builtin/numeric/ln.rs | 4 +- .../functions/scalar/builtin/numeric/log.rs | 6 +- .../functions/scalar/builtin/numeric/mod.rs | 6 +- .../scalar/builtin/numeric/radians.rs | 4 +- .../functions/scalar/builtin/numeric/sin.rs | 4 +- .../functions/scalar/builtin/numeric/sqrt.rs | 4 +- .../functions/scalar/builtin/numeric/tan.rs | 4 +- .../src/functions/scalar/builtin/random.rs | 6 +- .../scalar/builtin/similarity/l2_distance.rs | 4 +- .../functions/scalar/builtin/string/ascii.rs | 4 +- .../functions/scalar/builtin/string/case.rs | 8 +-- .../functions/scalar/builtin/string/concat.rs | 6 +- .../scalar/builtin/string/contains.rs | 6 +- .../scalar/builtin/string/ends_with.rs | 6 +- .../functions/scalar/builtin/string/length.rs | 8 +-- .../functions/scalar/builtin/string/like.rs | 6 +- .../functions/scalar/builtin/string/pad.rs | 6 +- .../scalar/builtin/string/regexp_replace.rs | 4 +- .../functions/scalar/builtin/string/repeat.rs | 4 +- .../scalar/builtin/string/starts_with.rs | 4 +- .../scalar/builtin/string/substring.rs | 6 +- .../functions/scalar/builtin/string/trim.rs | 6 +- .../src/functions/scalar/mod.rs | 4 +- .../src/functions/table/builtin/series.rs | 6 +- .../src/functions/table/builtin/system.rs | 40 +++++------ .../src/functions/table/builtin/unnest.rs | 8 +-- crates/rayexec_parquet/src/reader/mod.rs | 4 +- .../rayexec_parquet/src/reader/primitive.rs | 12 ++-- crates/rayexec_parquet/src/reader/varlen.rs | 10 +-- crates/rayexec_parquet/src/writer/mod.rs | 4 +- crates/rayexec_postgres/src/lib.rs | 20 +++--- crates/rayexec_shell/src/result_table.rs | 8 +-- crates/rayexec_unity_catalog/src/functions.rs | 25 +++---- crates/rayexec_wasm/src/session.rs | 8 +-- test_bin/integration_slt_hybrid.rs | 10 +-- 114 files changed, 667 insertions(+), 653 deletions(-) diff --git a/crates/docgen/src/markdown_table.rs b/crates/docgen/src/markdown_table.rs index 70a6dc4e8..591d0c548 100644 --- a/crates/docgen/src/markdown_table.rs +++ b/crates/docgen/src/markdown_table.rs @@ -54,7 +54,7 @@ pub fn write_markdown_table<'a>( #[cfg(test)] mod tests { - use rayexec_execution::arrays::array::Array; + use rayexec_execution::arrays::array::Array2; use rayexec_execution::arrays::datatype::DataType; use rayexec_execution::arrays::field::Field; @@ -63,8 +63,8 @@ mod tests { #[test] fn simple() { let batch = Batch::try_new([ - Array::from_iter([1, 2, 3]), - Array::from_iter(["cat", "dog", "mouse"]), + Array2::from_iter([1, 2, 3]), + Array2::from_iter(["cat", "dog", "mouse"]), ]) .unwrap(); diff --git a/crates/rayexec_csv/src/reader.rs b/crates/rayexec_csv/src/reader.rs index 448bedebf..23f57de4f 100644 --- a/crates/rayexec_csv/src/reader.rs +++ b/crates/rayexec_csv/src/reader.rs @@ -23,7 +23,7 @@ use bytes::Bytes; use futures::stream::BoxStream; use futures::StreamExt; use rayexec_error::{RayexecError, Result}; -use rayexec_execution::arrays::array::{Array, ArrayData}; +use rayexec_execution::arrays::array::{Array2, ArrayData}; use rayexec_execution::arrays::batch::Batch; use rayexec_execution::arrays::bitmap::Bitmap; use rayexec_execution::arrays::compute::cast::parse::{ @@ -490,7 +490,7 @@ impl AsyncCsvStream { completed: &CompletedRecords, field_idx: usize, skip_records: usize, - ) -> Result { + ) -> Result { let mut values = Bitmap::with_capacity(completed.num_completed()); let mut validity = Bitmap::with_capacity(completed.num_completed()); @@ -507,7 +507,7 @@ impl AsyncCsvStream { } } - Ok(Array::new_with_validity_and_array_data( + Ok(Array2::new_with_validity_and_array_data( DataType::Boolean, validity, BooleanStorage::from(values), @@ -520,7 +520,7 @@ impl AsyncCsvStream { field_idx: usize, skip_records: usize, mut parser: P, - ) -> Result + ) -> Result where T: Default, P: Parser, @@ -544,7 +544,7 @@ impl AsyncCsvStream { } } - Ok(Array::new_with_validity_and_array_data( + Ok(Array2::new_with_validity_and_array_data( datatype.clone(), validity, PrimitiveStorage::from(values), @@ -555,7 +555,7 @@ impl AsyncCsvStream { completed: &CompletedRecords, field_idx: usize, skip_records: usize, - ) -> Result { + ) -> Result { let mut values = GermanVarlenBuffer::with_len(completed.num_completed() - skip_records); let mut validity = Bitmap::with_capacity(completed.num_completed()); @@ -569,7 +569,7 @@ impl AsyncCsvStream { } } - Ok(Array::new_with_validity_and_array_data( + Ok(Array2::new_with_validity_and_array_data( DataType::Utf8, validity, values.into_data(), diff --git a/crates/rayexec_execution/src/arrays/array/mod.rs b/crates/rayexec_execution/src/arrays/array/mod.rs index 1278c58ec..c9d29d347 100644 --- a/crates/rayexec_execution/src/arrays/array/mod.rs +++ b/crates/rayexec_execution/src/arrays/array/mod.rs @@ -59,7 +59,7 @@ pub type PhysicalValidity = SharedOrOwned; pub type LogicalSelection = SharedOrOwned; #[derive(Debug, Clone, PartialEq)] -pub struct Array { +pub struct Array2 { /// Data type of the array. pub(crate) datatype: DataType, /// Selection of rows for the array. @@ -77,7 +77,7 @@ pub struct Array { pub(crate) data: ArrayData, } -impl Array { +impl Array2 { pub fn new_untyped_null_array(len: usize) -> Self { // Note that we're adding a bitmap here even though the data already // returns NULL. This allows the executors (especially for aggregates) @@ -87,7 +87,7 @@ impl Array { let selection = SelectionVector::repeated(len, 0); let data = UntypedNullStorage(1); - Array { + Array2 { datatype: DataType::Null, selection: Some(selection.into()), validity: Some(validity.into()), @@ -103,7 +103,7 @@ impl Array { let validity = Bitmap::new_with_all_false(1); let selection = SelectionVector::repeated(len, 0); - Ok(Array { + Ok(Array2 { datatype, selection: Some(selection.into()), validity: Some(validity.into()), @@ -112,7 +112,7 @@ impl Array { } pub fn new_with_array_data(datatype: DataType, data: impl Into) -> Self { - Array { + Array2 { datatype, selection: None, validity: None, @@ -125,7 +125,7 @@ impl Array { validity: impl Into, data: impl Into, ) -> Self { - Array { + Array2 { datatype, selection: None, validity: Some(validity.into()), @@ -139,7 +139,7 @@ impl Array { selection: impl Into, data: impl Into, ) -> Self { - Array { + Array2 { datatype, selection: Some(selection.into()), validity: Some(validity.into()), @@ -297,7 +297,7 @@ impl Array { } match self.array_data() { - ArrayData::UntypedNull(_) => Ok(Array { + ArrayData::UntypedNull(_) => Ok(Array2 { datatype: self.datatype.clone(), selection: None, validity: None, @@ -768,7 +768,7 @@ impl Array { None => SelectionVector::with_range(offset..(offset + count)), }; - Array { + Array2 { datatype: self.datatype.clone(), selection: Some(selection.into()), validity: self.validity.clone(), @@ -781,10 +781,10 @@ fn array_not_valid_for_type_err(datatype: &DataType) -> RayexecError { RayexecError::new(format!("Array data not valid for data type: {datatype}")) } -impl FromIterator> for Array +impl FromIterator> for Array2 where F: Default, - Array: FromIterator, + Array2: FromIterator, { fn from_iter>>(iter: T) -> Self { // TODO: Make a bit more performant, this is used for more than just @@ -803,14 +803,14 @@ where } } - let mut array = Array::from_iter(new_vals); + let mut array = Array2::from_iter(new_vals); array.validity = Some(validity.into()); array } } -impl FromIterator for Array { +impl FromIterator for Array2 { fn from_iter>(iter: T) -> Self { let iter = iter.into_iter(); let (lower, _) = iter.size_hint(); @@ -820,7 +820,7 @@ impl FromIterator for Array { german.try_push(s.as_bytes()).unwrap(); } - Array { + Array2 { datatype: DataType::Utf8, selection: None, validity: None, @@ -829,7 +829,7 @@ impl FromIterator for Array { } } -impl<'a> FromIterator<&'a str> for Array { +impl<'a> FromIterator<&'a str> for Array2 { fn from_iter>(iter: T) -> Self { let iter = iter.into_iter(); let (lower, _) = iter.size_hint(); @@ -839,7 +839,7 @@ impl<'a> FromIterator<&'a str> for Array { german.try_push(s.as_bytes()).unwrap(); } - Array { + Array2 { datatype: DataType::Utf8, selection: None, validity: None, @@ -878,10 +878,10 @@ impl_primitive_from_iter!(f16, Float16); impl_primitive_from_iter!(f32, Float32); impl_primitive_from_iter!(f64, Float64); -impl FromIterator for Array { +impl FromIterator for Array2 { fn from_iter>(iter: T) -> Self { let vals: Bitmap = iter.into_iter().collect(); - Array { + Array2 { datatype: DataType::Boolean, selection: None, validity: None, @@ -1103,7 +1103,7 @@ mod tests { #[test] fn select_mut_no_change() { - let mut arr = Array::from_iter(["a", "b", "c"]); + let mut arr = Array2::from_iter(["a", "b", "c"]); let selection = SelectionVector::with_range(0..3); arr.select_mut(selection); @@ -1115,7 +1115,7 @@ mod tests { #[test] fn select_mut_prune_rows() { - let mut arr = Array::from_iter(["a", "b", "c"]); + let mut arr = Array2::from_iter(["a", "b", "c"]); let selection = SelectionVector::from_iter([0, 2]); arr.select_mut(selection); @@ -1127,7 +1127,7 @@ mod tests { #[test] fn select_mut_expand_rows() { - let mut arr = Array::from_iter(["a", "b", "c"]); + let mut arr = Array2::from_iter(["a", "b", "c"]); let selection = SelectionVector::from_iter([0, 1, 1, 2]); arr.select_mut(selection); @@ -1141,7 +1141,7 @@ mod tests { #[test] fn select_mut_existing_selection() { - let mut arr = Array::from_iter(["a", "b", "c"]); + let mut arr = Array2::from_iter(["a", "b", "c"]); let selection = SelectionVector::from_iter([0, 2]); // => ["a", "c"] @@ -1158,7 +1158,7 @@ mod tests { #[test] fn scalar_value_logical_eq_i32() { - let arr = Array::from_iter([1, 2, 3]); + let arr = Array2::from_iter([1, 2, 3]); let scalar = ScalarValue::Int32(2); assert!(!arr.scalar_value_logically_eq(&scalar, 0).unwrap()); @@ -1167,7 +1167,7 @@ mod tests { #[test] fn scalar_value_logical_eq_null() { - let arr = Array::from_iter([Some(1), None, Some(3)]); + let arr = Array2::from_iter([Some(1), None, Some(3)]); let scalar = ScalarValue::Null; assert!(!arr.scalar_value_logically_eq(&scalar, 0).unwrap()); diff --git a/crates/rayexec_execution/src/arrays/batch.rs b/crates/rayexec_execution/src/arrays/batch.rs index 61e6b2628..d61ea434b 100644 --- a/crates/rayexec_execution/src/arrays/batch.rs +++ b/crates/rayexec_execution/src/arrays/batch.rs @@ -2,7 +2,7 @@ use std::sync::Arc; use rayexec_error::{RayexecError, Result}; -use crate::arrays::array::Array; +use crate::arrays::array::Array2; use crate::arrays::executor::scalar::concat_with_exact_total_len; use crate::arrays::row::ScalarRow; use crate::arrays::selection::SelectionVector; @@ -11,7 +11,7 @@ use crate::arrays::selection::SelectionVector; #[derive(Debug, Clone, PartialEq)] pub struct Batch { /// Columns that make up this batch. - cols: Vec, + cols: Vec, /// Number of rows in this batch. Needed to allow for a batch that has no /// columns but a non-zero number of rows. @@ -80,7 +80,7 @@ impl Batch { /// Create a new batch from some number of arrays. /// /// All arrays should have the same logical length. - pub fn try_new(cols: impl IntoIterator) -> Result { + pub fn try_new(cols: impl IntoIterator) -> Result { let cols: Vec<_> = cols.into_iter().collect(); let len = match cols.first() { Some(arr) => arr.logical_len(), @@ -158,15 +158,15 @@ impl Batch { Some(ScalarRow::from_iter(row)) } - pub fn column(&self, idx: usize) -> Option<&Array> { + pub fn column(&self, idx: usize) -> Option<&Array2> { self.cols.get(idx) } - pub fn columns(&self) -> &[Array] { + pub fn columns(&self) -> &[Array2] { &self.cols } - pub fn columns_mut(&mut self) -> &mut [Array] { + pub fn columns_mut(&mut self) -> &mut [Array2] { &mut self.cols } @@ -178,7 +178,7 @@ impl Batch { self.num_rows } - pub fn into_arrays(self) -> Vec { + pub fn into_arrays(self) -> Vec { self.cols } } diff --git a/crates/rayexec_execution/src/arrays/compute/cast/array.rs b/crates/rayexec_execution/src/arrays/compute/cast/array.rs index 76845aa06..9886d4c10 100644 --- a/crates/rayexec_execution/src/arrays/compute/cast/array.rs +++ b/crates/rayexec_execution/src/arrays/compute/cast/array.rs @@ -48,7 +48,7 @@ use super::parse::{ UInt64Parser, UInt8Parser, }; -use crate::arrays::array::{Array, ArrayData}; +use crate::arrays::array::{Array2, ArrayData}; use crate::arrays::bitmap::Bitmap; use crate::arrays::datatype::{DataType, TimeUnit}; use crate::arrays::executor::builder::{ @@ -79,7 +79,7 @@ use crate::arrays::executor::scalar::UnaryExecutor; use crate::arrays::scalar::decimal::{Decimal128Type, Decimal64Type, DecimalType}; use crate::arrays::storage::{AddressableStorage, PrimitiveStorage}; -pub fn cast_array(arr: &Array, to: DataType, behavior: CastFailBehavior) -> Result { +pub fn cast_array(arr: &Array2, to: DataType, behavior: CastFailBehavior) -> Result { if arr.datatype() == &to { // TODO: Cow? return Ok(arr.clone()); @@ -90,7 +90,7 @@ pub fn cast_array(arr: &Array, to: DataType, behavior: CastFailBehavior) -> Resu // Can cast NULL to anything else. let data = to.physical_type()?.zeroed_array_data(arr.logical_len()); let validity = Bitmap::new_with_all_false(arr.logical_len()); - Array::new_with_validity_and_array_data(to, validity, data) + Array2::new_with_validity_and_array_data(to, validity, data) } // String to anything else. @@ -228,10 +228,10 @@ pub fn cast_array(arr: &Array, to: DataType, behavior: CastFailBehavior) -> Resu } fn decimal_rescale_helper<'a, S>( - arr: &'a Array, + arr: &'a Array2, to: DataType, behavior: CastFailBehavior, -) -> Result +) -> Result where S: PhysicalStorage, S::Type<'a>: PrimInt, @@ -244,10 +244,10 @@ where } pub fn decimal_rescale<'a, S, D>( - arr: &'a Array, + arr: &'a Array2, to: DataType, behavior: CastFailBehavior, -) -> Result +) -> Result where S: PhysicalStorage, D: DecimalType, @@ -297,10 +297,10 @@ where } fn cast_float_to_decimal_helper<'a, S>( - arr: &'a Array, + arr: &'a Array2, to: DataType, behavior: CastFailBehavior, -) -> Result +) -> Result where S: PhysicalStorage, S::Type<'a>: Float, @@ -313,10 +313,10 @@ where } fn cast_float_to_decimal<'a, S, D>( - arr: &'a Array, + arr: &'a Array2, to: DataType, behavior: CastFailBehavior, -) -> Result +) -> Result where S: PhysicalStorage, D: DecimalType, @@ -361,10 +361,10 @@ where // TODO: Weird to specify both the float generic and datatype. pub fn cast_decimal_to_float<'a, S, F>( - arr: &'a Array, + arr: &'a Array2, to: DataType, behavior: CastFailBehavior, -) -> Result +) -> Result where S: PhysicalStorage, F: Float + Default + Copy, @@ -399,10 +399,10 @@ where } fn cast_int_to_decimal_helper<'a, S>( - arr: &'a Array, + arr: &'a Array2, to: DataType, behavior: CastFailBehavior, -) -> Result +) -> Result where S: PhysicalStorage, S::Type<'a>: PrimInt, @@ -415,10 +415,10 @@ where } fn cast_int_to_decimal<'a, S, D>( - arr: &'a Array, + arr: &'a Array2, to: DataType, behavior: CastFailBehavior, -) -> Result +) -> Result where S: PhysicalStorage, D: DecimalType, @@ -481,10 +481,10 @@ where } fn cast_primitive_numeric_helper<'a, S>( - arr: &'a Array, + arr: &'a Array2, to: DataType, behavior: CastFailBehavior, -) -> Result +) -> Result where S: PhysicalStorage, S::Type<'a>: ToPrimitive, @@ -508,10 +508,10 @@ where } pub fn cast_primitive_numeric<'a, S, T>( - arr: &'a Array, + arr: &'a Array2, datatype: DataType, behavior: CastFailBehavior, -) -> Result +) -> Result where S: PhysicalStorage, S::Type<'a>: ToPrimitive, @@ -535,10 +535,10 @@ where } pub fn cast_from_utf8( - arr: &Array, + arr: &Array2, datatype: DataType, behavior: CastFailBehavior, -) -> Result { +) -> Result { match datatype { DataType::Boolean => cast_parse_bool(arr, behavior), DataType::Int8 => cast_parse_primitive(arr, datatype, behavior, Int8Parser::default()), @@ -584,7 +584,7 @@ pub fn cast_from_utf8( } } -pub fn cast_to_utf8(arr: &Array, behavior: CastFailBehavior) -> Result { +pub fn cast_to_utf8(arr: &Array2, behavior: CastFailBehavior) -> Result { match arr.datatype() { DataType::Boolean => { cast_format::(arr, BoolFormatter::default(), behavior) @@ -652,10 +652,10 @@ pub fn cast_to_utf8(arr: &Array, behavior: CastFailBehavior) -> Result { } fn cast_format<'a, S, F>( - arr: &'a Array, + arr: &'a Array2, mut formatter: F, behavior: CastFailBehavior, -) -> Result +) -> Result where S: PhysicalStorage, F: Formatter>, @@ -681,7 +681,7 @@ where fail_state.check_and_apply(arr, output) } -fn cast_parse_bool(arr: &Array, behavior: CastFailBehavior) -> Result { +fn cast_parse_bool(arr: &Array2, behavior: CastFailBehavior) -> Result { let mut fail_state = behavior.new_state_for_array(arr); let output = UnaryExecutor::execute::( arr, @@ -699,11 +699,11 @@ fn cast_parse_bool(arr: &Array, behavior: CastFailBehavior) -> Result { } fn cast_parse_primitive( - arr: &Array, + arr: &Array2, datatype: DataType, behavior: CastFailBehavior, mut parser: P, -) -> Result +) -> Result where T: Default + Copy, P: Parser, @@ -733,7 +733,7 @@ mod tests { #[test] fn array_cast_utf8_to_i32() { - let arr = Array::from_iter(["13", "18", "123456789"]); + let arr = Array2::from_iter(["13", "18", "123456789"]); let got = cast_array(&arr, DataType::Int32, CastFailBehavior::Error).unwrap(); @@ -744,13 +744,13 @@ mod tests { #[test] fn array_cast_utf8_to_i32_overflow_error() { - let arr = Array::from_iter(["13", "18", "123456789000000"]); + let arr = Array2::from_iter(["13", "18", "123456789000000"]); cast_array(&arr, DataType::Int32, CastFailBehavior::Error).unwrap_err(); } #[test] fn array_cast_utf8_to_i32_overflow_null() { - let arr = Array::from_iter(["13", "18", "123456789000000"]); + let arr = Array2::from_iter(["13", "18", "123456789000000"]); let got = cast_array(&arr, DataType::Int32, CastFailBehavior::Null).unwrap(); @@ -761,7 +761,7 @@ mod tests { #[test] fn array_cast_null_to_f32() { - let arr = Array::new_untyped_null_array(3); + let arr = Array2::new_untyped_null_array(3); let got = cast_array(&arr, DataType::Float32, CastFailBehavior::Error).unwrap(); @@ -774,7 +774,7 @@ mod tests { #[test] fn array_cast_decimal64_to_f64() { - let arr = Array::new_with_array_data( + let arr = Array2::new_with_array_data( DataType::Decimal64(DecimalTypeMeta { precision: 10, scale: 3, diff --git a/crates/rayexec_execution/src/arrays/compute/cast/behavior.rs b/crates/rayexec_execution/src/arrays/compute/cast/behavior.rs index f81ca0e32..8ae4ae51e 100644 --- a/crates/rayexec_execution/src/arrays/compute/cast/behavior.rs +++ b/crates/rayexec_execution/src/arrays/compute/cast/behavior.rs @@ -1,6 +1,6 @@ use rayexec_error::{RayexecError, Result}; -use crate::arrays::array::Array; +use crate::arrays::array::Array2; /// Behavior when a cast fail due to under/overflow. #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -12,7 +12,7 @@ pub enum CastFailBehavior { } impl CastFailBehavior { - pub(crate) fn new_state_for_array(&self, _arr: &Array) -> CastFailState { + pub(crate) fn new_state_for_array(&self, _arr: &Array2) -> CastFailState { match self { CastFailBehavior::Error => CastFailState::TrackOneAndError(None), CastFailBehavior::Null => CastFailState::TrackManyAndInvalidate(Vec::new()), @@ -63,7 +63,7 @@ impl CastFailState { } } - pub(crate) fn check_and_apply(self, original: &Array, mut output: Array) -> Result { + pub(crate) fn check_and_apply(self, original: &Array2, mut output: Array2) -> Result { match self { Self::TrackOneAndError(None) => Ok(output), Self::TrackOneAndError(Some(error_idx)) => { diff --git a/crates/rayexec_execution/src/arrays/compute/date.rs b/crates/rayexec_execution/src/arrays/compute/date.rs index 7e02e1498..dee45b5bc 100644 --- a/crates/rayexec_execution/src/arrays/compute/date.rs +++ b/crates/rayexec_execution/src/arrays/compute/date.rs @@ -1,7 +1,7 @@ use chrono::{DateTime, Datelike, NaiveDate, Timelike, Utc}; use rayexec_error::{not_implemented, RayexecError, Result}; -use crate::arrays::array::Array; +use crate::arrays::array::Array2; use crate::arrays::datatype::{DataType, DecimalTypeMeta, TimeUnit}; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; use crate::arrays::executor::physical_type::{PhysicalI32, PhysicalI64}; @@ -69,7 +69,7 @@ pub enum DatePart { /// /// The results should be decimal representing the part extracted, and should /// use the Decimal64 default precision and scale. -pub fn extract_date_part(part: DatePart, arr: &Array) -> Result { +pub fn extract_date_part(part: DatePart, arr: &Array2) -> Result { let datatype = arr.datatype(); match datatype { DataType::Date32 => match part { @@ -119,7 +119,7 @@ pub fn extract_date_part(part: DatePart, arr: &Array) -> Result { } } -fn timestamp_extract_with_fn(unit: TimeUnit, arr: &Array, f: F) -> Result +fn timestamp_extract_with_fn(unit: TimeUnit, arr: &Array2, f: F) -> Result where F: Fn(DateTime) -> i64, { @@ -140,10 +140,10 @@ where } fn timestamp_extract_with_fn_and_datetime_builder( - arr: &Array, + arr: &Array2, f: F, builder: B, -) -> Result +) -> Result where B: Fn(i64) -> DateTime, F: Fn(DateTime) -> i64, @@ -164,7 +164,7 @@ where ) } -fn date32_extract_with_fn(arr: &Array, f: F) -> Result +fn date32_extract_with_fn(arr: &Array2, f: F) -> Result where F: Fn(DateTime) -> i64, { @@ -185,7 +185,7 @@ where ) } -fn date64_extract_with_fn(arr: &Array, f: F) -> Result +fn date64_extract_with_fn(arr: &Array2, f: F) -> Result where F: Fn(DateTime) -> i64, { diff --git a/crates/rayexec_execution/src/arrays/executor/aggregate/binary.rs b/crates/rayexec_execution/src/arrays/executor/aggregate/binary.rs index 26e2f30bd..0b29d1bd4 100644 --- a/crates/rayexec_execution/src/arrays/executor/aggregate/binary.rs +++ b/crates/rayexec_execution/src/arrays/executor/aggregate/binary.rs @@ -1,7 +1,7 @@ use rayexec_error::{RayexecError, Result}; use super::{AggregateState, RowToStateMapping}; -use crate::arrays::array::Array; +use crate::arrays::array::Array2; use crate::arrays::executor::physical_type::PhysicalStorage; use crate::arrays::executor::scalar::check_validity; use crate::arrays::selection; @@ -13,8 +13,8 @@ pub struct BinaryNonNullUpdater; impl BinaryNonNullUpdater { pub fn update<'a, S1, S2, I, State, Output>( - array1: &'a Array, - array2: &'a Array, + array1: &'a Array2, + array2: &'a Array2, mapping: I, states: &mut [State], ) -> Result<()> @@ -113,8 +113,8 @@ mod tests { #[test] fn binary_primitive_single_state() { let mut states = [TestAddSumAndProductState::default()]; - let array1 = Array::from_iter([1, 2, 3, 4, 5]); - let array2 = Array::from_iter([6, 7, 8, 9, 10]); + let array1 = Array2::from_iter([1, 2, 3, 4, 5]); + let array2 = Array2::from_iter([6, 7, 8, 9, 10]); let mapping = [ RowToStateMapping { diff --git a/crates/rayexec_execution/src/arrays/executor/aggregate/mod.rs b/crates/rayexec_execution/src/arrays/executor/aggregate/mod.rs index cfacfefd9..9a38d8282 100644 --- a/crates/rayexec_execution/src/arrays/executor/aggregate/mod.rs +++ b/crates/rayexec_execution/src/arrays/executor/aggregate/mod.rs @@ -11,7 +11,7 @@ use rayexec_error::Result; pub use unary::*; use super::builder::{ArrayBuilder, ArrayDataBuffer}; -use crate::arrays::array::Array; +use crate::arrays::array::Array2; use crate::arrays::bitmap::Bitmap; /// State for a single group's aggregate. @@ -72,7 +72,7 @@ impl StateFinalizer { pub fn finalize<'a, State, I, B, Input, Output>( states: I, mut builder: ArrayBuilder, - ) -> Result + ) -> Result where B: ArrayDataBuffer, I: IntoIterator, @@ -99,7 +99,7 @@ impl StateFinalizer { Some(validities.into()) }; - Ok(Array { + Ok(Array2 { datatype: builder.datatype, selection: None, validity: validities, diff --git a/crates/rayexec_execution/src/arrays/executor/aggregate/unary.rs b/crates/rayexec_execution/src/arrays/executor/aggregate/unary.rs index 512f4e1e2..0281f3403 100644 --- a/crates/rayexec_execution/src/arrays/executor/aggregate/unary.rs +++ b/crates/rayexec_execution/src/arrays/executor/aggregate/unary.rs @@ -1,7 +1,7 @@ use rayexec_error::Result; use super::{AggregateState, RowToStateMapping}; -use crate::arrays::array::Array; +use crate::arrays::array::Array2; use crate::arrays::executor::physical_type::PhysicalStorage; use crate::arrays::selection; use crate::arrays::storage::AddressableStorage; @@ -12,7 +12,7 @@ pub struct UnaryNonNullUpdater; impl UnaryNonNullUpdater { pub fn update<'a, S, I, State, Output>( - array: &'a Array, + array: &'a Array2, mapping: I, states: &mut [State], ) -> Result<()> @@ -86,7 +86,7 @@ mod tests { #[test] fn unary_primitive_single_state() { let mut states = [TestSumState::default()]; - let array = Array::from_iter([1, 2, 3, 4, 5]); + let array = Array2::from_iter([1, 2, 3, 4, 5]); let mapping = [ RowToStateMapping { from_row: 1, @@ -110,7 +110,7 @@ mod tests { #[test] fn unary_primitive_single_state_skip_null() { let mut states = [TestSumState::default()]; - let array = Array::from_iter([Some(1), Some(2), Some(3), None, Some(5)]); + let array = Array2::from_iter([Some(1), Some(2), Some(3), None, Some(5)]); let mapping = [ RowToStateMapping { from_row: 1, @@ -134,7 +134,7 @@ mod tests { #[test] fn unary_primitive_multiple_state() { let mut states = [TestSumState::default(), TestSumState::default()]; - let array = Array::from_iter([1, 2, 3, 4, 5]); + let array = Array2::from_iter([1, 2, 3, 4, 5]); let mapping = [ RowToStateMapping { from_row: 1, @@ -185,7 +185,7 @@ mod tests { fn unary_str_single_state() { // Test just checks to ensure working with varlen is sane. let mut states = [TestStringAgg::default()]; - let array = Array::from_iter(["aa", "bbb", "cccc"]); + let array = Array2::from_iter(["aa", "bbb", "cccc"]); let mapping = [ RowToStateMapping { from_row: 0, diff --git a/crates/rayexec_execution/src/arrays/executor/physical_type.rs b/crates/rayexec_execution/src/arrays/executor/physical_type.rs index cd247c18a..ae0e76d04 100644 --- a/crates/rayexec_execution/src/arrays/executor/physical_type.rs +++ b/crates/rayexec_execution/src/arrays/executor/physical_type.rs @@ -5,7 +5,7 @@ use rayexec_error::{RayexecError, Result, ResultExt}; use rayexec_proto::ProtoConv; use super::builder::{ArrayDataBuffer, BooleanBuffer, GermanVarlenBuffer, PrimitiveBuffer}; -use crate::arrays::array::{Array, ArrayData, BinaryData}; +use crate::arrays::array::{Array2, ArrayData, BinaryData}; use crate::arrays::scalar::interval::Interval; use crate::arrays::storage::{ AddressableStorage, @@ -65,7 +65,7 @@ impl PhysicalType { Self::Utf8 => GermanVarlenBuffer::::with_len(len).into_data(), Self::List => ListStorage { metadata: vec![ListItemMetadata::default(); len].into(), - array: Array::new_untyped_null_array(0), + array: Array2::new_untyped_null_array(0), } .into(), } diff --git a/crates/rayexec_execution/src/arrays/executor/scalar/binary.rs b/crates/rayexec_execution/src/arrays/executor/scalar/binary.rs index f289147c9..eef30999f 100644 --- a/crates/rayexec_execution/src/arrays/executor/scalar/binary.rs +++ b/crates/rayexec_execution/src/arrays/executor/scalar/binary.rs @@ -1,7 +1,7 @@ use rayexec_error::Result; use super::check_validity; -use crate::arrays::array::Array; +use crate::arrays::array::Array2; use crate::arrays::bitmap::Bitmap; use crate::arrays::executor::builder::{ArrayBuilder, ArrayDataBuffer, OutputBuffer}; use crate::arrays::executor::physical_type::PhysicalStorage; @@ -14,11 +14,11 @@ pub struct BinaryExecutor; impl BinaryExecutor { pub fn execute<'a, S1, S2, B, Op>( - array1: &'a Array, - array2: &'a Array, + array1: &'a Array2, + array2: &'a Array2, builder: ArrayBuilder, mut op: Op, - ) -> Result + ) -> Result where Op: FnMut(S1::Type<'a>, S2::Type<'a>, &mut OutputBuffer), S1: PhysicalStorage, @@ -81,7 +81,7 @@ impl BinaryExecutor { let data = output_buffer.buffer.into_data(); - Ok(Array { + Ok(Array2 { datatype: builder.datatype, selection: None, validity: out_validity, @@ -102,8 +102,8 @@ mod tests { #[test] fn binary_simple_add() { - let left = Array::from_iter([1, 2, 3]); - let right = Array::from_iter([4, 5, 6]); + let left = Array2::from_iter([1, 2, 3]); + let right = Array2::from_iter([4, 5, 6]); let builder = ArrayBuilder { datatype: DataType::Int32, @@ -125,8 +125,8 @@ mod tests { #[test] fn binary_string_repeat() { - let left = Array::from_iter([1, 2, 3]); - let right = Array::from_iter(["hello", "world", "goodbye!"]); + let left = Array2::from_iter([1, 2, 3]); + let right = Array2::from_iter(["hello", "world", "goodbye!"]); let builder = ArrayBuilder { datatype: DataType::Utf8, @@ -162,11 +162,11 @@ mod tests { #[test] fn binary_add_with_invalid() { // Make left constant null. - let mut left = Array::from_iter([1]); + let mut left = Array2::from_iter([1]); left.put_selection(SelectionVector::repeated(3, 0)); left.set_physical_validity(0, false); - let right = Array::from_iter([2, 3, 4]); + let right = Array2::from_iter([2, 3, 4]); let got = BinaryExecutor::execute::( &left, diff --git a/crates/rayexec_execution/src/arrays/executor/scalar/fill.rs b/crates/rayexec_execution/src/arrays/executor/scalar/fill.rs index 8029004ba..1bca6818a 100644 --- a/crates/rayexec_execution/src/arrays/executor/scalar/fill.rs +++ b/crates/rayexec_execution/src/arrays/executor/scalar/fill.rs @@ -2,7 +2,7 @@ use std::borrow::Borrow; use rayexec_error::{RayexecError, Result}; -use crate::arrays::array::{Array, ArrayData}; +use crate::arrays::array::{Array2, ArrayData}; use crate::arrays::bitmap::Bitmap; use crate::arrays::datatype::DataType; use crate::arrays::executor::builder::{ @@ -81,7 +81,7 @@ where /// /// `fill_map` is an iterator of mappings that map indices from `array` to /// where they should be placed in the buffer. - pub fn fill<'a, S, I>(&mut self, array: &'a Array, fill_map: I) -> Result<()> + pub fn fill<'a, S, I>(&mut self, array: &'a Array2, fill_map: I) -> Result<()> where S: PhysicalStorage, I: IntoIterator, @@ -118,14 +118,14 @@ where Ok(()) } - pub fn finish(self) -> Array { + pub fn finish(self) -> Array2 { let validity = if self.validity.is_all_true() { None } else { Some(self.validity.into()) }; - Array { + Array2 { datatype: self.builder.datatype, selection: None, validity, @@ -135,7 +135,7 @@ where } /// Concatenate multiple arrays into a single array. -pub fn concat(arrays: &[&Array]) -> Result { +pub fn concat(arrays: &[&Array2]) -> Result { let total_len: usize = arrays.iter().map(|a| a.logical_len()).sum(); concat_with_exact_total_len(arrays, total_len) } @@ -146,14 +146,14 @@ pub fn concat(arrays: &[&Array]) -> Result { /// /// This function exists so that we can compute the total length once for a set /// of batches that we're concatenating instead of once per array. -pub(crate) fn concat_with_exact_total_len(arrays: &[&Array], total_len: usize) -> Result { +pub(crate) fn concat_with_exact_total_len(arrays: &[&Array2], total_len: usize) -> Result { let datatype = match arrays.first() { Some(arr) => arr.datatype(), None => return Err(RayexecError::new("Cannot concat zero arrays")), }; match datatype.physical_type()? { - PhysicalType::UntypedNull => Ok(Array { + PhysicalType::UntypedNull => Ok(Array2 { datatype: datatype.clone(), selection: None, validity: None, @@ -282,7 +282,7 @@ pub(crate) fn concat_with_exact_total_len(arrays: &[&Array], total_len: usize) - } } -fn concat_lists(datatype: DataType, arrays: &[&Array], total_len: usize) -> Result { +fn concat_lists(datatype: DataType, arrays: &[&Array2], total_len: usize) -> Result { let inner_arrays = arrays .iter() .map(|arr| match arr.array_data() { @@ -329,7 +329,7 @@ fn concat_lists(datatype: DataType, arrays: &[&Array], total_len: usize) -> Resu array: concatenated, }; - Ok(Array { + Ok(Array2 { datatype, selection: None, validity: Some(validity.into()), @@ -338,9 +338,9 @@ fn concat_lists(datatype: DataType, arrays: &[&Array], total_len: usize) -> Resu } fn concat_with_fill_state<'a, S, B>( - arrays: &'a [&Array], + arrays: &'a [&Array2], mut fill_state: FillState, -) -> Result +) -> Result where S: PhysicalStorage, B: ArrayDataBuffer, @@ -370,14 +370,14 @@ where /// array. /// /// Indices may be specified more than once. -pub fn interleave(arrays: &[&Array], indices: &[(usize, usize)]) -> Result { +pub fn interleave(arrays: &[&Array2], indices: &[(usize, usize)]) -> Result { let datatype = match arrays.first() { Some(arr) => arr.datatype(), None => return Err(RayexecError::new("Cannot interleave zero arrays")), }; match datatype.physical_type()? { - PhysicalType::UntypedNull => Ok(Array { + PhysicalType::UntypedNull => Ok(Array2 { datatype: datatype.clone(), selection: None, validity: None, @@ -512,10 +512,10 @@ pub fn interleave(arrays: &[&Array], indices: &[(usize, usize)]) -> Result( - arrays: &'a [&Array], + arrays: &'a [&Array2], indices: &[(usize, usize)], mut fill_state: FillState, -) -> Result +) -> Result where S: PhysicalStorage, B: ArrayDataBuffer, @@ -560,7 +560,7 @@ mod tests { buffer: PrimitiveBuffer::::with_len(3), }); - let arr = Array::from_iter([4, 5, 6]); + let arr = Array2::from_iter([4, 5, 6]); let mapping = [ FillMapping { from: 0, to: 0 }, FillMapping { from: 1, to: 1 }, @@ -583,7 +583,7 @@ mod tests { buffer: PrimitiveBuffer::::with_len(3), }); - let arr = Array::from_iter([4, 5, 6]); + let arr = Array2::from_iter([4, 5, 6]); let mapping = [ FillMapping { from: 1, to: 0 }, FillMapping { from: 1, to: 1 }, @@ -606,7 +606,7 @@ mod tests { buffer: PrimitiveBuffer::::with_len(3), }); - let arr = Array::from_iter([4, 5, 6]); + let arr = Array2::from_iter([4, 5, 6]); let mapping = [ FillMapping { from: 0, to: 1 }, FillMapping { from: 1, to: 2 }, @@ -629,7 +629,7 @@ mod tests { buffer: PrimitiveBuffer::::with_len(6), }); - let arr1 = Array::from_iter([4, 5, 6]); + let arr1 = Array2::from_iter([4, 5, 6]); let mapping1 = [ FillMapping { from: 0, to: 2 }, FillMapping { from: 1, to: 4 }, @@ -637,7 +637,7 @@ mod tests { ]; state.fill::(&arr1, mapping1).unwrap(); - let arr2 = Array::from_iter([7, 8, 9]); + let arr2 = Array2::from_iter([7, 8, 9]); let mapping2 = [ FillMapping { from: 0, to: 1 }, FillMapping { from: 1, to: 3 }, @@ -657,8 +657,8 @@ mod tests { #[test] fn interleave_2() { - let arr1 = Array::from_iter([4, 5, 6]); - let arr2 = Array::from_iter([7, 8, 9]); + let arr1 = Array2::from_iter([4, 5, 6]); + let arr2 = Array2::from_iter([7, 8, 9]); let indices = [(0, 1), (0, 2), (1, 0), (1, 1), (0, 0), (1, 2)]; @@ -674,8 +674,8 @@ mod tests { #[test] fn interleave_2_repeated() { - let arr1 = Array::from_iter([4, 5]); - let arr2 = Array::from_iter([7, 8]); + let arr1 = Array2::from_iter([4, 5]); + let arr2 = Array2::from_iter([7, 8]); let indices = [(0, 1), (1, 1), (0, 1), (1, 1)]; @@ -689,8 +689,8 @@ mod tests { #[test] fn concat_2() { - let arr1 = Array::from_iter([4, 5, 6]); - let arr2 = Array::from_iter([7, 8]); + let arr1 = Array2::from_iter([4, 5, 6]); + let arr2 = Array2::from_iter([7, 8]); let got = concat(&[&arr1, &arr2]).unwrap(); diff --git a/crates/rayexec_execution/src/arrays/executor/scalar/hash.rs b/crates/rayexec_execution/src/arrays/executor/scalar/hash.rs index 548bcb850..3b65255b0 100644 --- a/crates/rayexec_execution/src/arrays/executor/scalar/hash.rs +++ b/crates/rayexec_execution/src/arrays/executor/scalar/hash.rs @@ -2,7 +2,7 @@ use ahash::RandomState; use half::f16; use rayexec_error::{RayexecError, Result}; -use crate::arrays::array::{Array, ArrayData}; +use crate::arrays::array::{Array2, ArrayData}; use crate::arrays::executor::physical_type::{ PhysicalBinary, PhysicalBool, @@ -38,7 +38,7 @@ pub struct HashExecutor; impl HashExecutor { /// Hashes the given array values, combining them with the existing hashes /// in `hashes`. - pub fn hash_combine(array: &Array, hashes: &mut [u64]) -> Result<()> { + pub fn hash_combine(array: &Array2, hashes: &mut [u64]) -> Result<()> { match array.physical_type() { PhysicalType::UntypedNull => { Self::hash_one_inner::(array, hashes)? @@ -102,7 +102,7 @@ impl HashExecutor { /// Hash the given array and write the values into `hashes`, overwriting any /// existing values. - pub fn hash_no_combine(array: &Array, hashes: &mut [u64]) -> Result<()> { + pub fn hash_no_combine(array: &Array2, hashes: &mut [u64]) -> Result<()> { match array.physical_type() { PhysicalType::UntypedNull => { Self::hash_one_inner::(array, hashes)? @@ -164,7 +164,7 @@ impl HashExecutor { Ok(()) } - pub fn hash_many<'b>(arrays: &[Array], hashes: &'b mut [u64]) -> Result<&'b mut [u64]> { + pub fn hash_many<'b>(arrays: &[Array2], hashes: &'b mut [u64]) -> Result<&'b mut [u64]> { for (idx, array) in arrays.iter().enumerate() { let combine_hash = idx > 0; @@ -178,7 +178,7 @@ impl HashExecutor { Ok(hashes) } - fn hash_one_inner<'a, 'b, S, H>(array: &'a Array, hashes: &'b mut [u64]) -> Result<()> + fn hash_one_inner<'a, 'b, S, H>(array: &'a Array2, hashes: &'b mut [u64]) -> Result<()> where S: PhysicalStorage, S::Type<'a>: HashValue, @@ -215,7 +215,7 @@ impl HashExecutor { Ok(()) } - fn hash_list(array: &Array, hashes: &mut [u64]) -> Result<()> + fn hash_list(array: &Array2, hashes: &mut [u64]) -> Result<()> where H: SetHash, { diff --git a/crates/rayexec_execution/src/arrays/executor/scalar/list.rs b/crates/rayexec_execution/src/arrays/executor/scalar/list.rs index f1b0ff63d..734318fcc 100644 --- a/crates/rayexec_execution/src/arrays/executor/scalar/list.rs +++ b/crates/rayexec_execution/src/arrays/executor/scalar/list.rs @@ -1,6 +1,6 @@ use rayexec_error::{not_implemented, RayexecError, Result}; -use crate::arrays::array::{Array, ArrayData}; +use crate::arrays::array::{Array2, ArrayData}; use crate::arrays::bitmap::Bitmap; use crate::arrays::executor::builder::{ArrayBuilder, ArrayDataBuffer}; use crate::arrays::executor::physical_type::{PhysicalList, PhysicalStorage}; @@ -36,10 +36,10 @@ impl { /// Execute a reducer on two list arrays. pub fn binary_reduce<'a, S, B, R>( - array1: &'a Array, - array2: &'a Array, + array1: &'a Array2, + array2: &'a Array2, mut builder: ArrayBuilder, - ) -> Result + ) -> Result where R: BinaryListReducer, B::Type>, S: PhysicalStorage, @@ -95,7 +95,7 @@ impl builder.buffer.put(idx, &out); } - Ok(Array { + Ok(Array2 { datatype: builder.datatype, selection: None, validity: None, @@ -139,7 +139,7 @@ impl builder.buffer.put(idx, &out); } - Ok(Array { + Ok(Array2 { datatype: builder.datatype, selection: None, validity: None, @@ -168,7 +168,7 @@ impl /// Gets the inner array storage. Checks to ensure the inner array does not /// contain NULLs. -fn get_inner_array_storage(array: &Array) -> Result<(S::Storage<'_>, Option<&Bitmap>)> +fn get_inner_array_storage(array: &Array2) -> Result<(S::Storage<'_>, Option<&Bitmap>)> where S: PhysicalStorage, { @@ -182,7 +182,7 @@ where } } -fn get_inner_array_selection(array: &Array) -> Result> { +fn get_inner_array_selection(array: &Array2) -> Result> { match array.array_data() { ArrayData::List(d) => Ok(d.array.selection_vector()), _ => Err(RayexecError::new("Expected list array data")), diff --git a/crates/rayexec_execution/src/arrays/executor/scalar/mod.rs b/crates/rayexec_execution/src/arrays/executor/scalar/mod.rs index e2477f180..96c2b07f6 100644 --- a/crates/rayexec_execution/src/arrays/executor/scalar/mod.rs +++ b/crates/rayexec_execution/src/arrays/executor/scalar/mod.rs @@ -35,7 +35,7 @@ pub use fill::*; use rayexec_error::{RayexecError, Result}; use super::builder::ArrayDataBuffer; -use crate::arrays::array::Array; +use crate::arrays::array::Array2; use crate::arrays::bitmap::Bitmap; #[inline] @@ -64,7 +64,7 @@ where /// array matches the logical length of some other array. /// /// Returns the logical length. -pub(crate) fn validate_logical_len(buffer: &B, array: &Array) -> Result +pub(crate) fn validate_logical_len(buffer: &B, array: &Array2) -> Result where B: ArrayDataBuffer, { diff --git a/crates/rayexec_execution/src/arrays/executor/scalar/select.rs b/crates/rayexec_execution/src/arrays/executor/scalar/select.rs index 7e7e37e2d..74ac06747 100644 --- a/crates/rayexec_execution/src/arrays/executor/scalar/select.rs +++ b/crates/rayexec_execution/src/arrays/executor/scalar/select.rs @@ -1,6 +1,6 @@ use rayexec_error::Result; -use crate::arrays::array::Array; +use crate::arrays::array::Array2; use crate::arrays::executor::physical_type::{PhysicalBool, PhysicalStorage}; use crate::arrays::selection::{self, SelectionVector}; use crate::arrays::storage::AddressableStorage; @@ -12,7 +12,7 @@ impl SelectExecutor { /// Writes row selections to `output_sel`. /// /// Errors if the provided array isn't a boolean array. - pub fn select(bool_array: &Array, output_sel: &mut SelectionVector) -> Result<()> { + pub fn select(bool_array: &Array2, output_sel: &mut SelectionVector) -> Result<()> { output_sel.clear(); let selection = bool_array.selection_vector(); let len = bool_array.logical_len(); @@ -58,7 +58,7 @@ mod tests { #[test] fn select_simple() { - let arr = Array::from_iter([false, true, true, false, true]); + let arr = Array2::from_iter([false, true, true, false, true]); let mut selection = SelectionVector::with_capacity(5); SelectExecutor::select(&arr, &mut selection).unwrap(); @@ -69,7 +69,7 @@ mod tests { #[test] fn select_with_nulls() { - let arr = Array::from_iter([Some(false), Some(true), None, Some(false), Some(true)]); + let arr = Array2::from_iter([Some(false), Some(true), None, Some(false), Some(true)]); let mut selection = SelectionVector::with_capacity(5); SelectExecutor::select(&arr, &mut selection).unwrap(); @@ -80,7 +80,7 @@ mod tests { #[test] fn select_with_selection() { - let mut arr = Array::from_iter([Some(false), Some(true), None, Some(false), Some(true)]); + let mut arr = Array2::from_iter([Some(false), Some(true), None, Some(false), Some(true)]); // => [NULL, false, true] arr.select_mut(SelectionVector::from_iter([2, 3, 4])); @@ -93,7 +93,7 @@ mod tests { #[test] fn select_wrong_type() { - let arr = Array::from_iter([1, 2, 3, 4, 5]); + let arr = Array2::from_iter([1, 2, 3, 4, 5]); let mut selection = SelectionVector::with_capacity(5); SelectExecutor::select(&arr, &mut selection).unwrap_err(); diff --git a/crates/rayexec_execution/src/arrays/executor/scalar/ternary.rs b/crates/rayexec_execution/src/arrays/executor/scalar/ternary.rs index cdde8aee2..b3b91eae9 100644 --- a/crates/rayexec_execution/src/arrays/executor/scalar/ternary.rs +++ b/crates/rayexec_execution/src/arrays/executor/scalar/ternary.rs @@ -3,7 +3,7 @@ use std::fmt::Debug; use rayexec_error::Result; use super::check_validity; -use crate::arrays::array::Array; +use crate::arrays::array::Array2; use crate::arrays::bitmap::Bitmap; use crate::arrays::executor::builder::{ArrayBuilder, ArrayDataBuffer, OutputBuffer}; use crate::arrays::executor::physical_type::PhysicalStorage; @@ -16,12 +16,12 @@ pub struct TernaryExecutor; impl TernaryExecutor { pub fn execute<'a, S1, S2, S3, B, Op>( - array1: &'a Array, - array2: &'a Array, - array3: &'a Array, + array1: &'a Array2, + array2: &'a Array2, + array3: &'a Array2, builder: ArrayBuilder, mut op: Op, - ) -> Result + ) -> Result where Op: FnMut(S1::Type<'a>, S2::Type<'a>, S3::Type<'a>, &mut OutputBuffer), S1: PhysicalStorage, @@ -97,7 +97,7 @@ impl TernaryExecutor { let data = output_buffer.buffer.into_data(); - Ok(Array { + Ok(Array2 { datatype: builder.datatype, selection: None, validity: out_validity, diff --git a/crates/rayexec_execution/src/arrays/executor/scalar/unary.rs b/crates/rayexec_execution/src/arrays/executor/scalar/unary.rs index 778ff7838..00c73368a 100644 --- a/crates/rayexec_execution/src/arrays/executor/scalar/unary.rs +++ b/crates/rayexec_execution/src/arrays/executor/scalar/unary.rs @@ -1,7 +1,7 @@ use rayexec_error::Result; use super::validate_logical_len; -use crate::arrays::array::Array; +use crate::arrays::array::Array2; use crate::arrays::bitmap::Bitmap; use crate::arrays::executor::builder::{ArrayBuilder, ArrayDataBuffer, OutputBuffer}; use crate::arrays::executor::physical_type::PhysicalStorage; @@ -14,10 +14,10 @@ pub struct UnaryExecutor; impl UnaryExecutor { /// Executes `op` on every non-null input. pub fn execute<'a, S, B, Op>( - array: &'a Array, + array: &'a Array2, builder: ArrayBuilder, mut op: Op, - ) -> Result + ) -> Result where Op: FnMut(S::Type<'a>, &mut OutputBuffer), S: PhysicalStorage, @@ -67,7 +67,7 @@ impl UnaryExecutor { let data = output_buffer.buffer.into_data(); - Ok(Array { + Ok(Array2 { datatype: builder.datatype, selection: None, validity: out_validity, @@ -80,7 +80,7 @@ impl UnaryExecutor { /// /// `op` is called for each logical entry in the array with the index and /// either Some(val) if the value is valid, or None if it's not. - pub fn for_each<'a, S, Op>(array: &'a Array, mut op: Op) -> Result<()> + pub fn for_each<'a, S, Op>(array: &'a Array2, mut op: Op) -> Result<()> where Op: FnMut(usize, Option>), S: PhysicalStorage, @@ -119,7 +119,7 @@ impl UnaryExecutor { /// Gets the value some index in the array. /// /// Returns Some if the value is valid, None otherwise. - pub fn value_at(array: &Array, idx: usize) -> Result>> + pub fn value_at(array: &Array2, idx: usize) -> Result>> where S: PhysicalStorage, { @@ -159,7 +159,7 @@ mod tests { #[test] fn int32_inc_by_2() { - let array = Array::from_iter([1, 2, 3]); + let array = Array2::from_iter([1, 2, 3]); let builder = ArrayBuilder { datatype: DataType::Int32, buffer: PrimitiveBuffer::::with_len(3), @@ -179,7 +179,7 @@ mod tests { fn string_double_named_func() { // Example with defined function, and allocating a new string every time. - let array = Array::from_iter(["a", "bb", "ccc", "dddd"]); + let array = Array2::from_iter(["a", "bb", "ccc", "dddd"]); let builder = ArrayBuilder { datatype: DataType::Utf8, buffer: GermanVarlenBuffer::::with_len(4), @@ -210,7 +210,7 @@ mod tests { fn string_double_closure() { // Example with closure that reuses a string. - let array = Array::from_iter(["a", "bb", "ccc", "dddd"]); + let array = Array2::from_iter(["a", "bb", "ccc", "dddd"]); let builder = ArrayBuilder { datatype: DataType::Utf8, buffer: GermanVarlenBuffer::::with_len(4), @@ -243,7 +243,7 @@ mod tests { fn string_trunc_closure() { // Example with closure returning referencing to input. - let array = Array::from_iter(["a", "bb", "ccc", "dddd"]); + let array = Array2::from_iter(["a", "bb", "ccc", "dddd"]); let builder = ArrayBuilder { datatype: DataType::Utf8, buffer: GermanVarlenBuffer::::with_len(4), @@ -268,7 +268,7 @@ mod tests { // Example with selection vector whose logical length is greater than // the underlying physical data len. - let mut array = Array::from_iter(["a", "bb", "ccc", "dddd"]); + let mut array = Array2::from_iter(["a", "bb", "ccc", "dddd"]); let mut selection = SelectionVector::with_range(0..5); selection.set_unchecked(0, 3); selection.set_unchecked(1, 3); diff --git a/crates/rayexec_execution/src/arrays/executor/scalar/uniform.rs b/crates/rayexec_execution/src/arrays/executor/scalar/uniform.rs index 7a7b01083..9b7bb8180 100644 --- a/crates/rayexec_execution/src/arrays/executor/scalar/uniform.rs +++ b/crates/rayexec_execution/src/arrays/executor/scalar/uniform.rs @@ -1,7 +1,7 @@ use rayexec_error::{RayexecError, Result}; use super::check_validity; -use crate::arrays::array::Array; +use crate::arrays::array::Array2; use crate::arrays::bitmap::Bitmap; use crate::arrays::executor::builder::{ArrayBuilder, ArrayDataBuffer, OutputBuffer}; use crate::arrays::executor::physical_type::PhysicalStorage; @@ -14,10 +14,10 @@ pub struct UniformExecutor; impl UniformExecutor { pub fn execute<'a, S, B, Op>( - arrays: &[&'a Array], + arrays: &[&'a Array2], builder: ArrayBuilder, mut op: Op, - ) -> Result + ) -> Result where Op: FnMut(&[S::Type<'a>], &mut OutputBuffer), S: PhysicalStorage, @@ -95,7 +95,7 @@ impl UniformExecutor { let data = output_buffer.buffer.into_data(); - Ok(Array { + Ok(Array2 { datatype: builder.datatype, selection: None, validity: out_validity, @@ -116,9 +116,9 @@ mod tests { #[test] fn uniform_string_concat_row_wise() { - let first = Array::from_iter(["a", "b", "c"]); - let second = Array::from_iter(["1", "2", "3"]); - let third = Array::from_iter(["dog", "cat", "horse"]); + let first = Array2::from_iter(["a", "b", "c"]); + let second = Array2::from_iter(["1", "2", "3"]); + let third = Array2::from_iter(["dog", "cat", "horse"]); let builder = ArrayBuilder { datatype: DataType::Utf8, @@ -150,10 +150,10 @@ mod tests { #[test] fn uniform_string_concat_row_wise_with_invalid() { - let first = Array::from_iter(["a", "b", "c"]); - let mut second = Array::from_iter(["1", "2", "3"]); + let first = Array2::from_iter(["a", "b", "c"]); + let mut second = Array2::from_iter(["1", "2", "3"]); second.set_physical_validity(1, false); // "2" => NULL - let third = Array::from_iter(["dog", "cat", "horse"]); + let third = Array2::from_iter(["dog", "cat", "horse"]); let builder = ArrayBuilder { datatype: DataType::Utf8, @@ -182,11 +182,11 @@ mod tests { #[test] fn uniform_string_concat_row_wise_with_invalid_and_reordered() { - let first = Array::from_iter(["a", "b", "c"]); - let mut second = Array::from_iter(["1", "2", "3"]); + let first = Array2::from_iter(["a", "b", "c"]); + let mut second = Array2::from_iter(["1", "2", "3"]); second.select_mut(SelectionVector::from_iter([1, 0, 2])); // ["1", "2", "3"] => ["2", "1", "3"] second.set_physical_validity(1, false); // "2" => NULL, referencing physical index - let third = Array::from_iter(["dog", "cat", "horse"]); + let third = Array2::from_iter(["dog", "cat", "horse"]); let builder = ArrayBuilder { datatype: DataType::Utf8, diff --git a/crates/rayexec_execution/src/arrays/format/mod.rs b/crates/rayexec_execution/src/arrays/format/mod.rs index 106625af5..0f384c254 100644 --- a/crates/rayexec_execution/src/arrays/format/mod.rs +++ b/crates/rayexec_execution/src/arrays/format/mod.rs @@ -5,7 +5,7 @@ use std::fmt; use rayexec_error::Result; -use crate::arrays::array::Array; +use crate::arrays::array::Array2; use crate::arrays::scalar::ScalarValue; /// Formatting options for arrays and scalars. @@ -57,7 +57,7 @@ impl<'a> Formatter<'a> { /// Returns `None` if the idx is out of bounds. pub fn format_array_value<'b>( &self, - array: &'b Array, + array: &'b Array2, idx: usize, ) -> Result> { let scalar = array.logical_value(idx)?; diff --git a/crates/rayexec_execution/src/arrays/format/pretty/table.rs b/crates/rayexec_execution/src/arrays/format/pretty/table.rs index 3896f95a2..90dd98a03 100644 --- a/crates/rayexec_execution/src/arrays/format/pretty/table.rs +++ b/crates/rayexec_execution/src/arrays/format/pretty/table.rs @@ -7,7 +7,7 @@ use textwrap::core::display_width; use textwrap::{fill_inplace, wrap}; use super::display::{table_width, Alignment, PrettyFooter, PrettyHeader, PrettyValues}; -use crate::arrays::array::Array; +use crate::arrays::array::Array2; use crate::arrays::batch::Batch; use crate::arrays::datatype::DataType; use crate::arrays::field::Schema; @@ -552,7 +552,7 @@ impl ColumnValues { /// If the upper bound in the range exceeds the length of the array, it'll /// be clamped to the length of the array. pub fn try_from_array( - array: &Array, + array: &Array2, range: Option>, max_width: Option, ) -> Result { @@ -842,8 +842,8 @@ mod tests { ]); let batch = Batch::try_new(vec![ - Array::from_iter([Some("a"), Some("b"), None, Some("d")]), - Array::from_iter([Some(1), None, Some(10), Some(100)]), + Array2::from_iter([Some("a"), Some("b"), None, Some("d")]), + Array2::from_iter([Some(1), None, Some(10), Some(100)]), ]) .unwrap(); @@ -874,9 +874,9 @@ mod tests { ]); let batch = Batch::try_new(vec![ - Array::from_iter([Some("a\nb"), Some("c"), Some("d")]), - Array::from_iter([Some(1), Some(10), Some(100)]), - Array::from_iter([Some("Mario"), Some("Yoshi"), Some("Luigi\nPeach")]), + Array2::from_iter([Some("a\nb"), Some("c"), Some("d")]), + Array2::from_iter([Some(1), Some(10), Some(100)]), + Array2::from_iter([Some("Mario"), Some("Yoshi"), Some("Luigi\nPeach")]), ]) .unwrap(); @@ -907,8 +907,8 @@ mod tests { ]); let batch = Batch::try_new(vec![ - Array::from_iter([Some("a")]), - Array::from_iter([Some(1)]), + Array2::from_iter([Some("a")]), + Array2::from_iter([Some(1)]), ]) .unwrap(); @@ -940,7 +940,7 @@ mod tests { ]); let create_batch = |s, n| { - Batch::try_new([Array::from_iter([Some(s)]), Array::from_iter([Some(n)])]).unwrap() + Batch::try_new([Array2::from_iter([Some(s)]), Array2::from_iter([Some(n)])]).unwrap() }; let batches = vec![ @@ -984,7 +984,9 @@ mod tests { let b_vals: Vec<_> = (0..10).map(Some).collect(); let batches = - vec![Batch::try_new(vec![Array::from_iter(a_vals), Array::from_iter(b_vals)]).unwrap()]; + vec![ + Batch::try_new(vec![Array2::from_iter(a_vals), Array2::from_iter(b_vals)]).unwrap(), + ]; let table = pretty_format_batches(&schema, &batches, 80, Some(4)).unwrap(); @@ -1018,7 +1020,9 @@ mod tests { let b_vals: Vec<_> = (0..10).map(Some).collect(); let batches = - vec![Batch::try_new(vec![Array::from_iter(a_vals), Array::from_iter(b_vals)]).unwrap()]; + vec![ + Batch::try_new(vec![Array2::from_iter(a_vals), Array2::from_iter(b_vals)]).unwrap(), + ]; let table = pretty_format_batches(&schema, &batches, 80, Some(3)).unwrap(); @@ -1051,10 +1055,10 @@ mod tests { let create_batch = |a, b, c, d| { Batch::try_new(vec![ - Array::from_iter([Some(a)]), - Array::from_iter([Some(b)]), - Array::from_iter([Some(c)]), - Array::from_iter([Some(d)]), + Array2::from_iter([Some(a)]), + Array2::from_iter([Some(b)]), + Array2::from_iter([Some(c)]), + Array2::from_iter([Some(d)]), ]) .unwrap() }; @@ -1101,10 +1105,10 @@ mod tests { let create_batch = |a, b, c, d| { Batch::try_new(vec![ - Array::from_iter([Some(a)]), - Array::from_iter([Some(b)]), - Array::from_iter([Some(c)]), - Array::from_iter([Some(d)]), + Array2::from_iter([Some(a)]), + Array2::from_iter([Some(b)]), + Array2::from_iter([Some(c)]), + Array2::from_iter([Some(d)]), ]) .unwrap() }; @@ -1145,9 +1149,9 @@ mod tests { let create_batch = |a, b, c| { Batch::try_new(vec![ - Array::from_iter([Some(a)]), - Array::from_iter([Some(b)]), - Array::from_iter([Some(c)]), + Array2::from_iter([Some(a)]), + Array2::from_iter([Some(b)]), + Array2::from_iter([Some(c)]), ]) .unwrap() }; @@ -1188,10 +1192,10 @@ mod tests { let create_batch = |a, b, c, d| { Batch::try_new(vec![ - Array::from_iter([Some(a)]), - Array::from_iter([Some(b)]), - Array::from_iter([Some(c)]), - Array::from_iter([Some(d)]), + Array2::from_iter([Some(a)]), + Array2::from_iter([Some(b)]), + Array2::from_iter([Some(c)]), + Array2::from_iter([Some(d)]), ]) .unwrap() }; @@ -1263,22 +1267,22 @@ mod tests { // First record should be printed. let first = Batch::try_new(vec![ - Array::from_iter([Some("1"), Some("2")]), - Array::from_iter([Some(1), Some(2)]), + Array2::from_iter([Some("1"), Some("2")]), + Array2::from_iter([Some(1), Some(2)]), ]) .unwrap(); // Nothing in this batch should be printed. let middle = Batch::try_new(vec![ - Array::from_iter([Some("3"), Some("4")]), - Array::from_iter([Some(3), Some(4)]), + Array2::from_iter([Some("3"), Some("4")]), + Array2::from_iter([Some(3), Some(4)]), ]) .unwrap(); // Last record should be printed. let last = Batch::try_new(vec![ - Array::from_iter([Some("5"), Some("6")]), - Array::from_iter([Some(5), Some(6)]), + Array2::from_iter([Some("5"), Some("6")]), + Array2::from_iter([Some(5), Some(6)]), ]) .unwrap(); diff --git a/crates/rayexec_execution/src/arrays/row/encoding.rs b/crates/rayexec_execution/src/arrays/row/encoding.rs index 67d059e0f..22f61fb05 100644 --- a/crates/rayexec_execution/src/arrays/row/encoding.rs +++ b/crates/rayexec_execution/src/arrays/row/encoding.rs @@ -1,7 +1,7 @@ use half::f16; use rayexec_error::{not_implemented, RayexecError, Result}; -use crate::arrays::array::{Array, ArrayData, BinaryData}; +use crate::arrays::array::{Array2, ArrayData, BinaryData}; use crate::arrays::executor::physical_type::{ AsBytes, PhysicalBinary, @@ -159,7 +159,7 @@ pub struct ComparableRowEncoder { } impl ComparableRowEncoder { - pub fn encode(&self, columns: &[&Array]) -> Result { + pub fn encode(&self, columns: &[&Array2]) -> Result { if columns.len() != self.columns.len() { return Err(RayexecError::new("Column mismatch")); } @@ -255,7 +255,7 @@ impl ComparableRowEncoder { /// Compute the size of the data buffer we'll need for storing all encoded /// rows. - fn compute_data_size(&self, columns: &[&Array]) -> Result { + fn compute_data_size(&self, columns: &[&Array2]) -> Result { let mut size = 0; for arr in columns { let mut arr_size = match arr.array_data() { @@ -301,7 +301,7 @@ impl ComparableRowEncoder { /// This should return the new offset to write to for the next value. fn encode_varlen<'a, S>( col: &ComparableColumn, - arr: &'a Array, + arr: &'a Array2, row: usize, buf: &mut [u8], start: usize, @@ -342,7 +342,7 @@ impl ComparableRowEncoder { /// This should return the new offset to write to for the next value. fn encode_primitive<'a, S>( col: &ComparableColumn, - arr: &'a Array, + arr: &'a Array2, row: usize, buf: &mut [u8], start: usize, @@ -482,8 +482,8 @@ mod tests { #[test] fn simple_primitive_cmp_between_cols_asc() { - let col1 = Array::from_iter([-1, 0, 1]); - let col2 = Array::from_iter([1, 0, -1]); + let col1 = Array2::from_iter([-1, 0, 1]); + let col2 = Array2::from_iter([1, 0, -1]); let encoder = ComparableRowEncoder { columns: vec![ComparableColumn { @@ -508,8 +508,8 @@ mod tests { #[test] fn simple_primitive_cmp_between_cols_desc() { - let col1 = Array::from_iter([-1, 0, 1]); - let col2 = Array::from_iter([1, 0, -1]); + let col1 = Array2::from_iter([-1, 0, 1]); + let col2 = Array2::from_iter([1, 0, -1]); let encoder = ComparableRowEncoder { columns: vec![ComparableColumn { @@ -535,8 +535,8 @@ mod tests { #[test] fn simple_varlen_cmp_between_cols_asc() { - let col1 = Array::from_iter(["a", "aa", "bb"]); - let col2 = Array::from_iter(["aa", "a", "bb"]); + let col1 = Array2::from_iter(["a", "aa", "bb"]); + let col2 = Array2::from_iter(["aa", "a", "bb"]); let encoder = ComparableRowEncoder { columns: vec![ComparableColumn { @@ -561,8 +561,8 @@ mod tests { #[test] fn primitive_nulls_last_asc() { - let col1 = Array::from_iter([Some(-1), None, Some(1), Some(2)]); - let col2 = Array::from_iter([Some(1), Some(0), Some(-1), None]); + let col1 = Array2::from_iter([Some(-1), None, Some(1), Some(2)]); + let col2 = Array2::from_iter([Some(1), Some(0), Some(-1), None]); let encoder = ComparableRowEncoder { columns: vec![ComparableColumn { @@ -585,8 +585,8 @@ mod tests { #[test] fn primitive_nulls_last_desc() { - let col1 = Array::from_iter([Some(-1), None, Some(1), Some(2)]); - let col2 = Array::from_iter([Some(1), Some(0), Some(-1), None]); + let col1 = Array2::from_iter([Some(-1), None, Some(1), Some(2)]); + let col2 = Array2::from_iter([Some(1), Some(0), Some(-1), None]); let encoder = ComparableRowEncoder { columns: vec![ComparableColumn { @@ -609,8 +609,8 @@ mod tests { #[test] fn primitive_nulls_first_asc() { - let col1 = Array::from_iter([Some(-1), None, Some(1), Some(2)]); - let col2 = Array::from_iter([Some(1), Some(0), Some(-1), None]); + let col1 = Array2::from_iter([Some(-1), None, Some(1), Some(2)]); + let col2 = Array2::from_iter([Some(1), Some(0), Some(-1), None]); let encoder = ComparableRowEncoder { columns: vec![ComparableColumn { @@ -633,8 +633,8 @@ mod tests { #[test] fn primitive_nulls_first_desc() { - let col1 = Array::from_iter([Some(-1), None, Some(1), Some(2)]); - let col2 = Array::from_iter([Some(1), Some(0), Some(-1), None]); + let col1 = Array2::from_iter([Some(-1), None, Some(1), Some(2)]); + let col2 = Array2::from_iter([Some(1), Some(0), Some(-1), None]); let encoder = ComparableRowEncoder { columns: vec![ComparableColumn { diff --git a/crates/rayexec_execution/src/arrays/row/mod.rs b/crates/rayexec_execution/src/arrays/row/mod.rs index ed1ce3d72..81563cd58 100644 --- a/crates/rayexec_execution/src/arrays/row/mod.rs +++ b/crates/rayexec_execution/src/arrays/row/mod.rs @@ -2,7 +2,7 @@ pub mod encoding; use rayexec_error::Result; -use crate::arrays::array::Array; +use crate::arrays::array::Array2; use crate::arrays::scalar::ScalarValue; /// Scalar representation of a single row. @@ -23,7 +23,7 @@ impl<'a> ScalarRow<'a> { } /// Create a new row representation backed by data from arrays. - pub fn try_new_from_arrays(arrays: &[&'a Array], row: usize) -> Result> { + pub fn try_new_from_arrays(arrays: &[&'a Array2], row: usize) -> Result> { let vals = arrays .iter() .map(|a| a.logical_value(row)) diff --git a/crates/rayexec_execution/src/arrays/scalar/mod.rs b/crates/rayexec_execution/src/arrays/scalar/mod.rs index fb0741a82..9fe531bcf 100644 --- a/crates/rayexec_execution/src/arrays/scalar/mod.rs +++ b/crates/rayexec_execution/src/arrays/scalar/mod.rs @@ -14,7 +14,7 @@ use rayexec_proto::ProtoConv; use serde::{Deserialize, Serialize}; use timestamp::TimestampScalar; -use crate::arrays::array::{Array, ArrayData}; +use crate::arrays::array::{Array2, ArrayData}; use crate::arrays::bitmap::Bitmap; use crate::arrays::compute::cast::format::{ BoolFormatter, @@ -204,9 +204,9 @@ impl ScalarValue<'_> { } /// Create an array of size `n` using the scalar value. - pub fn as_array(&self, n: usize) -> Result { + pub fn as_array(&self, n: usize) -> Result { let data: ArrayData = match self { - Self::Null => return Ok(Array::new_untyped_null_array(n)), + Self::Null => return Ok(Array2::new_untyped_null_array(n)), Self::Boolean(v) => BooleanStorage(Bitmap::new_with_val(*v, 1)).into(), Self::Float16(v) => PrimitiveStorage::from(vec![*v]).into(), Self::Float32(v) => PrimitiveStorage::from(vec![*v]).into(), @@ -235,7 +235,7 @@ impl ScalarValue<'_> { ListStorage { metadata: vec![metadata].into(), - array: Array::new_untyped_null_array(0), + array: Array2::new_untyped_null_array(0), } .into() } else { @@ -261,7 +261,7 @@ impl ScalarValue<'_> { other => not_implemented!("{other:?} to array"), // Struct, List }; - let mut array = Array::new_with_array_data(self.datatype(), data); + let mut array = Array2::new_with_array_data(self.datatype(), data); array.selection = Some(SelectionVector::repeated(n, 0).into()); Ok(array) diff --git a/crates/rayexec_execution/src/arrays/storage/list.rs b/crates/rayexec_execution/src/arrays/storage/list.rs index f2107de67..bc35ac4f7 100644 --- a/crates/rayexec_execution/src/arrays/storage/list.rs +++ b/crates/rayexec_execution/src/arrays/storage/list.rs @@ -1,7 +1,7 @@ use rayexec_error::{RayexecError, Result}; use super::PrimitiveStorage; -use crate::arrays::array::Array; +use crate::arrays::array::Array2; #[derive(Debug, Clone, Copy, Default, PartialEq, Eq)] pub struct ListItemMetadata { @@ -12,13 +12,13 @@ pub struct ListItemMetadata { #[derive(Debug, PartialEq)] pub struct ListStorage { pub(crate) metadata: PrimitiveStorage, - pub(crate) array: Array, + pub(crate) array: Array2, } impl ListStorage { pub fn try_new( metadata: impl Into>, - array: Array, + array: Array2, ) -> Result { let metadata = metadata.into(); @@ -41,14 +41,14 @@ impl ListStorage { Ok(ListStorage { metadata, array }) } - pub fn empty_list(array: Array) -> Self { + pub fn empty_list(array: Array2) -> Self { ListStorage { metadata: vec![ListItemMetadata { offset: 0, len: 0 }].into(), array, } } - pub fn single_list(array: Array) -> Self { + pub fn single_list(array: Array2) -> Self { let len = array.logical_len(); ListStorage { @@ -61,7 +61,7 @@ impl ListStorage { } } - pub fn inner_array(&self) -> &Array { + pub fn inner_array(&self) -> &Array2 { &self.array } diff --git a/crates/rayexec_execution/src/arrays/testutil.rs b/crates/rayexec_execution/src/arrays/testutil.rs index e2d84d947..3d4749da7 100644 --- a/crates/rayexec_execution/src/arrays/testutil.rs +++ b/crates/rayexec_execution/src/arrays/testutil.rs @@ -5,11 +5,11 @@ //! //! Should not be used outside of tests. -use crate::arrays::array::Array; +use crate::arrays::array::Array2; use crate::arrays::batch::Batch; /// Asserts that two arrays are logically equal. -pub fn assert_arrays_eq(a: &Array, b: &Array) { +pub fn assert_arrays_eq(a: &Array2, b: &Array2) { assert_eq!(a.datatype(), b.datatype(), "data types differ"); assert_eq!(a.logical_len(), b.logical_len(), "logical lengths differ"); @@ -41,16 +41,16 @@ mod tests { #[test] fn arrays_eq() { - let a = Array::from_iter([1, 2, 3]); - let b = Array::from_iter([1, 2, 3]); + let a = Array2::from_iter([1, 2, 3]); + let b = Array2::from_iter([1, 2, 3]); assert_arrays_eq(&a, &b); } #[test] fn arrays_eq_with_selection() { - let a = Array::from_iter([2, 2, 2]); - let mut b = Array::from_iter([2]); + let a = Array2::from_iter([2, 2, 2]); + let mut b = Array2::from_iter([2]); b.select_mut(SelectionVector::repeated(3, 0)); assert_arrays_eq(&a, &b); @@ -59,8 +59,8 @@ mod tests { #[test] #[should_panic] fn arrays_not_eq() { - let a = Array::from_iter([1, 2, 3]); - let b = Array::from_iter(["a", "b", "c"]); + let a = Array2::from_iter([1, 2, 3]); + let b = Array2::from_iter(["a", "b", "c"]); assert_arrays_eq(&a, &b); } diff --git a/crates/rayexec_execution/src/execution/intermediate/planner/plan_describe.rs b/crates/rayexec_execution/src/execution/intermediate/planner/plan_describe.rs index 3525b942d..9b5691ed3 100644 --- a/crates/rayexec_execution/src/execution/intermediate/planner/plan_describe.rs +++ b/crates/rayexec_execution/src/execution/intermediate/planner/plan_describe.rs @@ -3,7 +3,7 @@ use std::sync::Arc; use rayexec_error::{RayexecError, Result}; use super::{InProgressPipeline, IntermediatePipelineBuildState, PipelineIdGen}; -use crate::arrays::array::Array; +use crate::arrays::array::Array2; use crate::arrays::batch::Batch; use crate::execution::intermediate::pipeline::{IntermediateOperator, PipelineSource}; use crate::execution::operators::values::PhysicalValues; @@ -23,9 +23,9 @@ impl IntermediatePipelineBuildState<'_> { return Err(RayexecError::new("Expected in progress to be None")); } - let names = Array::from_iter(describe.node.schema.iter().map(|f| f.name.as_str())); + let names = Array2::from_iter(describe.node.schema.iter().map(|f| f.name.as_str())); let datatypes = - Array::from_iter(describe.node.schema.iter().map(|f| f.datatype.to_string())); + Array2::from_iter(describe.node.schema.iter().map(|f| f.datatype.to_string())); let batch = Batch::try_new(vec![names, datatypes])?; let operator = IntermediateOperator { diff --git a/crates/rayexec_execution/src/execution/intermediate/planner/plan_explain.rs b/crates/rayexec_execution/src/execution/intermediate/planner/plan_explain.rs index 23dbc22e6..fc0cac119 100644 --- a/crates/rayexec_execution/src/execution/intermediate/planner/plan_explain.rs +++ b/crates/rayexec_execution/src/execution/intermediate/planner/plan_explain.rs @@ -4,7 +4,7 @@ use rayexec_error::{not_implemented, RayexecError, Result}; use tracing::error; use super::{InProgressPipeline, IntermediatePipelineBuildState, Materializations, PipelineIdGen}; -use crate::arrays::array::Array; +use crate::arrays::array::Array2; use crate::arrays::batch::Batch; use crate::execution::intermediate::pipeline::{IntermediateOperator, PipelineSource}; use crate::execution::operators::values::PhysicalValues; @@ -82,8 +82,8 @@ impl IntermediatePipelineBuildState<'_> { let physical = Arc::new(PhysicalOperator::Values(PhysicalValues::new(vec![ Batch::try_new([ - Array::from_iter(type_strings), - Array::from_iter(plan_strings), + Array2::from_iter(type_strings), + Array2::from_iter(plan_strings), ])?, ]))); diff --git a/crates/rayexec_execution/src/execution/intermediate/planner/plan_scan.rs b/crates/rayexec_execution/src/execution/intermediate/planner/plan_scan.rs index af92d317e..0c19b4a78 100644 --- a/crates/rayexec_execution/src/execution/intermediate/planner/plan_scan.rs +++ b/crates/rayexec_execution/src/execution/intermediate/planner/plan_scan.rs @@ -3,7 +3,7 @@ use std::sync::Arc; use rayexec_error::{not_implemented, RayexecError, Result, ResultExt}; use super::{InProgressPipeline, IntermediatePipelineBuildState, PipelineIdGen}; -use crate::arrays::array::Array; +use crate::arrays::array::Array2; use crate::arrays::batch::Batch; use crate::execution::intermediate::pipeline::{IntermediateOperator, PipelineSource}; use crate::execution::operators::scan::PhysicalScan; @@ -84,7 +84,7 @@ impl IntermediatePipelineBuildState<'_> { // TODO: This could probably be simplified. - let mut row_arrs: Vec> = Vec::new(); // Row oriented. + let mut row_arrs: Vec> = Vec::new(); // Row oriented. let dummy_batch = Batch::empty_with_num_rows(1); // Convert expressions into arrays of one element each. diff --git a/crates/rayexec_execution/src/execution/intermediate/planner/plan_show_var.rs b/crates/rayexec_execution/src/execution/intermediate/planner/plan_show_var.rs index 5eb96bc88..7caedb8c2 100644 --- a/crates/rayexec_execution/src/execution/intermediate/planner/plan_show_var.rs +++ b/crates/rayexec_execution/src/execution/intermediate/planner/plan_show_var.rs @@ -3,7 +3,7 @@ use std::sync::Arc; use rayexec_error::{RayexecError, Result}; use super::{InProgressPipeline, IntermediatePipelineBuildState, PipelineIdGen}; -use crate::arrays::array::Array; +use crate::arrays::array::Array2; use crate::arrays::batch::Batch; use crate::execution::intermediate::pipeline::{IntermediateOperator, PipelineSource}; use crate::execution::operators::values::PhysicalValues; @@ -26,7 +26,7 @@ impl IntermediatePipelineBuildState<'_> { let operator = IntermediateOperator { operator: Arc::new(PhysicalOperator::Values(PhysicalValues::new(vec![ - Batch::try_new([Array::from_iter([show.value.to_string().as_str()])])?, + Batch::try_new([Array2::from_iter([show.value.to_string().as_str()])])?, ]))), partitioning_requirement: Some(1), }; diff --git a/crates/rayexec_execution/src/execution/operators/hash_aggregate/chunk.rs b/crates/rayexec_execution/src/execution/operators/hash_aggregate/chunk.rs index 46caf2618..6047cb241 100644 --- a/crates/rayexec_execution/src/execution/operators/hash_aggregate/chunk.rs +++ b/crates/rayexec_execution/src/execution/operators/hash_aggregate/chunk.rs @@ -2,7 +2,7 @@ use rayexec_error::Result; use super::hash_table::GroupAddress; use super::AggregateStates; -use crate::arrays::array::Array; +use crate::arrays::array::Array2; use crate::arrays::executor::physical_type::PhysicalType; use crate::arrays::executor::scalar::concat; use crate::execution::operators::util::resizer::DEFAULT_TARGET_BATCH_SIZE; @@ -18,7 +18,7 @@ pub struct GroupChunk { /// All row hashes. pub hashes: Vec, /// Arrays making up the group values. - pub arrays: Vec, + pub arrays: Vec, /// Aggregate states we're keeping track of. pub aggregate_states: Vec, } @@ -50,7 +50,7 @@ impl GroupChunk { /// states. pub fn append_group_values( &mut self, - group_vals: impl ExactSizeIterator, + group_vals: impl ExactSizeIterator, hashes: impl ExactSizeIterator, ) -> Result<()> { debug_assert_eq!(self.arrays.len(), group_vals.len()); @@ -82,7 +82,7 @@ impl GroupChunk { /// `addrs` contains a list of group addresses we'll be using to map input /// rows to the state index. If and address is for a different chunk, that /// row will be skipped. - pub fn update_states(&mut self, inputs: &[Array], addrs: &[GroupAddress]) -> Result<()> { + pub fn update_states(&mut self, inputs: &[Array2], addrs: &[GroupAddress]) -> Result<()> { for agg_states in &mut self.aggregate_states { let input_cols: Vec<_> = agg_states .col_selection diff --git a/crates/rayexec_execution/src/execution/operators/hash_aggregate/compare.rs b/crates/rayexec_execution/src/execution/operators/hash_aggregate/compare.rs index 74c781e5d..3718db416 100644 --- a/crates/rayexec_execution/src/execution/operators/hash_aggregate/compare.rs +++ b/crates/rayexec_execution/src/execution/operators/hash_aggregate/compare.rs @@ -4,7 +4,7 @@ use rayexec_error::{not_implemented, Result}; use super::chunk::GroupChunk; use super::hash_table::GroupAddress; -use crate::arrays::array::Array; +use crate::arrays::array::Array2; use crate::arrays::executor::physical_type::{ PhysicalBinary, PhysicalBool, @@ -32,7 +32,7 @@ use crate::arrays::selection::{self, SelectionVector}; use crate::arrays::storage::AddressableStorage; pub fn group_values_eq( - inputs: &[Array], + inputs: &[Array2], input_sel: &SelectionVector, chunks: &[GroupChunk], addresses: &[GroupAddress], @@ -69,8 +69,8 @@ pub fn group_values_eq( } fn compare_group_rows_eq( - arrays1: &[Array], - arrays2: &[Array], + arrays1: &[Array2], + arrays2: &[Array2], rows1: I1, rows2: I2, not_eq_rows: &mut BTreeSet, @@ -173,8 +173,8 @@ where /// When a row is not equal, the row from the `rows1` iter will be inserted into /// `not_eq_rows`. fn compare_rows_eq<'a, S, I1, I2>( - array1: &'a Array, - array2: &'a Array, + array1: &'a Array2, + array2: &'a Array2, rows1: I1, rows2: I2, not_eq_rows: &mut BTreeSet, diff --git a/crates/rayexec_execution/src/execution/operators/hash_aggregate/distinct.rs b/crates/rayexec_execution/src/execution/operators/hash_aggregate/distinct.rs index ff21e4169..5296423fe 100644 --- a/crates/rayexec_execution/src/execution/operators/hash_aggregate/distinct.rs +++ b/crates/rayexec_execution/src/execution/operators/hash_aggregate/distinct.rs @@ -3,7 +3,7 @@ use std::sync::Arc; use rayexec_error::Result; use super::hash_table::HashTable; -use crate::arrays::array::Array; +use crate::arrays::array::Array2; use crate::arrays::executor::scalar::HashExecutor; use crate::arrays::selection::SelectionVector; use crate::execution::operators::hash_aggregate::hash_table::GroupAddress; @@ -50,7 +50,7 @@ impl AggregateGroupStates for DistinctGroupedStates { self.distinct_inputs.len() } - fn update_states(&mut self, inputs: &[&Array], mapping: ChunkGroupAddressIter) -> Result<()> { + fn update_states(&mut self, inputs: &[&Array2], mapping: ChunkGroupAddressIter) -> Result<()> { // TODO: Would be cool not needing to do this. let mappings: Vec<_> = mapping.collect(); @@ -114,7 +114,7 @@ impl AggregateGroupStates for DistinctGroupedStates { Ok(()) } - fn finalize(&mut self) -> Result { + fn finalize(&mut self) -> Result { // And now we actually create the states we need. self.states.new_states(self.distinct_inputs.len()); diff --git a/crates/rayexec_execution/src/execution/operators/hash_aggregate/hash_table.rs b/crates/rayexec_execution/src/execution/operators/hash_aggregate/hash_table.rs index f6ce18a95..b30a8a6dd 100644 --- a/crates/rayexec_execution/src/execution/operators/hash_aggregate/hash_table.rs +++ b/crates/rayexec_execution/src/execution/operators/hash_aggregate/hash_table.rs @@ -8,7 +8,7 @@ use super::compare::group_values_eq; use super::drain::HashTableDrain; use super::entry::EntryKey; use super::Aggregate; -use crate::arrays::array::Array; +use crate::arrays::array::Array2; use crate::arrays::selection::SelectionVector; const LOAD_FACTOR: f64 = 0.7; @@ -86,7 +86,7 @@ impl HashTable { self.entries.len() } - pub fn insert(&mut self, groups: &[Array], hashes: &[u64], inputs: &[Array]) -> Result<()> { + pub fn insert(&mut self, groups: &[Array2], hashes: &[u64], inputs: &[Array2]) -> Result<()> { // Find and create groups as needed. self.find_or_create_groups(groups, hashes)?; @@ -155,7 +155,7 @@ impl HashTable { } } - fn find_or_create_groups(&mut self, groups: &[Array], hashes: &[u64]) -> Result<()> { + fn find_or_create_groups(&mut self, groups: &[Array2], hashes: &[u64]) -> Result<()> { let num_inputs = hashes.len(); // Resize addresses, this will be where we store all the group @@ -519,8 +519,8 @@ mod tests { #[test] fn insert_simple() { - let groups = [Array::from_iter(["g1", "g2", "g1"])]; - let inputs = [Array::from_iter::<[i64; 3]>([1, 2, 3])]; + let groups = [Array2::from_iter(["g1", "g2", "g1"])]; + let inputs = [Array2::from_iter::<[i64; 3]>([1, 2, 3])]; let hashes = [4, 5, 4]; // Hashes for group values. @@ -535,12 +535,12 @@ mod tests { fn insert_chunk_append() { // Assumes knowledge of internals. - let groups1 = [Array::from_iter(["g1", "g2", "g1"])]; - let inputs1 = [Array::from_iter::<[i64; 3]>([1, 2, 3])]; + let groups1 = [Array2::from_iter(["g1", "g2", "g1"])]; + let inputs1 = [Array2::from_iter::<[i64; 3]>([1, 2, 3])]; let hashes1 = [4, 5, 4]; - let groups2 = [Array::from_iter(["g1", "g2", "g3"])]; - let inputs2 = [Array::from_iter::<[i64; 3]>([1, 2, 3])]; + let groups2 = [Array2::from_iter(["g1", "g2", "g3"])]; + let inputs2 = [Array2::from_iter::<[i64; 3]>([1, 2, 3])]; let hashes2 = [4, 5, 6]; let agg = make_planned_aggregate([("g", DataType::Utf8), ("i", DataType::Int32)], 1); @@ -554,8 +554,8 @@ mod tests { #[test] fn insert_hash_collision() { - let groups = [Array::from_iter(["g1", "g2", "g1"])]; - let inputs = [Array::from_iter::<[i64; 3]>([1, 2, 3])]; + let groups = [Array2::from_iter(["g1", "g2", "g1"])]; + let inputs = [Array2::from_iter::<[i64; 3]>([1, 2, 3])]; let hashes = [4, 4, 4]; @@ -570,8 +570,8 @@ mod tests { fn insert_require_resize() { // 17 unique groups (> initial 16 capacity) - let groups = [Array::from_iter(0..17)]; - let inputs = [Array::from_iter(0_i64..17_i64)]; + let groups = [Array2::from_iter(0..17)]; + let inputs = [Array2::from_iter(0_i64..17_i64)]; let hashes = vec![44; 17]; // All hashes collide. @@ -587,8 +587,8 @@ mod tests { // 33 unique groups, more than twice initial capacity. Caught bug where // resize by doubling didn't increase capacity enough. - let groups = [Array::from_iter(0..33)]; - let inputs = [Array::from_iter(0_i64..33_i64)]; + let groups = [Array2::from_iter(0..33)]; + let inputs = [Array2::from_iter(0_i64..33_i64)]; let hashes = vec![44; 33]; // All hashes collide. @@ -601,8 +601,8 @@ mod tests { #[test] fn merge_simple() { - let groups1 = [Array::from_iter(["g1", "g2", "g1"])]; - let inputs1 = [Array::from_iter::<[i64; 3]>([1, 2, 3])]; + let groups1 = [Array2::from_iter(["g1", "g2", "g1"])]; + let inputs1 = [Array2::from_iter::<[i64; 3]>([1, 2, 3])]; let agg = make_planned_aggregate([("g", DataType::Utf8), ("i", DataType::Int32)], 1); @@ -610,8 +610,8 @@ mod tests { let mut t1 = make_hash_table(agg.clone()); t1.insert(&groups1, &hashes, &inputs1).unwrap(); - let groups2 = [Array::from_iter(["g3", "g2", "g1"])]; - let inputs2 = [Array::from_iter::<[i64; 3]>([1, 2, 3])]; + let groups2 = [Array2::from_iter(["g3", "g2", "g1"])]; + let inputs2 = [Array2::from_iter::<[i64; 3]>([1, 2, 3])]; let hashes = vec![6, 5, 4]; @@ -627,8 +627,8 @@ mod tests { fn merge_non_empty_then_merge_empty() { // Tests that we properly resize internal buffers to account for merging // in empty hash tables after already merging in non-empty hash tables. - let groups1 = [Array::from_iter(["g1", "g2", "g1"])]; - let inputs1 = [Array::from_iter::<[i64; 3]>([1, 2, 3])]; + let groups1 = [Array2::from_iter(["g1", "g2", "g1"])]; + let inputs1 = [Array2::from_iter::<[i64; 3]>([1, 2, 3])]; let agg = make_planned_aggregate([("g", DataType::Utf8), ("i", DataType::Int32)], 1); @@ -638,8 +638,8 @@ mod tests { t1.insert(&groups1, &hashes, &inputs1).unwrap(); // Second hash table, not empty - let groups2 = [Array::from_iter(["g1", "g2"])]; - let inputs2 = [Array::from_iter::<[i64; 2]>([4, 5])]; + let groups2 = [Array2::from_iter(["g1", "g2"])]; + let inputs2 = [Array2::from_iter::<[i64; 2]>([4, 5])]; let hashes = vec![4, 5]; let mut t2 = make_hash_table(agg.clone()); t2.insert(&groups2, &hashes, &inputs2).unwrap(); diff --git a/crates/rayexec_execution/src/execution/operators/hash_aggregate/mod.rs b/crates/rayexec_execution/src/execution/operators/hash_aggregate/mod.rs index b6c7ea605..4ed297739 100644 --- a/crates/rayexec_execution/src/execution/operators/hash_aggregate/mod.rs +++ b/crates/rayexec_execution/src/execution/operators/hash_aggregate/mod.rs @@ -16,7 +16,7 @@ use parking_lot::Mutex; use rayexec_error::{RayexecError, Result}; use super::{ExecutionStates, InputOutputStates, PollFinalize}; -use crate::arrays::array::Array; +use crate::arrays::array::Array2; use crate::arrays::batch::Batch; use crate::arrays::bitmap::Bitmap; use crate::arrays::datatype::DataType; @@ -514,7 +514,7 @@ impl PhysicalHashAggregate { state.hash_buf.resize(num_rows, 0); state.partitions_idx_buf.resize(num_rows, 0); - let mut masked_grouping_columns: Vec = Vec::with_capacity(grouping_columns.len()); + let mut masked_grouping_columns: Vec = Vec::with_capacity(grouping_columns.len()); // Reused to select hashes per partition. let mut partition_hashes = Vec::new(); @@ -527,7 +527,7 @@ impl PhysicalHashAggregate { for (col_idx, col_is_null) in null_mask.iter().enumerate() { if col_is_null { // Create column with all nulls but retain the datatype. - let null_col = Array::new_typed_null_array( + let null_col = Array2::new_typed_null_array( grouping_columns[col_idx].datatype().clone(), num_rows, )?; diff --git a/crates/rayexec_execution/src/execution/operators/hash_join/condition.rs b/crates/rayexec_execution/src/execution/operators/hash_join/condition.rs index 355b64274..5fdcb25db 100644 --- a/crates/rayexec_execution/src/execution/operators/hash_join/condition.rs +++ b/crates/rayexec_execution/src/execution/operators/hash_join/condition.rs @@ -3,7 +3,7 @@ use std::sync::Arc; use rayexec_error::{RayexecError, Result}; -use crate::arrays::array::Array; +use crate::arrays::array::Array2; use crate::arrays::batch::Batch; use crate::arrays::executor::scalar::SelectExecutor; use crate::arrays::selection::SelectionVector; @@ -45,7 +45,7 @@ impl fmt::Display for HashJoinCondition { #[derive(Debug)] pub struct LeftPrecomputedJoinCondition { /// Precomputed results for left batches. - pub left_precomputed: Vec, + pub left_precomputed: Vec, pub left: PhysicalScalarExpression, pub right: PhysicalScalarExpression, pub function: PlannedScalarFunction, diff --git a/crates/rayexec_execution/src/execution/operators/sink.rs b/crates/rayexec_execution/src/execution/operators/sink.rs index 098dcf968..545bd0602 100644 --- a/crates/rayexec_execution/src/execution/operators/sink.rs +++ b/crates/rayexec_execution/src/execution/operators/sink.rs @@ -18,7 +18,7 @@ use super::{ PollPull, PollPush, }; -use crate::arrays::array::Array; +use crate::arrays::array::Array2; use crate::arrays::batch::Batch; use crate::database::DatabaseContext; use crate::explain::explainable::{ExplainConfig, ExplainEntry, Explainable}; @@ -370,7 +370,7 @@ impl ExecutableOperator for SinkOperator { let row_count = shared.global_row_count as u64; - let row_count_batch = Batch::try_new([Array::from_iter([row_count])])?; + let row_count_batch = Batch::try_new([Array2::from_iter([row_count])])?; return Ok(PollPull::Computed(row_count_batch.into())); } diff --git a/crates/rayexec_execution/src/execution/operators/sort/util/sort_keys.rs b/crates/rayexec_execution/src/execution/operators/sort/util/sort_keys.rs index b89ff7ad3..cdb7a8f61 100644 --- a/crates/rayexec_execution/src/execution/operators/sort/util/sort_keys.rs +++ b/crates/rayexec_execution/src/execution/operators/sort/util/sort_keys.rs @@ -1,6 +1,6 @@ use rayexec_error::{RayexecError, Result}; -use crate::arrays::array::Array; +use crate::arrays::array::Array2; use crate::arrays::batch::Batch; use crate::arrays::row::encoding::{ComparableColumn, ComparableRowEncoder, ComparableRows}; use crate::expr::physical::PhysicalSortExpression; @@ -38,7 +38,7 @@ impl SortKeysExtractor { } /// Get the columns that make up the sort keys. - pub fn sort_columns<'a>(&self, batch: &'a Batch) -> Result> { + pub fn sort_columns<'a>(&self, batch: &'a Batch) -> Result> { let sort_cols = self .order_by .iter() diff --git a/crates/rayexec_execution/src/execution/operators/table_inout.rs b/crates/rayexec_execution/src/execution/operators/table_inout.rs index e047eb941..728ae632b 100644 --- a/crates/rayexec_execution/src/execution/operators/table_inout.rs +++ b/crates/rayexec_execution/src/execution/operators/table_inout.rs @@ -13,7 +13,7 @@ use super::{ PollPull, PollPush, }; -use crate::arrays::array::Array; +use crate::arrays::array::Array2; use crate::arrays::batch::Batch; use crate::arrays::selection::SelectionVector; use crate::database::DatabaseContext; @@ -25,7 +25,7 @@ use crate::functions::table::{inout, PlannedTableFunction, TableFunctionImpl}; pub struct TableInOutPartitionState { function_state: Box, /// Additional outputs that will be included on the output batch. - additional_outputs: Vec, + additional_outputs: Vec, } #[derive(Debug)] diff --git a/crates/rayexec_execution/src/execution/operators/test_util.rs b/crates/rayexec_execution/src/execution/operators/test_util.rs index 65ef3bdde..14883e2d2 100644 --- a/crates/rayexec_execution/src/execution/operators/test_util.rs +++ b/crates/rayexec_execution/src/execution/operators/test_util.rs @@ -13,7 +13,7 @@ use super::{ PollPull, PollPush, }; -use crate::arrays::array::Array; +use crate::arrays::array::Array2; use crate::arrays::batch::Batch; use crate::arrays::scalar::ScalarValue; use crate::database::system::new_system_catalog; @@ -114,5 +114,5 @@ pub fn logical_value(batch: &Batch, column: usize, row: usize) -> ScalarValue { /// Makes a batch with a single column i32 values provided by the iterator. pub fn make_i32_batch(iter: impl IntoIterator) -> Batch { - Batch::try_new(vec![Array::from_iter(iter.into_iter())]).unwrap() + Batch::try_new(vec![Array2::from_iter(iter.into_iter())]).unwrap() } diff --git a/crates/rayexec_execution/src/execution/operators/unnest.rs b/crates/rayexec_execution/src/execution/operators/unnest.rs index 9667554df..0ef2bd1f1 100644 --- a/crates/rayexec_execution/src/execution/operators/unnest.rs +++ b/crates/rayexec_execution/src/execution/operators/unnest.rs @@ -15,7 +15,7 @@ use super::{ PollPull, PollPush, }; -use crate::arrays::array::{Array, ArrayData}; +use crate::arrays::array::{Array2, ArrayData}; use crate::arrays::batch::Batch; use crate::arrays::bitmap::Bitmap; use crate::arrays::executor::builder::{ @@ -56,9 +56,9 @@ use crate::expr::physical::PhysicalScalarExpression; #[derive(Debug)] pub struct UnnestPartitionState { /// Projections that need to extended to match the unnest outputs. - project_inputs: Vec, + project_inputs: Vec, /// Inputs we're processing. - unnest_inputs: Vec, + unnest_inputs: Vec, /// Number of rows in the input. input_num_rows: usize, /// Row we're currently unnesting. @@ -93,11 +93,11 @@ impl ExecutableOperator for PhysicalUnnest { .map(|_| { PartitionState::Unnest(UnnestPartitionState { project_inputs: vec![ - Array::new_untyped_null_array(0); + Array2::new_untyped_null_array(0); self.project_expressions.len() ], unnest_inputs: vec![ - Array::new_untyped_null_array(0); + Array2::new_untyped_null_array(0); self.unnest_expressions.len() ], input_num_rows: 0, @@ -259,7 +259,7 @@ impl ExecutableOperator for PhysicalUnnest { None => { // Row is null, produce nulls according to longest // length. - let out = Array::new_typed_null_array( + let out = Array2::new_typed_null_array( child.datatype().clone(), longest as usize, )?; @@ -269,7 +269,7 @@ impl ExecutableOperator for PhysicalUnnest { } PhysicalType::UntypedNull => { // Just produce null array according to longest length. - let out = Array::new_untyped_null_array(longest as usize); + let out = Array2::new_untyped_null_array(longest as usize); outputs.push(out); } other => { @@ -304,11 +304,11 @@ impl Explainable for PhysicalUnnest { } } -pub(crate) fn unnest(child: &Array, longest_len: usize, meta: ListItemMetadata) -> Result { +pub(crate) fn unnest(child: &Array2, longest_len: usize, meta: ListItemMetadata) -> Result { let datatype = child.datatype().clone(); match child.physical_type() { - PhysicalType::UntypedNull => Ok(Array::new_untyped_null_array(longest_len)), + PhysicalType::UntypedNull => Ok(Array2::new_untyped_null_array(longest_len)), PhysicalType::Boolean => { let builder = ArrayBuilder { datatype, @@ -427,9 +427,9 @@ pub(crate) fn unnest(child: &Array, longest_len: usize, meta: ListItemMetadata) fn unnest_inner<'a, S, B>( mut builder: ArrayBuilder, - child: &'a Array, + child: &'a Array2, meta: ListItemMetadata, -) -> Result +) -> Result where S: PhysicalStorage, B: ArrayDataBuffer, @@ -459,7 +459,7 @@ where builder.buffer.put(out_idx, val.borrow()); } - Ok(Array::new_with_validity_and_array_data( + Ok(Array2::new_with_validity_and_array_data( builder.datatype, out_validity, builder.buffer.into_data(), @@ -482,7 +482,7 @@ where builder.buffer.put(out_idx, val.borrow()); } - Ok(Array::new_with_validity_and_array_data( + Ok(Array2::new_with_validity_and_array_data( builder.datatype, out_validity, builder.buffer.into_data(), diff --git a/crates/rayexec_execution/src/execution/operators/util/broadcast.rs b/crates/rayexec_execution/src/execution/operators/util/broadcast.rs index beca52778..828b544da 100644 --- a/crates/rayexec_execution/src/execution/operators/util/broadcast.rs +++ b/crates/rayexec_execution/src/execution/operators/util/broadcast.rs @@ -152,7 +152,7 @@ mod tests { use futures::FutureExt; use super::*; - use crate::arrays::array::Array; + use crate::arrays::array::Array2; struct NopWaker {} @@ -171,7 +171,7 @@ mod tests { /// Create a batch with a single int64 value. fn test_batch(n: i64) -> Batch { - let col = Array::from_iter([n]); + let col = Array2::from_iter([n]); Batch::try_new([col]).unwrap() } diff --git a/crates/rayexec_execution/src/execution/operators/util/outer_join_tracker.rs b/crates/rayexec_execution/src/execution/operators/util/outer_join_tracker.rs index 12b385ce2..b0791b9a8 100644 --- a/crates/rayexec_execution/src/execution/operators/util/outer_join_tracker.rs +++ b/crates/rayexec_execution/src/execution/operators/util/outer_join_tracker.rs @@ -2,7 +2,7 @@ use std::sync::Arc; use rayexec_error::Result; -use crate::arrays::array::{Array, ArrayData}; +use crate::arrays::array::{Array2, ArrayData}; use crate::arrays::batch::Batch; use crate::arrays::bitmap::Bitmap; use crate::arrays::datatype::DataType; @@ -106,7 +106,7 @@ impl LeftOuterJoinDrainState { .columns() .iter() .cloned() - .chain([Array::new_with_array_data( + .chain([Array2::new_with_array_data( DataType::Boolean, ArrayData::Boolean(Arc::new(bitmap.clone().into())), )]); @@ -152,7 +152,7 @@ impl LeftOuterJoinDrainState { let right_cols = self .right_types .iter() - .map(|datatype| Array::new_typed_null_array(datatype.clone(), num_rows)) + .map(|datatype| Array2::new_typed_null_array(datatype.clone(), num_rows)) .collect::>>()?; let batch = Batch::try_new(left_cols.into_iter().chain(right_cols))?; @@ -187,7 +187,7 @@ impl LeftOuterJoinDrainState { let right_cols = self .right_types .iter() - .map(|datatype| Array::new_typed_null_array(datatype.clone(), num_rows)) + .map(|datatype| Array2::new_typed_null_array(datatype.clone(), num_rows)) .collect::>>()?; let batch = Batch::try_new(left_cols.into_iter().chain(right_cols))?; @@ -241,7 +241,7 @@ impl RightOuterJoinTracker { let left_null_cols = left_types .iter() - .map(|datatype| Array::new_typed_null_array(datatype.clone(), num_rows)) + .map(|datatype| Array2::new_typed_null_array(datatype.clone(), num_rows)) .collect::>>()?; let batch = Batch::try_new(left_null_cols.into_iter().chain(right_cols))?; diff --git a/crates/rayexec_execution/src/execution/operators/util/resizer.rs b/crates/rayexec_execution/src/execution/operators/util/resizer.rs index c2a7eb615..cda83ea42 100644 --- a/crates/rayexec_execution/src/execution/operators/util/resizer.rs +++ b/crates/rayexec_execution/src/execution/operators/util/resizer.rs @@ -117,20 +117,20 @@ impl BatchResizer { #[cfg(test)] mod tests { use super::*; - use crate::arrays::array::Array; + use crate::arrays::array::Array2; use crate::arrays::testutil::assert_batches_eq; #[test] fn push_within_target() { let batch1 = Batch::try_new([ - Array::from_iter([1, 2, 3]), - Array::from_iter(["a", "b", "c"]), + Array2::from_iter([1, 2, 3]), + Array2::from_iter(["a", "b", "c"]), ]) .unwrap(); let batch2 = Batch::try_new([ - Array::from_iter([4, 5, 6]), - Array::from_iter(["d", "e", "f"]), + Array2::from_iter([4, 5, 6]), + Array2::from_iter(["d", "e", "f"]), ]) .unwrap(); @@ -146,15 +146,15 @@ mod tests { }; let expected = Batch::try_new([ - Array::from_iter([1, 2, 3, 4]), - Array::from_iter(["a", "b", "c", "d"]), + Array2::from_iter([1, 2, 3, 4]), + Array2::from_iter(["a", "b", "c", "d"]), ]) .unwrap(); assert_batches_eq(&expected, &got); let expected_rem = - Batch::try_new([Array::from_iter([5, 6]), Array::from_iter(["e", "f"])]).unwrap(); + Batch::try_new([Array2::from_iter([5, 6]), Array2::from_iter(["e", "f"])]).unwrap(); let remaining = match resizer.flush_remaining().unwrap() { ComputedBatches::Single(batch) => batch, @@ -169,8 +169,8 @@ mod tests { // len(batch) > target && len(batch) < target * 2 let batch = Batch::try_new([ - Array::from_iter([1, 2, 3, 4, 5]), - Array::from_iter(["a", "b", "c", "d", "e"]), + Array2::from_iter([1, 2, 3, 4, 5]), + Array2::from_iter(["a", "b", "c", "d", "e"]), ]) .unwrap(); @@ -181,15 +181,15 @@ mod tests { }; let expected = Batch::try_new([ - Array::from_iter([1, 2, 3, 4]), - Array::from_iter(["a", "b", "c", "d"]), + Array2::from_iter([1, 2, 3, 4]), + Array2::from_iter(["a", "b", "c", "d"]), ]) .unwrap(); assert_batches_eq(&expected, &got); let expected_rem = - Batch::try_new([Array::from_iter([5]), Array::from_iter(["e"])]).unwrap(); + Batch::try_new([Array2::from_iter([5]), Array2::from_iter(["e"])]).unwrap(); let remaining = match resizer.flush_remaining().unwrap() { ComputedBatches::Single(batch) => batch, @@ -204,8 +204,8 @@ mod tests { // len(batch) > target * 2 let batch = Batch::try_new([ - Array::from_iter([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]), - Array::from_iter(["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"]), + Array2::from_iter([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]), + Array2::from_iter(["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"]), ]) .unwrap(); @@ -218,21 +218,21 @@ mod tests { assert_eq!(2, gots.len()); let expected1 = Batch::try_new([ - Array::from_iter([1, 2, 3, 4]), - Array::from_iter(["a", "b", "c", "d"]), + Array2::from_iter([1, 2, 3, 4]), + Array2::from_iter(["a", "b", "c", "d"]), ]) .unwrap(); assert_batches_eq(&expected1, &gots[0]); let expected2 = Batch::try_new([ - Array::from_iter([5, 6, 7, 8]), - Array::from_iter(["e", "f", "g", "h"]), + Array2::from_iter([5, 6, 7, 8]), + Array2::from_iter(["e", "f", "g", "h"]), ]) .unwrap(); assert_batches_eq(&expected2, &gots[1]); let expected_rem = - Batch::try_new([Array::from_iter([9, 10]), Array::from_iter(["i", "j"])]).unwrap(); + Batch::try_new([Array2::from_iter([9, 10]), Array2::from_iter(["i", "j"])]).unwrap(); let remaining = match resizer.flush_remaining().unwrap() { ComputedBatches::Single(batch) => batch, diff --git a/crates/rayexec_execution/src/expr/physical/case_expr.rs b/crates/rayexec_execution/src/expr/physical/case_expr.rs index bafbb5db8..511560153 100644 --- a/crates/rayexec_execution/src/expr/physical/case_expr.rs +++ b/crates/rayexec_execution/src/expr/physical/case_expr.rs @@ -5,7 +5,7 @@ use std::sync::Arc; use rayexec_error::Result; use super::PhysicalScalarExpression; -use crate::arrays::array::Array; +use crate::arrays::array::Array2; use crate::arrays::batch::Batch; use crate::arrays::bitmap::Bitmap; use crate::arrays::executor::scalar::{interleave, SelectExecutor}; @@ -30,7 +30,7 @@ pub struct PhysicalCaseExpr { } impl PhysicalCaseExpr { - pub fn eval<'a>(&self, batch: &'a Batch) -> Result> { + pub fn eval<'a>(&self, batch: &'a Batch) -> Result> { let mut arrays = Vec::new(); let mut indices: Vec<(usize, usize)> = (0..batch.num_rows()).map(|_| (0, 0)).collect(); @@ -129,8 +129,8 @@ mod tests { #[test] fn case_simple() { let batch = Batch::try_new([ - Array::from_iter([1, 2, 3, 4]), - Array::from_iter([12, 13, 14, 15]), + Array2::from_iter([1, 2, 3, 4]), + Array2::from_iter([12, 13, 14, 15]), ]) .unwrap(); diff --git a/crates/rayexec_execution/src/expr/physical/cast_expr.rs b/crates/rayexec_execution/src/expr/physical/cast_expr.rs index 081e8b023..33aeb91ab 100644 --- a/crates/rayexec_execution/src/expr/physical/cast_expr.rs +++ b/crates/rayexec_execution/src/expr/physical/cast_expr.rs @@ -5,7 +5,7 @@ use rayexec_error::{OptionExt, Result}; use rayexec_proto::ProtoConv; use super::PhysicalScalarExpression; -use crate::arrays::array::Array; +use crate::arrays::array::Array2; use crate::arrays::batch::Batch; use crate::arrays::compute::cast::array::cast_array; use crate::arrays::compute::cast::behavior::CastFailBehavior; @@ -20,7 +20,7 @@ pub struct PhysicalCastExpr { } impl PhysicalCastExpr { - pub fn eval<'a>(&self, batch: &'a Batch) -> Result> { + pub fn eval<'a>(&self, batch: &'a Batch) -> Result> { let input = self.expr.eval(batch)?; let out = cast_array(input.as_ref(), self.to.clone(), CastFailBehavior::Error)?; Ok(Cow::Owned(out)) diff --git a/crates/rayexec_execution/src/expr/physical/column_expr.rs b/crates/rayexec_execution/src/expr/physical/column_expr.rs index 6394383cb..889563a05 100644 --- a/crates/rayexec_execution/src/expr/physical/column_expr.rs +++ b/crates/rayexec_execution/src/expr/physical/column_expr.rs @@ -3,7 +3,7 @@ use std::fmt; use rayexec_error::{RayexecError, Result}; -use crate::arrays::array::Array; +use crate::arrays::array::Array2; use crate::arrays::batch::Batch; use crate::database::DatabaseContext; use crate::proto::DatabaseProtoConv; @@ -14,7 +14,7 @@ pub struct PhysicalColumnExpr { } impl PhysicalColumnExpr { - pub fn eval<'a>(&self, batch: &'a Batch) -> Result> { + pub fn eval<'a>(&self, batch: &'a Batch) -> Result> { let col = batch.column(self.idx).ok_or_else(|| { RayexecError::new(format!( "Tried to get column at index {} in a batch with {} columns", diff --git a/crates/rayexec_execution/src/expr/physical/literal_expr.rs b/crates/rayexec_execution/src/expr/physical/literal_expr.rs index 6f64288fd..af66526b0 100644 --- a/crates/rayexec_execution/src/expr/physical/literal_expr.rs +++ b/crates/rayexec_execution/src/expr/physical/literal_expr.rs @@ -4,7 +4,7 @@ use std::fmt; use rayexec_error::{OptionExt, Result}; use rayexec_proto::ProtoConv; -use crate::arrays::array::Array; +use crate::arrays::array::Array2; use crate::arrays::batch::Batch; use crate::arrays::scalar::OwnedScalarValue; use crate::database::DatabaseContext; @@ -16,7 +16,7 @@ pub struct PhysicalLiteralExpr { } impl PhysicalLiteralExpr { - pub fn eval<'a>(&self, batch: &'a Batch) -> Result> { + pub fn eval<'a>(&self, batch: &'a Batch) -> Result> { let arr = self.literal.as_array(batch.num_rows())?; Ok(Cow::Owned(arr)) } diff --git a/crates/rayexec_execution/src/expr/physical/mod.rs b/crates/rayexec_execution/src/expr/physical/mod.rs index 223561832..646ea0831 100644 --- a/crates/rayexec_execution/src/expr/physical/mod.rs +++ b/crates/rayexec_execution/src/expr/physical/mod.rs @@ -16,7 +16,7 @@ use literal_expr::PhysicalLiteralExpr; use rayexec_error::{not_implemented, OptionExt, Result}; use scalar_function_expr::PhysicalScalarFunctionExpr; -use crate::arrays::array::Array; +use crate::arrays::array::Array2; use crate::arrays::batch::Batch; use crate::arrays::executor::scalar::SelectExecutor; use crate::arrays::selection::SelectionVector; @@ -34,7 +34,7 @@ pub enum PhysicalScalarExpression { } impl PhysicalScalarExpression { - pub fn eval<'a>(&self, batch: &'a Batch) -> Result> { + pub fn eval<'a>(&self, batch: &'a Batch) -> Result> { match self { Self::Case(e) => e.eval(batch), Self::Cast(e) => e.eval(batch), @@ -199,8 +199,8 @@ mod tests { #[test] fn select_some() { let batch = Batch::try_new([ - Array::from_iter([1, 4, 6, 9, 12]), - Array::from_iter([2, 3, 8, 9, 10]), + Array2::from_iter([1, 4, 6, 9, 12]), + Array2::from_iter([2, 3, 8, 9, 10]), ]) .unwrap(); @@ -226,8 +226,8 @@ mod tests { #[test] fn select_none() { let batch = Batch::try_new([ - Array::from_iter([1, 2, 6, 9, 9]), - Array::from_iter([2, 3, 8, 9, 10]), + Array2::from_iter([1, 2, 6, 9, 9]), + Array2::from_iter([2, 3, 8, 9, 10]), ]) .unwrap(); diff --git a/crates/rayexec_execution/src/expr/physical/scalar_function_expr.rs b/crates/rayexec_execution/src/expr/physical/scalar_function_expr.rs index 676f6122f..9ee56a08b 100644 --- a/crates/rayexec_execution/src/expr/physical/scalar_function_expr.rs +++ b/crates/rayexec_execution/src/expr/physical/scalar_function_expr.rs @@ -5,7 +5,7 @@ use fmtutil::IntoDisplayableSlice; use rayexec_error::Result; use super::PhysicalScalarExpression; -use crate::arrays::array::Array; +use crate::arrays::array::Array2; use crate::arrays::batch::Batch; use crate::database::DatabaseContext; use crate::functions::scalar::PlannedScalarFunction; @@ -18,7 +18,7 @@ pub struct PhysicalScalarFunctionExpr { } impl PhysicalScalarFunctionExpr { - pub fn eval<'a>(&self, batch: &'a Batch) -> Result> { + pub fn eval<'a>(&self, batch: &'a Batch) -> Result> { let inputs = self .inputs .iter() diff --git a/crates/rayexec_execution/src/functions/aggregate/builtin/avg.rs b/crates/rayexec_execution/src/functions/aggregate/builtin/avg.rs index 205107c09..8d9d29f86 100644 --- a/crates/rayexec_execution/src/functions/aggregate/builtin/avg.rs +++ b/crates/rayexec_execution/src/functions/aggregate/builtin/avg.rs @@ -6,7 +6,7 @@ use num_traits::AsPrimitive; use rayexec_error::Result; use serde::{Deserialize, Serialize}; -use crate::arrays::array::Array; +use crate::arrays::array::Array2; use crate::arrays::bitmap::Bitmap; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::aggregate::AggregateState; @@ -159,7 +159,7 @@ where builder.buffer.put(idx, &val); } - Ok(Array::new_with_validity_and_array_data( + Ok(Array2::new_with_validity_and_array_data( builder.datatype, validities, builder.buffer.into_data(), diff --git a/crates/rayexec_execution/src/functions/aggregate/builtin/sum.rs b/crates/rayexec_execution/src/functions/aggregate/builtin/sum.rs index 379d60935..8fc959489 100644 --- a/crates/rayexec_execution/src/functions/aggregate/builtin/sum.rs +++ b/crates/rayexec_execution/src/functions/aggregate/builtin/sum.rs @@ -222,7 +222,7 @@ impl AggregateState for SumStateAdd #[cfg(test)] mod tests { use super::*; - use crate::arrays::array::Array; + use crate::arrays::array::Array2; use crate::arrays::scalar::ScalarValue; use crate::execution::operators::hash_aggregate::hash_table::GroupAddress; use crate::expr; @@ -232,8 +232,8 @@ mod tests { fn sum_i64_single_group_two_partitions() { // Single group, two partitions, 'SELECT SUM(a) FROM table' - let partition_1_vals = &Array::from_iter::<[i64; 3]>([1, 2, 3]); - let partition_2_vals = &Array::from_iter::<[i64; 3]>([4, 5, 6]); + let partition_1_vals = &Array2::from_iter::<[i64; 3]>([1, 2, 3]); + let partition_2_vals = &Array2::from_iter::<[i64; 3]>([4, 5, 6]); let mut table_list = TableList::empty(); let table_ref = table_list @@ -309,8 +309,8 @@ mod tests { // Partition values and mappings represent the positions of the above // table. The actual grouping values are stored in the operator, and // operator is what computes the mappings. - let partition_1_vals = &Array::from_iter::<[i64; 3]>([1, 2, 3]); - let partition_2_vals = &Array::from_iter::<[i64; 3]>([4, 5, 6]); + let partition_1_vals = &Array2::from_iter::<[i64; 3]>([1, 2, 3]); + let partition_2_vals = &Array2::from_iter::<[i64; 3]>([4, 5, 6]); let mut table_list = TableList::empty(); let table_ref = table_list @@ -431,8 +431,8 @@ mod tests { // Partition values and mappings represent the positions of the above // table. The actual grouping values are stored in the operator, and // operator is what computes the mappings. - let partition_1_vals = &Array::from_iter::<[i64; 4]>([1, 2, 3, 4]); - let partition_2_vals = &Array::from_iter::<[i64; 4]>([5, 6, 7, 8]); + let partition_1_vals = &Array2::from_iter::<[i64; 4]>([1, 2, 3, 4]); + let partition_2_vals = &Array2::from_iter::<[i64; 4]>([5, 6, 7, 8]); let mut table_list = TableList::empty(); let table_ref = table_list diff --git a/crates/rayexec_execution/src/functions/aggregate/states.rs b/crates/rayexec_execution/src/functions/aggregate/states.rs index c3926aa98..2ce04399e 100644 --- a/crates/rayexec_execution/src/functions/aggregate/states.rs +++ b/crates/rayexec_execution/src/functions/aggregate/states.rs @@ -6,7 +6,7 @@ use std::marker::PhantomData; use rayexec_error::{RayexecError, Result}; use super::ChunkGroupAddressIter; -use crate::arrays::array::{Array, ArrayData}; +use crate::arrays::array::{Array2, ArrayData}; use crate::arrays::datatype::DataType; use crate::arrays::executor::aggregate::{ AggregateState, @@ -64,7 +64,7 @@ where + 'static, Output: Sync + Send + 'static, StateInit: Fn() -> State + Sync + Send + 'static, - StateFinalize: Fn(&mut [State]) -> Result + Sync + Send + 'static, + StateFinalize: Fn(&mut [State]) -> Result + Sync + Send + 'static, { Box::new(TypedAggregateGroupStates { states: Vec::new(), @@ -90,7 +90,7 @@ where + 'static, Output: Sync + Send + 'static, StateInit: Fn() -> State + Sync + Send + 'static, - StateFinalize: Fn(&mut [State]) -> Result + Sync + Send + 'static, + StateFinalize: Fn(&mut [State]) -> Result + Sync + Send + 'static, { Box::new(TypedAggregateGroupStates { states: Vec::new(), @@ -109,8 +109,8 @@ where Input: Sync + Send, Output: Sync + Send, StateInit: Fn() -> State + Sync + Send, - StateUpdate: Fn(&[&Array], ChunkGroupAddressIter, &mut [State]) -> Result<()> + Sync + Send, - StateFinalize: Fn(&mut [State]) -> Result + Sync + Send, + StateUpdate: Fn(&[&Array2], ChunkGroupAddressIter, &mut [State]) -> Result<()> + Sync + Send, + StateFinalize: Fn(&mut [State]) -> Result + Sync + Send, { fn opaque_states_mut(&mut self) -> OpaqueStatesMut<'_> { OpaqueStatesMut(&mut self.states) @@ -124,7 +124,7 @@ where self.states.len() } - fn update_states(&mut self, inputs: &[&Array], mapping: ChunkGroupAddressIter) -> Result<()> { + fn update_states(&mut self, inputs: &[&Array2], mapping: ChunkGroupAddressIter) -> Result<()> { (self.state_update)(inputs, mapping, &mut self.states) } @@ -137,7 +137,7 @@ where StateCombiner::combine(consume_states, mapping, &mut self.states) } - fn finalize(&mut self) -> Result { + fn finalize(&mut self) -> Result { (self.state_finalize)(&mut self.states) } } @@ -166,7 +166,7 @@ pub trait AggregateGroupStates: Debug + Sync + Send { fn num_states(&self) -> usize; /// Update states from inputs using some mapping. - fn update_states(&mut self, inputs: &[&Array], mapping: ChunkGroupAddressIter) -> Result<()>; + fn update_states(&mut self, inputs: &[&Array2], mapping: ChunkGroupAddressIter) -> Result<()>; /// Combine states from another partition into self using some mapping. fn combine( @@ -176,7 +176,7 @@ pub trait AggregateGroupStates: Debug + Sync + Send { ) -> Result<()>; /// Finalize the states and return an array. - fn finalize(&mut self) -> Result; + fn finalize(&mut self) -> Result; } #[derive(Debug)] @@ -194,7 +194,7 @@ impl<'a> OpaqueStatesMut<'a> { /// Update function for a unary aggregate. pub fn unary_update( - arrays: &[&Array], + arrays: &[&Array2], mapping: ChunkGroupAddressIter, states: &mut [State], ) -> Result<()> @@ -206,7 +206,7 @@ where } pub fn binary_update( - arrays: &[&Array], + arrays: &[&Array2], mapping: ChunkGroupAddressIter, states: &mut [State], ) -> Result<()> @@ -220,11 +220,11 @@ where ) } -pub fn untyped_null_finalize(states: &mut [State]) -> Result { - Ok(Array::new_untyped_null_array(states.len())) +pub fn untyped_null_finalize(states: &mut [State]) -> Result { + Ok(Array2::new_untyped_null_array(states.len())) } -pub fn boolean_finalize(datatype: DataType, states: &mut [State]) -> Result +pub fn boolean_finalize(datatype: DataType, states: &mut [State]) -> Result where State: AggregateState, { @@ -238,7 +238,7 @@ where pub fn primitive_finalize( datatype: DataType, states: &mut [State], -) -> Result +) -> Result where State: AggregateState, Output: Copy + Default, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/arith/add.rs b/crates/rayexec_execution/src/functions/scalar/builtin/arith/add.rs index a3bd2318e..f8f75b600 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/arith/add.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/arith/add.rs @@ -3,7 +3,7 @@ use std::marker::PhantomData; use rayexec_error::Result; -use crate::arrays::array::{Array, ArrayData}; +use crate::arrays::array::{Array2, ArrayData}; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; use crate::arrays::executor::physical_type::{ @@ -208,7 +208,7 @@ where for<'a> S::Type<'a>: std::ops::Add> + Default + Copy, ArrayData: From>>, { - fn execute(&self, inputs: &[&Array]) -> Result { + fn execute(&self, inputs: &[&Array2]) -> Result { let a = inputs[0]; let b = inputs[1]; @@ -230,8 +230,8 @@ mod tests { #[test] fn add_i32() { - let a = Array::from_iter([1, 2, 3]); - let b = Array::from_iter([4, 5, 6]); + let a = Array2::from_iter([1, 2, 3]); + let b = Array2::from_iter([4, 5, 6]); let mut table_list = TableList::empty(); let table_ref = table_list @@ -250,7 +250,7 @@ mod tests { .unwrap(); let out = planned.function_impl.execute(&[&a, &b]).unwrap(); - let expected = Array::from_iter([5, 7, 9]); + let expected = Array2::from_iter([5, 7, 9]); assert_eq!(expected, out); } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/arith/div.rs b/crates/rayexec_execution/src/functions/scalar/builtin/arith/div.rs index f5c27bd17..7a02531fc 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/arith/div.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/arith/div.rs @@ -3,7 +3,7 @@ use std::marker::PhantomData; use rayexec_error::Result; -use crate::arrays::array::{Array, ArrayData}; +use crate::arrays::array::{Array2, ArrayData}; use crate::arrays::compute::cast::array::cast_decimal_to_float; use crate::arrays::compute::cast::behavior::CastFailBehavior; use crate::arrays::datatype::{DataType, DataTypeId}; @@ -198,7 +198,7 @@ impl ScalarFunctionImpl for DecimalDivImpl where D: DecimalType, { - fn execute(&self, inputs: &[&Array]) -> Result { + fn execute(&self, inputs: &[&Array2]) -> Result { let a = inputs[0]; let b = inputs[1]; @@ -245,7 +245,7 @@ where for<'a> S::Type<'a>: std::ops::Div> + Default + Copy, ArrayData: From>>, { - fn execute(&self, inputs: &[&Array]) -> Result { + fn execute(&self, inputs: &[&Array2]) -> Result { let a = inputs[0]; let b = inputs[1]; @@ -267,8 +267,8 @@ mod tests { #[test] fn div_i32() { - let a = Array::from_iter([4, 5, 6]); - let b = Array::from_iter([1, 2, 3]); + let a = Array2::from_iter([4, 5, 6]); + let b = Array2::from_iter([1, 2, 3]); let mut table_list = TableList::empty(); let table_ref = table_list @@ -287,7 +287,7 @@ mod tests { .unwrap(); let out = planned.function_impl.execute(&[&a, &b]).unwrap(); - let expected = Array::from_iter([4, 2, 2]); + let expected = Array2::from_iter([4, 2, 2]); assert_eq!(expected, out); } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/arith/mul.rs b/crates/rayexec_execution/src/functions/scalar/builtin/arith/mul.rs index 1a2c3d9e9..df2e16e60 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/arith/mul.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/arith/mul.rs @@ -4,7 +4,7 @@ use std::marker::PhantomData; use num_traits::{NumCast, PrimInt}; use rayexec_error::Result; -use crate::arrays::array::{Array, ArrayData}; +use crate::arrays::array::{Array2, ArrayData}; use crate::arrays::datatype::{DataType, DataTypeId, DecimalTypeMeta}; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; use crate::arrays::executor::physical_type::{ @@ -239,7 +239,7 @@ where Rhs: PhysicalStorage, for<'a> Rhs::Type<'a>: PrimInt, { - fn execute(&self, inputs: &[&Array]) -> Result { + fn execute(&self, inputs: &[&Array2]) -> Result { let (lhs, rhs) = if LHS_RHS_FLIPPED { (inputs[1], inputs[0]) } else { @@ -282,7 +282,7 @@ where D: DecimalType, ArrayData: From>, { - fn execute(&self, inputs: &[&Array]) -> Result { + fn execute(&self, inputs: &[&Array2]) -> Result { let a = inputs[0]; let b = inputs[1]; @@ -318,7 +318,7 @@ where for<'a> S::Type<'a>: std::ops::Mul> + Default + Copy, ArrayData: From>>, { - fn execute(&self, inputs: &[&Array]) -> Result { + fn execute(&self, inputs: &[&Array2]) -> Result { let a = inputs[0]; let b = inputs[1]; @@ -340,8 +340,8 @@ mod tests { #[test] fn mul_i32() { - let a = Array::from_iter([4, 5, 6]); - let b = Array::from_iter([1, 2, 3]); + let a = Array2::from_iter([4, 5, 6]); + let b = Array2::from_iter([1, 2, 3]); let mut table_list = TableList::empty(); let table_ref = table_list @@ -360,7 +360,7 @@ mod tests { .unwrap(); let out = planned.function_impl.execute(&[&a, &b]).unwrap(); - let expected = Array::from_iter([4, 10, 18]); + let expected = Array2::from_iter([4, 10, 18]); assert_eq!(expected, out); } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/arith/rem.rs b/crates/rayexec_execution/src/functions/scalar/builtin/arith/rem.rs index 52beb91ff..e72483d46 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/arith/rem.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/arith/rem.rs @@ -3,7 +3,7 @@ use std::marker::PhantomData; use rayexec_error::Result; -use crate::arrays::array::{Array, ArrayData}; +use crate::arrays::array::{Array2, ArrayData}; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; use crate::arrays::executor::physical_type::{ @@ -200,7 +200,7 @@ where for<'a> S::Type<'a>: std::ops::Rem> + Default + Copy, ArrayData: From>>, { - fn execute(&self, inputs: &[&Array]) -> Result { + fn execute(&self, inputs: &[&Array2]) -> Result { let a = inputs[0]; let b = inputs[1]; @@ -222,8 +222,8 @@ mod tests { #[test] fn rem_i32() { - let a = Array::from_iter([4, 5, 6]); - let b = Array::from_iter([1, 2, 3]); + let a = Array2::from_iter([4, 5, 6]); + let b = Array2::from_iter([1, 2, 3]); let mut table_list = TableList::empty(); let table_ref = table_list @@ -242,7 +242,7 @@ mod tests { .unwrap(); let out = planned.function_impl.execute(&[&a, &b]).unwrap(); - let expected = Array::from_iter([0, 1, 0]); + let expected = Array2::from_iter([0, 1, 0]); assert_eq!(expected, out); } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/arith/sub.rs b/crates/rayexec_execution/src/functions/scalar/builtin/arith/sub.rs index 5936265ad..d091749a4 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/arith/sub.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/arith/sub.rs @@ -3,7 +3,7 @@ use std::marker::PhantomData; use rayexec_error::Result; -use crate::arrays::array::{Array, ArrayData}; +use crate::arrays::array::{Array2, ArrayData}; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; use crate::arrays::executor::physical_type::{ @@ -215,7 +215,7 @@ where for<'a> S::Type<'a>: std::ops::Sub> + Default + Copy, ArrayData: From>>, { - fn execute(&self, inputs: &[&Array]) -> Result { + fn execute(&self, inputs: &[&Array2]) -> Result { let a = inputs[0]; let b = inputs[1]; @@ -237,8 +237,8 @@ mod tests { #[test] fn sub_i32() { - let a = Array::from_iter([4, 5, 6]); - let b = Array::from_iter([1, 2, 3]); + let a = Array2::from_iter([4, 5, 6]); + let b = Array2::from_iter([1, 2, 3]); let mut table_list = TableList::empty(); let table_ref = table_list @@ -257,7 +257,7 @@ mod tests { .unwrap(); let out = planned.function_impl.execute(&[&a, &b]).unwrap(); - let expected = Array::from_iter([3, 3, 3]); + let expected = Array2::from_iter([3, 3, 3]); assert_eq!(expected, out); } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/boolean.rs b/crates/rayexec_execution/src/functions/scalar/builtin/boolean.rs index c72438ec5..a9d1d069a 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/boolean.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/boolean.rs @@ -3,7 +3,7 @@ use std::fmt::Debug; use rayexec_error::Result; use serde::{Deserialize, Serialize}; -use crate::arrays::array::Array; +use crate::arrays::array::Array2; use crate::arrays::bitmap::Bitmap; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::builder::{ArrayBuilder, BooleanBuffer}; @@ -70,10 +70,10 @@ impl ScalarFunction for And { pub struct AndImpl; impl ScalarFunctionImpl for AndImpl { - fn execute(&self, inputs: &[&Array]) -> Result { + fn execute(&self, inputs: &[&Array2]) -> Result { match inputs.len() { 0 => { - let mut array = Array::new_with_array_data( + let mut array = Array2::new_with_array_data( DataType::Boolean, BooleanStorage::from(Bitmap::new_with_val(false, 1)), ); @@ -178,10 +178,10 @@ impl ScalarFunction for Or { pub struct OrImpl; impl ScalarFunctionImpl for OrImpl { - fn execute(&self, inputs: &[&Array]) -> Result { + fn execute(&self, inputs: &[&Array2]) -> Result { match inputs.len() { 0 => { - let mut array = Array::new_with_array_data( + let mut array = Array2::new_with_array_data( DataType::Boolean, BooleanStorage::from(Bitmap::new_with_val(false, 1)), ); @@ -225,8 +225,8 @@ mod tests { #[test] fn and_bool_2() { - let a = Array::from_iter([true, false, false]); - let b = Array::from_iter([true, true, false]); + let a = Array2::from_iter([true, false, false]); + let b = Array2::from_iter([true, true, false]); let mut table_list = TableList::empty(); let table_ref = table_list @@ -253,9 +253,9 @@ mod tests { #[test] fn and_bool_3() { - let a = Array::from_iter([true, true, true]); - let b = Array::from_iter([false, true, true]); - let c = Array::from_iter([true, true, false]); + let a = Array2::from_iter([true, true, true]); + let b = Array2::from_iter([false, true, true]); + let c = Array2::from_iter([true, true, false]); let mut table_list = TableList::empty(); let table_ref = table_list @@ -286,8 +286,8 @@ mod tests { #[test] fn or_bool_2() { - let a = Array::from_iter([true, false, false]); - let b = Array::from_iter([true, true, false]); + let a = Array2::from_iter([true, false, false]); + let b = Array2::from_iter([true, true, false]); let mut table_list = TableList::empty(); let table_ref = table_list diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/comparison.rs b/crates/rayexec_execution/src/functions/scalar/builtin/comparison.rs index 23ba23a54..fad9ab0d4 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/comparison.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/comparison.rs @@ -4,7 +4,7 @@ use std::marker::PhantomData; use rayexec_error::{RayexecError, Result}; -use crate::arrays::array::{Array, ArrayData}; +use crate::arrays::array::{Array2, ArrayData}; use crate::arrays::compute::cast::array::decimal_rescale; use crate::arrays::compute::cast::behavior::CastFailBehavior; use crate::arrays::datatype::{DataType, DataTypeId, DecimalTypeMeta}; @@ -660,7 +660,7 @@ impl ScalarFunctionImpl for ListComparisonImpl where O: ComparisonOperation, { - fn execute(&self, inputs: &[&Array]) -> Result { + fn execute(&self, inputs: &[&Array2]) -> Result { let left = inputs[0]; let right = inputs[1]; @@ -797,7 +797,7 @@ where S: PhysicalStorage, for<'a> S::Type<'a>: PartialEq + PartialOrd, { - fn execute(&self, inputs: &[&Array]) -> Result { + fn execute(&self, inputs: &[&Array2]) -> Result { let left = inputs[0]; let right = inputs[1]; @@ -845,7 +845,7 @@ where T: DecimalType, ArrayData: From>, { - fn execute(&self, inputs: &[&Array]) -> Result { + fn execute(&self, inputs: &[&Array2]) -> Result { let left = inputs[0]; let right = inputs[1]; @@ -901,8 +901,8 @@ mod tests { #[test] fn eq_i32() { - let a = Array::from_iter([1, 2, 3]); - let b = Array::from_iter([2, 2, 6]); + let a = Array2::from_iter([1, 2, 3]); + let b = Array2::from_iter([2, 2, 6]); let mut table_list = TableList::empty(); let table_ref = table_list @@ -921,15 +921,15 @@ mod tests { .unwrap(); let out = planned.function_impl.execute(&[&a, &b]).unwrap(); - let expected = Array::from_iter([false, true, false]); + let expected = Array2::from_iter([false, true, false]); assert_eq!(expected, out); } #[test] fn neq_i32() { - let a = Array::from_iter([1, 2, 3]); - let b = Array::from_iter([2, 2, 6]); + let a = Array2::from_iter([1, 2, 3]); + let b = Array2::from_iter([2, 2, 6]); let mut table_list = TableList::empty(); let table_ref = table_list @@ -948,15 +948,15 @@ mod tests { .unwrap(); let out = planned.function_impl.execute(&[&a, &b]).unwrap(); - let expected = Array::from_iter([true, false, true]); + let expected = Array2::from_iter([true, false, true]); assert_eq!(expected, out); } #[test] fn lt_i32() { - let a = Array::from_iter([1, 2, 3]); - let b = Array::from_iter([2, 2, 6]); + let a = Array2::from_iter([1, 2, 3]); + let b = Array2::from_iter([2, 2, 6]); let mut table_list = TableList::empty(); let table_ref = table_list @@ -975,15 +975,15 @@ mod tests { .unwrap(); let out = planned.function_impl.execute(&[&a, &b]).unwrap(); - let expected = Array::from_iter([true, false, true]); + let expected = Array2::from_iter([true, false, true]); assert_eq!(expected, out); } #[test] fn lt_eq_i32() { - let a = Array::from_iter([1, 2, 3]); - let b = Array::from_iter([2, 2, 6]); + let a = Array2::from_iter([1, 2, 3]); + let b = Array2::from_iter([2, 2, 6]); let mut table_list = TableList::empty(); let table_ref = table_list @@ -1002,15 +1002,15 @@ mod tests { .unwrap(); let out = planned.function_impl.execute(&[&a, &b]).unwrap(); - let expected = Array::from_iter([true, true, true]); + let expected = Array2::from_iter([true, true, true]); assert_eq!(expected, out); } #[test] fn gt_i32() { - let a = Array::from_iter([1, 2, 3]); - let b = Array::from_iter([2, 2, 6]); + let a = Array2::from_iter([1, 2, 3]); + let b = Array2::from_iter([2, 2, 6]); let mut table_list = TableList::empty(); let table_ref = table_list @@ -1029,15 +1029,15 @@ mod tests { .unwrap(); let out = planned.function_impl.execute(&[&a, &b]).unwrap(); - let expected = Array::from_iter([false, false, false]); + let expected = Array2::from_iter([false, false, false]); assert_eq!(expected, out); } #[test] fn gt_eq_i32() { - let a = Array::from_iter([1, 2, 3]); - let b = Array::from_iter([2, 2, 6]); + let a = Array2::from_iter([1, 2, 3]); + let b = Array2::from_iter([2, 2, 6]); let mut table_list = TableList::empty(); let table_ref = table_list @@ -1056,7 +1056,7 @@ mod tests { .unwrap(); let out = planned.function_impl.execute(&[&a, &b]).unwrap(); - let expected = Array::from_iter([false, true, false]); + let expected = Array2::from_iter([false, true, false]); assert_eq!(expected, out); } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/datetime/date_part.rs b/crates/rayexec_execution/src/functions/scalar/builtin/datetime/date_part.rs index 2e4044200..2a6cb7823 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/datetime/date_part.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/datetime/date_part.rs @@ -1,7 +1,7 @@ use rayexec_error::Result; use rayexec_parser::ast; -use crate::arrays::array::Array; +use crate::arrays::array::Array2; use crate::arrays::compute::date::{self, extract_date_part}; use crate::arrays::datatype::{DataType, DataTypeId, DecimalTypeMeta}; use crate::arrays::scalar::decimal::{Decimal64Type, DecimalType}; @@ -101,7 +101,7 @@ pub struct DatePartImpl { } impl ScalarFunctionImpl for DatePartImpl { - fn execute(&self, inputs: &[&Array]) -> Result { + fn execute(&self, inputs: &[&Array2]) -> Result { // First input ignored (the constant "part" to extract) extract_date_part(self.part, inputs[1]) } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/datetime/date_trunc.rs b/crates/rayexec_execution/src/functions/scalar/builtin/datetime/date_trunc.rs index b677bd584..a359bcdac 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/datetime/date_trunc.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/datetime/date_trunc.rs @@ -2,7 +2,7 @@ use std::str::FromStr; use rayexec_error::{not_implemented, RayexecError, Result}; -use crate::arrays::array::Array; +use crate::arrays::array::Array2; use crate::arrays::datatype::{DataType, DataTypeId, TimeUnit, TimestampTypeMeta}; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; use crate::arrays::executor::physical_type::PhysicalI64; @@ -140,7 +140,7 @@ pub struct DateTruncImpl { } impl ScalarFunctionImpl for DateTruncImpl { - fn execute(&self, inputs: &[&Array]) -> Result { + fn execute(&self, inputs: &[&Array2]) -> Result { let input = &inputs[1]; let trunc = match self.input_unit { diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/datetime/epoch.rs b/crates/rayexec_execution/src/functions/scalar/builtin/datetime/epoch.rs index 536dee3ac..9e360163f 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/datetime/epoch.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/datetime/epoch.rs @@ -1,6 +1,6 @@ use rayexec_error::Result; -use crate::arrays::array::Array; +use crate::arrays::array::Array2; use crate::arrays::datatype::{DataType, DataTypeId, TimeUnit, TimestampTypeMeta}; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; use crate::arrays::executor::physical_type::PhysicalI64; @@ -102,13 +102,13 @@ impl ScalarFunction for EpochMs { pub struct EpochImpl; impl ScalarFunctionImpl for EpochImpl { - fn execute(&self, inputs: &[&Array]) -> Result { + fn execute(&self, inputs: &[&Array2]) -> Result { let input = inputs[0]; to_timestamp::(input) } } -fn to_timestamp(input: &Array) -> Result { +fn to_timestamp(input: &Array2) -> Result { let builder = ArrayBuilder { datatype: DataType::Timestamp(TimestampTypeMeta { unit: TimeUnit::Microsecond, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/is.rs b/crates/rayexec_execution/src/functions/scalar/builtin/is.rs index 69a221936..e841c8c67 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/is.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/is.rs @@ -1,6 +1,6 @@ use rayexec_error::Result; -use crate::arrays::array::Array; +use crate::arrays::array::Array2; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::builder::{ArrayBuilder, BooleanBuffer}; use crate::arrays::executor::physical_type::{PhysicalAny, PhysicalBool}; @@ -101,7 +101,7 @@ impl ScalarFunction for IsNotNull { pub struct CheckNullImpl; impl ScalarFunctionImpl for CheckNullImpl { - fn execute(&self, inputs: &[&Array]) -> Result { + fn execute(&self, inputs: &[&Array2]) -> Result { let input = inputs[0]; let (initial, updated) = if IS_NULL { @@ -123,7 +123,7 @@ impl ScalarFunctionImpl for CheckNullImpl { // Drop validity. let data = array.into_array_data(); - Ok(Array::new_with_array_data(DataType::Boolean, data)) + Ok(Array2::new_with_array_data(DataType::Boolean, data)) } } @@ -303,7 +303,7 @@ impl ScalarFunction for IsNotFalse { pub struct CheckBoolImpl; impl ScalarFunctionImpl for CheckBoolImpl { - fn execute(&self, inputs: &[&Array]) -> Result { + fn execute(&self, inputs: &[&Array2]) -> Result { let input = inputs[0]; let initial = NOT; @@ -319,6 +319,6 @@ impl ScalarFunctionImpl for CheckBoolImpl Result { + fn execute(&self, inputs: &[&Array2]) -> Result { let input = inputs[0]; extract(input, self.index) } } -fn extract(array: &Array, idx: usize) -> Result { +fn extract(array: &Array2, idx: usize) -> Result { let data = match array.array_data() { ArrayData::List(list) => list.as_ref(), _other => return Err(RayexecError::new("Unexpected storage type")), @@ -252,10 +252,10 @@ fn extract(array: &Array, idx: usize) -> Result { fn extract_inner<'a, S, B>( mut builder: ArrayBuilder, - outer: &Array, - inner: &'a Array, + outer: &Array2, + inner: &'a Array2, el_idx: usize, -) -> Result +) -> Result where S: PhysicalStorage, B: ArrayDataBuffer, @@ -290,7 +290,7 @@ where validity.set_unchecked(idx, false); })?; - Ok(Array::new_with_validity_and_array_data( + Ok(Array2::new_with_validity_and_array_data( builder.datatype, validity, builder.buffer.into_data(), diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/list/list_values.rs b/crates/rayexec_execution/src/functions/scalar/builtin/list/list_values.rs index b961085db..ffb2aa7ef 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/list/list_values.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/list/list_values.rs @@ -1,6 +1,6 @@ use rayexec_error::{RayexecError, Result}; -use crate::arrays::array::Array; +use crate::arrays::array::Array2; use crate::arrays::datatype::{DataType, DataTypeId, ListTypeMeta}; use crate::arrays::executor::scalar::concat; use crate::arrays::storage::ListStorage; @@ -90,20 +90,27 @@ pub struct ListValuesImpl { } impl ScalarFunctionImpl for ListValuesImpl { - fn execute(&self, inputs: &[&Array]) -> Result { + fn execute(&self, inputs: &[&Array2]) -> Result { if inputs.is_empty() { let inner_type = match &self.list_datatype { DataType::List(l) => l.datatype.as_ref(), other => panic!("invalid data type: {other}"), }; - let data = ListStorage::empty_list(Array::new_typed_null_array(inner_type.clone(), 1)?); - return Ok(Array::new_with_array_data(self.list_datatype.clone(), data)); + let data = + ListStorage::empty_list(Array2::new_typed_null_array(inner_type.clone(), 1)?); + return Ok(Array2::new_with_array_data( + self.list_datatype.clone(), + data, + )); } let out = concat(inputs)?; let data = ListStorage::single_list(out); - Ok(Array::new_with_array_data(self.list_datatype.clone(), data)) + Ok(Array2::new_with_array_data( + self.list_datatype.clone(), + data, + )) } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/negate.rs b/crates/rayexec_execution/src/functions/scalar/builtin/negate.rs index fc3099d39..fe0a1df07 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/negate.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/negate.rs @@ -2,7 +2,7 @@ use std::marker::PhantomData; use rayexec_error::Result; -use crate::arrays::array::{Array, ArrayData}; +use crate::arrays::array::{Array2, ArrayData}; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::builder::{ArrayBuilder, BooleanBuffer, PrimitiveBuffer}; use crate::arrays::executor::physical_type::{ @@ -102,7 +102,7 @@ where for<'a> S::Type<'a>: std::ops::Neg> + Default + Copy, ArrayData: From>>, { - fn execute(&self, inputs: &[&Array]) -> Result { + fn execute(&self, inputs: &[&Array2]) -> Result { use std::ops::Neg; let a = inputs[0]; @@ -165,7 +165,7 @@ impl ScalarFunction for Not { pub struct NotImpl; impl ScalarFunctionImpl for NotImpl { - fn execute(&self, inputs: &[&Array]) -> Result { + fn execute(&self, inputs: &[&Array2]) -> Result { UnaryExecutor::execute::( inputs[0], ArrayBuilder { diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/abs.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/abs.rs index c47048fef..2beb8e9d3 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/abs.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/abs.rs @@ -2,7 +2,7 @@ use num_traits::Float; use rayexec_error::Result; use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; -use crate::arrays::array::{Array, ArrayData}; +use crate::arrays::array::{Array2, ArrayData}; use crate::arrays::datatype::DataType; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; use crate::arrays::executor::physical_type::PhysicalStorage; @@ -18,7 +18,7 @@ impl UnaryInputNumericOperation for AbsOp { const NAME: &'static str = "abs"; const DESCRIPTION: &'static str = "Compute the absolute value of a number"; - fn execute_float<'a, S>(input: &'a Array, ret: DataType) -> Result + fn execute_float<'a, S>(input: &'a Array2, ret: DataType) -> Result where S: PhysicalStorage, S::Type<'a>: Float + Default, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/acos.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/acos.rs index ab989ab52..6ccb8d49f 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/acos.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/acos.rs @@ -2,7 +2,7 @@ use num_traits::Float; use rayexec_error::Result; use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; -use crate::arrays::array::{Array, ArrayData}; +use crate::arrays::array::{Array2, ArrayData}; use crate::arrays::datatype::DataType; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; use crate::arrays::executor::physical_type::PhysicalStorage; @@ -18,7 +18,7 @@ impl UnaryInputNumericOperation for AcosOp { const NAME: &'static str = "acos"; const DESCRIPTION: &'static str = "Compute the arccosine of value"; - fn execute_float<'a, S>(input: &'a Array, ret: DataType) -> Result + fn execute_float<'a, S>(input: &'a Array2, ret: DataType) -> Result where S: PhysicalStorage, S::Type<'a>: Float + Default, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/asin.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/asin.rs index d2721fa9d..d47994b5d 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/asin.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/asin.rs @@ -2,7 +2,7 @@ use num_traits::Float; use rayexec_error::Result; use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; -use crate::arrays::array::{Array, ArrayData}; +use crate::arrays::array::{Array2, ArrayData}; use crate::arrays::datatype::DataType; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; use crate::arrays::executor::physical_type::PhysicalStorage; @@ -18,7 +18,7 @@ impl UnaryInputNumericOperation for AsinOp { const NAME: &'static str = "asin"; const DESCRIPTION: &'static str = "Compute the arcsine of value"; - fn execute_float<'a, S>(input: &'a Array, ret: DataType) -> Result + fn execute_float<'a, S>(input: &'a Array2, ret: DataType) -> Result where S: PhysicalStorage, S::Type<'a>: Float + Default, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/atan.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/atan.rs index 8a3aad508..9cde0bff3 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/atan.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/atan.rs @@ -2,7 +2,7 @@ use num_traits::Float; use rayexec_error::Result; use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; -use crate::arrays::array::{Array, ArrayData}; +use crate::arrays::array::{Array2, ArrayData}; use crate::arrays::datatype::DataType; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; use crate::arrays::executor::physical_type::PhysicalStorage; @@ -18,7 +18,7 @@ impl UnaryInputNumericOperation for AtanOp { const NAME: &'static str = "atan"; const DESCRIPTION: &'static str = "Compute the arctangent of value"; - fn execute_float<'a, S>(input: &'a Array, ret: DataType) -> Result + fn execute_float<'a, S>(input: &'a Array2, ret: DataType) -> Result where S: PhysicalStorage, S::Type<'a>: Float + Default, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/cbrt.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/cbrt.rs index fe97f1980..64a5c4963 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/cbrt.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/cbrt.rs @@ -2,7 +2,7 @@ use num_traits::Float; use rayexec_error::Result; use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; -use crate::arrays::array::{Array, ArrayData}; +use crate::arrays::array::{Array2, ArrayData}; use crate::arrays::datatype::DataType; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; use crate::arrays::executor::physical_type::PhysicalStorage; @@ -18,7 +18,7 @@ impl UnaryInputNumericOperation for CbrtOp { const NAME: &'static str = "cbrt"; const DESCRIPTION: &'static str = "Compute the cube root of value"; - fn execute_float<'a, S>(input: &'a Array, ret: DataType) -> Result + fn execute_float<'a, S>(input: &'a Array2, ret: DataType) -> Result where S: PhysicalStorage, S::Type<'a>: Float + Default, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/ceil.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/ceil.rs index 49b0a14d4..9710c9e84 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/ceil.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/ceil.rs @@ -2,7 +2,7 @@ use num_traits::Float; use rayexec_error::Result; use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; -use crate::arrays::array::{Array, ArrayData}; +use crate::arrays::array::{Array2, ArrayData}; use crate::arrays::datatype::DataType; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; use crate::arrays::executor::physical_type::PhysicalStorage; @@ -18,7 +18,7 @@ impl UnaryInputNumericOperation for CeilOp { const NAME: &'static str = "ceil"; const DESCRIPTION: &'static str = "Round number up"; - fn execute_float<'a, S>(input: &'a Array, ret: DataType) -> Result + fn execute_float<'a, S>(input: &'a Array2, ret: DataType) -> Result where S: PhysicalStorage, S::Type<'a>: Float + Default, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/cos.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/cos.rs index 35d3e82b7..05e279a8b 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/cos.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/cos.rs @@ -2,7 +2,7 @@ use num_traits::Float; use rayexec_error::Result; use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; -use crate::arrays::array::{Array, ArrayData}; +use crate::arrays::array::{Array2, ArrayData}; use crate::arrays::datatype::DataType; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; use crate::arrays::executor::physical_type::PhysicalStorage; @@ -18,7 +18,7 @@ impl UnaryInputNumericOperation for CosOp { const NAME: &'static str = "cos"; const DESCRIPTION: &'static str = "Compute the cosine of value"; - fn execute_float<'a, S>(input: &'a Array, ret: DataType) -> Result + fn execute_float<'a, S>(input: &'a Array2, ret: DataType) -> Result where S: PhysicalStorage, S::Type<'a>: Float + Default, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/degrees.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/degrees.rs index 634d2497c..58386a653 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/degrees.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/degrees.rs @@ -2,7 +2,7 @@ use num_traits::Float; use rayexec_error::Result; use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; -use crate::arrays::array::{Array, ArrayData}; +use crate::arrays::array::{Array2, ArrayData}; use crate::arrays::datatype::DataType; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; use crate::arrays::executor::physical_type::PhysicalStorage; @@ -18,7 +18,7 @@ impl UnaryInputNumericOperation for DegreesOp { const NAME: &'static str = "degrees"; const DESCRIPTION: &'static str = "Converts radians to degrees"; - fn execute_float<'a, S>(input: &'a Array, ret: DataType) -> Result + fn execute_float<'a, S>(input: &'a Array2, ret: DataType) -> Result where S: PhysicalStorage, S::Type<'a>: Float + Default, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/exp.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/exp.rs index a71e4a70f..abeffec6a 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/exp.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/exp.rs @@ -2,7 +2,7 @@ use num_traits::Float; use rayexec_error::Result; use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; -use crate::arrays::array::{Array, ArrayData}; +use crate::arrays::array::{Array2, ArrayData}; use crate::arrays::datatype::DataType; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; use crate::arrays::executor::physical_type::PhysicalStorage; @@ -18,7 +18,7 @@ impl UnaryInputNumericOperation for ExpOp { const NAME: &'static str = "exp"; const DESCRIPTION: &'static str = "Compute `e ^ val`"; - fn execute_float<'a, S>(input: &'a Array, ret: DataType) -> Result + fn execute_float<'a, S>(input: &'a Array2, ret: DataType) -> Result where S: PhysicalStorage, S::Type<'a>: Float + Default, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/floor.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/floor.rs index 4e8ef22d3..3826856f5 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/floor.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/floor.rs @@ -2,7 +2,7 @@ use num_traits::Float; use rayexec_error::Result; use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; -use crate::arrays::array::{Array, ArrayData}; +use crate::arrays::array::{Array2, ArrayData}; use crate::arrays::datatype::DataType; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; use crate::arrays::executor::physical_type::PhysicalStorage; @@ -18,7 +18,7 @@ impl UnaryInputNumericOperation for FloorOp { const NAME: &'static str = "floor"; const DESCRIPTION: &'static str = "Round number down"; - fn execute_float<'a, S>(input: &'a Array, ret: DataType) -> Result + fn execute_float<'a, S>(input: &'a Array2, ret: DataType) -> Result where S: PhysicalStorage, S::Type<'a>: Float + Default, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/isnan.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/isnan.rs index 37d56a348..b8a05427e 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/isnan.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/isnan.rs @@ -4,7 +4,7 @@ use num_traits::Float; use rayexec_error::Result; use super::ScalarFunction; -use crate::arrays::array::Array; +use crate::arrays::array::Array2; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::builder::{ArrayBuilder, BooleanBuffer}; use crate::arrays::executor::physical_type::{ @@ -102,7 +102,7 @@ where S: PhysicalStorage, for<'a> S::Type<'a>: Float, { - fn execute(&self, inputs: &[&Array]) -> Result { + fn execute(&self, inputs: &[&Array2]) -> Result { let input = inputs[0]; let builder = ArrayBuilder { datatype: DataType::Boolean, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/ln.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/ln.rs index 6bd68c098..29153857a 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/ln.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/ln.rs @@ -2,7 +2,7 @@ use num_traits::Float; use rayexec_error::Result; use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; -use crate::arrays::array::{Array, ArrayData}; +use crate::arrays::array::{Array2, ArrayData}; use crate::arrays::datatype::DataType; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; use crate::arrays::executor::physical_type::PhysicalStorage; @@ -18,7 +18,7 @@ impl UnaryInputNumericOperation for LnOp { const NAME: &'static str = "ln"; const DESCRIPTION: &'static str = "Compute natural log of value"; - fn execute_float<'a, S>(input: &'a Array, ret: DataType) -> Result + fn execute_float<'a, S>(input: &'a Array2, ret: DataType) -> Result where S: PhysicalStorage, S::Type<'a>: Float + Default, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/log.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/log.rs index c668a51e3..db09c952e 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/log.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/log.rs @@ -2,7 +2,7 @@ use num_traits::Float; use rayexec_error::Result; use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; -use crate::arrays::array::{Array, ArrayData}; +use crate::arrays::array::{Array2, ArrayData}; use crate::arrays::datatype::DataType; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; use crate::arrays::executor::physical_type::PhysicalStorage; @@ -18,7 +18,7 @@ impl UnaryInputNumericOperation for LogOp { const NAME: &'static str = "log"; const DESCRIPTION: &'static str = "Compute base-10 log of value"; - fn execute_float<'a, S>(input: &'a Array, ret: DataType) -> Result + fn execute_float<'a, S>(input: &'a Array2, ret: DataType) -> Result where S: PhysicalStorage, S::Type<'a>: Float + Default, @@ -41,7 +41,7 @@ impl UnaryInputNumericOperation for LogOp2 { const NAME: &'static str = "log2"; const DESCRIPTION: &'static str = "Compute base-2 log of value"; - fn execute_float<'a, S>(input: &'a Array, ret: DataType) -> Result + fn execute_float<'a, S>(input: &'a Array2, ret: DataType) -> Result where S: PhysicalStorage, S::Type<'a>: Float + Default, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/mod.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/mod.rs index a2dcb12df..28192a12a 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/mod.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/mod.rs @@ -38,7 +38,7 @@ pub use sin::*; pub use sqrt::*; pub use tan::*; -use crate::arrays::array::{Array, ArrayData}; +use crate::arrays::array::{Array2, ArrayData}; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::physical_type::{ PhysicalF16, @@ -81,7 +81,7 @@ pub trait UnaryInputNumericOperation: Debug + Clone + Copy + Sync + Send + 'stat const NAME: &'static str; const DESCRIPTION: &'static str; - fn execute_float<'a, S>(input: &'a Array, ret: DataType) -> Result + fn execute_float<'a, S>(input: &'a Array2, ret: DataType) -> Result where S: PhysicalStorage, S::Type<'a>: Float + Default, @@ -145,7 +145,7 @@ pub(crate) struct UnaryInputNumericScalarImpl { } impl ScalarFunctionImpl for UnaryInputNumericScalarImpl { - fn execute(&self, inputs: &[&Array]) -> Result { + fn execute(&self, inputs: &[&Array2]) -> Result { let input = inputs[0]; match input.physical_type() { PhysicalType::Float16 => O::execute_float::(input, self.ret.clone()), diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/radians.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/radians.rs index d4c71dbfa..efca90b4f 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/radians.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/radians.rs @@ -2,7 +2,7 @@ use num_traits::Float; use rayexec_error::Result; use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; -use crate::arrays::array::{Array, ArrayData}; +use crate::arrays::array::{Array2, ArrayData}; use crate::arrays::datatype::DataType; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; use crate::arrays::executor::physical_type::PhysicalStorage; @@ -18,7 +18,7 @@ impl UnaryInputNumericOperation for RadiansOp { const NAME: &'static str = "radians"; const DESCRIPTION: &'static str = "Converts degrees to radians"; - fn execute_float<'a, S>(input: &'a Array, ret: DataType) -> Result + fn execute_float<'a, S>(input: &'a Array2, ret: DataType) -> Result where S: PhysicalStorage, S::Type<'a>: Float + Default, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/sin.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/sin.rs index 7561269c9..467bc4ed9 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/sin.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/sin.rs @@ -2,7 +2,7 @@ use num_traits::Float; use rayexec_error::Result; use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; -use crate::arrays::array::{Array, ArrayData}; +use crate::arrays::array::{Array2, ArrayData}; use crate::arrays::datatype::DataType; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; use crate::arrays::executor::physical_type::PhysicalStorage; @@ -18,7 +18,7 @@ impl UnaryInputNumericOperation for SinOp { const NAME: &'static str = "sin"; const DESCRIPTION: &'static str = "Compute the sin of value"; - fn execute_float<'a, S>(input: &'a Array, ret: DataType) -> Result + fn execute_float<'a, S>(input: &'a Array2, ret: DataType) -> Result where S: PhysicalStorage, S::Type<'a>: Float + Default, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/sqrt.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/sqrt.rs index 8770db9df..8add9ecfb 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/sqrt.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/sqrt.rs @@ -2,7 +2,7 @@ use num_traits::Float; use rayexec_error::Result; use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; -use crate::arrays::array::{Array, ArrayData}; +use crate::arrays::array::{Array2, ArrayData}; use crate::arrays::datatype::DataType; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; use crate::arrays::executor::physical_type::PhysicalStorage; @@ -18,7 +18,7 @@ impl UnaryInputNumericOperation for SqrtOp { const NAME: &'static str = "sqrt"; const DESCRIPTION: &'static str = "Compute the square root of value"; - fn execute_float<'a, S>(input: &'a Array, ret: DataType) -> Result + fn execute_float<'a, S>(input: &'a Array2, ret: DataType) -> Result where S: PhysicalStorage, S::Type<'a>: Float + Default, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/tan.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/tan.rs index 67d0377af..b705b9040 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/tan.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/tan.rs @@ -2,7 +2,7 @@ use num_traits::Float; use rayexec_error::Result; use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; -use crate::arrays::array::{Array, ArrayData}; +use crate::arrays::array::{Array2, ArrayData}; use crate::arrays::datatype::DataType; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; use crate::arrays::executor::physical_type::PhysicalStorage; @@ -18,7 +18,7 @@ impl UnaryInputNumericOperation for TanOp { const NAME: &'static str = "tan"; const DESCRIPTION: &'static str = "Compute the tangent of value"; - fn execute_float<'a, S>(input: &'a Array, ret: DataType) -> Result + fn execute_float<'a, S>(input: &'a Array2, ret: DataType) -> Result where S: PhysicalStorage, S::Type<'a>: Float + Default, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/random.rs b/crates/rayexec_execution/src/functions/scalar/builtin/random.rs index 5909de01d..7adcef12d 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/random.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/random.rs @@ -1,7 +1,7 @@ use rayexec_error::Result; use serde::{Deserialize, Serialize}; -use crate::arrays::array::Array; +use crate::arrays::array::Array2; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::storage::PrimitiveStorage; use crate::expr::Expression; @@ -62,10 +62,10 @@ impl ScalarFunction for Random { pub struct RandomImpl; impl ScalarFunctionImpl for RandomImpl { - fn execute(&self, _inputs: &[&Array]) -> Result { + fn execute(&self, _inputs: &[&Array2]) -> Result { // TODO: Need to pass in dummy input to produce all unique values. let val = rand::random::(); - Ok(Array::new_with_array_data( + Ok(Array2::new_with_array_data( DataType::Float64, PrimitiveStorage::from(vec![val]), )) diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/similarity/l2_distance.rs b/crates/rayexec_execution/src/functions/scalar/builtin/similarity/l2_distance.rs index 8db1686f8..c7f3bff3e 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/similarity/l2_distance.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/similarity/l2_distance.rs @@ -4,7 +4,7 @@ use std::ops::AddAssign; use num_traits::{AsPrimitive, Float}; use rayexec_error::Result; -use crate::arrays::array::Array; +use crate::arrays::array::Array2; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; use crate::arrays::executor::physical_type::{ @@ -110,7 +110,7 @@ where S: PhysicalStorage, for<'a> S::Type<'a>: Float + AddAssign + AsPrimitive + Default + Copy, { - fn execute(&self, inputs: &[&Array]) -> Result { + fn execute(&self, inputs: &[&Array2]) -> Result { let a = inputs[0]; let b = inputs[1]; diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/string/ascii.rs b/crates/rayexec_execution/src/functions/scalar/builtin/string/ascii.rs index fc7beda38..960687ef7 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/string/ascii.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/string/ascii.rs @@ -1,6 +1,6 @@ use rayexec_error::Result; -use crate::arrays::array::Array; +use crate::arrays::array::Array2; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; use crate::arrays::executor::physical_type::PhysicalUtf8; @@ -61,7 +61,7 @@ impl ScalarFunction for Ascii { pub struct AsciiImpl; impl ScalarFunctionImpl for AsciiImpl { - fn execute(&self, inputs: &[&Array]) -> Result { + fn execute(&self, inputs: &[&Array2]) -> Result { let input = inputs[0]; let builder = ArrayBuilder { datatype: DataType::Int32, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/string/case.rs b/crates/rayexec_execution/src/functions/scalar/builtin/string/case.rs index 2c8ed0a36..b1de7c37a 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/string/case.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/string/case.rs @@ -1,6 +1,6 @@ use rayexec_error::{RayexecError, Result}; -use crate::arrays::array::{Array, ArrayData}; +use crate::arrays::array::{Array2, ArrayData}; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::builder::{ArrayBuilder, GermanVarlenBuffer}; use crate::arrays::executor::physical_type::PhysicalUtf8; @@ -60,7 +60,7 @@ impl ScalarFunction for Lower { pub struct LowerImpl; impl ScalarFunctionImpl for LowerImpl { - fn execute(&self, inputs: &[&Array]) -> Result { + fn execute(&self, inputs: &[&Array2]) -> Result { let input = inputs[0]; case_convert_execute(input, str::to_lowercase) } @@ -115,13 +115,13 @@ impl ScalarFunction for Upper { pub struct UpperImpl; impl ScalarFunctionImpl for UpperImpl { - fn execute(&self, inputs: &[&Array]) -> Result { + fn execute(&self, inputs: &[&Array2]) -> Result { let input = inputs[0]; case_convert_execute(input, str::to_uppercase) } } -fn case_convert_execute(input: &Array, case_fn: F) -> Result +fn case_convert_execute(input: &Array2, case_fn: F) -> Result where F: Fn(&str) -> String, { diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/string/concat.rs b/crates/rayexec_execution/src/functions/scalar/builtin/string/concat.rs index 1b7c5ae40..c9e019c20 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/string/concat.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/string/concat.rs @@ -1,6 +1,6 @@ use rayexec_error::Result; -use crate::arrays::array::Array; +use crate::arrays::array::Array2; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::builder::{ArrayBuilder, GermanVarlenBuffer}; use crate::arrays::executor::physical_type::PhysicalUtf8; @@ -68,10 +68,10 @@ impl ScalarFunction for Concat { pub struct StringConcatImpl; impl ScalarFunctionImpl for StringConcatImpl { - fn execute(&self, inputs: &[&Array]) -> Result { + fn execute(&self, inputs: &[&Array2]) -> Result { match inputs.len() { 0 => { - let mut array = Array::from_iter([""]); + let mut array = Array2::from_iter([""]); array.set_physical_validity(0, false); Ok(array) } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/string/contains.rs b/crates/rayexec_execution/src/functions/scalar/builtin/string/contains.rs index 8cda9192a..01396a46b 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/string/contains.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/string/contains.rs @@ -1,6 +1,6 @@ use rayexec_error::Result; -use crate::arrays::array::Array; +use crate::arrays::array::Array2; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::builder::{ArrayBuilder, BooleanBuffer}; use crate::arrays::executor::physical_type::PhysicalUtf8; @@ -82,7 +82,7 @@ pub struct StringContainsConstantImpl { } impl ScalarFunctionImpl for StringContainsConstantImpl { - fn execute(&self, inputs: &[&Array]) -> Result { + fn execute(&self, inputs: &[&Array2]) -> Result { let builder = ArrayBuilder { datatype: DataType::Boolean, buffer: BooleanBuffer::with_len(inputs[0].logical_len()), @@ -98,7 +98,7 @@ impl ScalarFunctionImpl for StringContainsConstantImpl { pub struct StringContainsImpl; impl ScalarFunctionImpl for StringContainsImpl { - fn execute(&self, inputs: &[&Array]) -> Result { + fn execute(&self, inputs: &[&Array2]) -> Result { let builder = ArrayBuilder { datatype: DataType::Boolean, buffer: BooleanBuffer::with_len(inputs[0].logical_len()), diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/string/ends_with.rs b/crates/rayexec_execution/src/functions/scalar/builtin/string/ends_with.rs index 86d166b50..68941685f 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/string/ends_with.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/string/ends_with.rs @@ -1,6 +1,6 @@ use rayexec_error::Result; -use crate::arrays::array::Array; +use crate::arrays::array::Array2; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::builder::{ArrayBuilder, BooleanBuffer}; use crate::arrays::executor::physical_type::PhysicalUtf8; @@ -86,7 +86,7 @@ pub struct EndsWithConstantImpl { } impl ScalarFunctionImpl for EndsWithConstantImpl { - fn execute(&self, inputs: &[&Array]) -> Result { + fn execute(&self, inputs: &[&Array2]) -> Result { let builder = ArrayBuilder { datatype: DataType::Boolean, buffer: BooleanBuffer::with_len(inputs[0].logical_len()), @@ -102,7 +102,7 @@ impl ScalarFunctionImpl for EndsWithConstantImpl { pub struct EndsWithImpl; impl ScalarFunctionImpl for EndsWithImpl { - fn execute(&self, inputs: &[&Array]) -> Result { + fn execute(&self, inputs: &[&Array2]) -> Result { let builder = ArrayBuilder { datatype: DataType::Boolean, buffer: BooleanBuffer::with_len(inputs[0].logical_len()), diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/string/length.rs b/crates/rayexec_execution/src/functions/scalar/builtin/string/length.rs index 7943e24eb..426ee3e47 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/string/length.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/string/length.rs @@ -1,6 +1,6 @@ use rayexec_error::Result; -use crate::arrays::array::Array; +use crate::arrays::array::Array2; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; use crate::arrays::executor::physical_type::{PhysicalBinary, PhysicalUtf8}; @@ -64,7 +64,7 @@ impl ScalarFunction for Length { pub struct StrLengthImpl; impl ScalarFunctionImpl for StrLengthImpl { - fn execute(&self, inputs: &[&Array]) -> Result { + fn execute(&self, inputs: &[&Array2]) -> Result { let input = inputs[0]; let builder = ArrayBuilder { @@ -145,7 +145,7 @@ impl ScalarFunction for ByteLength { pub struct ByteLengthImpl; impl ScalarFunctionImpl for ByteLengthImpl { - fn execute(&self, inputs: &[&Array]) -> Result { + fn execute(&self, inputs: &[&Array2]) -> Result { let input = inputs[0]; let builder = ArrayBuilder { @@ -222,7 +222,7 @@ impl ScalarFunction for BitLength { pub struct BitLengthImpl; impl ScalarFunctionImpl for BitLengthImpl { - fn execute(&self, inputs: &[&Array]) -> Result { + fn execute(&self, inputs: &[&Array2]) -> Result { let input = inputs[0]; let builder = ArrayBuilder { diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/string/like.rs b/crates/rayexec_execution/src/functions/scalar/builtin/string/like.rs index 539b35255..f1283ffdb 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/string/like.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/string/like.rs @@ -1,7 +1,7 @@ use rayexec_error::{Result, ResultExt}; use regex::{escape, Regex}; -use crate::arrays::array::Array; +use crate::arrays::array::Array2; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::builder::{ArrayBuilder, BooleanBuffer}; use crate::arrays::executor::physical_type::PhysicalUtf8; @@ -84,7 +84,7 @@ pub struct LikeConstImpl { } impl ScalarFunctionImpl for LikeConstImpl { - fn execute(&self, inputs: &[&Array]) -> Result { + fn execute(&self, inputs: &[&Array2]) -> Result { let builder = ArrayBuilder { datatype: DataType::Boolean, buffer: BooleanBuffer::with_len(inputs[0].logical_len()), @@ -101,7 +101,7 @@ impl ScalarFunctionImpl for LikeConstImpl { pub struct LikeImpl; impl ScalarFunctionImpl for LikeImpl { - fn execute(&self, inputs: &[&Array]) -> Result { + fn execute(&self, inputs: &[&Array2]) -> Result { let builder = ArrayBuilder { datatype: DataType::Boolean, buffer: BooleanBuffer::with_len(inputs[0].logical_len()), diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/string/pad.rs b/crates/rayexec_execution/src/functions/scalar/builtin/string/pad.rs index ff554eea4..1ed2c7f01 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/string/pad.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/string/pad.rs @@ -1,6 +1,6 @@ use rayexec_error::Result; -use crate::arrays::array::Array; +use crate::arrays::array::Array2; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::builder::{ArrayBuilder, GermanVarlenBuffer}; use crate::arrays::executor::physical_type::{PhysicalI64, PhysicalUtf8}; @@ -96,7 +96,7 @@ impl ScalarFunction for LeftPad { pub struct LeftPadImpl; impl ScalarFunctionImpl for LeftPadImpl { - fn execute(&self, inputs: &[&Array]) -> Result { + fn execute(&self, inputs: &[&Array2]) -> Result { let mut string_buf = String::new(); let builder = ArrayBuilder { datatype: DataType::Utf8, @@ -209,7 +209,7 @@ impl ScalarFunction for RightPad { pub struct RightPadImpl; impl ScalarFunctionImpl for RightPadImpl { - fn execute(&self, inputs: &[&Array]) -> Result { + fn execute(&self, inputs: &[&Array2]) -> Result { let mut string_buf = String::new(); let builder = ArrayBuilder { datatype: DataType::Utf8, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/string/regexp_replace.rs b/crates/rayexec_execution/src/functions/scalar/builtin/string/regexp_replace.rs index 34b2ca556..0250f98d3 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/string/regexp_replace.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/string/regexp_replace.rs @@ -1,7 +1,7 @@ use rayexec_error::{Result, ResultExt}; use regex::Regex; -use crate::arrays::array::Array; +use crate::arrays::array::Array2; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::builder::{ArrayBuilder, GermanVarlenBuffer}; use crate::arrays::executor::physical_type::PhysicalUtf8; @@ -98,7 +98,7 @@ pub struct RegexpReplaceImpl { } impl ScalarFunctionImpl for RegexpReplaceImpl { - fn execute(&self, inputs: &[&Array]) -> Result { + fn execute(&self, inputs: &[&Array2]) -> Result { let builder = ArrayBuilder { datatype: DataType::Utf8, buffer: GermanVarlenBuffer::::with_len(inputs[0].logical_len()), diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/string/repeat.rs b/crates/rayexec_execution/src/functions/scalar/builtin/string/repeat.rs index b725787bc..cda468e69 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/string/repeat.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/string/repeat.rs @@ -2,7 +2,7 @@ use std::fmt::Debug; use rayexec_error::Result; -use crate::arrays::array::Array; +use crate::arrays::array::Array2; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::builder::{ArrayBuilder, GermanVarlenBuffer}; use crate::arrays::executor::physical_type::{PhysicalI64, PhysicalUtf8}; @@ -65,7 +65,7 @@ impl ScalarFunction for Repeat { pub struct RepeatUtf8Impl; impl ScalarFunctionImpl for RepeatUtf8Impl { - fn execute(&self, inputs: &[&Array]) -> Result { + fn execute(&self, inputs: &[&Array2]) -> Result { let strings = inputs[0]; let nums = inputs[1]; diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/string/starts_with.rs b/crates/rayexec_execution/src/functions/scalar/builtin/string/starts_with.rs index 9c7113461..495c84b1e 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/string/starts_with.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/string/starts_with.rs @@ -1,6 +1,6 @@ use rayexec_error::Result; -use crate::arrays::array::Array; +use crate::arrays::array::Array2; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::builder::{ArrayBuilder, BooleanBuffer}; use crate::arrays::executor::physical_type::PhysicalUtf8; @@ -84,7 +84,7 @@ pub struct StartsWithImpl { } impl ScalarFunctionImpl for StartsWithImpl { - fn execute(&self, inputs: &[&Array]) -> Result { + fn execute(&self, inputs: &[&Array2]) -> Result { let builder = ArrayBuilder { datatype: DataType::Boolean, buffer: BooleanBuffer::with_len(inputs[0].logical_len()), diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/string/substring.rs b/crates/rayexec_execution/src/functions/scalar/builtin/string/substring.rs index d338a27cc..d6aa76568 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/string/substring.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/string/substring.rs @@ -1,6 +1,6 @@ use rayexec_error::Result; -use crate::arrays::array::Array; +use crate::arrays::array::Array2; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::builder::{ArrayBuilder, GermanVarlenBuffer}; use crate::arrays::executor::physical_type::{PhysicalI64, PhysicalUtf8}; @@ -105,7 +105,7 @@ impl ScalarFunction for Substring { pub struct SubstringFromImpl; impl ScalarFunctionImpl for SubstringFromImpl { - fn execute(&self, inputs: &[&Array]) -> Result { + fn execute(&self, inputs: &[&Array2]) -> Result { let len = inputs[0].logical_len(); BinaryExecutor::execute::( inputs[0], @@ -123,7 +123,7 @@ impl ScalarFunctionImpl for SubstringFromImpl { pub struct SubstringFromToImpl; impl ScalarFunctionImpl for SubstringFromToImpl { - fn execute(&self, inputs: &[&Array]) -> Result { + fn execute(&self, inputs: &[&Array2]) -> Result { let len = inputs[0].logical_len(); TernaryExecutor::execute::( inputs[0], diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/string/trim.rs b/crates/rayexec_execution/src/functions/scalar/builtin/string/trim.rs index d36f04e87..32819198e 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/string/trim.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/string/trim.rs @@ -3,7 +3,7 @@ use std::marker::PhantomData; use rayexec_error::Result; -use crate::arrays::array::Array; +use crate::arrays::array::Array2; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::builder::{ArrayBuilder, GermanVarlenBuffer}; use crate::arrays::executor::physical_type::PhysicalUtf8; @@ -222,7 +222,7 @@ impl TrimWhitespaceImpl { } impl ScalarFunctionImpl for TrimWhitespaceImpl { - fn execute(&self, inputs: &[&Array]) -> Result { + fn execute(&self, inputs: &[&Array2]) -> Result { let builder = ArrayBuilder { datatype: DataType::Utf8, buffer: GermanVarlenBuffer::::with_len(inputs[0].logical_len()), @@ -247,7 +247,7 @@ impl TrimPatternImpl { } impl ScalarFunctionImpl for TrimPatternImpl { - fn execute(&self, inputs: &[&Array]) -> Result { + fn execute(&self, inputs: &[&Array2]) -> Result { let builder = ArrayBuilder { datatype: DataType::Utf8, buffer: GermanVarlenBuffer::::with_len(inputs[0].logical_len()), diff --git a/crates/rayexec_execution/src/functions/scalar/mod.rs b/crates/rayexec_execution/src/functions/scalar/mod.rs index 22941656d..bb36d066e 100644 --- a/crates/rayexec_execution/src/functions/scalar/mod.rs +++ b/crates/rayexec_execution/src/functions/scalar/mod.rs @@ -7,7 +7,7 @@ use dyn_clone::DynClone; use rayexec_error::Result; use super::FunctionInfo; -use crate::arrays::array::Array; +use crate::arrays::array::Array2; use crate::arrays::datatype::DataType; use crate::expr::Expression; use crate::logical::binder::table_list::TableList; @@ -103,7 +103,7 @@ impl Hash for PlannedScalarFunction { } pub trait ScalarFunctionImpl: Debug + Sync + Send + DynClone { - fn execute(&self, inputs: &[&Array]) -> Result; + fn execute(&self, inputs: &[&Array2]) -> Result; } impl Clone for Box { diff --git a/crates/rayexec_execution/src/functions/table/builtin/series.rs b/crates/rayexec_execution/src/functions/table/builtin/series.rs index aef14ab0a..7e6414d3f 100644 --- a/crates/rayexec_execution/src/functions/table/builtin/series.rs +++ b/crates/rayexec_execution/src/functions/table/builtin/series.rs @@ -3,7 +3,7 @@ use std::task::{Context, Waker}; use rayexec_error::{RayexecError, Result}; -use crate::arrays::array::Array; +use crate::arrays::array::Array2; use crate::arrays::batch::Batch; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::physical_type::PhysicalI64; @@ -164,7 +164,7 @@ struct SeriesParams { impl SeriesParams { /// Generate the next set of rows using the current parameters. - fn generate_next(&mut self, batch_size: usize) -> Array { + fn generate_next(&mut self, batch_size: usize) -> Array2 { debug_assert!(!self.exhausted); let mut series: Vec = Vec::new(); @@ -195,7 +195,7 @@ impl SeriesParams { self.curr = *last + self.step; } - Array::new_with_array_data(DataType::Int64, PrimitiveStorage::from(series)) + Array2::new_with_array_data(DataType::Int64, PrimitiveStorage::from(series)) } } diff --git a/crates/rayexec_execution/src/functions/table/builtin/system.rs b/crates/rayexec_execution/src/functions/table/builtin/system.rs index f0723ddeb..6acf05a58 100644 --- a/crates/rayexec_execution/src/functions/table/builtin/system.rs +++ b/crates/rayexec_execution/src/functions/table/builtin/system.rs @@ -7,7 +7,7 @@ use futures::future::BoxFuture; use parking_lot::Mutex; use rayexec_error::{OptionExt, RayexecError, Result}; -use crate::arrays::array::Array; +use crate::arrays::array::Array2; use crate::arrays::batch::Batch; use crate::arrays::bitmap::Bitmap; use crate::arrays::datatype::{DataType, DataTypeId, ListTypeMeta}; @@ -79,8 +79,8 @@ impl SystemFunctionImpl for ListDatabasesImpl { } Batch::try_new([ - Array::new_with_array_data(DataType::Utf8, database_names.into_data()), - Array::new_with_array_data(DataType::Utf8, datasources.into_data()), + Array2::new_with_array_data(DataType::Utf8, database_names.into_data()), + Array2::new_with_array_data(DataType::Utf8, datasources.into_data()), ]) } } @@ -232,32 +232,32 @@ impl SystemFunctionImpl for ListFunctionsImpl { })?; Batch::try_new([ - Array::new_with_array_data(DataType::Utf8, database_names), - Array::new_with_array_data(DataType::Utf8, schema_names), - Array::new_with_array_data(DataType::Utf8, function_names), - Array::new_with_array_data(DataType::Utf8, function_types), - Array::new_with_array_data( + Array2::new_with_array_data(DataType::Utf8, database_names), + Array2::new_with_array_data(DataType::Utf8, schema_names), + Array2::new_with_array_data(DataType::Utf8, function_names), + Array2::new_with_array_data(DataType::Utf8, function_types), + Array2::new_with_array_data( DataType::List(ListTypeMeta::new(DataType::Utf8)), ListStorage::try_new( argument_types_metadatas, - Array::new_with_array_data(DataType::Utf8, argument_types), + Array2::new_with_array_data(DataType::Utf8, argument_types), )?, ), - Array::new_with_array_data( + Array2::new_with_array_data( DataType::List(ListTypeMeta::new(DataType::Utf8)), ListStorage::try_new( argument_names_metadatas, - Array::new_with_array_data(DataType::Utf8, argument_names), + Array2::new_with_array_data(DataType::Utf8, argument_names), )?, ), - Array::new_with_array_data(DataType::Utf8, return_types), - Array::new_with_validity_and_array_data( + Array2::new_with_array_data(DataType::Utf8, return_types), + Array2::new_with_validity_and_array_data( DataType::Utf8, descriptions_validity, descriptions, ), - Array::new_with_validity_and_array_data(DataType::Utf8, examples_validity, examples), - Array::new_with_validity_and_array_data( + Array2::new_with_validity_and_array_data(DataType::Utf8, examples_validity, examples), + Array2::new_with_validity_and_array_data( DataType::Utf8, example_outputs_validity, example_outputs, @@ -309,9 +309,9 @@ impl SystemFunctionImpl for ListTablesImpl { })?; Batch::try_new([ - Array::new_with_array_data(DataType::Utf8, database_names), - Array::new_with_array_data(DataType::Utf8, schema_names), - Array::new_with_array_data(DataType::Utf8, table_names), + Array2::new_with_array_data(DataType::Utf8, database_names), + Array2::new_with_array_data(DataType::Utf8, schema_names), + Array2::new_with_array_data(DataType::Utf8, table_names), ]) } } @@ -349,8 +349,8 @@ impl SystemFunctionImpl for ListSchemasImpl { })?; Batch::try_new([ - Array::new_with_array_data(DataType::Utf8, database_names), - Array::new_with_array_data(DataType::Utf8, schema_names), + Array2::new_with_array_data(DataType::Utf8, database_names), + Array2::new_with_array_data(DataType::Utf8, schema_names), ]) } } diff --git a/crates/rayexec_execution/src/functions/table/builtin/unnest.rs b/crates/rayexec_execution/src/functions/table/builtin/unnest.rs index 0535b3f5c..d556f837f 100644 --- a/crates/rayexec_execution/src/functions/table/builtin/unnest.rs +++ b/crates/rayexec_execution/src/functions/table/builtin/unnest.rs @@ -3,7 +3,7 @@ use std::task::{Context, Waker}; use rayexec_error::{RayexecError, Result}; -use crate::arrays::array::{Array, ArrayData}; +use crate::arrays::array::{Array2, ArrayData}; use crate::arrays::batch::Batch; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::physical_type::{PhysicalList, PhysicalType}; @@ -134,7 +134,7 @@ impl TableInOutFunction for UnnestInOutImpl { #[derive(Debug)] pub struct UnnestInOutPartitionState { /// The array we're unnesting. - input: Option, + input: Option, /// Number of rows in the input batch. input_num_rows: usize, /// Current row we're processing. @@ -220,13 +220,13 @@ impl TableInOutPartitionState for UnnestInOutPartitionState { } None => { // Row is null, produce as single null - Array::new_typed_null_array(child.datatype().clone(), 1)? + Array2::new_typed_null_array(child.datatype().clone(), 1)? } } } PhysicalType::UntypedNull => { // Just produce null array of length 1. - Array::new_untyped_null_array(1) + Array2::new_untyped_null_array(1) } other => { return Err(RayexecError::new(format!( diff --git a/crates/rayexec_parquet/src/reader/mod.rs b/crates/rayexec_parquet/src/reader/mod.rs index 15d8c4106..f6266424e 100644 --- a/crates/rayexec_parquet/src/reader/mod.rs +++ b/crates/rayexec_parquet/src/reader/mod.rs @@ -19,7 +19,7 @@ use parquet::file::reader::{ChunkReader, Length, SerializedPageReader}; use parquet::schema::types::ColumnDescPtr; use primitive::PrimitiveArrayReader; use rayexec_error::{RayexecError, Result, ResultExt}; -use rayexec_execution::arrays::array::{Array, ArrayData}; +use rayexec_execution::arrays::array::{Array2, ArrayData}; use rayexec_execution::arrays::batch::Batch; use rayexec_execution::arrays::bitmap::Bitmap; use rayexec_execution::arrays::datatype::DataType; @@ -32,7 +32,7 @@ use crate::metadata::Metadata; pub trait ArrayBuilder: Send { /// Consume the current buffer and build an array. - fn build(&mut self) -> Result; + fn build(&mut self) -> Result; /// Sets the page reader the builder should now be reading from. fn set_page_reader(&mut self, page_reader: P) -> Result<()>; diff --git a/crates/rayexec_parquet/src/reader/primitive.rs b/crates/rayexec_parquet/src/reader/primitive.rs index 81e8c615f..f19860c40 100644 --- a/crates/rayexec_parquet/src/reader/primitive.rs +++ b/crates/rayexec_parquet/src/reader/primitive.rs @@ -4,7 +4,7 @@ use parquet::column::reader::basic::BasicColumnValueDecoder; use parquet::data_type::{DataType as ParquetDataType, Int96}; use parquet::schema::types::ColumnDescPtr; use rayexec_error::{RayexecError, Result}; -use rayexec_execution::arrays::array::{Array, ArrayData}; +use rayexec_execution::arrays::array::{Array2, ArrayData}; use rayexec_execution::arrays::bitmap::Bitmap; use rayexec_execution::arrays::compute::cast::array::cast_array; use rayexec_execution::arrays::compute::cast::behavior::CastFailBehavior; @@ -43,7 +43,7 @@ where } /// Take the currently read values and convert into an array. - pub fn take_array(&mut self) -> Result { + pub fn take_array(&mut self) -> Result { let def_levels = self.values_reader.take_def_levels(); let _rep_levels = self.values_reader.take_rep_levels(); @@ -87,8 +87,10 @@ where let needs_cast = build_type != self.datatype; let mut array = match bitmap { - Some(bitmap) => Array::new_with_validity_and_array_data(build_type, bitmap, array_data), - None => Array::new_with_array_data(build_type, array_data), + Some(bitmap) => { + Array2::new_with_validity_and_array_data(build_type, bitmap, array_data) + } + None => Array2::new_with_array_data(build_type, array_data), }; if needs_cast { @@ -106,7 +108,7 @@ where T::T: Copy + Default, Vec: IntoArrayData, { - fn build(&mut self) -> Result { + fn build(&mut self) -> Result { self.take_array() } diff --git a/crates/rayexec_parquet/src/reader/varlen.rs b/crates/rayexec_parquet/src/reader/varlen.rs index 5091e6f19..32ecdf837 100644 --- a/crates/rayexec_parquet/src/reader/varlen.rs +++ b/crates/rayexec_parquet/src/reader/varlen.rs @@ -5,7 +5,7 @@ use parquet::data_type::{ByteArray, DataType as ParquetDataType}; use parquet::decoding::view::ViewBuffer; use parquet::schema::types::ColumnDescPtr; use rayexec_error::{RayexecError, Result}; -use rayexec_execution::arrays::array::Array; +use rayexec_execution::arrays::array::Array2; use rayexec_execution::arrays::datatype::DataType; use rayexec_execution::arrays::executor::builder::ArrayDataBuffer; @@ -32,7 +32,7 @@ where } } - pub fn take_array(&mut self) -> Result { + pub fn take_array(&mut self) -> Result { let def_levels = self.values_reader.take_def_levels(); let _rep_levels = self.values_reader.take_rep_levels(); @@ -55,10 +55,10 @@ where // The "null" values will just be zeroed metadata fields. insert_null_values(buffer.metadata_mut(), &bitmap); - Array::new_with_validity_and_array_data(self.datatype.clone(), bitmap, buffer.into_data()) + Array2::new_with_validity_and_array_data(self.datatype.clone(), bitmap, buffer.into_data()) } None => { - Array::new_with_array_data(self.datatype.clone(), view_buffer.into_buffer().into_data()) + Array2::new_with_array_data(self.datatype.clone(), view_buffer.into_buffer().into_data()) } } } @@ -73,7 +73,7 @@ impl

ArrayBuilder

for VarlenArrayReader

where P: PageReader, { - fn build(&mut self) -> Result { + fn build(&mut self) -> Result { self.take_array() } diff --git a/crates/rayexec_parquet/src/writer/mod.rs b/crates/rayexec_parquet/src/writer/mod.rs index 1e380b9df..716d7143a 100644 --- a/crates/rayexec_parquet/src/writer/mod.rs +++ b/crates/rayexec_parquet/src/writer/mod.rs @@ -12,7 +12,7 @@ use parquet::file::writer::{write_page, SerializedFileWriter}; use parquet::format::FileMetaData; use parquet::schema::types::SchemaDescriptor; use rayexec_error::{not_implemented, OptionExt, RayexecError, Result, ResultExt}; -use rayexec_execution::arrays::array::{Array, ArrayData}; +use rayexec_execution::arrays::array::{Array2, ArrayData}; use rayexec_execution::arrays::batch::Batch; use rayexec_execution::arrays::datatype::DataType; use rayexec_execution::arrays::executor::physical_type::{PhysicalBinary, PhysicalStorage}; @@ -234,7 +234,7 @@ impl PageWriter for BufferedPageWriter { /// Write an array into the column writer. // TODO: Validity. -fn write_array(writer: &mut ColumnWriter

, array: &Array) -> Result<()> { +fn write_array(writer: &mut ColumnWriter

, array: &Array2) -> Result<()> { if array.has_selection() { return Err(RayexecError::new( "Array needs to be unselected before it can be written", diff --git a/crates/rayexec_postgres/src/lib.rs b/crates/rayexec_postgres/src/lib.rs index 0f5e1e344..e4fdd795b 100644 --- a/crates/rayexec_postgres/src/lib.rs +++ b/crates/rayexec_postgres/src/lib.rs @@ -11,7 +11,7 @@ use futures::future::BoxFuture; use futures::stream::BoxStream; use futures::{StreamExt, TryFutureExt}; use rayexec_error::{RayexecError, Result, ResultExt}; -use rayexec_execution::arrays::array::Array; +use rayexec_execution::arrays::array::Array2; use rayexec_execution::arrays::batch::Batch; use rayexec_execution::arrays::datatype::{DataType, DecimalTypeMeta}; use rayexec_execution::arrays::field::Field; @@ -401,32 +401,32 @@ impl PostgresClient { let mut arrays = Vec::with_capacity(typs.len()); for (idx, typ) in typs.iter().enumerate() { let arr = match typ { - DataType::Boolean => Array::from_iter(row_iter::(&rows, idx)), - DataType::Int8 => Array::from_iter(row_iter::(&rows, idx)), - DataType::Int16 => Array::from_iter(row_iter::(&rows, idx)), - DataType::Int32 => Array::from_iter(row_iter::(&rows, idx)), - DataType::Int64 => Array::from_iter(row_iter::(&rows, idx)), + DataType::Boolean => Array2::from_iter(row_iter::(&rows, idx)), + DataType::Int8 => Array2::from_iter(row_iter::(&rows, idx)), + DataType::Int16 => Array2::from_iter(row_iter::(&rows, idx)), + DataType::Int32 => Array2::from_iter(row_iter::(&rows, idx)), + DataType::Int64 => Array2::from_iter(row_iter::(&rows, idx)), DataType::Decimal128(m) => { - let primitives = Array::from_iter(rows.iter().map(|row| { + let primitives = Array2::from_iter(rows.iter().map(|row| { let decimal = row.try_get::(idx).ok(); // TODO: Rescale decimal.map(|d| d.0.value) })); match primitives.validity() { - Some(validity) => Array::new_with_validity_and_array_data( + Some(validity) => Array2::new_with_validity_and_array_data( DataType::Decimal128(DecimalTypeMeta::new(m.precision, m.scale)), validity.clone(), primitives.array_data().clone(), ), - None => Array::new_with_array_data( + None => Array2::new_with_array_data( DataType::Decimal128(DecimalTypeMeta::new(m.precision, m.scale)), primitives.array_data().clone(), ), } } - DataType::Utf8 => Array::from_iter( + DataType::Utf8 => Array2::from_iter( rows.iter() .map(|row| -> Option<&str> { row.try_get(idx).ok() }), ), diff --git a/crates/rayexec_shell/src/result_table.rs b/crates/rayexec_shell/src/result_table.rs index 208c0f64e..b55a953e3 100644 --- a/crates/rayexec_shell/src/result_table.rs +++ b/crates/rayexec_shell/src/result_table.rs @@ -5,7 +5,7 @@ use std::task::{Context, Poll}; use futures::stream::Stream; use futures::{StreamExt, TryStreamExt}; use rayexec_error::{RayexecError, Result}; -use rayexec_execution::arrays::array::Array; +use rayexec_execution::arrays::array::Array2; use rayexec_execution::arrays::batch::Batch; use rayexec_execution::arrays::field::Schema; use rayexec_execution::arrays::format::pretty::table::PrettyTable; @@ -148,7 +148,7 @@ impl MaterializedResultTable { /// within that array. pub fn with_cell(&self, cell_fn: F, col: usize, row: usize) -> Result where - F: Fn(&Array, usize) -> Result, + F: Fn(&Array2, usize) -> Result, { let (batch_idx, row) = find_normalized_row(row, self.batches.iter().map(|b| b.num_rows())) .ok_or_else(|| RayexecError::new(format!("Row out of range: {}", row)))?; @@ -185,7 +185,7 @@ impl MaterializedResultTable { #[derive(Debug, Clone, PartialEq)] pub struct MaterializedColumn { - pub(crate) arrays: Vec, + pub(crate) arrays: Vec, } impl MaterializedColumn { @@ -199,7 +199,7 @@ impl MaterializedColumn { pub fn with_row(&self, row_fn: F, row: usize) -> Result where - F: Fn(&Array, usize) -> Result, + F: Fn(&Array2, usize) -> Result, { let (arr_idx, row) = find_normalized_row(row, self.arrays.iter().map(|arr| arr.logical_len())) diff --git a/crates/rayexec_unity_catalog/src/functions.rs b/crates/rayexec_unity_catalog/src/functions.rs index 5fe88720a..30bd17f08 100644 --- a/crates/rayexec_unity_catalog/src/functions.rs +++ b/crates/rayexec_unity_catalog/src/functions.rs @@ -7,7 +7,7 @@ use futures::future::BoxFuture; use futures::stream::BoxStream; use futures::{FutureExt, TryStreamExt}; use rayexec_error::Result; -use rayexec_execution::arrays::array::Array; +use rayexec_execution::arrays::array::Array2; use rayexec_execution::arrays::batch::Batch; use rayexec_execution::arrays::datatype::{DataType, DataTypeId}; use rayexec_execution::arrays::field::{Field, Schema}; @@ -133,11 +133,11 @@ impl UnityObjectsOperation for ListSchemasOperation { let resp = state.stream.try_next().await?; match resp { Some(resp) => { - let names = Array::from_iter(resp.schemas.iter().map(|s| s.name.as_str())); + let names = Array2::from_iter(resp.schemas.iter().map(|s| s.name.as_str())); let catalog_names = - Array::from_iter(resp.schemas.iter().map(|s| s.catalog_name.as_str())); + Array2::from_iter(resp.schemas.iter().map(|s| s.catalog_name.as_str())); let comments = - Array::from_iter(resp.schemas.iter().map(|s| s.comment.as_deref())); + Array2::from_iter(resp.schemas.iter().map(|s| s.comment.as_deref())); let batch = Batch::try_new([names, catalog_names, comments])?; Ok(Some(batch)) @@ -221,19 +221,20 @@ impl UnityObjectsOperation for ListTablesOperation { let resp = state.stream.try_next().await?; match resp { Some(resp) => { - let names = Array::from_iter(resp.tables.iter().map(|s| s.name.as_str())); + let names = Array2::from_iter(resp.tables.iter().map(|s| s.name.as_str())); let catalog_names = - Array::from_iter(resp.tables.iter().map(|s| s.catalog_name.as_str())); + Array2::from_iter(resp.tables.iter().map(|s| s.catalog_name.as_str())); let schema_names = - Array::from_iter(resp.tables.iter().map(|s| s.schema_name.as_str())); + Array2::from_iter(resp.tables.iter().map(|s| s.schema_name.as_str())); let table_types = - Array::from_iter(resp.tables.iter().map(|s| s.table_type.as_str())); - let data_source_formats = - Array::from_iter(resp.tables.iter().map(|s| s.data_source_format.as_str())); + Array2::from_iter(resp.tables.iter().map(|s| s.table_type.as_str())); + let data_source_formats = Array2::from_iter( + resp.tables.iter().map(|s| s.data_source_format.as_str()), + ); let storage_locations = - Array::from_iter(resp.tables.iter().map(|s| s.storage_location.as_str())); + Array2::from_iter(resp.tables.iter().map(|s| s.storage_location.as_str())); let comments = - Array::from_iter(resp.tables.iter().map(|s| s.comment.as_deref())); + Array2::from_iter(resp.tables.iter().map(|s| s.comment.as_deref())); let batch = Batch::try_new([ names, diff --git a/crates/rayexec_wasm/src/session.rs b/crates/rayexec_wasm/src/session.rs index 0c3e693d3..1f05eba92 100644 --- a/crates/rayexec_wasm/src/session.rs +++ b/crates/rayexec_wasm/src/session.rs @@ -182,7 +182,7 @@ impl WasmMaterializedColumn { #[cfg(test)] mod tests { - use rayexec_execution::arrays::array::Array; + use rayexec_execution::arrays::array::Array2; use rayexec_execution::arrays::batch::Batch; use rayexec_execution::arrays::datatype::DataType; use rayexec_execution::arrays::field::{Field, Schema}; @@ -194,9 +194,9 @@ mod tests { let table = MaterializedResultTable::try_new( Schema::new([Field::new("c1", DataType::Int32, true)]), [ - Batch::try_new([Array::from_iter([0, 1, 2, 3])]).unwrap(), - Batch::try_new([Array::from_iter([4, 5])]).unwrap(), - Batch::try_new([Array::from_iter([6, 7, 8, 9, 10])]).unwrap(), + Batch::try_new([Array2::from_iter([0, 1, 2, 3])]).unwrap(), + Batch::try_new([Array2::from_iter([4, 5])]).unwrap(), + Batch::try_new([Array2::from_iter([6, 7, 8, 9, 10])]).unwrap(), ], ) .unwrap(); diff --git a/test_bin/integration_slt_hybrid.rs b/test_bin/integration_slt_hybrid.rs index 69c13283d..c8439ea16 100644 --- a/test_bin/integration_slt_hybrid.rs +++ b/test_bin/integration_slt_hybrid.rs @@ -5,7 +5,7 @@ use std::time::Duration; use rayexec_debug::table_storage::TablePreload; use rayexec_debug::{DebugDataSource, DebugDataSourceOptions}; use rayexec_error::Result; -use rayexec_execution::arrays::array::Array; +use rayexec_execution::arrays::array::Array2; use rayexec_execution::arrays::batch::Batch; use rayexec_execution::arrays::datatype::DataType; use rayexec_execution::arrays::field::Field; @@ -37,8 +37,8 @@ pub fn main() -> Result<()> { Field::new("c2", DataType::Utf8, false), ], data: Batch::try_new([ - Array::from_iter([1_i64, 2_i64]), - Array::from_iter(["a", "b"]), + Array2::from_iter([1_i64, 2_i64]), + Array2::from_iter(["a", "b"]), ])?, }, // Table specific to insert into. Don't rely on this outside of @@ -51,8 +51,8 @@ pub fn main() -> Result<()> { Field::new("c2", DataType::Utf8, false), ], data: Batch::try_new([ - Array::from_iter([1_i64, 2_i64]), - Array::from_iter(["a", "b"]), + Array2::from_iter([1_i64, 2_i64]), + Array2::from_iter(["a", "b"]), ])?, }, ], From 516da58025c2a0f1d177d64f75cb1e29e136d06f Mon Sep 17 00:00:00 2001 From: Sean Smith Date: Fri, 27 Dec 2024 17:23:50 -0500 Subject: [PATCH 06/59] fixup! temp rename --- crates/rayexec_csv/src/reader.rs | 4 +- .../src/arrays/array/array_data.rs | 115 ++++++++++ .../rayexec_execution/src/arrays/array/mod.rs | 197 +++++++++--------- .../src/arrays/array/validity.rs | 92 ++++++++ .../src/arrays/buffer/buffer_manager.rs | 28 +++ .../src/arrays/buffer/mod.rs | 2 +- .../src/arrays/buffer/raw.rs | 2 + .../src/arrays/compute/cast/array.rs | 14 +- .../src/arrays/executor/builder.rs | 16 +- .../src/arrays/executor/physical_type.rs | 84 ++++---- .../src/arrays/executor/scalar/fill.rs | 4 +- .../src/arrays/executor/scalar/hash.rs | 4 +- .../src/arrays/executor/scalar/list.rs | 6 +- .../src/arrays/row/encoding.rs | 74 +++---- .../src/arrays/scalar/mod.rs | 4 +- .../src/execution/operators/unnest.rs | 4 +- .../operators/util/outer_join_tracker.rs | 4 +- .../src/functions/aggregate/builtin/first.rs | 4 +- .../src/functions/aggregate/builtin/minmax.rs | 4 +- .../src/functions/aggregate/builtin/sum.rs | 4 +- .../src/functions/aggregate/states.rs | 4 +- .../src/functions/scalar/builtin/arith/add.rs | 4 +- .../src/functions/scalar/builtin/arith/div.rs | 4 +- .../src/functions/scalar/builtin/arith/mul.rs | 6 +- .../src/functions/scalar/builtin/arith/rem.rs | 4 +- .../src/functions/scalar/builtin/arith/sub.rs | 4 +- .../functions/scalar/builtin/comparison.rs | 6 +- .../scalar/builtin/list/list_extract.rs | 4 +- .../src/functions/scalar/builtin/negate.rs | 4 +- .../functions/scalar/builtin/numeric/abs.rs | 4 +- .../functions/scalar/builtin/numeric/acos.rs | 4 +- .../functions/scalar/builtin/numeric/asin.rs | 4 +- .../functions/scalar/builtin/numeric/atan.rs | 4 +- .../functions/scalar/builtin/numeric/cbrt.rs | 4 +- .../functions/scalar/builtin/numeric/ceil.rs | 4 +- .../functions/scalar/builtin/numeric/cos.rs | 4 +- .../scalar/builtin/numeric/degrees.rs | 4 +- .../functions/scalar/builtin/numeric/exp.rs | 4 +- .../functions/scalar/builtin/numeric/floor.rs | 4 +- .../functions/scalar/builtin/numeric/ln.rs | 4 +- .../functions/scalar/builtin/numeric/log.rs | 6 +- .../functions/scalar/builtin/numeric/mod.rs | 4 +- .../scalar/builtin/numeric/radians.rs | 4 +- .../functions/scalar/builtin/numeric/sin.rs | 4 +- .../functions/scalar/builtin/numeric/sqrt.rs | 4 +- .../functions/scalar/builtin/numeric/tan.rs | 4 +- .../functions/scalar/builtin/string/case.rs | 4 +- .../src/functions/table/builtin/unnest.rs | 4 +- crates/rayexec_parquet/src/reader/mod.rs | 4 +- .../rayexec_parquet/src/reader/primitive.rs | 8 +- crates/rayexec_parquet/src/writer/mod.rs | 18 +- 51 files changed, 527 insertions(+), 287 deletions(-) create mode 100644 crates/rayexec_execution/src/arrays/array/array_data.rs create mode 100644 crates/rayexec_execution/src/arrays/array/validity.rs diff --git a/crates/rayexec_csv/src/reader.rs b/crates/rayexec_csv/src/reader.rs index 23f57de4f..2b0ff3721 100644 --- a/crates/rayexec_csv/src/reader.rs +++ b/crates/rayexec_csv/src/reader.rs @@ -23,7 +23,7 @@ use bytes::Bytes; use futures::stream::BoxStream; use futures::StreamExt; use rayexec_error::{RayexecError, Result}; -use rayexec_execution::arrays::array::{Array2, ArrayData}; +use rayexec_execution::arrays::array::{Array2, ArrayData2}; use rayexec_execution::arrays::batch::Batch; use rayexec_execution::arrays::bitmap::Bitmap; use rayexec_execution::arrays::compute::cast::parse::{ @@ -524,7 +524,7 @@ impl AsyncCsvStream { where T: Default, P: Parser, - PrimitiveStorage: Into, + PrimitiveStorage: Into, { let mut values = Vec::with_capacity(completed.num_completed()); let mut validity = Bitmap::with_capacity(completed.num_completed()); diff --git a/crates/rayexec_execution/src/arrays/array/array_data.rs b/crates/rayexec_execution/src/arrays/array/array_data.rs new file mode 100644 index 000000000..fb67eda4e --- /dev/null +++ b/crates/rayexec_execution/src/arrays/array/array_data.rs @@ -0,0 +1,115 @@ +use std::ops::Deref; + +use rayexec_error::{RayexecError, Result}; + +use crate::arrays::buffer::buffer_manager::{BufferManager, NopBufferManager}; +use crate::arrays::buffer::ArrayBuffer; + +/// Abstraction layer for determining where an array's buffer resides. +#[derive(Debug)] +pub struct ArrayData { + inner: ArrayDataInner, +} + +#[derive(Debug)] +enum ArrayDataInner { + Managed(B::CowPtr>), + Owned(ArrayBuffer), + Uninit, +} + +impl ArrayData +where + B: BufferManager, +{ + pub fn owned(buffer: ArrayBuffer) -> Self { + ArrayData { + inner: ArrayDataInner::Owned(buffer), + } + } + + pub fn managed(buffer: B::CowPtr>) -> Self { + ArrayData { + inner: ArrayDataInner::Managed(buffer), + } + } + + pub fn is_managed(&self) -> bool { + matches!(self.inner, ArrayDataInner::Managed(_)) + } + + pub fn is_owned(&self) -> bool { + matches!(self.inner, ArrayDataInner::Owned(_)) + } + + /// Try to make the array managed by the buffer manager. + /// + /// Does nothing if the array is already managed. + /// + /// Returns an error if the array cannot be made to be managed. The array is + /// still valid (and remains in the 'owned' state). + /// + /// A cloned pointer to the newly managed array will be returned. + pub fn make_managed(&mut self, manager: &B) -> Result>> { + match &mut self.inner { + ArrayDataInner::Managed(m) => Ok(m.clone()), // Already managed. + ArrayDataInner::Owned(_) => { + let orig = std::mem::replace(&mut self.inner, ArrayDataInner::Uninit); + let array = match orig { + ArrayDataInner::Owned(array) => array, + _ => unreachable!("variant already checked"), + }; + + match manager.make_cow(array) { + Ok(managed) => { + self.inner = ArrayDataInner::Managed(managed); + match &self.inner { + ArrayDataInner::Managed(m) => Ok(m.clone()), + _ => unreachable!("variant just set"), + } + } + Err(orig) => { + // Manager rejected it, put it back as owned and return + // an error. + self.inner = ArrayDataInner::Owned(orig); + Err(RayexecError::new("Failed to make batch array managed")) + } + } + } + ArrayDataInner::Uninit => panic!("array in uninit state"), + } + } + + pub fn try_as_mut(&mut self) -> Result<&mut ArrayBuffer> { + match &mut self.inner { + ArrayDataInner::Managed(_) => Err(RayexecError::new( + "Mut references from managed arrays not yet supported", + )), + ArrayDataInner::Owned(array) => Ok(array), + ArrayDataInner::Uninit => panic!("array in uninit state"), + } + } +} +impl AsRef> for ArrayData +where + B: BufferManager, +{ + fn as_ref(&self) -> &ArrayBuffer { + match &self.inner { + ArrayDataInner::Managed(m) => m.as_ref(), + ArrayDataInner::Owned(array) => array, + ArrayDataInner::Uninit => panic!("array in uninit state"), + } + } +} + +impl Deref for ArrayData +where + B: BufferManager, +{ + type Target = ArrayBuffer; + + fn deref(&self) -> &Self::Target { + ArrayData::as_ref(&self) + } +} diff --git a/crates/rayexec_execution/src/arrays/array/mod.rs b/crates/rayexec_execution/src/arrays/array/mod.rs index c9d29d347..f9ccb6d60 100644 --- a/crates/rayexec_execution/src/arrays/array/mod.rs +++ b/crates/rayexec_execution/src/arrays/array/mod.rs @@ -1,3 +1,6 @@ +pub mod array_data; +pub mod validity; + mod shared_or_owned; use std::fmt::Debug; @@ -74,7 +77,7 @@ pub struct Array2 { /// into account the selection vector, and always maps directly to the data. pub(crate) validity: Option, /// The physical data. - pub(crate) data: ArrayData, + pub(crate) data: ArrayData2, } impl Array2 { @@ -111,7 +114,7 @@ impl Array2 { }) } - pub fn new_with_array_data(datatype: DataType, data: impl Into) -> Self { + pub fn new_with_array_data(datatype: DataType, data: impl Into) -> Self { Array2 { datatype, selection: None, @@ -123,7 +126,7 @@ impl Array2 { pub fn new_with_validity_and_array_data( datatype: DataType, validity: impl Into, - data: impl Into, + data: impl Into, ) -> Self { Array2 { datatype, @@ -137,7 +140,7 @@ impl Array2 { datatype: DataType, validity: impl Into, selection: impl Into, - data: impl Into, + data: impl Into, ) -> Self { Array2 { datatype, @@ -242,11 +245,11 @@ impl Array2 { /// Returns the array data. /// /// ArrayData can be cheaply cloned. - pub fn array_data(&self) -> &ArrayData { + pub fn array_data(&self) -> &ArrayData2 { &self.data } - pub fn into_array_data(self) -> ArrayData { + pub fn into_array_data(self) -> ArrayData2 { self.data } @@ -297,13 +300,13 @@ impl Array2 { } match self.array_data() { - ArrayData::UntypedNull(_) => Ok(Array2 { + ArrayData2::UntypedNull(_) => Ok(Array2 { datatype: self.datatype.clone(), selection: None, validity: None, data: UntypedNullStorage(self.logical_len()).into(), }), - ArrayData::Boolean(_) => UnaryExecutor::execute::( + ArrayData2::Boolean(_) => UnaryExecutor::execute::( self, ArrayBuilder { datatype: self.datatype.clone(), @@ -311,7 +314,7 @@ impl Array2 { }, |v, buf| buf.put(&v), ), - ArrayData::Int8(_) => UnaryExecutor::execute::( + ArrayData2::Int8(_) => UnaryExecutor::execute::( self, ArrayBuilder { datatype: self.datatype.clone(), @@ -319,7 +322,7 @@ impl Array2 { }, |v, buf| buf.put(&v), ), - ArrayData::Int16(_) => UnaryExecutor::execute::( + ArrayData2::Int16(_) => UnaryExecutor::execute::( self, ArrayBuilder { datatype: self.datatype.clone(), @@ -327,7 +330,7 @@ impl Array2 { }, |v, buf| buf.put(&v), ), - ArrayData::Int32(_) => UnaryExecutor::execute::( + ArrayData2::Int32(_) => UnaryExecutor::execute::( self, ArrayBuilder { datatype: self.datatype.clone(), @@ -335,7 +338,7 @@ impl Array2 { }, |v, buf| buf.put(&v), ), - ArrayData::Int64(_) => UnaryExecutor::execute::( + ArrayData2::Int64(_) => UnaryExecutor::execute::( self, ArrayBuilder { datatype: self.datatype.clone(), @@ -343,7 +346,7 @@ impl Array2 { }, |v, buf| buf.put(&v), ), - ArrayData::Int128(_) => UnaryExecutor::execute::( + ArrayData2::Int128(_) => UnaryExecutor::execute::( self, ArrayBuilder { datatype: self.datatype.clone(), @@ -351,7 +354,7 @@ impl Array2 { }, |v, buf| buf.put(&v), ), - ArrayData::UInt8(_) => UnaryExecutor::execute::( + ArrayData2::UInt8(_) => UnaryExecutor::execute::( self, ArrayBuilder { datatype: self.datatype.clone(), @@ -359,7 +362,7 @@ impl Array2 { }, |v, buf| buf.put(&v), ), - ArrayData::UInt16(_) => UnaryExecutor::execute::( + ArrayData2::UInt16(_) => UnaryExecutor::execute::( self, ArrayBuilder { datatype: self.datatype.clone(), @@ -367,7 +370,7 @@ impl Array2 { }, |v, buf| buf.put(&v), ), - ArrayData::UInt32(_) => UnaryExecutor::execute::( + ArrayData2::UInt32(_) => UnaryExecutor::execute::( self, ArrayBuilder { datatype: self.datatype.clone(), @@ -375,7 +378,7 @@ impl Array2 { }, |v, buf| buf.put(&v), ), - ArrayData::UInt64(_) => UnaryExecutor::execute::( + ArrayData2::UInt64(_) => UnaryExecutor::execute::( self, ArrayBuilder { datatype: self.datatype.clone(), @@ -383,7 +386,7 @@ impl Array2 { }, |v, buf| buf.put(&v), ), - ArrayData::UInt128(_) => UnaryExecutor::execute::( + ArrayData2::UInt128(_) => UnaryExecutor::execute::( self, ArrayBuilder { datatype: self.datatype.clone(), @@ -391,7 +394,7 @@ impl Array2 { }, |v, buf| buf.put(&v), ), - ArrayData::Float16(_) => UnaryExecutor::execute::( + ArrayData2::Float16(_) => UnaryExecutor::execute::( self, ArrayBuilder { datatype: self.datatype.clone(), @@ -399,7 +402,7 @@ impl Array2 { }, |v, buf| buf.put(&v), ), - ArrayData::Float32(_) => UnaryExecutor::execute::( + ArrayData2::Float32(_) => UnaryExecutor::execute::( self, ArrayBuilder { datatype: self.datatype.clone(), @@ -407,7 +410,7 @@ impl Array2 { }, |v, buf| buf.put(&v), ), - ArrayData::Float64(_) => UnaryExecutor::execute::( + ArrayData2::Float64(_) => UnaryExecutor::execute::( self, ArrayBuilder { datatype: self.datatype.clone(), @@ -415,7 +418,7 @@ impl Array2 { }, |v, buf| buf.put(&v), ), - ArrayData::Interval(_) => UnaryExecutor::execute::( + ArrayData2::Interval(_) => UnaryExecutor::execute::( self, ArrayBuilder { datatype: self.datatype.clone(), @@ -423,7 +426,7 @@ impl Array2 { }, |v, buf| buf.put(&v), ), - ArrayData::Binary(_) => { + ArrayData2::Binary(_) => { // Use the german varlen storage for all output varlen arrays, // even if the input use using some other variant. // @@ -450,7 +453,7 @@ impl Array2 { ) } } - ArrayData::List(_) => Err(RayexecError::new("Cannot yet unselect list arrays")), + ArrayData2::List(_) => Err(RayexecError::new("Cannot yet unselect list arrays")), } } @@ -460,67 +463,67 @@ impl Array2 { pub fn physical_scalar(&self, idx: usize) -> Result { Ok(match &self.datatype { DataType::Null => match &self.data { - ArrayData::UntypedNull(_) => ScalarValue::Null, + ArrayData2::UntypedNull(_) => ScalarValue::Null, _other => return Err(array_not_valid_for_type_err(&self.datatype)), }, DataType::Boolean => match &self.data { - ArrayData::Boolean(arr) => arr.as_ref().as_ref().value(idx).into(), + ArrayData2::Boolean(arr) => arr.as_ref().as_ref().value(idx).into(), _other => return Err(array_not_valid_for_type_err(&self.datatype)), }, DataType::Float16 => match &self.data { - ArrayData::Float16(arr) => arr.as_ref().as_ref()[idx].into(), + ArrayData2::Float16(arr) => arr.as_ref().as_ref()[idx].into(), _other => return Err(array_not_valid_for_type_err(&self.datatype)), }, DataType::Float32 => match &self.data { - ArrayData::Float32(arr) => arr.as_ref().as_ref()[idx].into(), + ArrayData2::Float32(arr) => arr.as_ref().as_ref()[idx].into(), _other => return Err(array_not_valid_for_type_err(&self.datatype)), }, DataType::Float64 => match &self.data { - ArrayData::Float64(arr) => arr.as_ref().as_ref()[idx].into(), + ArrayData2::Float64(arr) => arr.as_ref().as_ref()[idx].into(), _other => return Err(array_not_valid_for_type_err(&self.datatype)), }, DataType::Int8 => match &self.data { - ArrayData::Int8(arr) => arr.as_ref().as_ref()[idx].into(), + ArrayData2::Int8(arr) => arr.as_ref().as_ref()[idx].into(), _other => return Err(array_not_valid_for_type_err(&self.datatype)), }, DataType::Int16 => match &self.data { - ArrayData::Int16(arr) => arr.as_ref().as_ref()[idx].into(), + ArrayData2::Int16(arr) => arr.as_ref().as_ref()[idx].into(), _other => return Err(array_not_valid_for_type_err(&self.datatype)), }, DataType::Int32 => match &self.data { - ArrayData::Int32(arr) => arr.as_ref().as_ref()[idx].into(), + ArrayData2::Int32(arr) => arr.as_ref().as_ref()[idx].into(), _other => return Err(array_not_valid_for_type_err(&self.datatype)), }, DataType::Int64 => match &self.data { - ArrayData::Int64(arr) => arr.as_ref().as_ref()[idx].into(), + ArrayData2::Int64(arr) => arr.as_ref().as_ref()[idx].into(), _other => return Err(array_not_valid_for_type_err(&self.datatype)), }, DataType::Int128 => match &self.data { - ArrayData::Int64(arr) => arr.as_ref().as_ref()[idx].into(), + ArrayData2::Int64(arr) => arr.as_ref().as_ref()[idx].into(), _other => return Err(array_not_valid_for_type_err(&self.datatype)), }, DataType::UInt8 => match &self.data { - ArrayData::UInt8(arr) => arr.as_ref().as_ref()[idx].into(), + ArrayData2::UInt8(arr) => arr.as_ref().as_ref()[idx].into(), _other => return Err(array_not_valid_for_type_err(&self.datatype)), }, DataType::UInt16 => match &self.data { - ArrayData::UInt16(arr) => arr.as_ref().as_ref()[idx].into(), + ArrayData2::UInt16(arr) => arr.as_ref().as_ref()[idx].into(), _other => return Err(array_not_valid_for_type_err(&self.datatype)), }, DataType::UInt32 => match &self.data { - ArrayData::UInt32(arr) => arr.as_ref().as_ref()[idx].into(), + ArrayData2::UInt32(arr) => arr.as_ref().as_ref()[idx].into(), _other => return Err(array_not_valid_for_type_err(&self.datatype)), }, DataType::UInt64 => match &self.data { - ArrayData::UInt64(arr) => arr.as_ref().as_ref()[idx].into(), + ArrayData2::UInt64(arr) => arr.as_ref().as_ref()[idx].into(), _other => return Err(array_not_valid_for_type_err(&self.datatype)), }, DataType::UInt128 => match &self.data { - ArrayData::UInt64(arr) => arr.as_ref().as_ref()[idx].into(), + ArrayData2::UInt64(arr) => arr.as_ref().as_ref()[idx].into(), _other => return Err(array_not_valid_for_type_err(&self.datatype)), }, DataType::Decimal64(m) => match &self.data { - ArrayData::Int64(arr) => ScalarValue::Decimal64(Decimal64Scalar { + ArrayData2::Int64(arr) => ScalarValue::Decimal64(Decimal64Scalar { precision: m.precision, scale: m.scale, value: arr.as_ref().as_ref()[idx], @@ -528,7 +531,7 @@ impl Array2 { _other => return Err(array_not_valid_for_type_err(&self.datatype)), }, DataType::Decimal128(m) => match &self.data { - ArrayData::Int128(arr) => ScalarValue::Decimal128(Decimal128Scalar { + ArrayData2::Int128(arr) => ScalarValue::Decimal128(Decimal128Scalar { precision: m.precision, scale: m.scale, value: arr.as_ref().as_ref()[idx], @@ -536,33 +539,33 @@ impl Array2 { _other => return Err(array_not_valid_for_type_err(&self.datatype)), }, DataType::Date32 => match &self.data { - ArrayData::Int32(arr) => ScalarValue::Date32(arr.as_ref().as_ref()[idx]), + ArrayData2::Int32(arr) => ScalarValue::Date32(arr.as_ref().as_ref()[idx]), _other => return Err(array_not_valid_for_type_err(&self.datatype)), }, DataType::Date64 => match &self.data { - ArrayData::Int64(arr) => ScalarValue::Date64(arr.as_ref().as_ref()[idx]), + ArrayData2::Int64(arr) => ScalarValue::Date64(arr.as_ref().as_ref()[idx]), _other => return Err(array_not_valid_for_type_err(&self.datatype)), }, DataType::Timestamp(m) => match &self.data { - ArrayData::Int64(arr) => ScalarValue::Timestamp(TimestampScalar { + ArrayData2::Int64(arr) => ScalarValue::Timestamp(TimestampScalar { unit: m.unit, value: arr.as_ref().as_ref()[idx], }), _other => return Err(array_not_valid_for_type_err(&self.datatype)), }, DataType::Interval => match &self.data { - ArrayData::Interval(arr) => arr.as_ref().as_ref()[idx].into(), + ArrayData2::Interval(arr) => arr.as_ref().as_ref()[idx].into(), _other => return Err(array_not_valid_for_type_err(&self.datatype)), }, DataType::Utf8 => { let v = match &self.data { - ArrayData::Binary(BinaryData::Binary(arr)) => arr + ArrayData2::Binary(BinaryData::Binary(arr)) => arr .get(idx) .ok_or_else(|| RayexecError::new("missing data"))?, - ArrayData::Binary(BinaryData::LargeBinary(arr)) => arr + ArrayData2::Binary(BinaryData::LargeBinary(arr)) => arr .get(idx) .ok_or_else(|| RayexecError::new("missing data"))?, - ArrayData::Binary(BinaryData::German(arr)) => arr + ArrayData2::Binary(BinaryData::German(arr)) => arr .get(idx) .ok_or_else(|| RayexecError::new("missing data"))?, _other => return Err(array_not_valid_for_type_err(&self.datatype)), @@ -572,13 +575,13 @@ impl Array2 { } DataType::Binary => { let v = match &self.data { - ArrayData::Binary(BinaryData::Binary(arr)) => arr + ArrayData2::Binary(BinaryData::Binary(arr)) => arr .get(idx) .ok_or_else(|| RayexecError::new("missing data"))?, - ArrayData::Binary(BinaryData::LargeBinary(arr)) => arr + ArrayData2::Binary(BinaryData::LargeBinary(arr)) => arr .get(idx) .ok_or_else(|| RayexecError::new("missing data"))?, - ArrayData::Binary(BinaryData::German(arr)) => arr + ArrayData2::Binary(BinaryData::German(arr)) => arr .get(idx) .ok_or_else(|| RayexecError::new("missing data"))?, _other => return Err(array_not_valid_for_type_err(&self.datatype)), @@ -587,7 +590,7 @@ impl Array2 { } DataType::Struct(_) => not_implemented!("get value: struct"), DataType::List(_) => match &self.data { - ArrayData::List(list) => { + ArrayData2::List(list) => { let meta = list .metadata .as_slice() @@ -824,7 +827,7 @@ impl FromIterator for Array2 { datatype: DataType::Utf8, selection: None, validity: None, - data: ArrayData::Binary(BinaryData::German(Arc::new(german))), + data: ArrayData2::Binary(BinaryData::German(Arc::new(german))), } } } @@ -843,21 +846,21 @@ impl<'a> FromIterator<&'a str> for Array2 { datatype: DataType::Utf8, selection: None, validity: None, - data: ArrayData::Binary(BinaryData::German(Arc::new(german))), + data: ArrayData2::Binary(BinaryData::German(Arc::new(german))), } } } macro_rules! impl_primitive_from_iter { ($prim:ty, $variant:ident) => { - impl FromIterator<$prim> for Array { + impl FromIterator<$prim> for Array2 { fn from_iter>(iter: T) -> Self { let vals: Vec<_> = iter.into_iter().collect(); - Array { + Array2 { datatype: DataType::$variant, selection: None, validity: None, - data: ArrayData::$variant(Arc::new(vals.into())), + data: ArrayData2::$variant(Arc::new(vals.into())), } } } @@ -885,13 +888,13 @@ impl FromIterator for Array2 { datatype: DataType::Boolean, selection: None, validity: None, - data: ArrayData::Boolean(Arc::new(vals.into())), + data: ArrayData2::Boolean(Arc::new(vals.into())), } } } #[derive(Debug, Clone, PartialEq)] -pub enum ArrayData { +pub enum ArrayData2 { UntypedNull(UntypedNullStorage), Boolean(Arc), Float16(Arc>), @@ -912,7 +915,7 @@ pub enum ArrayData { List(Arc), } -impl ArrayData { +impl ArrayData2 { pub fn physical_type(&self) -> PhysicalType { match self { Self::UntypedNull(_) => PhysicalType::UntypedNull, @@ -959,7 +962,7 @@ impl ArrayData { BinaryData::LargeBinary(s) => s.len(), BinaryData::German(s) => s.len(), }, - ArrayData::List(s) => s.len(), + ArrayData2::List(s) => s.len(), } } @@ -988,111 +991,111 @@ impl BinaryData { } } -impl From for ArrayData { +impl From for ArrayData2 { fn from(value: UntypedNullStorage) -> Self { - ArrayData::UntypedNull(value) + ArrayData2::UntypedNull(value) } } -impl From for ArrayData { +impl From for ArrayData2 { fn from(value: BooleanStorage) -> Self { - ArrayData::Boolean(value.into()) + ArrayData2::Boolean(value.into()) } } -impl From> for ArrayData { +impl From> for ArrayData2 { fn from(value: PrimitiveStorage) -> Self { - ArrayData::Float16(value.into()) + ArrayData2::Float16(value.into()) } } -impl From> for ArrayData { +impl From> for ArrayData2 { fn from(value: PrimitiveStorage) -> Self { - ArrayData::Float32(value.into()) + ArrayData2::Float32(value.into()) } } -impl From> for ArrayData { +impl From> for ArrayData2 { fn from(value: PrimitiveStorage) -> Self { - ArrayData::Float64(value.into()) + ArrayData2::Float64(value.into()) } } -impl From> for ArrayData { +impl From> for ArrayData2 { fn from(value: PrimitiveStorage) -> Self { - ArrayData::Int8(value.into()) + ArrayData2::Int8(value.into()) } } -impl From> for ArrayData { +impl From> for ArrayData2 { fn from(value: PrimitiveStorage) -> Self { - ArrayData::Int16(value.into()) + ArrayData2::Int16(value.into()) } } -impl From> for ArrayData { +impl From> for ArrayData2 { fn from(value: PrimitiveStorage) -> Self { - ArrayData::Int32(value.into()) + ArrayData2::Int32(value.into()) } } -impl From> for ArrayData { +impl From> for ArrayData2 { fn from(value: PrimitiveStorage) -> Self { - ArrayData::Int64(value.into()) + ArrayData2::Int64(value.into()) } } -impl From> for ArrayData { +impl From> for ArrayData2 { fn from(value: PrimitiveStorage) -> Self { - ArrayData::Int128(value.into()) + ArrayData2::Int128(value.into()) } } -impl From> for ArrayData { +impl From> for ArrayData2 { fn from(value: PrimitiveStorage) -> Self { - ArrayData::UInt8(value.into()) + ArrayData2::UInt8(value.into()) } } -impl From> for ArrayData { +impl From> for ArrayData2 { fn from(value: PrimitiveStorage) -> Self { - ArrayData::UInt16(value.into()) + ArrayData2::UInt16(value.into()) } } -impl From> for ArrayData { +impl From> for ArrayData2 { fn from(value: PrimitiveStorage) -> Self { - ArrayData::UInt32(value.into()) + ArrayData2::UInt32(value.into()) } } -impl From> for ArrayData { +impl From> for ArrayData2 { fn from(value: PrimitiveStorage) -> Self { - ArrayData::UInt64(value.into()) + ArrayData2::UInt64(value.into()) } } -impl From> for ArrayData { +impl From> for ArrayData2 { fn from(value: PrimitiveStorage) -> Self { - ArrayData::UInt128(value.into()) + ArrayData2::UInt128(value.into()) } } -impl From> for ArrayData { +impl From> for ArrayData2 { fn from(value: PrimitiveStorage) -> Self { - ArrayData::Interval(value.into()) + ArrayData2::Interval(value.into()) } } -impl From for ArrayData { +impl From for ArrayData2 { fn from(value: GermanVarlenStorage) -> Self { - ArrayData::Binary(BinaryData::German(Arc::new(value))) + ArrayData2::Binary(BinaryData::German(Arc::new(value))) } } -impl From for ArrayData { +impl From for ArrayData2 { fn from(value: ListStorage) -> Self { - ArrayData::List(Arc::new(value)) + ArrayData2::List(Arc::new(value)) } } diff --git a/crates/rayexec_execution/src/arrays/array/validity.rs b/crates/rayexec_execution/src/arrays/array/validity.rs new file mode 100644 index 000000000..a14f15a38 --- /dev/null +++ b/crates/rayexec_execution/src/arrays/array/validity.rs @@ -0,0 +1,92 @@ +use crate::arrays::bitmap::Bitmap; + +#[derive(Debug, Clone)] +pub struct Validity { + inner: ValidityInner, +} + +#[derive(Debug, Clone)] +enum ValidityInner { + /// No mask has been set, assume all entries valid. + NoMask { len: usize }, + /// Mask has been set. Bitmap indicates which entries are valid or invalid. + Mask { bitmap: Bitmap }, +} + +impl Validity { + pub fn new_all_valid(len: usize) -> Self { + Validity { + inner: ValidityInner::NoMask { len }, + } + } + + pub fn len(&self) -> usize { + match &self.inner { + ValidityInner::NoMask { len } => *len, + ValidityInner::Mask { bitmap } => bitmap.len(), + } + } + + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + pub fn all_valid(&self) -> bool { + match &self.inner { + ValidityInner::NoMask { .. } => true, + ValidityInner::Mask { bitmap } => bitmap.is_all_true(), + } + } + + pub fn is_valid(&self, idx: usize) -> bool { + match &self.inner { + ValidityInner::NoMask { .. } => true, + ValidityInner::Mask { bitmap } => bitmap.value(idx), + } + } + + pub fn set_valid(&mut self, idx: usize) { + if let ValidityInner::Mask { bitmap } = &mut self.inner { + bitmap.set_unchecked(idx, true) + } + // Otherwise we already assume everything is valid. + } + + pub fn set_invalid(&mut self, idx: usize) { + match &mut self.inner { + ValidityInner::NoMask { len } => { + let mut bitmap = Bitmap::new_with_all_true(*len); + bitmap.set_unchecked(idx, false); + self.inner = ValidityInner::Mask { bitmap } + } + ValidityInner::Mask { bitmap } => bitmap.set_unchecked(idx, false), + } + } + + pub fn iter(&self) -> ValidityIter { + ValidityIter { + idx: 0, + validity: self, + } + } +} + +#[derive(Debug)] +pub struct ValidityIter<'a> { + idx: usize, + validity: &'a Validity, +} + +impl<'a> Iterator for ValidityIter<'a> { + type Item = bool; + + fn next(&mut self) -> Option { + if self.idx >= self.validity.len() { + return None; + } + + let val = self.validity.is_valid(self.idx); + self.idx += 1; + Some(val) + } +} diff --git a/crates/rayexec_execution/src/arrays/buffer/buffer_manager.rs b/crates/rayexec_execution/src/arrays/buffer/buffer_manager.rs index 5770927e7..b564c0c12 100644 --- a/crates/rayexec_execution/src/arrays/buffer/buffer_manager.rs +++ b/crates/rayexec_execution/src/arrays/buffer/buffer_manager.rs @@ -1,21 +1,49 @@ use std::fmt::Debug; +use std::ops::Deref; +use std::sync::Arc; use rayexec_error::Result; pub trait BufferManager: Debug + Sync + Send + Clone { type Reservation: Debug; + // TODO: T => Spillable or something. + type CowPtr: CowPtr + where + T: Debug; fn reserve_external(&self, additional_bytes: usize) -> Result; + + fn make_cow(&self, item: T) -> Result, T>; +} + +pub trait CowPtr: Debug + Clone + AsRef + Deref { + // TODO: Clone on write. + // + // Will need to be able to get the underlying reservation in order to track + // appropriately. + // + // Also might need to recurse to make sure everything is writable, not sure + // yet. } +impl CowPtr for Arc where T: Debug {} + /// Placeholder buffer manager. #[derive(Debug, Clone)] pub struct NopBufferManager; impl BufferManager for NopBufferManager { type Reservation = (); + type CowPtr + = Arc + where + T: Debug; fn reserve_external(&self, _additional_bytes: usize) -> Result { Ok(()) } + + fn make_cow(&self, item: T) -> Result, T> { + Ok(Arc::new(item)) + } } diff --git a/crates/rayexec_execution/src/arrays/buffer/mod.rs b/crates/rayexec_execution/src/arrays/buffer/mod.rs index 7864609c6..ce6915112 100644 --- a/crates/rayexec_execution/src/arrays/buffer/mod.rs +++ b/crates/rayexec_execution/src/arrays/buffer/mod.rs @@ -40,7 +40,7 @@ where manager: &B, capacity: usize, ) -> Result { - let primary = RawBufferParts::try_new(manager, capacity)?; + let primary = RawBufferParts::try_new::(manager, capacity)?; Ok(ArrayBuffer { physical_type: S::PHYSICAL_TYPE, diff --git a/crates/rayexec_execution/src/arrays/buffer/raw.rs b/crates/rayexec_execution/src/arrays/buffer/raw.rs index 782f6dfa2..d55189f74 100644 --- a/crates/rayexec_execution/src/arrays/buffer/raw.rs +++ b/crates/rayexec_execution/src/arrays/buffer/raw.rs @@ -1,3 +1,5 @@ +use rayexec_error::Result; + use super::buffer_manager::BufferManager; #[derive(Debug)] diff --git a/crates/rayexec_execution/src/arrays/compute/cast/array.rs b/crates/rayexec_execution/src/arrays/compute/cast/array.rs index 9886d4c10..ec777366f 100644 --- a/crates/rayexec_execution/src/arrays/compute/cast/array.rs +++ b/crates/rayexec_execution/src/arrays/compute/cast/array.rs @@ -48,7 +48,7 @@ use super::parse::{ UInt64Parser, UInt8Parser, }; -use crate::arrays::array::{Array2, ArrayData}; +use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::bitmap::Bitmap; use crate::arrays::datatype::{DataType, TimeUnit}; use crate::arrays::executor::builder::{ @@ -252,7 +252,7 @@ where S: PhysicalStorage, D: DecimalType, S::Type<'a>: PrimInt, - ArrayData: From>, + ArrayData2: From>, { let new_meta = to.try_get_decimal_type_meta()?; let arr_meta = arr.datatype().try_get_decimal_type_meta()?; @@ -321,7 +321,7 @@ where S: PhysicalStorage, D: DecimalType, S::Type<'a>: Float, - ArrayData: From>, + ArrayData2: From>, { let decimal_meta = to.try_get_decimal_type_meta()?; let scale = decimal_meta.scale; @@ -369,7 +369,7 @@ where S: PhysicalStorage, F: Float + Default + Copy, <::Storage<'a> as AddressableStorage>::T: ToPrimitive, - ArrayData: From>, + ArrayData2: From>, { let decimal_meta = arr.datatype().try_get_decimal_type_meta()?; @@ -423,7 +423,7 @@ where S: PhysicalStorage, D: DecimalType, S::Type<'a>: PrimInt, - ArrayData: From>, + ArrayData2: From>, { let decimal_meta = to.try_get_decimal_type_meta()?; let scale = decimal_meta.scale; @@ -516,7 +516,7 @@ where S: PhysicalStorage, S::Type<'a>: ToPrimitive, T: NumCast + Default + Copy, - ArrayData: From>, + ArrayData2: From>, { let mut fail_state = behavior.new_state_for_array(arr); let output = UnaryExecutor::execute::( @@ -707,7 +707,7 @@ fn cast_parse_primitive( where T: Default + Copy, P: Parser, - ArrayData: From>, + ArrayData2: From>, { let mut fail_state = behavior.new_state_for_array(arr); let output = UnaryExecutor::execute::( diff --git a/crates/rayexec_execution/src/arrays/executor/builder.rs b/crates/rayexec_execution/src/arrays/executor/builder.rs index a634dc552..57c46a2f7 100644 --- a/crates/rayexec_execution/src/arrays/executor/builder.rs +++ b/crates/rayexec_execution/src/arrays/executor/builder.rs @@ -2,7 +2,7 @@ use std::marker::PhantomData; use std::sync::Arc; use super::physical_type::{AsBytes, VarlenType}; -use crate::arrays::array::{ArrayData, BinaryData}; +use crate::arrays::array::{ArrayData2, BinaryData}; use crate::arrays::bitmap::Bitmap; use crate::arrays::datatype::DataType; use crate::arrays::storage::{ @@ -59,7 +59,7 @@ pub trait ArrayDataBuffer { fn put(&mut self, idx: usize, val: &Self::Type); /// Convert the buffer into array data. - fn into_data(self) -> ArrayData; + fn into_data(self) -> ArrayData2; } #[derive(Debug)] @@ -99,8 +99,8 @@ impl ArrayDataBuffer for BooleanBuffer { self.values.set_unchecked(idx, *val) } - fn into_data(self) -> ArrayData { - ArrayData::Boolean(Arc::new(BooleanStorage(self.values))) + fn into_data(self) -> ArrayData2 { + ArrayData2::Boolean(Arc::new(BooleanStorage(self.values))) } } @@ -125,7 +125,7 @@ impl ArrayDataBuffer for PrimitiveBuffer where T: Copy, Vec: Into>, - ArrayData: From>, + ArrayData2: From>, { type Type = T; @@ -137,7 +137,7 @@ where self.values[idx] = *val } - fn into_data(self) -> ArrayData { + fn into_data(self) -> ArrayData2 { PrimitiveStorage::from(self.values).into() } } @@ -241,13 +241,13 @@ where } } - fn into_data(self) -> ArrayData { + fn into_data(self) -> ArrayData2 { let storage = GermanVarlenStorage { metadata: self.metadata.into(), data: self.data.into(), }; - ArrayData::Binary(BinaryData::German(Arc::new(storage))) + ArrayData2::Binary(BinaryData::German(Arc::new(storage))) } } diff --git a/crates/rayexec_execution/src/arrays/executor/physical_type.rs b/crates/rayexec_execution/src/arrays/executor/physical_type.rs index ae0e76d04..159c9b01c 100644 --- a/crates/rayexec_execution/src/arrays/executor/physical_type.rs +++ b/crates/rayexec_execution/src/arrays/executor/physical_type.rs @@ -5,7 +5,7 @@ use rayexec_error::{RayexecError, Result, ResultExt}; use rayexec_proto::ProtoConv; use super::builder::{ArrayDataBuffer, BooleanBuffer, GermanVarlenBuffer, PrimitiveBuffer}; -use crate::arrays::array::{Array2, ArrayData, BinaryData}; +use crate::arrays::array::{Array2, ArrayData2, BinaryData}; use crate::arrays::scalar::interval::Interval; use crate::arrays::storage::{ AddressableStorage, @@ -43,7 +43,7 @@ pub enum PhysicalType { } impl PhysicalType { - pub fn zeroed_array_data(&self, len: usize) -> ArrayData { + pub fn zeroed_array_data(&self, len: usize) -> ArrayData2 { match self { Self::UntypedNull => UntypedNullStorage(len).into(), Self::Boolean => BooleanBuffer::with_len(len).into_data(), @@ -189,7 +189,7 @@ pub trait PhysicalStorage: Debug + Sync + Send + Clone + Copy + 'static { type Storage<'a>: AddressableStorage>; /// Gets the storage for the array that we can access directly. - fn get_storage(data: &ArrayData) -> Result>; + fn get_storage(data: &ArrayData2) -> Result>; } /// Type that's able to be used for any physical type. @@ -204,7 +204,7 @@ impl PhysicalStorage for PhysicalAny { type Type<'a> = (); type Storage<'a> = UnitStorage; - fn get_storage(data: &ArrayData) -> Result> { + fn get_storage(data: &ArrayData2) -> Result> { Ok(UnitStorage(data.len())) } } @@ -238,9 +238,9 @@ impl PhysicalStorage for PhysicalUntypedNull { type Type<'a> = UntypedNull; type Storage<'a> = UntypedNullStorage; - fn get_storage(data: &ArrayData) -> Result> { + fn get_storage(data: &ArrayData2) -> Result> { match data { - ArrayData::UntypedNull(s) => Ok(*s), + ArrayData2::UntypedNull(s) => Ok(*s), _ => Err(RayexecError::new("invalid storage")), } } @@ -253,9 +253,9 @@ impl PhysicalStorage for PhysicalBool { type Type<'a> = bool; type Storage<'a> = BooleanStorageRef<'a>; - fn get_storage(data: &ArrayData) -> Result> { + fn get_storage(data: &ArrayData2) -> Result> { match data { - ArrayData::Boolean(storage) => Ok(storage.as_boolean_storage_ref()), + ArrayData2::Boolean(storage) => Ok(storage.as_boolean_storage_ref()), _ => Err(RayexecError::new("invalid storage, expected boolean")), } } @@ -268,9 +268,9 @@ impl PhysicalStorage for PhysicalI8 { type Type<'a> = i8; type Storage<'a> = PrimitiveStorageSlice<'a, i8>; - fn get_storage(data: &ArrayData) -> Result> { + fn get_storage(data: &ArrayData2) -> Result> { match data { - ArrayData::Int8(storage) => Ok(storage.as_primitive_storage_slice()), + ArrayData2::Int8(storage) => Ok(storage.as_primitive_storage_slice()), _ => Err(RayexecError::new("invalid storage, expected int8")), } } @@ -283,9 +283,9 @@ impl PhysicalStorage for PhysicalI16 { type Type<'a> = i16; type Storage<'a> = PrimitiveStorageSlice<'a, i16>; - fn get_storage(data: &ArrayData) -> Result> { + fn get_storage(data: &ArrayData2) -> Result> { match data { - ArrayData::Int16(storage) => Ok(storage.as_primitive_storage_slice()), + ArrayData2::Int16(storage) => Ok(storage.as_primitive_storage_slice()), _ => Err(RayexecError::new("invalid storage, expected int16")), } } @@ -298,9 +298,9 @@ impl PhysicalStorage for PhysicalI32 { type Type<'a> = i32; type Storage<'a> = PrimitiveStorageSlice<'a, i32>; - fn get_storage(data: &ArrayData) -> Result> { + fn get_storage(data: &ArrayData2) -> Result> { match data { - ArrayData::Int32(storage) => Ok(storage.as_primitive_storage_slice()), + ArrayData2::Int32(storage) => Ok(storage.as_primitive_storage_slice()), _ => Err(RayexecError::new("invalid storage, expected int32")), } } @@ -313,9 +313,9 @@ impl PhysicalStorage for PhysicalI64 { type Type<'a> = i64; type Storage<'a> = PrimitiveStorageSlice<'a, i64>; - fn get_storage(data: &ArrayData) -> Result> { + fn get_storage(data: &ArrayData2) -> Result> { match data { - ArrayData::Int64(storage) => Ok(storage.as_primitive_storage_slice()), + ArrayData2::Int64(storage) => Ok(storage.as_primitive_storage_slice()), _ => Err(RayexecError::new("invalid storage, expected int64")), } } @@ -328,9 +328,9 @@ impl PhysicalStorage for PhysicalI128 { type Type<'a> = i128; type Storage<'a> = PrimitiveStorageSlice<'a, i128>; - fn get_storage(data: &ArrayData) -> Result> { + fn get_storage(data: &ArrayData2) -> Result> { match data { - ArrayData::Int128(storage) => Ok(storage.as_primitive_storage_slice()), + ArrayData2::Int128(storage) => Ok(storage.as_primitive_storage_slice()), _ => Err(RayexecError::new("invalid storage, expected int128")), } } @@ -343,9 +343,9 @@ impl PhysicalStorage for PhysicalU8 { type Type<'a> = u8; type Storage<'a> = PrimitiveStorageSlice<'a, u8>; - fn get_storage(data: &ArrayData) -> Result> { + fn get_storage(data: &ArrayData2) -> Result> { match data { - ArrayData::UInt8(storage) => Ok(storage.as_primitive_storage_slice()), + ArrayData2::UInt8(storage) => Ok(storage.as_primitive_storage_slice()), _ => Err(RayexecError::new("invalid storage, expected u8")), } } @@ -358,9 +358,9 @@ impl PhysicalStorage for PhysicalU16 { type Type<'a> = u16; type Storage<'a> = PrimitiveStorageSlice<'a, u16>; - fn get_storage(data: &ArrayData) -> Result> { + fn get_storage(data: &ArrayData2) -> Result> { match data { - ArrayData::UInt16(storage) => Ok(storage.as_primitive_storage_slice()), + ArrayData2::UInt16(storage) => Ok(storage.as_primitive_storage_slice()), _ => Err(RayexecError::new("invalid storage, expected u16")), } } @@ -373,9 +373,9 @@ impl PhysicalStorage for PhysicalU32 { type Type<'a> = u32; type Storage<'a> = PrimitiveStorageSlice<'a, u32>; - fn get_storage(data: &ArrayData) -> Result> { + fn get_storage(data: &ArrayData2) -> Result> { match data { - ArrayData::UInt32(storage) => Ok(storage.as_primitive_storage_slice()), + ArrayData2::UInt32(storage) => Ok(storage.as_primitive_storage_slice()), _ => Err(RayexecError::new("invalid storage, expected u32")), } } @@ -388,9 +388,9 @@ impl PhysicalStorage for PhysicalU64 { type Type<'a> = u64; type Storage<'a> = PrimitiveStorageSlice<'a, u64>; - fn get_storage(data: &ArrayData) -> Result> { + fn get_storage(data: &ArrayData2) -> Result> { match data { - ArrayData::UInt64(storage) => Ok(storage.as_primitive_storage_slice()), + ArrayData2::UInt64(storage) => Ok(storage.as_primitive_storage_slice()), _ => Err(RayexecError::new("invalid storage, expected u64")), } } @@ -403,9 +403,9 @@ impl PhysicalStorage for PhysicalU128 { type Type<'a> = u128; type Storage<'a> = PrimitiveStorageSlice<'a, u128>; - fn get_storage(data: &ArrayData) -> Result> { + fn get_storage(data: &ArrayData2) -> Result> { match data { - ArrayData::UInt128(storage) => Ok(storage.as_primitive_storage_slice()), + ArrayData2::UInt128(storage) => Ok(storage.as_primitive_storage_slice()), _ => Err(RayexecError::new("invalid storage, expected u128")), } } @@ -418,9 +418,9 @@ impl PhysicalStorage for PhysicalF16 { type Type<'a> = f16; type Storage<'a> = PrimitiveStorageSlice<'a, f16>; - fn get_storage(data: &ArrayData) -> Result> { + fn get_storage(data: &ArrayData2) -> Result> { match data { - ArrayData::Float16(storage) => Ok(storage.as_primitive_storage_slice()), + ArrayData2::Float16(storage) => Ok(storage.as_primitive_storage_slice()), _ => Err(RayexecError::new("invalid storage, expected f32")), } } @@ -433,9 +433,9 @@ impl PhysicalStorage for PhysicalF32 { type Type<'a> = f32; type Storage<'a> = PrimitiveStorageSlice<'a, f32>; - fn get_storage(data: &ArrayData) -> Result> { + fn get_storage(data: &ArrayData2) -> Result> { match data { - ArrayData::Float32(storage) => Ok(storage.as_primitive_storage_slice()), + ArrayData2::Float32(storage) => Ok(storage.as_primitive_storage_slice()), _ => Err(RayexecError::new("invalid storage, expected f32")), } } @@ -448,9 +448,9 @@ impl PhysicalStorage for PhysicalF64 { type Type<'a> = f64; type Storage<'a> = PrimitiveStorageSlice<'a, f64>; - fn get_storage(data: &ArrayData) -> Result> { + fn get_storage(data: &ArrayData2) -> Result> { match data { - ArrayData::Float64(storage) => Ok(storage.as_primitive_storage_slice()), + ArrayData2::Float64(storage) => Ok(storage.as_primitive_storage_slice()), _ => Err(RayexecError::new("invalid storage, expected f64")), } } @@ -463,9 +463,9 @@ impl PhysicalStorage for PhysicalInterval { type Type<'a> = Interval; type Storage<'a> = PrimitiveStorageSlice<'a, Interval>; - fn get_storage(data: &ArrayData) -> Result> { + fn get_storage(data: &ArrayData2) -> Result> { match data { - ArrayData::Interval(storage) => Ok(storage.as_primitive_storage_slice()), + ArrayData2::Interval(storage) => Ok(storage.as_primitive_storage_slice()), _ => Err(RayexecError::new("invalid storage, expected interval")), } } @@ -478,9 +478,9 @@ impl PhysicalStorage for PhysicalBinary { type Type<'a> = &'a [u8]; type Storage<'a> = BinaryDataStorage<'a>; - fn get_storage(data: &ArrayData) -> Result> { + fn get_storage(data: &ArrayData2) -> Result> { match data { - ArrayData::Binary(binary) => match binary { + ArrayData2::Binary(binary) => match binary { BinaryData::Binary(b) => { Ok(BinaryDataStorage::Binary(b.as_contiguous_storage_slice())) } @@ -501,9 +501,9 @@ impl PhysicalStorage for PhysicalUtf8 { type Type<'a> = &'a str; type Storage<'a> = StrDataStorage<'a>; - fn get_storage(data: &ArrayData) -> Result> { + fn get_storage(data: &ArrayData2) -> Result> { match data { - ArrayData::Binary(binary) => match binary { + ArrayData2::Binary(binary) => match binary { BinaryData::Binary(b) => { Ok(BinaryDataStorage::Binary(b.as_contiguous_storage_slice()).into()) } @@ -595,9 +595,9 @@ impl PhysicalStorage for PhysicalList { type Type<'a> = ListItemMetadata; type Storage<'a> = PrimitiveStorageSlice<'a, ListItemMetadata>; - fn get_storage(data: &ArrayData) -> Result> { + fn get_storage(data: &ArrayData2) -> Result> { match data { - ArrayData::List(storage) => Ok(storage.metadata.as_primitive_storage_slice()), + ArrayData2::List(storage) => Ok(storage.metadata.as_primitive_storage_slice()), _ => Err(RayexecError::new("invalid storage, expected list")), } } diff --git a/crates/rayexec_execution/src/arrays/executor/scalar/fill.rs b/crates/rayexec_execution/src/arrays/executor/scalar/fill.rs index 1bca6818a..175a0a72b 100644 --- a/crates/rayexec_execution/src/arrays/executor/scalar/fill.rs +++ b/crates/rayexec_execution/src/arrays/executor/scalar/fill.rs @@ -2,7 +2,7 @@ use std::borrow::Borrow; use rayexec_error::{RayexecError, Result}; -use crate::arrays::array::{Array2, ArrayData}; +use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::bitmap::Bitmap; use crate::arrays::datatype::DataType; use crate::arrays::executor::builder::{ @@ -286,7 +286,7 @@ fn concat_lists(datatype: DataType, arrays: &[&Array2], total_len: usize) -> Res let inner_arrays = arrays .iter() .map(|arr| match arr.array_data() { - ArrayData::List(list) => { + ArrayData2::List(list) => { if list.array.has_selection() { return Err(RayexecError::new("List child array has selection")); } diff --git a/crates/rayexec_execution/src/arrays/executor/scalar/hash.rs b/crates/rayexec_execution/src/arrays/executor/scalar/hash.rs index 3b65255b0..2b2c46994 100644 --- a/crates/rayexec_execution/src/arrays/executor/scalar/hash.rs +++ b/crates/rayexec_execution/src/arrays/executor/scalar/hash.rs @@ -2,7 +2,7 @@ use ahash::RandomState; use half::f16; use rayexec_error::{RayexecError, Result}; -use crate::arrays::array::{Array2, ArrayData}; +use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::executor::physical_type::{ PhysicalBinary, PhysicalBool, @@ -220,7 +220,7 @@ impl HashExecutor { H: SetHash, { let inner = match array.array_data() { - ArrayData::List(list) => &list.array, + ArrayData2::List(list) => &list.array, other => { return Err(RayexecError::new(format!( "Unexpected array data for list hashing: {:?}", diff --git a/crates/rayexec_execution/src/arrays/executor/scalar/list.rs b/crates/rayexec_execution/src/arrays/executor/scalar/list.rs index 734318fcc..d5cd0d537 100644 --- a/crates/rayexec_execution/src/arrays/executor/scalar/list.rs +++ b/crates/rayexec_execution/src/arrays/executor/scalar/list.rs @@ -1,6 +1,6 @@ use rayexec_error::{not_implemented, RayexecError, Result}; -use crate::arrays::array::{Array2, ArrayData}; +use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::bitmap::Bitmap; use crate::arrays::executor::builder::{ArrayBuilder, ArrayDataBuffer}; use crate::arrays::executor::physical_type::{PhysicalList, PhysicalStorage}; @@ -173,7 +173,7 @@ where S: PhysicalStorage, { match array.array_data() { - ArrayData::List(d) => { + ArrayData2::List(d) => { let storage = S::get_storage(d.array.array_data())?; let validity = d.array.validity(); Ok((storage, validity)) @@ -184,7 +184,7 @@ where fn get_inner_array_selection(array: &Array2) -> Result> { match array.array_data() { - ArrayData::List(d) => Ok(d.array.selection_vector()), + ArrayData2::List(d) => Ok(d.array.selection_vector()), _ => Err(RayexecError::new("Expected list array data")), } } diff --git a/crates/rayexec_execution/src/arrays/row/encoding.rs b/crates/rayexec_execution/src/arrays/row/encoding.rs index 22f61fb05..1c9bb9e60 100644 --- a/crates/rayexec_execution/src/arrays/row/encoding.rs +++ b/crates/rayexec_execution/src/arrays/row/encoding.rs @@ -1,7 +1,7 @@ use half::f16; use rayexec_error::{not_implemented, RayexecError, Result}; -use crate::arrays::array::{Array2, ArrayData, BinaryData}; +use crate::arrays::array::{Array2, ArrayData2, BinaryData}; use crate::arrays::executor::physical_type::{ AsBytes, PhysicalBinary, @@ -192,58 +192,58 @@ impl ComparableRowEncoder { let mut row_offset = *offsets.last().unwrap(); for (arr, cmp_col) in columns.iter().zip(self.columns.iter()) { row_offset = match arr.array_data() { - ArrayData::UntypedNull(_) => { + ArrayData2::UntypedNull(_) => { Self::encode_untyped_null(cmp_col, data, row_offset)? } - ArrayData::Boolean(_) => Self::encode_primitive::( + ArrayData2::Boolean(_) => Self::encode_primitive::( cmp_col, arr, row_idx, data, row_offset, )?, - ArrayData::Int8(_) => Self::encode_primitive::( + ArrayData2::Int8(_) => Self::encode_primitive::( cmp_col, arr, row_idx, data, row_offset, )?, - ArrayData::Int16(_) => Self::encode_primitive::( + ArrayData2::Int16(_) => Self::encode_primitive::( cmp_col, arr, row_idx, data, row_offset, )?, - ArrayData::Int32(_) => Self::encode_primitive::( + ArrayData2::Int32(_) => Self::encode_primitive::( cmp_col, arr, row_idx, data, row_offset, )?, - ArrayData::Int64(_) => Self::encode_primitive::( + ArrayData2::Int64(_) => Self::encode_primitive::( cmp_col, arr, row_idx, data, row_offset, )?, - ArrayData::Int128(_) => Self::encode_primitive::( + ArrayData2::Int128(_) => Self::encode_primitive::( cmp_col, arr, row_idx, data, row_offset, )?, - ArrayData::UInt8(_) => Self::encode_primitive::( + ArrayData2::UInt8(_) => Self::encode_primitive::( cmp_col, arr, row_idx, data, row_offset, )?, - ArrayData::UInt16(_) => Self::encode_primitive::( + ArrayData2::UInt16(_) => Self::encode_primitive::( cmp_col, arr, row_idx, data, row_offset, )?, - ArrayData::UInt32(_) => Self::encode_primitive::( + ArrayData2::UInt32(_) => Self::encode_primitive::( cmp_col, arr, row_idx, data, row_offset, )?, - ArrayData::UInt64(_) => Self::encode_primitive::( + ArrayData2::UInt64(_) => Self::encode_primitive::( cmp_col, arr, row_idx, data, row_offset, )?, - ArrayData::UInt128(_) => Self::encode_primitive::( + ArrayData2::UInt128(_) => Self::encode_primitive::( cmp_col, arr, row_idx, data, row_offset, )?, - ArrayData::Float16(_) => Self::encode_primitive::( + ArrayData2::Float16(_) => Self::encode_primitive::( cmp_col, arr, row_idx, data, row_offset, )?, - ArrayData::Float32(_) => Self::encode_primitive::( + ArrayData2::Float32(_) => Self::encode_primitive::( cmp_col, arr, row_idx, data, row_offset, )?, - ArrayData::Float64(_) => Self::encode_primitive::( + ArrayData2::Float64(_) => Self::encode_primitive::( cmp_col, arr, row_idx, data, row_offset, )?, - ArrayData::Interval(_) => Self::encode_primitive::( + ArrayData2::Interval(_) => Self::encode_primitive::( cmp_col, arr, row_idx, data, row_offset, )?, - ArrayData::Binary(_) => Self::encode_varlen::( + ArrayData2::Binary(_) => Self::encode_varlen::( cmp_col, arr, row_idx, data, row_offset, )?, - ArrayData::List(_) => not_implemented!("Row encode list"), + ArrayData2::List(_) => not_implemented!("Row encode list"), }; } @@ -259,28 +259,28 @@ impl ComparableRowEncoder { let mut size = 0; for arr in columns { let mut arr_size = match arr.array_data() { - ArrayData::UntypedNull(_) => 0, // Nulls will be encoded in the "validity" portion of the row. - ArrayData::Boolean(d) => d.len() * std::mem::size_of::(), // Note this will expand the 1 bit bools to bytes. - ArrayData::Int8(d) => d.data_size_bytes(), - ArrayData::Int16(d) => d.data_size_bytes(), - ArrayData::Int32(d) => d.data_size_bytes(), - ArrayData::Int64(d) => d.data_size_bytes(), - ArrayData::Int128(d) => d.data_size_bytes(), - ArrayData::UInt8(d) => d.data_size_bytes(), - ArrayData::UInt16(d) => d.data_size_bytes(), - ArrayData::UInt32(d) => d.data_size_bytes(), - ArrayData::UInt64(d) => d.data_size_bytes(), - ArrayData::UInt128(d) => d.data_size_bytes(), - ArrayData::Float16(d) => d.data_size_bytes(), - ArrayData::Float32(d) => d.data_size_bytes(), - ArrayData::Float64(d) => d.data_size_bytes(), - ArrayData::Interval(d) => d.data_size_bytes(), - ArrayData::Binary(d) => match d { + ArrayData2::UntypedNull(_) => 0, // Nulls will be encoded in the "validity" portion of the row. + ArrayData2::Boolean(d) => d.len() * std::mem::size_of::(), // Note this will expand the 1 bit bools to bytes. + ArrayData2::Int8(d) => d.data_size_bytes(), + ArrayData2::Int16(d) => d.data_size_bytes(), + ArrayData2::Int32(d) => d.data_size_bytes(), + ArrayData2::Int64(d) => d.data_size_bytes(), + ArrayData2::Int128(d) => d.data_size_bytes(), + ArrayData2::UInt8(d) => d.data_size_bytes(), + ArrayData2::UInt16(d) => d.data_size_bytes(), + ArrayData2::UInt32(d) => d.data_size_bytes(), + ArrayData2::UInt64(d) => d.data_size_bytes(), + ArrayData2::UInt128(d) => d.data_size_bytes(), + ArrayData2::Float16(d) => d.data_size_bytes(), + ArrayData2::Float32(d) => d.data_size_bytes(), + ArrayData2::Float64(d) => d.data_size_bytes(), + ArrayData2::Interval(d) => d.data_size_bytes(), + ArrayData2::Binary(d) => match d { BinaryData::Binary(d) => d.data_size_bytes(), BinaryData::LargeBinary(d) => d.data_size_bytes(), BinaryData::German(d) => d.data_size_bytes(), }, - ArrayData::List(_) => not_implemented!("Row encode list"), + ArrayData2::List(_) => not_implemented!("Row encode list"), }; // Account for validities. diff --git a/crates/rayexec_execution/src/arrays/scalar/mod.rs b/crates/rayexec_execution/src/arrays/scalar/mod.rs index 9fe531bcf..74341e3cc 100644 --- a/crates/rayexec_execution/src/arrays/scalar/mod.rs +++ b/crates/rayexec_execution/src/arrays/scalar/mod.rs @@ -14,7 +14,7 @@ use rayexec_proto::ProtoConv; use serde::{Deserialize, Serialize}; use timestamp::TimestampScalar; -use crate::arrays::array::{Array2, ArrayData}; +use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::bitmap::Bitmap; use crate::arrays::compute::cast::format::{ BoolFormatter, @@ -205,7 +205,7 @@ impl ScalarValue<'_> { /// Create an array of size `n` using the scalar value. pub fn as_array(&self, n: usize) -> Result { - let data: ArrayData = match self { + let data: ArrayData2 = match self { Self::Null => return Ok(Array2::new_untyped_null_array(n)), Self::Boolean(v) => BooleanStorage(Bitmap::new_with_val(*v, 1)).into(), Self::Float16(v) => PrimitiveStorage::from(vec![*v]).into(), diff --git a/crates/rayexec_execution/src/execution/operators/unnest.rs b/crates/rayexec_execution/src/execution/operators/unnest.rs index 0ef2bd1f1..805b38c3e 100644 --- a/crates/rayexec_execution/src/execution/operators/unnest.rs +++ b/crates/rayexec_execution/src/execution/operators/unnest.rs @@ -15,7 +15,7 @@ use super::{ PollPull, PollPush, }; -use crate::arrays::array::{Array2, ArrayData}; +use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::batch::Batch; use crate::arrays::bitmap::Bitmap; use crate::arrays::executor::builder::{ @@ -246,7 +246,7 @@ impl ExecutableOperator for PhysicalUnnest { match arr.physical_type() { PhysicalType::List => { let child = match arr.array_data() { - ArrayData::List(list) => list.inner_array(), + ArrayData2::List(list) => list.inner_array(), _other => return Err(RayexecError::new("Unexpected storage type")), }; diff --git a/crates/rayexec_execution/src/execution/operators/util/outer_join_tracker.rs b/crates/rayexec_execution/src/execution/operators/util/outer_join_tracker.rs index b0791b9a8..1bcbdc8c8 100644 --- a/crates/rayexec_execution/src/execution/operators/util/outer_join_tracker.rs +++ b/crates/rayexec_execution/src/execution/operators/util/outer_join_tracker.rs @@ -2,7 +2,7 @@ use std::sync::Arc; use rayexec_error::Result; -use crate::arrays::array::{Array2, ArrayData}; +use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::batch::Batch; use crate::arrays::bitmap::Bitmap; use crate::arrays::datatype::DataType; @@ -108,7 +108,7 @@ impl LeftOuterJoinDrainState { .cloned() .chain([Array2::new_with_array_data( DataType::Boolean, - ArrayData::Boolean(Arc::new(bitmap.clone().into())), + ArrayData2::Boolean(Arc::new(bitmap.clone().into())), )]); let batch = Batch::try_new(cols)?; diff --git a/crates/rayexec_execution/src/functions/aggregate/builtin/first.rs b/crates/rayexec_execution/src/functions/aggregate/builtin/first.rs index d15fcc709..b9075bec1 100644 --- a/crates/rayexec_execution/src/functions/aggregate/builtin/first.rs +++ b/crates/rayexec_execution/src/functions/aggregate/builtin/first.rs @@ -4,7 +4,7 @@ use std::marker::PhantomData; use half::f16; use rayexec_error::{not_implemented, Result}; -use crate::arrays::array::ArrayData; +use crate::arrays::array::ArrayData2; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::aggregate::{AggregateState, StateFinalizer}; use crate::arrays::executor::builder::{ArrayBuilder, GermanVarlenBuffer}; @@ -216,7 +216,7 @@ impl AggregateFunctionImpl for FirstPrimitiveImpl where for<'a> S: PhysicalStorage = T>, T: Copy + Debug + Default + Sync + Send + 'static, - ArrayData: From>, + ArrayData2: From>, { fn new_states(&self) -> Box { let datatype = self.datatype.clone(); diff --git a/crates/rayexec_execution/src/functions/aggregate/builtin/minmax.rs b/crates/rayexec_execution/src/functions/aggregate/builtin/minmax.rs index 50793c642..e5bac18ad 100644 --- a/crates/rayexec_execution/src/functions/aggregate/builtin/minmax.rs +++ b/crates/rayexec_execution/src/functions/aggregate/builtin/minmax.rs @@ -4,7 +4,7 @@ use std::marker::PhantomData; use half::f16; use rayexec_error::{not_implemented, Result}; -use crate::arrays::array::ArrayData; +use crate::arrays::array::ArrayData2; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::aggregate::{AggregateState, StateFinalizer}; use crate::arrays::executor::builder::{ArrayBuilder, GermanVarlenBuffer}; @@ -349,7 +349,7 @@ where for<'a> S: PhysicalStorage = T>, T: PartialOrd + Debug + Default + Sync + Send + Copy + 'static, M: AggregateState + Default + Sync + Send + 'static, - ArrayData: From>, + ArrayData2: From>, { fn new_states(&self) -> Box { let datatype = self.datatype.clone(); diff --git a/crates/rayexec_execution/src/functions/aggregate/builtin/sum.rs b/crates/rayexec_execution/src/functions/aggregate/builtin/sum.rs index 8fc959489..f8ff4f71a 100644 --- a/crates/rayexec_execution/src/functions/aggregate/builtin/sum.rs +++ b/crates/rayexec_execution/src/functions/aggregate/builtin/sum.rs @@ -5,7 +5,7 @@ use std::ops::AddAssign; use num_traits::CheckedAdd; use rayexec_error::Result; -use crate::arrays::array::ArrayData; +use crate::arrays::array::ArrayData2; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::aggregate::AggregateState; use crate::arrays::executor::physical_type::{PhysicalF64, PhysicalI64}; @@ -151,7 +151,7 @@ impl SumDecimalImpl { impl AggregateFunctionImpl for SumDecimalImpl where D: DecimalType, - ArrayData: From>, + ArrayData2: From>, { fn new_states(&self) -> Box { let datatype = self.datatype.clone(); diff --git a/crates/rayexec_execution/src/functions/aggregate/states.rs b/crates/rayexec_execution/src/functions/aggregate/states.rs index 2ce04399e..a5971926c 100644 --- a/crates/rayexec_execution/src/functions/aggregate/states.rs +++ b/crates/rayexec_execution/src/functions/aggregate/states.rs @@ -6,7 +6,7 @@ use std::marker::PhantomData; use rayexec_error::{RayexecError, Result}; use super::ChunkGroupAddressIter; -use crate::arrays::array::{Array2, ArrayData}; +use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::datatype::DataType; use crate::arrays::executor::aggregate::{ AggregateState, @@ -242,7 +242,7 @@ pub fn primitive_finalize( where State: AggregateState, Output: Copy + Default, - ArrayData: From>, + ArrayData2: From>, { let builder = ArrayBuilder { datatype, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/arith/add.rs b/crates/rayexec_execution/src/functions/scalar/builtin/arith/add.rs index f8f75b600..780ce89a7 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/arith/add.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/arith/add.rs @@ -3,7 +3,7 @@ use std::marker::PhantomData; use rayexec_error::Result; -use crate::arrays::array::{Array2, ArrayData}; +use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; use crate::arrays::executor::physical_type::{ @@ -206,7 +206,7 @@ impl ScalarFunctionImpl for AddImpl where S: PhysicalStorage, for<'a> S::Type<'a>: std::ops::Add> + Default + Copy, - ArrayData: From>>, + ArrayData2: From>>, { fn execute(&self, inputs: &[&Array2]) -> Result { let a = inputs[0]; diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/arith/div.rs b/crates/rayexec_execution/src/functions/scalar/builtin/arith/div.rs index 7a02531fc..cbbdd2680 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/arith/div.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/arith/div.rs @@ -3,7 +3,7 @@ use std::marker::PhantomData; use rayexec_error::Result; -use crate::arrays::array::{Array2, ArrayData}; +use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::compute::cast::array::cast_decimal_to_float; use crate::arrays::compute::cast::behavior::CastFailBehavior; use crate::arrays::datatype::{DataType, DataTypeId}; @@ -243,7 +243,7 @@ impl ScalarFunctionImpl for DivImpl where S: PhysicalStorage, for<'a> S::Type<'a>: std::ops::Div> + Default + Copy, - ArrayData: From>>, + ArrayData2: From>>, { fn execute(&self, inputs: &[&Array2]) -> Result { let a = inputs[0]; diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/arith/mul.rs b/crates/rayexec_execution/src/functions/scalar/builtin/arith/mul.rs index df2e16e60..428125557 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/arith/mul.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/arith/mul.rs @@ -4,7 +4,7 @@ use std::marker::PhantomData; use num_traits::{NumCast, PrimInt}; use rayexec_error::Result; -use crate::arrays::array::{Array2, ArrayData}; +use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::datatype::{DataType, DataTypeId, DecimalTypeMeta}; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; use crate::arrays::executor::physical_type::{ @@ -280,7 +280,7 @@ impl DecimalMulImpl { impl ScalarFunctionImpl for DecimalMulImpl where D: DecimalType, - ArrayData: From>, + ArrayData2: From>, { fn execute(&self, inputs: &[&Array2]) -> Result { let a = inputs[0]; @@ -316,7 +316,7 @@ impl ScalarFunctionImpl for MulImpl where S: PhysicalStorage, for<'a> S::Type<'a>: std::ops::Mul> + Default + Copy, - ArrayData: From>>, + ArrayData2: From>>, { fn execute(&self, inputs: &[&Array2]) -> Result { let a = inputs[0]; diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/arith/rem.rs b/crates/rayexec_execution/src/functions/scalar/builtin/arith/rem.rs index e72483d46..f7b5da2b0 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/arith/rem.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/arith/rem.rs @@ -3,7 +3,7 @@ use std::marker::PhantomData; use rayexec_error::Result; -use crate::arrays::array::{Array2, ArrayData}; +use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; use crate::arrays::executor::physical_type::{ @@ -198,7 +198,7 @@ impl ScalarFunctionImpl for RemImpl where S: PhysicalStorage, for<'a> S::Type<'a>: std::ops::Rem> + Default + Copy, - ArrayData: From>>, + ArrayData2: From>>, { fn execute(&self, inputs: &[&Array2]) -> Result { let a = inputs[0]; diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/arith/sub.rs b/crates/rayexec_execution/src/functions/scalar/builtin/arith/sub.rs index d091749a4..057b450f3 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/arith/sub.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/arith/sub.rs @@ -3,7 +3,7 @@ use std::marker::PhantomData; use rayexec_error::Result; -use crate::arrays::array::{Array2, ArrayData}; +use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; use crate::arrays::executor::physical_type::{ @@ -213,7 +213,7 @@ impl ScalarFunctionImpl for SubImpl where S: PhysicalStorage, for<'a> S::Type<'a>: std::ops::Sub> + Default + Copy, - ArrayData: From>>, + ArrayData2: From>>, { fn execute(&self, inputs: &[&Array2]) -> Result { let a = inputs[0]; diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/comparison.rs b/crates/rayexec_execution/src/functions/scalar/builtin/comparison.rs index fad9ab0d4..2e31dfba2 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/comparison.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/comparison.rs @@ -4,7 +4,7 @@ use std::marker::PhantomData; use rayexec_error::{RayexecError, Result}; -use crate::arrays::array::{Array2, ArrayData}; +use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::compute::cast::array::decimal_rescale; use crate::arrays::compute::cast::behavior::CastFailBehavior; use crate::arrays::datatype::{DataType, DataTypeId, DecimalTypeMeta}; @@ -827,7 +827,7 @@ impl RescalingComparisionImpl where O: ComparisonOperation, T: DecimalType, - ArrayData: From>, + ArrayData2: From>, { fn new(left: DecimalTypeMeta, right: DecimalTypeMeta) -> Self { RescalingComparisionImpl { @@ -843,7 +843,7 @@ impl ScalarFunctionImpl for RescalingComparisionImpl where O: ComparisonOperation, T: DecimalType, - ArrayData: From>, + ArrayData2: From>, { fn execute(&self, inputs: &[&Array2]) -> Result { let left = inputs[0]; diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/list/list_extract.rs b/crates/rayexec_execution/src/functions/scalar/builtin/list/list_extract.rs index 4cd85761f..2ec8b8907 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/list/list_extract.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/list/list_extract.rs @@ -4,7 +4,7 @@ use half::f16; use rayexec_error::{not_implemented, RayexecError, Result}; use serde::{Deserialize, Serialize}; -use crate::arrays::array::{Array2, ArrayData}; +use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::bitmap::Bitmap; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::builder::{ @@ -128,7 +128,7 @@ impl ScalarFunctionImpl for ListExtractImpl { fn extract(array: &Array2, idx: usize) -> Result { let data = match array.array_data() { - ArrayData::List(list) => list.as_ref(), + ArrayData2::List(list) => list.as_ref(), _other => return Err(RayexecError::new("Unexpected storage type")), }; diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/negate.rs b/crates/rayexec_execution/src/functions/scalar/builtin/negate.rs index fe0a1df07..d4b0e8df9 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/negate.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/negate.rs @@ -2,7 +2,7 @@ use std::marker::PhantomData; use rayexec_error::Result; -use crate::arrays::array::{Array2, ArrayData}; +use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::builder::{ArrayBuilder, BooleanBuffer, PrimitiveBuffer}; use crate::arrays::executor::physical_type::{ @@ -100,7 +100,7 @@ impl ScalarFunctionImpl for NegateImpl where S: PhysicalStorage, for<'a> S::Type<'a>: std::ops::Neg> + Default + Copy, - ArrayData: From>>, + ArrayData2: From>>, { fn execute(&self, inputs: &[&Array2]) -> Result { use std::ops::Neg; diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/abs.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/abs.rs index 2beb8e9d3..b36516c7a 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/abs.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/abs.rs @@ -2,7 +2,7 @@ use num_traits::Float; use rayexec_error::Result; use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; -use crate::arrays::array::{Array2, ArrayData}; +use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::datatype::DataType; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; use crate::arrays::executor::physical_type::PhysicalStorage; @@ -22,7 +22,7 @@ impl UnaryInputNumericOperation for AbsOp { where S: PhysicalStorage, S::Type<'a>: Float + Default, - ArrayData: From>>, + ArrayData2: From>>, { let builder = ArrayBuilder { datatype: ret, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/acos.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/acos.rs index 6ccb8d49f..e6196e020 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/acos.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/acos.rs @@ -2,7 +2,7 @@ use num_traits::Float; use rayexec_error::Result; use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; -use crate::arrays::array::{Array2, ArrayData}; +use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::datatype::DataType; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; use crate::arrays::executor::physical_type::PhysicalStorage; @@ -22,7 +22,7 @@ impl UnaryInputNumericOperation for AcosOp { where S: PhysicalStorage, S::Type<'a>: Float + Default, - ArrayData: From>>, + ArrayData2: From>>, { let builder = ArrayBuilder { datatype: ret, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/asin.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/asin.rs index d47994b5d..e09163bd3 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/asin.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/asin.rs @@ -2,7 +2,7 @@ use num_traits::Float; use rayexec_error::Result; use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; -use crate::arrays::array::{Array2, ArrayData}; +use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::datatype::DataType; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; use crate::arrays::executor::physical_type::PhysicalStorage; @@ -22,7 +22,7 @@ impl UnaryInputNumericOperation for AsinOp { where S: PhysicalStorage, S::Type<'a>: Float + Default, - ArrayData: From>>, + ArrayData2: From>>, { let builder = ArrayBuilder { datatype: ret, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/atan.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/atan.rs index 9cde0bff3..d54dc8e8a 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/atan.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/atan.rs @@ -2,7 +2,7 @@ use num_traits::Float; use rayexec_error::Result; use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; -use crate::arrays::array::{Array2, ArrayData}; +use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::datatype::DataType; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; use crate::arrays::executor::physical_type::PhysicalStorage; @@ -22,7 +22,7 @@ impl UnaryInputNumericOperation for AtanOp { where S: PhysicalStorage, S::Type<'a>: Float + Default, - ArrayData: From>>, + ArrayData2: From>>, { let builder = ArrayBuilder { datatype: ret, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/cbrt.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/cbrt.rs index 64a5c4963..044ca4628 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/cbrt.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/cbrt.rs @@ -2,7 +2,7 @@ use num_traits::Float; use rayexec_error::Result; use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; -use crate::arrays::array::{Array2, ArrayData}; +use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::datatype::DataType; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; use crate::arrays::executor::physical_type::PhysicalStorage; @@ -22,7 +22,7 @@ impl UnaryInputNumericOperation for CbrtOp { where S: PhysicalStorage, S::Type<'a>: Float + Default, - ArrayData: From>>, + ArrayData2: From>>, { let builder = ArrayBuilder { datatype: ret, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/ceil.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/ceil.rs index 9710c9e84..56b4e5bbb 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/ceil.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/ceil.rs @@ -2,7 +2,7 @@ use num_traits::Float; use rayexec_error::Result; use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; -use crate::arrays::array::{Array2, ArrayData}; +use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::datatype::DataType; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; use crate::arrays::executor::physical_type::PhysicalStorage; @@ -22,7 +22,7 @@ impl UnaryInputNumericOperation for CeilOp { where S: PhysicalStorage, S::Type<'a>: Float + Default, - ArrayData: From>>, + ArrayData2: From>>, { let builder = ArrayBuilder { datatype: ret, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/cos.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/cos.rs index 05e279a8b..d6fee453c 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/cos.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/cos.rs @@ -2,7 +2,7 @@ use num_traits::Float; use rayexec_error::Result; use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; -use crate::arrays::array::{Array2, ArrayData}; +use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::datatype::DataType; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; use crate::arrays::executor::physical_type::PhysicalStorage; @@ -22,7 +22,7 @@ impl UnaryInputNumericOperation for CosOp { where S: PhysicalStorage, S::Type<'a>: Float + Default, - ArrayData: From>>, + ArrayData2: From>>, { let builder = ArrayBuilder { datatype: ret, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/degrees.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/degrees.rs index 58386a653..1094e7f79 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/degrees.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/degrees.rs @@ -2,7 +2,7 @@ use num_traits::Float; use rayexec_error::Result; use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; -use crate::arrays::array::{Array2, ArrayData}; +use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::datatype::DataType; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; use crate::arrays::executor::physical_type::PhysicalStorage; @@ -22,7 +22,7 @@ impl UnaryInputNumericOperation for DegreesOp { where S: PhysicalStorage, S::Type<'a>: Float + Default, - ArrayData: From>>, + ArrayData2: From>>, { let builder = ArrayBuilder { datatype: ret, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/exp.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/exp.rs index abeffec6a..e2c72b61c 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/exp.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/exp.rs @@ -2,7 +2,7 @@ use num_traits::Float; use rayexec_error::Result; use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; -use crate::arrays::array::{Array2, ArrayData}; +use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::datatype::DataType; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; use crate::arrays::executor::physical_type::PhysicalStorage; @@ -22,7 +22,7 @@ impl UnaryInputNumericOperation for ExpOp { where S: PhysicalStorage, S::Type<'a>: Float + Default, - ArrayData: From>>, + ArrayData2: From>>, { let builder = ArrayBuilder { datatype: ret, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/floor.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/floor.rs index 3826856f5..042aa67f6 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/floor.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/floor.rs @@ -2,7 +2,7 @@ use num_traits::Float; use rayexec_error::Result; use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; -use crate::arrays::array::{Array2, ArrayData}; +use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::datatype::DataType; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; use crate::arrays::executor::physical_type::PhysicalStorage; @@ -22,7 +22,7 @@ impl UnaryInputNumericOperation for FloorOp { where S: PhysicalStorage, S::Type<'a>: Float + Default, - ArrayData: From>>, + ArrayData2: From>>, { let builder = ArrayBuilder { datatype: ret, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/ln.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/ln.rs index 29153857a..1bfb58813 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/ln.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/ln.rs @@ -2,7 +2,7 @@ use num_traits::Float; use rayexec_error::Result; use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; -use crate::arrays::array::{Array2, ArrayData}; +use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::datatype::DataType; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; use crate::arrays::executor::physical_type::PhysicalStorage; @@ -22,7 +22,7 @@ impl UnaryInputNumericOperation for LnOp { where S: PhysicalStorage, S::Type<'a>: Float + Default, - ArrayData: From>>, + ArrayData2: From>>, { let builder = ArrayBuilder { datatype: ret, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/log.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/log.rs index db09c952e..57460b66e 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/log.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/log.rs @@ -2,7 +2,7 @@ use num_traits::Float; use rayexec_error::Result; use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; -use crate::arrays::array::{Array2, ArrayData}; +use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::datatype::DataType; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; use crate::arrays::executor::physical_type::PhysicalStorage; @@ -22,7 +22,7 @@ impl UnaryInputNumericOperation for LogOp { where S: PhysicalStorage, S::Type<'a>: Float + Default, - ArrayData: From>>, + ArrayData2: From>>, { let builder = ArrayBuilder { datatype: ret, @@ -45,7 +45,7 @@ impl UnaryInputNumericOperation for LogOp2 { where S: PhysicalStorage, S::Type<'a>: Float + Default, - ArrayData: From>>, + ArrayData2: From>>, { let builder = ArrayBuilder { datatype: ret, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/mod.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/mod.rs index 28192a12a..9c44f0313 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/mod.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/mod.rs @@ -38,7 +38,7 @@ pub use sin::*; pub use sqrt::*; pub use tan::*; -use crate::arrays::array::{Array2, ArrayData}; +use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::physical_type::{ PhysicalF16, @@ -85,7 +85,7 @@ pub trait UnaryInputNumericOperation: Debug + Clone + Copy + Sync + Send + 'stat where S: PhysicalStorage, S::Type<'a>: Float + Default, - ArrayData: From>>; + ArrayData2: From>>; } /// Helper struct for creating functions that accept and produce a single diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/radians.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/radians.rs index efca90b4f..8d70420bb 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/radians.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/radians.rs @@ -2,7 +2,7 @@ use num_traits::Float; use rayexec_error::Result; use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; -use crate::arrays::array::{Array2, ArrayData}; +use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::datatype::DataType; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; use crate::arrays::executor::physical_type::PhysicalStorage; @@ -22,7 +22,7 @@ impl UnaryInputNumericOperation for RadiansOp { where S: PhysicalStorage, S::Type<'a>: Float + Default, - ArrayData: From>>, + ArrayData2: From>>, { let builder = ArrayBuilder { datatype: ret, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/sin.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/sin.rs index 467bc4ed9..f22c276bb 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/sin.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/sin.rs @@ -2,7 +2,7 @@ use num_traits::Float; use rayexec_error::Result; use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; -use crate::arrays::array::{Array2, ArrayData}; +use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::datatype::DataType; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; use crate::arrays::executor::physical_type::PhysicalStorage; @@ -22,7 +22,7 @@ impl UnaryInputNumericOperation for SinOp { where S: PhysicalStorage, S::Type<'a>: Float + Default, - ArrayData: From>>, + ArrayData2: From>>, { let builder = ArrayBuilder { datatype: ret, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/sqrt.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/sqrt.rs index 8add9ecfb..aca72b13a 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/sqrt.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/sqrt.rs @@ -2,7 +2,7 @@ use num_traits::Float; use rayexec_error::Result; use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; -use crate::arrays::array::{Array2, ArrayData}; +use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::datatype::DataType; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; use crate::arrays::executor::physical_type::PhysicalStorage; @@ -22,7 +22,7 @@ impl UnaryInputNumericOperation for SqrtOp { where S: PhysicalStorage, S::Type<'a>: Float + Default, - ArrayData: From>>, + ArrayData2: From>>, { let builder = ArrayBuilder { datatype: ret, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/tan.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/tan.rs index b705b9040..7669b561b 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/tan.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/tan.rs @@ -2,7 +2,7 @@ use num_traits::Float; use rayexec_error::Result; use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; -use crate::arrays::array::{Array2, ArrayData}; +use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::datatype::DataType; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; use crate::arrays::executor::physical_type::PhysicalStorage; @@ -22,7 +22,7 @@ impl UnaryInputNumericOperation for TanOp { where S: PhysicalStorage, S::Type<'a>: Float + Default, - ArrayData: From>>, + ArrayData2: From>>, { let builder = ArrayBuilder { datatype: ret, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/string/case.rs b/crates/rayexec_execution/src/functions/scalar/builtin/string/case.rs index b1de7c37a..d12c51c91 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/string/case.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/string/case.rs @@ -1,6 +1,6 @@ use rayexec_error::{RayexecError, Result}; -use crate::arrays::array::{Array2, ArrayData}; +use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::builder::{ArrayBuilder, GermanVarlenBuffer}; use crate::arrays::executor::physical_type::PhysicalUtf8; @@ -126,7 +126,7 @@ where F: Fn(&str) -> String, { let cap = match input.array_data() { - ArrayData::Binary(bin) => bin.binary_data_size_bytes(), + ArrayData2::Binary(bin) => bin.binary_data_size_bytes(), _ => return Err(RayexecError::new("Unexpected array data type")), }; diff --git a/crates/rayexec_execution/src/functions/table/builtin/unnest.rs b/crates/rayexec_execution/src/functions/table/builtin/unnest.rs index d556f837f..0839b9272 100644 --- a/crates/rayexec_execution/src/functions/table/builtin/unnest.rs +++ b/crates/rayexec_execution/src/functions/table/builtin/unnest.rs @@ -3,7 +3,7 @@ use std::task::{Context, Waker}; use rayexec_error::{RayexecError, Result}; -use crate::arrays::array::{Array2, ArrayData}; +use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::batch::Batch; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::physical_type::{PhysicalList, PhysicalType}; @@ -209,7 +209,7 @@ impl TableInOutPartitionState for UnnestInOutPartitionState { let output = match input.physical_type() { PhysicalType::List => { let child = match input.array_data() { - ArrayData::List(list) => list.inner_array(), + ArrayData2::List(list) => list.inner_array(), _other => return Err(RayexecError::new("Unexpected storage type")), }; diff --git a/crates/rayexec_parquet/src/reader/mod.rs b/crates/rayexec_parquet/src/reader/mod.rs index f6266424e..fdb28694e 100644 --- a/crates/rayexec_parquet/src/reader/mod.rs +++ b/crates/rayexec_parquet/src/reader/mod.rs @@ -19,7 +19,7 @@ use parquet::file::reader::{ChunkReader, Length, SerializedPageReader}; use parquet::schema::types::ColumnDescPtr; use primitive::PrimitiveArrayReader; use rayexec_error::{RayexecError, Result, ResultExt}; -use rayexec_execution::arrays::array::{Array2, ArrayData}; +use rayexec_execution::arrays::array::{Array2, ArrayData2}; use rayexec_execution::arrays::batch::Batch; use rayexec_execution::arrays::bitmap::Bitmap; use rayexec_execution::arrays::datatype::DataType; @@ -115,7 +115,7 @@ where /// Trait for converting a buffer of values into array data. pub trait IntoArrayData { - fn into_array_data(self) -> ArrayData; + fn into_array_data(self) -> ArrayData2; } pub fn def_levels_into_bitmap(def_levels: Vec) -> Bitmap { diff --git a/crates/rayexec_parquet/src/reader/primitive.rs b/crates/rayexec_parquet/src/reader/primitive.rs index f19860c40..41ac0729f 100644 --- a/crates/rayexec_parquet/src/reader/primitive.rs +++ b/crates/rayexec_parquet/src/reader/primitive.rs @@ -4,7 +4,7 @@ use parquet::column::reader::basic::BasicColumnValueDecoder; use parquet::data_type::{DataType as ParquetDataType, Int96}; use parquet::schema::types::ColumnDescPtr; use rayexec_error::{RayexecError, Result}; -use rayexec_execution::arrays::array::{Array2, ArrayData}; +use rayexec_execution::arrays::array::{Array2, ArrayData2}; use rayexec_execution::arrays::bitmap::Bitmap; use rayexec_execution::arrays::compute::cast::array::cast_array; use rayexec_execution::arrays::compute::cast::behavior::CastFailBehavior; @@ -123,7 +123,7 @@ where } impl IntoArrayData for Vec { - fn into_array_data(self) -> ArrayData { + fn into_array_data(self) -> ArrayData2 { let values = Bitmap::from_iter(self); BooleanStorage::from(values).into() } @@ -132,7 +132,7 @@ impl IntoArrayData for Vec { macro_rules! impl_into_array_primitive { ($prim:ty) => { impl IntoArrayData for Vec<$prim> { - fn into_array_data(self) -> ArrayData { + fn into_array_data(self) -> ArrayData2 { PrimitiveStorage::from(self).into() } } @@ -153,7 +153,7 @@ impl_into_array_primitive!(f32); impl_into_array_primitive!(f64); impl IntoArrayData for Vec { - fn into_array_data(self) -> ArrayData { + fn into_array_data(self) -> ArrayData2 { let values: Vec<_> = self.into_iter().map(|v| v.to_nanos()).collect(); PrimitiveStorage::from(values).into() } diff --git a/crates/rayexec_parquet/src/writer/mod.rs b/crates/rayexec_parquet/src/writer/mod.rs index 716d7143a..28c18e0a5 100644 --- a/crates/rayexec_parquet/src/writer/mod.rs +++ b/crates/rayexec_parquet/src/writer/mod.rs @@ -12,7 +12,7 @@ use parquet::file::writer::{write_page, SerializedFileWriter}; use parquet::format::FileMetaData; use parquet::schema::types::SchemaDescriptor; use rayexec_error::{not_implemented, OptionExt, RayexecError, Result, ResultExt}; -use rayexec_execution::arrays::array::{Array2, ArrayData}; +use rayexec_execution::arrays::array::{Array2, ArrayData2}; use rayexec_execution::arrays::batch::Batch; use rayexec_execution::arrays::datatype::DataType; use rayexec_execution::arrays::executor::physical_type::{PhysicalBinary, PhysicalStorage}; @@ -244,7 +244,7 @@ fn write_array(writer: &mut ColumnWriter

, array: &Array2) -> R match writer { ColumnWriter::BoolColumnWriter(writer) => { match array.array_data() { - ArrayData::Boolean(d) => { + ArrayData2::Boolean(d) => { let bools: Vec<_> = d.as_ref().as_ref().iter().collect(); writer .write_batch(&bools, None, None) @@ -255,13 +255,13 @@ fn write_array(writer: &mut ColumnWriter

, array: &Array2) -> R } } ColumnWriter::Int32ColumnWriter(writer) => match array.array_data() { - ArrayData::Int32(d) => { + ArrayData2::Int32(d) => { writer .write_batch(d.as_slice(), None, None) .context("failed to write i32 data")?; Ok(()) } - ArrayData::UInt32(d) => { + ArrayData2::UInt32(d) => { // SAFETY: u32 and i32 safe to cast to/from. This follows // upstream behavior. let data = unsafe { d.try_reintepret_cast::()? }; @@ -273,13 +273,13 @@ fn write_array(writer: &mut ColumnWriter

, array: &Array2) -> R _ => Err(RayexecError::new("expected i32/u32 data")), }, ColumnWriter::Int64ColumnWriter(writer) => match array.array_data() { - ArrayData::Int64(d) => { + ArrayData2::Int64(d) => { writer .write_batch(d.as_slice(), None, None) .context("failed to write i64 data")?; Ok(()) } - ArrayData::UInt64(d) => { + ArrayData2::UInt64(d) => { // SAFETY: u64 and i64 safe to cast to/from. This follows // upstream behavior. let data = unsafe { d.try_reintepret_cast::()? }; @@ -291,7 +291,7 @@ fn write_array(writer: &mut ColumnWriter

, array: &Array2) -> R _ => Err(RayexecError::new("expected i64/u64 data")), }, ColumnWriter::FloatColumnWriter(writer) => match array.array_data() { - ArrayData::Float32(d) => { + ArrayData2::Float32(d) => { writer .write_batch(d.as_slice(), None, None) .context("failed to write f32 data")?; @@ -300,7 +300,7 @@ fn write_array(writer: &mut ColumnWriter

, array: &Array2) -> R _ => Err(RayexecError::new("expected f32 data")), }, ColumnWriter::DoubleColumnWriter(writer) => match array.array_data() { - ArrayData::Float64(d) => { + ArrayData2::Float64(d) => { writer .write_batch(d.as_slice(), None, None) .context("failed to write f64 data")?; @@ -309,7 +309,7 @@ fn write_array(writer: &mut ColumnWriter

, array: &Array2) -> R _ => Err(RayexecError::new("expected f64 data")), }, ColumnWriter::ByteArrayColumnWriter(writer) => match array.array_data() { - ArrayData::Binary(_) => { + ArrayData2::Binary(_) => { // TODO: Try not to copy here. There's a hard requirement on the // physical type being `Bytes`, and so a conversion needs to // happen somewhere. From 583581a7a76fc6ed57f8019025242b9fd981f8aa Mon Sep 17 00:00:00 2001 From: Sean Smith Date: Fri, 27 Dec 2024 19:37:21 -0500 Subject: [PATCH 07/59] array --- .../src/arrays/array/array_data.rs | 1 + .../rayexec_execution/src/arrays/array/exp.rs | 123 ++++++++++++++++++ .../src/arrays/array/flat.rs | 52 ++++++++ .../rayexec_execution/src/arrays/array/mod.rs | 53 ++++---- .../src/arrays/array/selection.rs | 110 ++++++++++++++++ .../src/arrays/buffer/mod.rs | 41 +++++- .../src/arrays/buffer/physical_type.rs | 17 +++ .../src/arrays/compute/cast/array.rs | 2 +- .../rayexec_execution/src/arrays/datatype.rs | 39 +++++- .../src/arrays/executor/physical_type.rs | 6 +- .../src/arrays/executor/scalar/fill.rs | 82 ++++++------ .../src/arrays/executor/scalar/hash.rs | 78 +++++------ .../operators/hash_aggregate/chunk.rs | 4 +- .../operators/hash_aggregate/compare.rs | 40 +++--- .../src/execution/operators/unnest.rs | 42 +++--- .../src/functions/aggregate/builtin/first.rs | 42 +++--- .../src/functions/aggregate/builtin/minmax.rs | 82 ++++++------ .../functions/scalar/builtin/comparison.rs | 46 +++---- .../scalar/builtin/list/list_extract.rs | 36 ++--- .../functions/scalar/builtin/numeric/mod.rs | 8 +- .../src/functions/table/builtin/unnest.rs | 6 +- 21 files changed, 637 insertions(+), 273 deletions(-) create mode 100644 crates/rayexec_execution/src/arrays/array/exp.rs create mode 100644 crates/rayexec_execution/src/arrays/array/flat.rs create mode 100644 crates/rayexec_execution/src/arrays/array/selection.rs diff --git a/crates/rayexec_execution/src/arrays/array/array_data.rs b/crates/rayexec_execution/src/arrays/array/array_data.rs index fb67eda4e..7a3e60c8e 100644 --- a/crates/rayexec_execution/src/arrays/array/array_data.rs +++ b/crates/rayexec_execution/src/arrays/array/array_data.rs @@ -90,6 +90,7 @@ where } } } + impl AsRef> for ArrayData where B: BufferManager, diff --git a/crates/rayexec_execution/src/arrays/array/exp.rs b/crates/rayexec_execution/src/arrays/array/exp.rs new file mode 100644 index 000000000..79619cbba --- /dev/null +++ b/crates/rayexec_execution/src/arrays/array/exp.rs @@ -0,0 +1,123 @@ +use rayexec_error::Result; + +use super::array_data::ArrayData; +use super::flat::FlatArrayView; +use super::validity::Validity; +use crate::arrays::buffer::buffer_manager::{BufferManager, NopBufferManager}; +use crate::arrays::buffer::physical_type::{ + PhysicalF16, + PhysicalF32, + PhysicalF64, + PhysicalI128, + PhysicalI16, + PhysicalI32, + PhysicalI64, + PhysicalI8, + PhysicalInterval, + PhysicalType, + PhysicalU128, + PhysicalU16, + PhysicalU32, + PhysicalU64, + PhysicalU8, + PhysicalUtf8, +}; +use crate::arrays::buffer::string_view::StringViewHeap; +use crate::arrays::buffer::{ArrayBuffer, SecondaryBuffer}; +use crate::arrays::datatype::DataType; + +#[derive(Debug)] +pub struct Array { + pub(crate) datatype: DataType, + pub(crate) validity: Validity, + pub(crate) data: ArrayData, +} + +impl Array { + /// Create a new array with the given capacity. + /// + /// This will take care of initalizing the primary and secondary data + /// buffers depending on the type. + pub fn new(datatype: DataType, capacity: usize) -> Result { + let manager = NopBufferManager; + + let buffer = match datatype.physical_type() { + PhysicalType::Int8 => { + ArrayBuffer::with_primary_capacity::(&manager, capacity)? + } + PhysicalType::Int16 => { + ArrayBuffer::with_primary_capacity::(&manager, capacity)? + } + PhysicalType::Int32 => { + ArrayBuffer::with_primary_capacity::(&manager, capacity)? + } + PhysicalType::Int64 => { + ArrayBuffer::with_primary_capacity::(&manager, capacity)? + } + PhysicalType::Int128 => { + ArrayBuffer::with_primary_capacity::(&manager, capacity)? + } + PhysicalType::UInt8 => { + ArrayBuffer::with_primary_capacity::(&manager, capacity)? + } + PhysicalType::UInt16 => { + ArrayBuffer::with_primary_capacity::(&manager, capacity)? + } + PhysicalType::UInt32 => { + ArrayBuffer::with_primary_capacity::(&manager, capacity)? + } + PhysicalType::UInt64 => { + ArrayBuffer::with_primary_capacity::(&manager, capacity)? + } + PhysicalType::UInt128 => { + ArrayBuffer::with_primary_capacity::(&manager, capacity)? + } + PhysicalType::Float16 => { + ArrayBuffer::with_primary_capacity::(&manager, capacity)? + } + PhysicalType::Float32 => { + ArrayBuffer::with_primary_capacity::(&manager, capacity)? + } + PhysicalType::Float64 => { + ArrayBuffer::with_primary_capacity::(&manager, capacity)? + } + PhysicalType::Interval => { + ArrayBuffer::with_primary_capacity::(&manager, capacity)? + } + PhysicalType::Utf8 => { + let mut buffer = + ArrayBuffer::with_primary_capacity::(&manager, capacity)?; + buffer.put_secondary_buffer(SecondaryBuffer::StringViewHeap(StringViewHeap::new())); + buffer + } + _ => unimplemented!(), + }; + + let validity = Validity::new_all_valid(capacity); + + Ok(Array { + datatype, + validity, + data: ArrayData::owned(buffer), + }) + } +} + +impl Array +where + B: BufferManager, +{ + pub fn capacity(&self) -> usize { + self.data.capacity() + } + + /// If this array is a dictionary array. + pub fn is_dictionary(&self) -> bool { + self.data.physical_type() == PhysicalType::Dictionary + } + + /// Return a flat array view for this array. + pub fn flat_view(&self) -> Result> { + FlatArrayView::from_array(self) + } +} diff --git a/crates/rayexec_execution/src/arrays/array/flat.rs b/crates/rayexec_execution/src/arrays/array/flat.rs new file mode 100644 index 000000000..f5756de0a --- /dev/null +++ b/crates/rayexec_execution/src/arrays/array/flat.rs @@ -0,0 +1,52 @@ +use rayexec_error::{RayexecError, Result}; + +use super::exp::Array; +use super::selection::Selection; +use super::validity::Validity; +use crate::arrays::buffer::buffer_manager::{BufferManager, NopBufferManager}; +use crate::arrays::buffer::physical_type::PhysicalDictionary; +use crate::arrays::buffer::{ArrayBuffer, SecondaryBuffer}; + +/// A view on top of normal arrays flattening some parts of the nested +/// structure. +#[derive(Debug)] +pub struct FlatArrayView<'a, B: BufferManager = NopBufferManager> { + pub(crate) validity: &'a Validity, + pub(crate) array_buffer: &'a ArrayBuffer, + pub(crate) selection: Selection<'a>, +} + +impl<'a, B> FlatArrayView<'a, B> +where + B: BufferManager, +{ + pub fn from_array(array: &'a Array) -> Result { + if array.is_dictionary() { + let selection = array.data.try_as_slice::()?; + let dict_buffer = match array.data.get_secondary() { + SecondaryBuffer::Dictionary(dict) => dict, + _ => { + return Err(RayexecError::new( + "Secondary buffer not a dictionary buffer", + )) + } + }; + + Ok(FlatArrayView { + validity: &dict_buffer.validity, + array_buffer: &dict_buffer.buffer, + selection: Selection::selection(selection), + }) + } else { + Ok(FlatArrayView { + validity: &array.validity, + array_buffer: &array.data, + selection: Selection::linear(array.capacity()), + }) + } + } + + pub fn logical_len(&self) -> usize { + self.selection.len() + } +} diff --git a/crates/rayexec_execution/src/arrays/array/mod.rs b/crates/rayexec_execution/src/arrays/array/mod.rs index f9ccb6d60..9c55b47d3 100644 --- a/crates/rayexec_execution/src/arrays/array/mod.rs +++ b/crates/rayexec_execution/src/arrays/array/mod.rs @@ -1,4 +1,7 @@ pub mod array_data; +pub mod exp; +pub mod flat; +pub mod selection; pub mod validity; mod shared_or_owned; @@ -31,7 +34,7 @@ use crate::arrays::executor::physical_type::{ PhysicalI64, PhysicalI8, PhysicalInterval, - PhysicalType, + PhysicalType2, PhysicalU128, PhysicalU16, PhysicalU32, @@ -102,7 +105,7 @@ impl Array2 { pub fn new_typed_null_array(datatype: DataType, len: usize) -> Result { // Create physical array data of length 1, and use a selection vector to // extend it out to the desired size. - let data = datatype.physical_type()?.zeroed_array_data(1); + let data = datatype.physical_type2()?.zeroed_array_data(1); let validity = Bitmap::new_with_all_false(1); let selection = SelectionVector::repeated(len, 0); @@ -254,11 +257,11 @@ impl Array2 { } /// Gets the physical type of the array. - pub fn physical_type(&self) -> PhysicalType { + pub fn physical_type(&self) -> PhysicalType2 { match self.data.physical_type() { - PhysicalType::Binary => match self.datatype { - DataType::Utf8 => PhysicalType::Utf8, - _ => PhysicalType::Binary, + PhysicalType2::Binary => match self.datatype { + DataType::Utf8 => PhysicalType2::Utf8, + _ => PhysicalType2::Binary, }, other => other, } @@ -916,26 +919,26 @@ pub enum ArrayData2 { } impl ArrayData2 { - pub fn physical_type(&self) -> PhysicalType { + pub fn physical_type(&self) -> PhysicalType2 { match self { - Self::UntypedNull(_) => PhysicalType::UntypedNull, - Self::Boolean(_) => PhysicalType::Boolean, - Self::Float16(_) => PhysicalType::Float16, - Self::Float32(_) => PhysicalType::Float32, - Self::Float64(_) => PhysicalType::Float64, - Self::Int8(_) => PhysicalType::Int8, - Self::Int16(_) => PhysicalType::Int16, - Self::Int32(_) => PhysicalType::Int32, - Self::Int64(_) => PhysicalType::Int64, - Self::Int128(_) => PhysicalType::Int128, - Self::UInt8(_) => PhysicalType::UInt8, - Self::UInt16(_) => PhysicalType::UInt16, - Self::UInt32(_) => PhysicalType::UInt32, - Self::UInt64(_) => PhysicalType::UInt64, - Self::UInt128(_) => PhysicalType::UInt128, - Self::Interval(_) => PhysicalType::Interval, - Self::Binary(_) => PhysicalType::Binary, - Self::List(_) => PhysicalType::List, + Self::UntypedNull(_) => PhysicalType2::UntypedNull, + Self::Boolean(_) => PhysicalType2::Boolean, + Self::Float16(_) => PhysicalType2::Float16, + Self::Float32(_) => PhysicalType2::Float32, + Self::Float64(_) => PhysicalType2::Float64, + Self::Int8(_) => PhysicalType2::Int8, + Self::Int16(_) => PhysicalType2::Int16, + Self::Int32(_) => PhysicalType2::Int32, + Self::Int64(_) => PhysicalType2::Int64, + Self::Int128(_) => PhysicalType2::Int128, + Self::UInt8(_) => PhysicalType2::UInt8, + Self::UInt16(_) => PhysicalType2::UInt16, + Self::UInt32(_) => PhysicalType2::UInt32, + Self::UInt64(_) => PhysicalType2::UInt64, + Self::UInt128(_) => PhysicalType2::UInt128, + Self::Interval(_) => PhysicalType2::Interval, + Self::Binary(_) => PhysicalType2::Binary, + Self::List(_) => PhysicalType2::List, } } diff --git a/crates/rayexec_execution/src/arrays/array/selection.rs b/crates/rayexec_execution/src/arrays/array/selection.rs new file mode 100644 index 000000000..2eade31da --- /dev/null +++ b/crates/rayexec_execution/src/arrays/array/selection.rs @@ -0,0 +1,110 @@ +#[derive(Debug, Clone, Copy)] +pub enum Selection<'a> { + /// Constant selection. + /// + /// All indices point to the same location. + Constant { len: usize, loc: usize }, + /// Represents a linear selection. + /// + /// '0..len' + Linear { len: usize }, + /// Represents the true location to use for some index. + Selection(&'a [usize]), +} + +impl<'a> Selection<'a> { + pub fn constant(len: usize, loc: usize) -> Self { + Self::Constant { len, loc } + } + + pub fn linear(len: usize) -> Self { + Self::Linear { len } + } + + pub fn selection(sel: &'a [usize]) -> Self { + Self::Selection(sel) + } + + pub fn is_linear(&self) -> bool { + matches!(self, Selection::Linear { .. }) + } + + pub fn iter(&self) -> FlatSelectionIter { + FlatSelectionIter { idx: 0, sel: *self } + } + + pub fn len(&self) -> usize { + match self { + Self::Constant { len, .. } => *len, + Self::Linear { len } => *len, + Self::Selection(sel) => sel.len(), + } + } + + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + #[inline] + pub fn get(&self, idx: usize) -> Option { + match self { + Self::Constant { len, loc } => { + if idx >= *len { + None + } else { + Some(*loc) + } + } + Self::Linear { len } => { + if idx >= *len { + None + } else { + Some(idx) + } + } + Self::Selection(sel) => sel.get(idx).copied(), + } + } +} + +impl<'a> IntoIterator for Selection<'a> { + type Item = usize; + type IntoIter = FlatSelectionIter<'a>; + + fn into_iter(self) -> Self::IntoIter { + FlatSelectionIter { idx: 0, sel: self } + } +} + +#[derive(Debug)] +pub struct FlatSelectionIter<'a> { + idx: usize, + sel: Selection<'a>, +} + +impl<'a> Iterator for FlatSelectionIter<'a> { + type Item = usize; + + fn next(&mut self) -> Option { + if self.idx >= self.sel.len() { + return None; + } + + let v = match self.sel { + Selection::Constant { loc, .. } => loc, + Selection::Linear { .. } => self.idx, + Selection::Selection(sel) => sel[self.idx], + }; + + self.idx += 1; + + Some(v) + } + + fn size_hint(&self) -> (usize, Option) { + let rem = self.sel.len() - self.idx; + (rem, Some(rem)) + } +} + +impl<'a> ExactSizeIterator for FlatSelectionIter<'a> {} diff --git a/crates/rayexec_execution/src/arrays/buffer/mod.rs b/crates/rayexec_execution/src/arrays/buffer/mod.rs index ce6915112..f3e73bea7 100644 --- a/crates/rayexec_execution/src/arrays/buffer/mod.rs +++ b/crates/rayexec_execution/src/arrays/buffer/mod.rs @@ -15,6 +15,9 @@ use string_view::{ StringViewMetadataUnion, }; +use super::array::array_data::ArrayData; +use super::array::validity::Validity; + #[derive(Debug)] pub struct ArrayBuffer { /// Physical type of the buffer. @@ -53,6 +56,14 @@ where self.secondary = Box::new(secondary) } + pub fn capacity(&self) -> usize { + self.primary.len + } + + pub fn physical_type(&self) -> PhysicalType { + self.physical_type + } + pub fn try_as_slice(&self) -> Result<&[S::PrimaryBufferType]> { self.check_type(S::PHYSICAL_TYPE)?; let slice = unsafe { self.primary.as_slice::() }; @@ -110,13 +121,6 @@ where } } -#[derive(Debug)] -pub enum SecondaryBuffer { - StringViewHeap(StringViewHeap), - Temp(B), - None, -} - impl Drop for ArrayBuffer { fn drop(&mut self) { let ptr = self.primary.ptr; @@ -130,3 +134,26 @@ impl Drop for ArrayBuffer { // self.primary.reservation.free() } } + +#[derive(Debug)] +pub enum SecondaryBuffer { + StringViewHeap(StringViewHeap), + Dictionary(DictionaryBuffer), + None, +} + +#[derive(Debug)] +pub struct DictionaryBuffer { + pub(crate) validity: Validity, + pub(crate) buffer: ArrayData, +} + +impl DictionaryBuffer +where + B: BufferManager, +{ + pub fn new(buffer: ArrayData, validity: Validity) -> Self { + debug_assert_eq!(buffer.capacity(), validity.len()); + DictionaryBuffer { buffer, validity } + } +} diff --git a/crates/rayexec_execution/src/arrays/buffer/physical_type.rs b/crates/rayexec_execution/src/arrays/buffer/physical_type.rs index ec1ce408f..c5b87884b 100644 --- a/crates/rayexec_execution/src/arrays/buffer/physical_type.rs +++ b/crates/rayexec_execution/src/arrays/buffer/physical_type.rs @@ -260,3 +260,20 @@ impl MutablePhysicalStorage for PhysicalUtf8 { buffer.try_as_string_view_addressable_mut() } } + +/// Dictionary arrays have the selection vector as the primary data buffer. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct PhysicalDictionary; + +impl PhysicalStorage for PhysicalDictionary { + const PHYSICAL_TYPE: PhysicalType = PhysicalType::Dictionary; + + type PrimaryBufferType = usize; // The index into the dictionary. + type StorageType = Self::PrimaryBufferType; + + type Addressable<'a> = &'a [usize]; + + fn get_addressable(buffer: &ArrayBuffer) -> Result> { + buffer.try_as_slice::() + } +} diff --git a/crates/rayexec_execution/src/arrays/compute/cast/array.rs b/crates/rayexec_execution/src/arrays/compute/cast/array.rs index ec777366f..a96084493 100644 --- a/crates/rayexec_execution/src/arrays/compute/cast/array.rs +++ b/crates/rayexec_execution/src/arrays/compute/cast/array.rs @@ -88,7 +88,7 @@ pub fn cast_array(arr: &Array2, to: DataType, behavior: CastFailBehavior) -> Res let arr = match arr.datatype() { DataType::Null => { // Can cast NULL to anything else. - let data = to.physical_type()?.zeroed_array_data(arr.logical_len()); + let data = to.physical_type2()?.zeroed_array_data(arr.logical_len()); let validity = Bitmap::new_with_all_false(arr.logical_len()); Array2::new_with_validity_and_array_data(to, validity, data) } diff --git a/crates/rayexec_execution/src/arrays/datatype.rs b/crates/rayexec_execution/src/arrays/datatype.rs index b6ba28fc3..a05cd0843 100644 --- a/crates/rayexec_execution/src/arrays/datatype.rs +++ b/crates/rayexec_execution/src/arrays/datatype.rs @@ -4,7 +4,8 @@ use rayexec_error::{not_implemented, OptionExt, RayexecError, Result, ResultExt} use rayexec_proto::ProtoConv; use serde::{Deserialize, Serialize}; -use crate::arrays::executor::physical_type::PhysicalType; +use super::buffer::physical_type::PhysicalType; +use crate::arrays::executor::physical_type::PhysicalType2; use crate::arrays::field::Field; use crate::arrays::scalar::decimal::{Decimal128Type, Decimal64Type, DecimalType}; @@ -445,8 +446,8 @@ impl DataType { } } - pub fn physical_type(&self) -> Result { - Ok(match self { + pub fn physical_type(&self) -> PhysicalType { + match self { DataType::Null => PhysicalType::UntypedNull, DataType::Boolean => PhysicalType::Boolean, DataType::Int8 => PhysicalType::Int8, @@ -470,8 +471,38 @@ impl DataType { DataType::Interval => PhysicalType::Interval, DataType::Utf8 => PhysicalType::Utf8, DataType::Binary => PhysicalType::Binary, - DataType::Struct(_) => not_implemented!("struct data type to physical type"), + DataType::Struct(_) => PhysicalType::Struct, DataType::List(_) => PhysicalType::List, + } + } + + pub fn physical_type2(&self) -> Result { + Ok(match self { + DataType::Null => PhysicalType2::UntypedNull, + DataType::Boolean => PhysicalType2::Boolean, + DataType::Int8 => PhysicalType2::Int8, + DataType::Int16 => PhysicalType2::Int16, + DataType::Int32 => PhysicalType2::Int32, + DataType::Int64 => PhysicalType2::Int64, + DataType::Int128 => PhysicalType2::Int128, + DataType::UInt8 => PhysicalType2::UInt8, + DataType::UInt16 => PhysicalType2::UInt16, + DataType::UInt32 => PhysicalType2::UInt32, + DataType::UInt64 => PhysicalType2::UInt64, + DataType::UInt128 => PhysicalType2::UInt128, + DataType::Float16 => PhysicalType2::Float16, + DataType::Float32 => PhysicalType2::Float32, + DataType::Float64 => PhysicalType2::Float64, + DataType::Decimal64(_) => PhysicalType2::Int64, + DataType::Decimal128(_) => PhysicalType2::Int128, + DataType::Timestamp(_) => PhysicalType2::Int64, + DataType::Date32 => PhysicalType2::Int32, + DataType::Date64 => PhysicalType2::Int64, + DataType::Interval => PhysicalType2::Interval, + DataType::Utf8 => PhysicalType2::Utf8, + DataType::Binary => PhysicalType2::Binary, + DataType::Struct(_) => not_implemented!("struct data type to physical type"), + DataType::List(_) => PhysicalType2::List, }) } diff --git a/crates/rayexec_execution/src/arrays/executor/physical_type.rs b/crates/rayexec_execution/src/arrays/executor/physical_type.rs index 159c9b01c..a286499c5 100644 --- a/crates/rayexec_execution/src/arrays/executor/physical_type.rs +++ b/crates/rayexec_execution/src/arrays/executor/physical_type.rs @@ -20,7 +20,7 @@ use crate::arrays::storage::{ }; #[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum PhysicalType { +pub enum PhysicalType2 { UntypedNull, Boolean, Int8, @@ -42,7 +42,7 @@ pub enum PhysicalType { List, } -impl PhysicalType { +impl PhysicalType2 { pub fn zeroed_array_data(&self, len: usize) -> ArrayData2 { match self { Self::UntypedNull => UntypedNullStorage(len).into(), @@ -72,7 +72,7 @@ impl PhysicalType { } } -impl ProtoConv for PhysicalType { +impl ProtoConv for PhysicalType2 { type ProtoType = rayexec_proto::generated::physical_type::PhysicalType; fn to_proto(&self) -> Result { diff --git a/crates/rayexec_execution/src/arrays/executor/scalar/fill.rs b/crates/rayexec_execution/src/arrays/executor/scalar/fill.rs index 175a0a72b..5fd18f9c2 100644 --- a/crates/rayexec_execution/src/arrays/executor/scalar/fill.rs +++ b/crates/rayexec_execution/src/arrays/executor/scalar/fill.rs @@ -26,7 +26,7 @@ use crate::arrays::executor::physical_type::{ PhysicalInterval, PhysicalList, PhysicalStorage, - PhysicalType, + PhysicalType2, PhysicalU128, PhysicalU16, PhysicalU32, @@ -152,133 +152,133 @@ pub(crate) fn concat_with_exact_total_len(arrays: &[&Array2], total_len: usize) None => return Err(RayexecError::new("Cannot concat zero arrays")), }; - match datatype.physical_type()? { - PhysicalType::UntypedNull => Ok(Array2 { + match datatype.physical_type2()? { + PhysicalType2::UntypedNull => Ok(Array2 { datatype: datatype.clone(), selection: None, validity: None, data: UntypedNullStorage(total_len).into(), }), - PhysicalType::Boolean => { + PhysicalType2::Boolean => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: BooleanBuffer::with_len(total_len), }); concat_with_fill_state::(arrays, state) } - PhysicalType::Int8 => { + PhysicalType2::Int8 => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: PrimitiveBuffer::with_len(total_len), }); concat_with_fill_state::(arrays, state) } - PhysicalType::Int16 => { + PhysicalType2::Int16 => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: PrimitiveBuffer::with_len(total_len), }); concat_with_fill_state::(arrays, state) } - PhysicalType::Int32 => { + PhysicalType2::Int32 => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: PrimitiveBuffer::with_len(total_len), }); concat_with_fill_state::(arrays, state) } - PhysicalType::Int64 => { + PhysicalType2::Int64 => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: PrimitiveBuffer::with_len(total_len), }); concat_with_fill_state::(arrays, state) } - PhysicalType::Int128 => { + PhysicalType2::Int128 => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: PrimitiveBuffer::with_len(total_len), }); concat_with_fill_state::(arrays, state) } - PhysicalType::UInt8 => { + PhysicalType2::UInt8 => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: PrimitiveBuffer::with_len(total_len), }); concat_with_fill_state::(arrays, state) } - PhysicalType::UInt16 => { + PhysicalType2::UInt16 => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: PrimitiveBuffer::with_len(total_len), }); concat_with_fill_state::(arrays, state) } - PhysicalType::UInt32 => { + PhysicalType2::UInt32 => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: PrimitiveBuffer::with_len(total_len), }); concat_with_fill_state::(arrays, state) } - PhysicalType::UInt64 => { + PhysicalType2::UInt64 => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: PrimitiveBuffer::with_len(total_len), }); concat_with_fill_state::(arrays, state) } - PhysicalType::UInt128 => { + PhysicalType2::UInt128 => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: PrimitiveBuffer::with_len(total_len), }); concat_with_fill_state::(arrays, state) } - PhysicalType::Float16 => { + PhysicalType2::Float16 => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: PrimitiveBuffer::with_len(total_len), }); concat_with_fill_state::(arrays, state) } - PhysicalType::Float32 => { + PhysicalType2::Float32 => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: PrimitiveBuffer::with_len(total_len), }); concat_with_fill_state::(arrays, state) } - PhysicalType::Float64 => { + PhysicalType2::Float64 => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: PrimitiveBuffer::with_len(total_len), }); concat_with_fill_state::(arrays, state) } - PhysicalType::Interval => { + PhysicalType2::Interval => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: PrimitiveBuffer::with_len(total_len), }); concat_with_fill_state::(arrays, state) } - PhysicalType::Utf8 => { + PhysicalType2::Utf8 => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: GermanVarlenBuffer::::with_len(total_len), }); concat_with_fill_state::(arrays, state) } - PhysicalType::Binary => { + PhysicalType2::Binary => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: GermanVarlenBuffer::<[u8]>::with_len(total_len), }); concat_with_fill_state::(arrays, state) } - PhysicalType::List => concat_lists(datatype.clone(), arrays, total_len), + PhysicalType2::List => concat_lists(datatype.clone(), arrays, total_len), } } @@ -376,133 +376,133 @@ pub fn interleave(arrays: &[&Array2], indices: &[(usize, usize)]) -> Result return Err(RayexecError::new("Cannot interleave zero arrays")), }; - match datatype.physical_type()? { - PhysicalType::UntypedNull => Ok(Array2 { + match datatype.physical_type2()? { + PhysicalType2::UntypedNull => Ok(Array2 { datatype: datatype.clone(), selection: None, validity: None, data: UntypedNullStorage(indices.len()).into(), }), - PhysicalType::Boolean => { + PhysicalType2::Boolean => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: BooleanBuffer::with_len(indices.len()), }); interleave_with_fill_state::(arrays, indices, state) } - PhysicalType::Int8 => { + PhysicalType2::Int8 => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: PrimitiveBuffer::with_len(indices.len()), }); interleave_with_fill_state::(arrays, indices, state) } - PhysicalType::Int16 => { + PhysicalType2::Int16 => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: PrimitiveBuffer::with_len(indices.len()), }); interleave_with_fill_state::(arrays, indices, state) } - PhysicalType::Int32 => { + PhysicalType2::Int32 => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: PrimitiveBuffer::with_len(indices.len()), }); interleave_with_fill_state::(arrays, indices, state) } - PhysicalType::Int64 => { + PhysicalType2::Int64 => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: PrimitiveBuffer::with_len(indices.len()), }); interleave_with_fill_state::(arrays, indices, state) } - PhysicalType::Int128 => { + PhysicalType2::Int128 => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: PrimitiveBuffer::with_len(indices.len()), }); interleave_with_fill_state::(arrays, indices, state) } - PhysicalType::UInt8 => { + PhysicalType2::UInt8 => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: PrimitiveBuffer::with_len(indices.len()), }); interleave_with_fill_state::(arrays, indices, state) } - PhysicalType::UInt16 => { + PhysicalType2::UInt16 => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: PrimitiveBuffer::with_len(indices.len()), }); interleave_with_fill_state::(arrays, indices, state) } - PhysicalType::UInt32 => { + PhysicalType2::UInt32 => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: PrimitiveBuffer::with_len(indices.len()), }); interleave_with_fill_state::(arrays, indices, state) } - PhysicalType::UInt64 => { + PhysicalType2::UInt64 => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: PrimitiveBuffer::with_len(indices.len()), }); interleave_with_fill_state::(arrays, indices, state) } - PhysicalType::UInt128 => { + PhysicalType2::UInt128 => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: PrimitiveBuffer::with_len(indices.len()), }); interleave_with_fill_state::(arrays, indices, state) } - PhysicalType::Float16 => { + PhysicalType2::Float16 => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: PrimitiveBuffer::with_len(indices.len()), }); interleave_with_fill_state::(arrays, indices, state) } - PhysicalType::Float32 => { + PhysicalType2::Float32 => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: PrimitiveBuffer::with_len(indices.len()), }); interleave_with_fill_state::(arrays, indices, state) } - PhysicalType::Float64 => { + PhysicalType2::Float64 => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: PrimitiveBuffer::with_len(indices.len()), }); interleave_with_fill_state::(arrays, indices, state) } - PhysicalType::Interval => { + PhysicalType2::Interval => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: PrimitiveBuffer::with_len(indices.len()), }); interleave_with_fill_state::(arrays, indices, state) } - PhysicalType::Utf8 => { + PhysicalType2::Utf8 => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: GermanVarlenBuffer::::with_len(indices.len()), }); interleave_with_fill_state::(arrays, indices, state) } - PhysicalType::Binary => { + PhysicalType2::Binary => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: GermanVarlenBuffer::<[u8]>::with_len(indices.len()), }); interleave_with_fill_state::(arrays, indices, state) } - PhysicalType::List => { + PhysicalType2::List => { // TODO: Also doable Err(RayexecError::new( "interleaving list arrays not yet supported", diff --git a/crates/rayexec_execution/src/arrays/executor/scalar/hash.rs b/crates/rayexec_execution/src/arrays/executor/scalar/hash.rs index 2b2c46994..5686e133e 100644 --- a/crates/rayexec_execution/src/arrays/executor/scalar/hash.rs +++ b/crates/rayexec_execution/src/arrays/executor/scalar/hash.rs @@ -17,7 +17,7 @@ use crate::arrays::executor::physical_type::{ PhysicalInterval, PhysicalList, PhysicalStorage, - PhysicalType, + PhysicalType2, PhysicalU16, PhysicalU32, PhysicalU64, @@ -40,61 +40,61 @@ impl HashExecutor { /// in `hashes`. pub fn hash_combine(array: &Array2, hashes: &mut [u64]) -> Result<()> { match array.physical_type() { - PhysicalType::UntypedNull => { + PhysicalType2::UntypedNull => { Self::hash_one_inner::(array, hashes)? } - PhysicalType::Boolean => { + PhysicalType2::Boolean => { Self::hash_one_inner::(array, hashes)? } - PhysicalType::Int8 => { + PhysicalType2::Int8 => { Self::hash_one_inner::(array, hashes)? } - PhysicalType::Int16 => { + PhysicalType2::Int16 => { Self::hash_one_inner::(array, hashes)? } - PhysicalType::Int32 => { + PhysicalType2::Int32 => { Self::hash_one_inner::(array, hashes)? } - PhysicalType::Int64 => { + PhysicalType2::Int64 => { Self::hash_one_inner::(array, hashes)? } - PhysicalType::Int128 => { + PhysicalType2::Int128 => { Self::hash_one_inner::(array, hashes)? } - PhysicalType::UInt8 => { + PhysicalType2::UInt8 => { Self::hash_one_inner::(array, hashes)? } - PhysicalType::UInt16 => { + PhysicalType2::UInt16 => { Self::hash_one_inner::(array, hashes)? } - PhysicalType::UInt32 => { + PhysicalType2::UInt32 => { Self::hash_one_inner::(array, hashes)? } - PhysicalType::UInt64 => { + PhysicalType2::UInt64 => { Self::hash_one_inner::(array, hashes)? } - PhysicalType::UInt128 => { + PhysicalType2::UInt128 => { Self::hash_one_inner::(array, hashes)? } - PhysicalType::Float16 => { + PhysicalType2::Float16 => { Self::hash_one_inner::(array, hashes)? } - PhysicalType::Float32 => { + PhysicalType2::Float32 => { Self::hash_one_inner::(array, hashes)? } - PhysicalType::Float64 => { + PhysicalType2::Float64 => { Self::hash_one_inner::(array, hashes)? } - PhysicalType::Binary => { + PhysicalType2::Binary => { Self::hash_one_inner::(array, hashes)? } - PhysicalType::Utf8 => { + PhysicalType2::Utf8 => { Self::hash_one_inner::(array, hashes)? } - PhysicalType::Interval => { + PhysicalType2::Interval => { Self::hash_one_inner::(array, hashes)? } - PhysicalType::List => Self::hash_list::(array, hashes)?, + PhysicalType2::List => Self::hash_list::(array, hashes)?, } Ok(()) @@ -104,61 +104,61 @@ impl HashExecutor { /// existing values. pub fn hash_no_combine(array: &Array2, hashes: &mut [u64]) -> Result<()> { match array.physical_type() { - PhysicalType::UntypedNull => { + PhysicalType2::UntypedNull => { Self::hash_one_inner::(array, hashes)? } - PhysicalType::Boolean => { + PhysicalType2::Boolean => { Self::hash_one_inner::(array, hashes)? } - PhysicalType::Int8 => { + PhysicalType2::Int8 => { Self::hash_one_inner::(array, hashes)? } - PhysicalType::Int16 => { + PhysicalType2::Int16 => { Self::hash_one_inner::(array, hashes)? } - PhysicalType::Int32 => { + PhysicalType2::Int32 => { Self::hash_one_inner::(array, hashes)? } - PhysicalType::Int64 => { + PhysicalType2::Int64 => { Self::hash_one_inner::(array, hashes)? } - PhysicalType::Int128 => { + PhysicalType2::Int128 => { Self::hash_one_inner::(array, hashes)? } - PhysicalType::UInt8 => { + PhysicalType2::UInt8 => { Self::hash_one_inner::(array, hashes)? } - PhysicalType::UInt16 => { + PhysicalType2::UInt16 => { Self::hash_one_inner::(array, hashes)? } - PhysicalType::UInt32 => { + PhysicalType2::UInt32 => { Self::hash_one_inner::(array, hashes)? } - PhysicalType::UInt64 => { + PhysicalType2::UInt64 => { Self::hash_one_inner::(array, hashes)? } - PhysicalType::UInt128 => { + PhysicalType2::UInt128 => { Self::hash_one_inner::(array, hashes)? } - PhysicalType::Float16 => { + PhysicalType2::Float16 => { Self::hash_one_inner::(array, hashes)? } - PhysicalType::Float32 => { + PhysicalType2::Float32 => { Self::hash_one_inner::(array, hashes)? } - PhysicalType::Float64 => { + PhysicalType2::Float64 => { Self::hash_one_inner::(array, hashes)? } - PhysicalType::Binary => { + PhysicalType2::Binary => { Self::hash_one_inner::(array, hashes)? } - PhysicalType::Utf8 => { + PhysicalType2::Utf8 => { Self::hash_one_inner::(array, hashes)? } - PhysicalType::Interval => { + PhysicalType2::Interval => { Self::hash_one_inner::(array, hashes)? } - PhysicalType::List => Self::hash_list::(array, hashes)?, + PhysicalType2::List => Self::hash_list::(array, hashes)?, } Ok(()) diff --git a/crates/rayexec_execution/src/execution/operators/hash_aggregate/chunk.rs b/crates/rayexec_execution/src/execution/operators/hash_aggregate/chunk.rs index 6047cb241..c2ab28fe4 100644 --- a/crates/rayexec_execution/src/execution/operators/hash_aggregate/chunk.rs +++ b/crates/rayexec_execution/src/execution/operators/hash_aggregate/chunk.rs @@ -3,7 +3,7 @@ use rayexec_error::Result; use super::hash_table::GroupAddress; use super::AggregateStates; use crate::arrays::array::Array2; -use crate::arrays::executor::physical_type::PhysicalType; +use crate::arrays::executor::physical_type::PhysicalType2; use crate::arrays::executor::scalar::concat; use crate::execution::operators::util::resizer::DEFAULT_TARGET_BATCH_SIZE; use crate::functions::aggregate::ChunkGroupAddressIter; @@ -27,7 +27,7 @@ impl GroupChunk { pub fn can_append( &self, new_groups: usize, - group_vals: impl ExactSizeIterator, + group_vals: impl ExactSizeIterator, ) -> bool { if self.num_groups + new_groups > DEFAULT_TARGET_BATCH_SIZE { return false; diff --git a/crates/rayexec_execution/src/execution/operators/hash_aggregate/compare.rs b/crates/rayexec_execution/src/execution/operators/hash_aggregate/compare.rs index 3718db416..51a005fce 100644 --- a/crates/rayexec_execution/src/execution/operators/hash_aggregate/compare.rs +++ b/crates/rayexec_execution/src/execution/operators/hash_aggregate/compare.rs @@ -18,7 +18,7 @@ use crate::arrays::executor::physical_type::{ PhysicalI8, PhysicalInterval, PhysicalStorage, - PhysicalType, + PhysicalType2, PhysicalU128, PhysicalU16, PhysicalU32, @@ -96,69 +96,69 @@ where } match array1.physical_type() { - PhysicalType::UntypedNull => compare_rows_eq::( + PhysicalType2::UntypedNull => compare_rows_eq::( array1, array2, rows1, rows2, not_eq_rows, )?, - PhysicalType::Boolean => { + PhysicalType2::Boolean => { compare_rows_eq::(array1, array2, rows1, rows2, not_eq_rows)? } - PhysicalType::Int8 => { + PhysicalType2::Int8 => { compare_rows_eq::(array1, array2, rows1, rows2, not_eq_rows)? } - PhysicalType::Int16 => { + PhysicalType2::Int16 => { compare_rows_eq::(array1, array2, rows1, rows2, not_eq_rows)? } - PhysicalType::Int32 => { + PhysicalType2::Int32 => { compare_rows_eq::(array1, array2, rows1, rows2, not_eq_rows)? } - PhysicalType::Int64 => { + PhysicalType2::Int64 => { compare_rows_eq::(array1, array2, rows1, rows2, not_eq_rows)? } - PhysicalType::Int128 => { + PhysicalType2::Int128 => { compare_rows_eq::(array1, array2, rows1, rows2, not_eq_rows)? } - PhysicalType::UInt8 => { + PhysicalType2::UInt8 => { compare_rows_eq::(array1, array2, rows1, rows2, not_eq_rows)? } - PhysicalType::UInt16 => { + PhysicalType2::UInt16 => { compare_rows_eq::(array1, array2, rows1, rows2, not_eq_rows)? } - PhysicalType::UInt32 => { + PhysicalType2::UInt32 => { compare_rows_eq::(array1, array2, rows1, rows2, not_eq_rows)? } - PhysicalType::UInt64 => { + PhysicalType2::UInt64 => { compare_rows_eq::(array1, array2, rows1, rows2, not_eq_rows)? } - PhysicalType::UInt128 => { + PhysicalType2::UInt128 => { compare_rows_eq::(array1, array2, rows1, rows2, not_eq_rows)? } - PhysicalType::Float16 => { + PhysicalType2::Float16 => { compare_rows_eq::(array1, array2, rows1, rows2, not_eq_rows)? } - PhysicalType::Float32 => { + PhysicalType2::Float32 => { compare_rows_eq::(array1, array2, rows1, rows2, not_eq_rows)? } - PhysicalType::Float64 => { + PhysicalType2::Float64 => { compare_rows_eq::(array1, array2, rows1, rows2, not_eq_rows)? } - PhysicalType::Interval => compare_rows_eq::( + PhysicalType2::Interval => compare_rows_eq::( array1, array2, rows1, rows2, not_eq_rows, )?, - PhysicalType::Binary => { + PhysicalType2::Binary => { compare_rows_eq::(array1, array2, rows1, rows2, not_eq_rows)? } - PhysicalType::Utf8 => { + PhysicalType2::Utf8 => { compare_rows_eq::(array1, array2, rows1, rows2, not_eq_rows)? } - PhysicalType::List => { + PhysicalType2::List => { not_implemented!("Row compare list") } } diff --git a/crates/rayexec_execution/src/execution/operators/unnest.rs b/crates/rayexec_execution/src/execution/operators/unnest.rs index 805b38c3e..4c92f611e 100644 --- a/crates/rayexec_execution/src/execution/operators/unnest.rs +++ b/crates/rayexec_execution/src/execution/operators/unnest.rs @@ -38,7 +38,7 @@ use crate::arrays::executor::physical_type::{ PhysicalI8, PhysicalList, PhysicalStorage, - PhysicalType, + PhysicalType2, PhysicalU128, PhysicalU16, PhysicalU32, @@ -206,7 +206,7 @@ impl ExecutableOperator for PhysicalUnnest { // We have input ready, get the longest list for the current row. let mut longest = 0; for input_idx in 0..state.unnest_inputs.len() { - if state.unnest_inputs[input_idx].physical_type() == PhysicalType::UntypedNull { + if state.unnest_inputs[input_idx].physical_type() == PhysicalType2::UntypedNull { // Just let other unnest expressions determine the number of // rows. continue; @@ -244,7 +244,7 @@ impl ExecutableOperator for PhysicalUnnest { let arr = &state.unnest_inputs[input_idx]; match arr.physical_type() { - PhysicalType::List => { + PhysicalType2::List => { let child = match arr.array_data() { ArrayData2::List(list) => list.inner_array(), _other => return Err(RayexecError::new("Unexpected storage type")), @@ -267,7 +267,7 @@ impl ExecutableOperator for PhysicalUnnest { } } } - PhysicalType::UntypedNull => { + PhysicalType2::UntypedNull => { // Just produce null array according to longest length. let out = Array2::new_untyped_null_array(longest as usize); outputs.push(out); @@ -308,113 +308,113 @@ pub(crate) fn unnest(child: &Array2, longest_len: usize, meta: ListItemMetadata) let datatype = child.datatype().clone(); match child.physical_type() { - PhysicalType::UntypedNull => Ok(Array2::new_untyped_null_array(longest_len)), - PhysicalType::Boolean => { + PhysicalType2::UntypedNull => Ok(Array2::new_untyped_null_array(longest_len)), + PhysicalType2::Boolean => { let builder = ArrayBuilder { datatype, buffer: BooleanBuffer::with_len(longest_len), }; unnest_inner::(builder, child, meta) } - PhysicalType::Int8 => { + PhysicalType2::Int8 => { let builder = ArrayBuilder { datatype, buffer: PrimitiveBuffer::::with_len(longest_len), }; unnest_inner::(builder, child, meta) } - PhysicalType::Int16 => { + PhysicalType2::Int16 => { let builder = ArrayBuilder { datatype, buffer: PrimitiveBuffer::::with_len(longest_len), }; unnest_inner::(builder, child, meta) } - PhysicalType::Int32 => { + PhysicalType2::Int32 => { let builder = ArrayBuilder { datatype, buffer: PrimitiveBuffer::::with_len(longest_len), }; unnest_inner::(builder, child, meta) } - PhysicalType::Int64 => { + PhysicalType2::Int64 => { let builder = ArrayBuilder { datatype, buffer: PrimitiveBuffer::::with_len(longest_len), }; unnest_inner::(builder, child, meta) } - PhysicalType::Int128 => { + PhysicalType2::Int128 => { let builder = ArrayBuilder { datatype, buffer: PrimitiveBuffer::::with_len(longest_len), }; unnest_inner::(builder, child, meta) } - PhysicalType::UInt8 => { + PhysicalType2::UInt8 => { let builder = ArrayBuilder { datatype, buffer: PrimitiveBuffer::::with_len(longest_len), }; unnest_inner::(builder, child, meta) } - PhysicalType::UInt16 => { + PhysicalType2::UInt16 => { let builder = ArrayBuilder { datatype, buffer: PrimitiveBuffer::::with_len(longest_len), }; unnest_inner::(builder, child, meta) } - PhysicalType::UInt32 => { + PhysicalType2::UInt32 => { let builder = ArrayBuilder { datatype, buffer: PrimitiveBuffer::::with_len(longest_len), }; unnest_inner::(builder, child, meta) } - PhysicalType::UInt64 => { + PhysicalType2::UInt64 => { let builder = ArrayBuilder { datatype, buffer: PrimitiveBuffer::::with_len(longest_len), }; unnest_inner::(builder, child, meta) } - PhysicalType::UInt128 => { + PhysicalType2::UInt128 => { let builder = ArrayBuilder { datatype, buffer: PrimitiveBuffer::::with_len(longest_len), }; unnest_inner::(builder, child, meta) } - PhysicalType::Float16 => { + PhysicalType2::Float16 => { let builder = ArrayBuilder { datatype, buffer: PrimitiveBuffer::::with_len(longest_len), }; unnest_inner::(builder, child, meta) } - PhysicalType::Float32 => { + PhysicalType2::Float32 => { let builder = ArrayBuilder { datatype, buffer: PrimitiveBuffer::::with_len(longest_len), }; unnest_inner::(builder, child, meta) } - PhysicalType::Float64 => { + PhysicalType2::Float64 => { let builder = ArrayBuilder { datatype, buffer: PrimitiveBuffer::::with_len(longest_len), }; unnest_inner::(builder, child, meta) } - PhysicalType::Utf8 => { + PhysicalType2::Utf8 => { let builder = ArrayBuilder { datatype, buffer: GermanVarlenBuffer::::with_len(longest_len), }; unnest_inner::(builder, child, meta) } - PhysicalType::Binary => { + PhysicalType2::Binary => { let builder = ArrayBuilder { datatype, buffer: GermanVarlenBuffer::<[u8]>::with_len(longest_len), diff --git a/crates/rayexec_execution/src/functions/aggregate/builtin/first.rs b/crates/rayexec_execution/src/functions/aggregate/builtin/first.rs index b9075bec1..23fe60270 100644 --- a/crates/rayexec_execution/src/functions/aggregate/builtin/first.rs +++ b/crates/rayexec_execution/src/functions/aggregate/builtin/first.rs @@ -21,7 +21,7 @@ use crate::arrays::executor::physical_type::{ PhysicalI8, PhysicalInterval, PhysicalStorage, - PhysicalType, + PhysicalType2, PhysicalU128, PhysicalU16, PhysicalU32, @@ -81,58 +81,58 @@ impl AggregateFunction for First { let datatype = inputs[0].datatype(table_list)?; - let function_impl: Box = match datatype.physical_type()? { - PhysicalType::UntypedNull => Box::new(FirstUntypedNullImpl), - PhysicalType::Boolean => Box::new(FirstBoolImpl), - PhysicalType::Float16 => Box::new(FirstPrimitiveImpl::::new( + let function_impl: Box = match datatype.physical_type2()? { + PhysicalType2::UntypedNull => Box::new(FirstUntypedNullImpl), + PhysicalType2::Boolean => Box::new(FirstBoolImpl), + PhysicalType2::Float16 => Box::new(FirstPrimitiveImpl::::new( datatype.clone(), )), - PhysicalType::Float32 => Box::new(FirstPrimitiveImpl::::new( + PhysicalType2::Float32 => Box::new(FirstPrimitiveImpl::::new( datatype.clone(), )), - PhysicalType::Float64 => Box::new(FirstPrimitiveImpl::::new( + PhysicalType2::Float64 => Box::new(FirstPrimitiveImpl::::new( datatype.clone(), )), - PhysicalType::Int8 => { + PhysicalType2::Int8 => { Box::new(FirstPrimitiveImpl::::new(datatype.clone())) } - PhysicalType::Int16 => Box::new(FirstPrimitiveImpl::::new( + PhysicalType2::Int16 => Box::new(FirstPrimitiveImpl::::new( datatype.clone(), )), - PhysicalType::Int32 => Box::new(FirstPrimitiveImpl::::new( + PhysicalType2::Int32 => Box::new(FirstPrimitiveImpl::::new( datatype.clone(), )), - PhysicalType::Int64 => Box::new(FirstPrimitiveImpl::::new( + PhysicalType2::Int64 => Box::new(FirstPrimitiveImpl::::new( datatype.clone(), )), - PhysicalType::Int128 => Box::new(FirstPrimitiveImpl::::new( + PhysicalType2::Int128 => Box::new(FirstPrimitiveImpl::::new( datatype.clone(), )), - PhysicalType::UInt8 => { + PhysicalType2::UInt8 => { Box::new(FirstPrimitiveImpl::::new(datatype.clone())) } - PhysicalType::UInt16 => Box::new(FirstPrimitiveImpl::::new( + PhysicalType2::UInt16 => Box::new(FirstPrimitiveImpl::::new( datatype.clone(), )), - PhysicalType::UInt32 => Box::new(FirstPrimitiveImpl::::new( + PhysicalType2::UInt32 => Box::new(FirstPrimitiveImpl::::new( datatype.clone(), )), - PhysicalType::UInt64 => Box::new(FirstPrimitiveImpl::::new( + PhysicalType2::UInt64 => Box::new(FirstPrimitiveImpl::::new( datatype.clone(), )), - PhysicalType::UInt128 => Box::new(FirstPrimitiveImpl::::new( + PhysicalType2::UInt128 => Box::new(FirstPrimitiveImpl::::new( datatype.clone(), )), - PhysicalType::Interval => Box::new( + PhysicalType2::Interval => Box::new( FirstPrimitiveImpl::::new(datatype.clone()), ), - PhysicalType::Binary => Box::new(FirstBinaryImpl { + PhysicalType2::Binary => Box::new(FirstBinaryImpl { datatype: datatype.clone(), }), - PhysicalType::Utf8 => Box::new(FirstBinaryImpl { + PhysicalType2::Utf8 => Box::new(FirstBinaryImpl { datatype: datatype.clone(), }), - PhysicalType::List => { + PhysicalType2::List => { // TODO: Easy, clone underlying array and select. not_implemented!("FIRST for list arrays") } diff --git a/crates/rayexec_execution/src/functions/aggregate/builtin/minmax.rs b/crates/rayexec_execution/src/functions/aggregate/builtin/minmax.rs index e5bac18ad..bd4429f20 100644 --- a/crates/rayexec_execution/src/functions/aggregate/builtin/minmax.rs +++ b/crates/rayexec_execution/src/functions/aggregate/builtin/minmax.rs @@ -21,7 +21,7 @@ use crate::arrays::executor::physical_type::{ PhysicalI8, PhysicalInterval, PhysicalStorage, - PhysicalType, + PhysicalType2, PhysicalU128, PhysicalU16, PhysicalU32, @@ -81,54 +81,54 @@ impl AggregateFunction for Min { let datatype = inputs[0].datatype(table_list)?; - let function_impl: Box = match datatype.physical_type()? { - PhysicalType::UntypedNull => Box::new(MinMaxUntypedNull), - PhysicalType::Boolean => Box::new(MinBoolImpl::new()), - PhysicalType::Float16 => { + let function_impl: Box = match datatype.physical_type2()? { + PhysicalType2::UntypedNull => Box::new(MinMaxUntypedNull), + PhysicalType2::Boolean => Box::new(MinBoolImpl::new()), + PhysicalType2::Float16 => { Box::new(MinPrimitiveImpl::::new(datatype.clone())) } - PhysicalType::Float32 => { + PhysicalType2::Float32 => { Box::new(MinPrimitiveImpl::::new(datatype.clone())) } - PhysicalType::Float64 => { + PhysicalType2::Float64 => { Box::new(MinPrimitiveImpl::::new(datatype.clone())) } - PhysicalType::Int8 => { + PhysicalType2::Int8 => { Box::new(MinPrimitiveImpl::::new(datatype.clone())) } - PhysicalType::Int16 => { + PhysicalType2::Int16 => { Box::new(MinPrimitiveImpl::::new(datatype.clone())) } - PhysicalType::Int32 => { + PhysicalType2::Int32 => { Box::new(MinPrimitiveImpl::::new(datatype.clone())) } - PhysicalType::Int64 => { + PhysicalType2::Int64 => { Box::new(MinPrimitiveImpl::::new(datatype.clone())) } - PhysicalType::Int128 => Box::new(MinPrimitiveImpl::::new( + PhysicalType2::Int128 => Box::new(MinPrimitiveImpl::::new( datatype.clone(), )), - PhysicalType::UInt8 => { + PhysicalType2::UInt8 => { Box::new(MinPrimitiveImpl::::new(datatype.clone())) } - PhysicalType::UInt16 => { + PhysicalType2::UInt16 => { Box::new(MinPrimitiveImpl::::new(datatype.clone())) } - PhysicalType::UInt32 => { + PhysicalType2::UInt32 => { Box::new(MinPrimitiveImpl::::new(datatype.clone())) } - PhysicalType::UInt64 => { + PhysicalType2::UInt64 => { Box::new(MinPrimitiveImpl::::new(datatype.clone())) } - PhysicalType::UInt128 => Box::new(MinPrimitiveImpl::::new( + PhysicalType2::UInt128 => Box::new(MinPrimitiveImpl::::new( datatype.clone(), )), - PhysicalType::Interval => Box::new( + PhysicalType2::Interval => Box::new( MinPrimitiveImpl::::new(datatype.clone()), ), - PhysicalType::Binary => Box::new(MinBinaryImpl::new(datatype.clone())), - PhysicalType::Utf8 => Box::new(MinBinaryImpl::new(datatype.clone())), - PhysicalType::List => { + PhysicalType2::Binary => Box::new(MinBinaryImpl::new(datatype.clone())), + PhysicalType2::Utf8 => Box::new(MinBinaryImpl::new(datatype.clone())), + PhysicalType2::List => { not_implemented!("MIN for list arrays") } }; @@ -175,54 +175,54 @@ impl AggregateFunction for Max { let datatype = inputs[0].datatype(table_list)?; - let function_impl: Box = match datatype.physical_type()? { - PhysicalType::UntypedNull => Box::new(MinMaxUntypedNull), - PhysicalType::Boolean => Box::new(MaxBoolImpl::new()), - PhysicalType::Float16 => { + let function_impl: Box = match datatype.physical_type2()? { + PhysicalType2::UntypedNull => Box::new(MinMaxUntypedNull), + PhysicalType2::Boolean => Box::new(MaxBoolImpl::new()), + PhysicalType2::Float16 => { Box::new(MaxPrimitiveImpl::::new(datatype.clone())) } - PhysicalType::Float32 => { + PhysicalType2::Float32 => { Box::new(MaxPrimitiveImpl::::new(datatype.clone())) } - PhysicalType::Float64 => { + PhysicalType2::Float64 => { Box::new(MaxPrimitiveImpl::::new(datatype.clone())) } - PhysicalType::Int8 => { + PhysicalType2::Int8 => { Box::new(MaxPrimitiveImpl::::new(datatype.clone())) } - PhysicalType::Int16 => { + PhysicalType2::Int16 => { Box::new(MaxPrimitiveImpl::::new(datatype.clone())) } - PhysicalType::Int32 => { + PhysicalType2::Int32 => { Box::new(MaxPrimitiveImpl::::new(datatype.clone())) } - PhysicalType::Int64 => { + PhysicalType2::Int64 => { Box::new(MaxPrimitiveImpl::::new(datatype.clone())) } - PhysicalType::Int128 => Box::new(MaxPrimitiveImpl::::new( + PhysicalType2::Int128 => Box::new(MaxPrimitiveImpl::::new( datatype.clone(), )), - PhysicalType::UInt8 => { + PhysicalType2::UInt8 => { Box::new(MaxPrimitiveImpl::::new(datatype.clone())) } - PhysicalType::UInt16 => { + PhysicalType2::UInt16 => { Box::new(MaxPrimitiveImpl::::new(datatype.clone())) } - PhysicalType::UInt32 => { + PhysicalType2::UInt32 => { Box::new(MaxPrimitiveImpl::::new(datatype.clone())) } - PhysicalType::UInt64 => { + PhysicalType2::UInt64 => { Box::new(MaxPrimitiveImpl::::new(datatype.clone())) } - PhysicalType::UInt128 => Box::new(MaxPrimitiveImpl::::new( + PhysicalType2::UInt128 => Box::new(MaxPrimitiveImpl::::new( datatype.clone(), )), - PhysicalType::Interval => Box::new( + PhysicalType2::Interval => Box::new( MaxPrimitiveImpl::::new(datatype.clone()), ), - PhysicalType::Binary => Box::new(MaxBinaryImpl::new(datatype.clone())), - PhysicalType::Utf8 => Box::new(MaxBinaryImpl::new(datatype.clone())), - PhysicalType::List => { + PhysicalType2::Binary => Box::new(MaxBinaryImpl::new(datatype.clone())), + PhysicalType2::Utf8 => Box::new(MaxBinaryImpl::new(datatype.clone())), + PhysicalType2::List => { not_implemented!("MAX for list arrays") } }; diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/comparison.rs b/crates/rayexec_execution/src/functions/scalar/builtin/comparison.rs index 2e31dfba2..68db6f81a 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/comparison.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/comparison.rs @@ -22,7 +22,7 @@ use crate::arrays::executor::physical_type::{ PhysicalI8, PhysicalInterval, PhysicalStorage, - PhysicalType, + PhysicalType2, PhysicalU128, PhysicalU16, PhysicalU32, @@ -585,7 +585,7 @@ fn new_comparison_impl( } (DataType::List(m1), DataType::List(m2)) if m1 == m2 => { // TODO: We'll want to figure out casting for lists. - Box::new(ListComparisonImpl::::new(m1.datatype.physical_type()?)) + Box::new(ListComparisonImpl::::new(m1.datatype.physical_type2()?)) } (a, b) => return Err(invalid_input_types_error(func, &[a, b])), }, @@ -643,12 +643,12 @@ where #[derive(Debug, Clone)] struct ListComparisonImpl { - inner_physical_type: PhysicalType, + inner_physical_type: PhysicalType2, _op: PhantomData, } impl ListComparisonImpl { - fn new(inner_physical_type: PhysicalType) -> Self { + fn new(inner_physical_type: PhysicalType2) -> Self { ListComparisonImpl { _op: PhantomData, inner_physical_type, @@ -670,97 +670,97 @@ where }; let array = match self.inner_physical_type { - PhysicalType::UntypedNull => FlexibleListExecutor::binary_reduce::< + PhysicalType2::UntypedNull => FlexibleListExecutor::binary_reduce::< PhysicalUntypedNull, _, ListComparisonReducer<_, O>, >(left, right, builder)?, - PhysicalType::Boolean => { + PhysicalType2::Boolean => { FlexibleListExecutor::binary_reduce::>( left, right, builder, )? } - PhysicalType::Int8 => { + PhysicalType2::Int8 => { FlexibleListExecutor::binary_reduce::>( left, right, builder, )? } - PhysicalType::Int16 => { + PhysicalType2::Int16 => { FlexibleListExecutor::binary_reduce::>( left, right, builder, )? } - PhysicalType::Int32 => { + PhysicalType2::Int32 => { FlexibleListExecutor::binary_reduce::>( left, right, builder, )? } - PhysicalType::Int64 => { + PhysicalType2::Int64 => { FlexibleListExecutor::binary_reduce::>( left, right, builder, )? } - PhysicalType::Int128 => { + PhysicalType2::Int128 => { FlexibleListExecutor::binary_reduce::>( left, right, builder, )? } - PhysicalType::UInt8 => { + PhysicalType2::UInt8 => { FlexibleListExecutor::binary_reduce::>( left, right, builder, )? } - PhysicalType::UInt16 => { + PhysicalType2::UInt16 => { FlexibleListExecutor::binary_reduce::>( left, right, builder, )? } - PhysicalType::UInt32 => { + PhysicalType2::UInt32 => { FlexibleListExecutor::binary_reduce::>( left, right, builder, )? } - PhysicalType::UInt64 => { + PhysicalType2::UInt64 => { FlexibleListExecutor::binary_reduce::>( left, right, builder, )? } - PhysicalType::UInt128 => { + PhysicalType2::UInt128 => { FlexibleListExecutor::binary_reduce::>( left, right, builder, )? } - PhysicalType::Float16 => { + PhysicalType2::Float16 => { FlexibleListExecutor::binary_reduce::>( left, right, builder, )? } - PhysicalType::Float32 => { + PhysicalType2::Float32 => { FlexibleListExecutor::binary_reduce::>( left, right, builder, )? } - PhysicalType::Float64 => { + PhysicalType2::Float64 => { FlexibleListExecutor::binary_reduce::>( left, right, builder, )? } - PhysicalType::Interval => FlexibleListExecutor::binary_reduce::< + PhysicalType2::Interval => FlexibleListExecutor::binary_reduce::< PhysicalInterval, _, ListComparisonReducer<_, O>, >(left, right, builder)?, - PhysicalType::Binary => { + PhysicalType2::Binary => { FlexibleListExecutor::binary_reduce::>( left, right, builder, )? } - PhysicalType::Utf8 => { + PhysicalType2::Utf8 => { FlexibleListExecutor::binary_reduce::>( left, right, builder, )? } - PhysicalType::List => { + PhysicalType2::List => { return Err(RayexecError::new( "Comparison between nested lists not yet supported", )) diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/list/list_extract.rs b/crates/rayexec_execution/src/functions/scalar/builtin/list/list_extract.rs index 2ec8b8907..d0cfe2569 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/list/list_extract.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/list/list_extract.rs @@ -27,7 +27,7 @@ use crate::arrays::executor::physical_type::{ PhysicalI8, PhysicalList, PhysicalStorage, - PhysicalType, + PhysicalType2, PhysicalU128, PhysicalU16, PhysicalU32, @@ -133,113 +133,113 @@ fn extract(array: &Array2, idx: usize) -> Result { }; match data.inner_array().physical_type() { - PhysicalType::UntypedNull => not_implemented!("NULL list extract"), - PhysicalType::Boolean => { + PhysicalType2::UntypedNull => not_implemented!("NULL list extract"), + PhysicalType2::Boolean => { let builder = ArrayBuilder { datatype: DataType::Boolean, buffer: BooleanBuffer::with_len(array.logical_len()), }; extract_inner::(builder, array, data.inner_array(), idx) } - PhysicalType::Int8 => { + PhysicalType2::Int8 => { let builder = ArrayBuilder { datatype: DataType::Int8, buffer: PrimitiveBuffer::::with_len(array.logical_len()), }; extract_inner::(builder, array, data.inner_array(), idx) } - PhysicalType::Int16 => { + PhysicalType2::Int16 => { let builder = ArrayBuilder { datatype: DataType::Int16, buffer: PrimitiveBuffer::::with_len(array.logical_len()), }; extract_inner::(builder, array, data.inner_array(), idx) } - PhysicalType::Int32 => { + PhysicalType2::Int32 => { let builder = ArrayBuilder { datatype: DataType::Int32, buffer: PrimitiveBuffer::::with_len(array.logical_len()), }; extract_inner::(builder, array, data.inner_array(), idx) } - PhysicalType::Int64 => { + PhysicalType2::Int64 => { let builder = ArrayBuilder { datatype: DataType::Int64, buffer: PrimitiveBuffer::::with_len(array.logical_len()), }; extract_inner::(builder, array, data.inner_array(), idx) } - PhysicalType::Int128 => { + PhysicalType2::Int128 => { let builder = ArrayBuilder { datatype: DataType::Int128, buffer: PrimitiveBuffer::::with_len(array.logical_len()), }; extract_inner::(builder, array, data.inner_array(), idx) } - PhysicalType::UInt8 => { + PhysicalType2::UInt8 => { let builder = ArrayBuilder { datatype: DataType::UInt8, buffer: PrimitiveBuffer::::with_len(array.logical_len()), }; extract_inner::(builder, array, data.inner_array(), idx) } - PhysicalType::UInt16 => { + PhysicalType2::UInt16 => { let builder = ArrayBuilder { datatype: DataType::UInt16, buffer: PrimitiveBuffer::::with_len(array.logical_len()), }; extract_inner::(builder, array, data.inner_array(), idx) } - PhysicalType::UInt32 => { + PhysicalType2::UInt32 => { let builder = ArrayBuilder { datatype: DataType::UInt32, buffer: PrimitiveBuffer::::with_len(array.logical_len()), }; extract_inner::(builder, array, data.inner_array(), idx) } - PhysicalType::UInt64 => { + PhysicalType2::UInt64 => { let builder = ArrayBuilder { datatype: DataType::UInt64, buffer: PrimitiveBuffer::::with_len(array.logical_len()), }; extract_inner::(builder, array, data.inner_array(), idx) } - PhysicalType::UInt128 => { + PhysicalType2::UInt128 => { let builder = ArrayBuilder { datatype: DataType::UInt128, buffer: PrimitiveBuffer::::with_len(array.logical_len()), }; extract_inner::(builder, array, data.inner_array(), idx) } - PhysicalType::Float16 => { + PhysicalType2::Float16 => { let builder = ArrayBuilder { datatype: DataType::Float16, buffer: PrimitiveBuffer::::with_len(array.logical_len()), }; extract_inner::(builder, array, data.inner_array(), idx) } - PhysicalType::Float32 => { + PhysicalType2::Float32 => { let builder = ArrayBuilder { datatype: DataType::Float32, buffer: PrimitiveBuffer::::with_len(array.logical_len()), }; extract_inner::(builder, array, data.inner_array(), idx) } - PhysicalType::Float64 => { + PhysicalType2::Float64 => { let builder = ArrayBuilder { datatype: DataType::Float64, buffer: PrimitiveBuffer::::with_len(array.logical_len()), }; extract_inner::(builder, array, data.inner_array(), idx) } - PhysicalType::Utf8 => { + PhysicalType2::Utf8 => { let builder = ArrayBuilder { datatype: DataType::Utf8, buffer: GermanVarlenBuffer::::with_len(array.logical_len()), }; extract_inner::(builder, array, data.inner_array(), idx) } - PhysicalType::Binary => { + PhysicalType2::Binary => { let builder = ArrayBuilder { datatype: DataType::Binary, buffer: GermanVarlenBuffer::<[u8]>::with_len(array.logical_len()), diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/mod.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/mod.rs index 9c44f0313..8b9291257 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/mod.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/mod.rs @@ -45,7 +45,7 @@ use crate::arrays::executor::physical_type::{ PhysicalF32, PhysicalF64, PhysicalStorage, - PhysicalType, + PhysicalType2, }; use crate::arrays::storage::PrimitiveStorage; use crate::expr::Expression; @@ -148,9 +148,9 @@ impl ScalarFunctionImpl for UnaryInputNumericScal fn execute(&self, inputs: &[&Array2]) -> Result { let input = inputs[0]; match input.physical_type() { - PhysicalType::Float16 => O::execute_float::(input, self.ret.clone()), - PhysicalType::Float32 => O::execute_float::(input, self.ret.clone()), - PhysicalType::Float64 => O::execute_float::(input, self.ret.clone()), + PhysicalType2::Float16 => O::execute_float::(input, self.ret.clone()), + PhysicalType2::Float32 => O::execute_float::(input, self.ret.clone()), + PhysicalType2::Float64 => O::execute_float::(input, self.ret.clone()), other => Err(RayexecError::new(format!( "Invalid physical type: {other:?}" ))), diff --git a/crates/rayexec_execution/src/functions/table/builtin/unnest.rs b/crates/rayexec_execution/src/functions/table/builtin/unnest.rs index 0839b9272..c09301d81 100644 --- a/crates/rayexec_execution/src/functions/table/builtin/unnest.rs +++ b/crates/rayexec_execution/src/functions/table/builtin/unnest.rs @@ -6,7 +6,7 @@ use rayexec_error::{RayexecError, Result}; use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::batch::Batch; use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::physical_type::{PhysicalList, PhysicalType}; +use crate::arrays::executor::physical_type::{PhysicalList, PhysicalType2}; use crate::arrays::executor::scalar::UnaryExecutor; use crate::arrays::field::{Field, Schema}; use crate::arrays::scalar::OwnedScalarValue; @@ -207,7 +207,7 @@ impl TableInOutPartitionState for UnnestInOutPartitionState { let input = self.input.as_ref().unwrap(); let output = match input.physical_type() { - PhysicalType::List => { + PhysicalType2::List => { let child = match input.array_data() { ArrayData2::List(list) => list.inner_array(), _other => return Err(RayexecError::new("Unexpected storage type")), @@ -224,7 +224,7 @@ impl TableInOutPartitionState for UnnestInOutPartitionState { } } } - PhysicalType::UntypedNull => { + PhysicalType2::UntypedNull => { // Just produce null array of length 1. Array2::new_untyped_null_array(1) } From f4ddf6df3e59a45d144aa55066b18a0ee1fe2679 Mon Sep 17 00:00:00 2001 From: Sean Smith Date: Fri, 27 Dec 2024 20:01:55 -0500 Subject: [PATCH 08/59] copy rows --- Cargo.lock | 5 + crates/iterutil/Cargo.toml | 6 + crates/iterutil/src/lib.rs | 35 +++++ crates/rayexec_execution/Cargo.toml | 1 + .../rayexec_execution/src/arrays/array/exp.rs | 128 +++++++++++++++++- 5 files changed, 174 insertions(+), 1 deletion(-) create mode 100644 crates/iterutil/Cargo.toml create mode 100644 crates/iterutil/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index 7cb18f144..818925d0b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1390,6 +1390,10 @@ dependencies = [ "either", ] +[[package]] +name = "iterutil" +version = "0.0.93" + [[package]] name = "itoa" version = "1.0.10" @@ -2265,6 +2269,7 @@ dependencies = [ "half", "hashbrown 0.14.5", "indexmap", + "iterutil", "num", "num-traits", "num_cpus", diff --git a/crates/iterutil/Cargo.toml b/crates/iterutil/Cargo.toml new file mode 100644 index 000000000..39c919663 --- /dev/null +++ b/crates/iterutil/Cargo.toml @@ -0,0 +1,6 @@ +[package] +name = "iterutil" +version.workspace = true +edition.workspace = true + +[dependencies] diff --git a/crates/iterutil/src/lib.rs b/crates/iterutil/src/lib.rs new file mode 100644 index 000000000..f77a6fda8 --- /dev/null +++ b/crates/iterutil/src/lib.rs @@ -0,0 +1,35 @@ +/// Similar to `IntoIterator`, but for an iterator with an exact size. +pub trait IntoExactSizeIterator { + type Item; + type IntoIter: ExactSizeIterator; + + /// Converts self into the `ExactSizeIteror`. + fn into_iter(self) -> Self::IntoIter; +} + +/// Auto-implement for any exact size iterator. +impl IntoExactSizeIterator for I +where + I: IntoIterator, + I::IntoIter: ExactSizeIterator, +{ + type Item = I::Item; + type IntoIter = I::IntoIter; + + fn into_iter(self) -> Self::IntoIter { + self.into_iter() + } +} + +pub trait FromExactSizeIterator: Sized { + /// Create Self from an exact size iterator. + fn from_iter>(iter: T) -> Self; +} + +pub trait TryFromExactSizeIterator: Sized { + /// Error type that will be returned. + type Error; + + /// Try to create Self from an exact size iterator. + fn try_from_iter>(iter: T) -> Result; +} diff --git a/crates/rayexec_execution/Cargo.toml b/crates/rayexec_execution/Cargo.toml index cf3426671..f895cb44a 100644 --- a/crates/rayexec_execution/Cargo.toml +++ b/crates/rayexec_execution/Cargo.toml @@ -10,6 +10,7 @@ rayexec_parser = { path = "../rayexec_parser" } # rayexec_bullet = { path = "../rayexec_bullet" } rayexec_io = { path = "../rayexec_io" } fmtutil = { path = "../fmtutil" } +iterutil = { path = "../iterutil" } # stackutil = { path = "../stackutil" } TODO: psm hash issues when compiling to wasm on macos ahash = { workspace = true } diff --git a/crates/rayexec_execution/src/arrays/array/exp.rs b/crates/rayexec_execution/src/arrays/array/exp.rs index 79619cbba..04f33383f 100644 --- a/crates/rayexec_execution/src/arrays/array/exp.rs +++ b/crates/rayexec_execution/src/arrays/array/exp.rs @@ -1,10 +1,16 @@ -use rayexec_error::Result; +use half::f16; +use iterutil::{IntoExactSizeIterator, TryFromExactSizeIterator}; +use rayexec_error::{RayexecError, Result}; use super::array_data::ArrayData; use super::flat::FlatArrayView; use super::validity::Validity; use crate::arrays::buffer::buffer_manager::{BufferManager, NopBufferManager}; use crate::arrays::buffer::physical_type::{ + Addressable, + AddressableMut, + MutablePhysicalStorage, + PhysicalBool, PhysicalF16, PhysicalF32, PhysicalF64, @@ -25,6 +31,7 @@ use crate::arrays::buffer::physical_type::{ use crate::arrays::buffer::string_view::StringViewHeap; use crate::arrays::buffer::{ArrayBuffer, SecondaryBuffer}; use crate::arrays::datatype::DataType; +use crate::arrays::scalar::interval::Interval; #[derive(Debug)] pub struct Array { @@ -42,6 +49,9 @@ impl Array { let manager = NopBufferManager; let buffer = match datatype.physical_type() { + PhysicalType::Boolean => { + ArrayBuffer::with_primary_capacity::(&manager, capacity)? + } PhysicalType::Int8 => { ArrayBuffer::with_primary_capacity::(&manager, capacity)? } @@ -120,4 +130,120 @@ where pub fn flat_view(&self) -> Result> { FlatArrayView::from_array(self) } + + /// Copy rows from self to another array. + /// + /// `mapping` provides a mapping of source indices to destination indices in + /// (source, dest) pairs. + pub fn copy_rows( + &self, + mapping: impl IntoExactSizeIterator, + dest: &mut Self, + ) -> Result<()> { + match self.datatype.physical_type() { + PhysicalType::Boolean => copy_rows::(self, mapping, dest)?, + PhysicalType::Int8 => copy_rows::(self, mapping, dest)?, + PhysicalType::Int16 => copy_rows::(self, mapping, dest)?, + PhysicalType::Int32 => copy_rows::(self, mapping, dest)?, + PhysicalType::Int64 => copy_rows::(self, mapping, dest)?, + PhysicalType::Int128 => copy_rows::(self, mapping, dest)?, + PhysicalType::UInt8 => copy_rows::(self, mapping, dest)?, + PhysicalType::UInt16 => copy_rows::(self, mapping, dest)?, + PhysicalType::UInt32 => copy_rows::(self, mapping, dest)?, + PhysicalType::UInt64 => copy_rows::(self, mapping, dest)?, + PhysicalType::UInt128 => copy_rows::(self, mapping, dest)?, + PhysicalType::Float16 => copy_rows::(self, mapping, dest)?, + PhysicalType::Float32 => copy_rows::(self, mapping, dest)?, + PhysicalType::Float64 => copy_rows::(self, mapping, dest)?, + PhysicalType::Interval => copy_rows::(self, mapping, dest)?, + PhysicalType::Utf8 => copy_rows::(self, mapping, dest)?, + _ => unimplemented!(), + } + + Ok(()) + } +} + +/// Helper for copying rows. +fn copy_rows( + from: &Array, + mapping: impl IntoExactSizeIterator, + to: &mut Array, +) -> Result<()> +where + S: MutablePhysicalStorage, + B: BufferManager, +{ + let from_flat = from.flat_view()?; + let from_storage = S::get_addressable(from_flat.array_buffer)?; + + let to_data = to.data.try_as_mut()?; + let mut to_storage = S::get_addressable_mut(to_data)?; + + if from_flat.validity.all_valid() && to.validity.all_valid() { + for (from_idx, to_idx) in mapping.into_iter() { + let from_idx = from_flat.selection.get(from_idx).unwrap(); + let v = from_storage.get(from_idx).unwrap(); + to_storage.put(to_idx, v); + } + } else { + for (from_idx, to_idx) in mapping.into_iter() { + let from_idx = from_flat.selection.get(from_idx).unwrap(); + if from_flat.validity.is_valid(from_idx) { + let v = from_storage.get(from_idx).unwrap(); + to_storage.put(to_idx, v); + } else { + to.validity.set_invalid(to_idx); + } + } + } + + Ok(()) +} + +/// Implements `try_from_iter` for primitive types. +/// +/// Note these create arrays using Nop buffer manager and so really only +/// suitable for tests right now. +macro_rules! impl_primitive_from_iter { + ($prim:ty, $phys:ty, $typ_variant:ident) => { + impl TryFromExactSizeIterator<$prim> for Array { + type Error = RayexecError; + + fn try_from_iter>( + iter: T, + ) -> Result { + let iter = iter.into_iter(); + + let mut array = Array::new(DataType::$typ_variant, iter.len())?; + let slice = array.data.try_as_mut()?.try_as_slice_mut::<$phys>()?; + + for (dest, v) in slice.iter_mut().zip(iter) { + *dest = v; + } + + Ok(array) + } + } + }; } + +impl_primitive_from_iter!(bool, PhysicalBool, Boolean); + +impl_primitive_from_iter!(i8, PhysicalI8, Int8); +impl_primitive_from_iter!(i16, PhysicalI16, Int16); +impl_primitive_from_iter!(i32, PhysicalI32, Int32); +impl_primitive_from_iter!(i64, PhysicalI64, Int64); +impl_primitive_from_iter!(i128, PhysicalI128, Int128); + +impl_primitive_from_iter!(u8, PhysicalU8, UInt8); +impl_primitive_from_iter!(u16, PhysicalU16, UInt16); +impl_primitive_from_iter!(u32, PhysicalU32, UInt32); +impl_primitive_from_iter!(u64, PhysicalU64, UInt64); +impl_primitive_from_iter!(u128, PhysicalU128, UInt128); + +impl_primitive_from_iter!(f16, PhysicalF16, Float16); +impl_primitive_from_iter!(f32, PhysicalF32, Float32); +impl_primitive_from_iter!(f64, PhysicalF64, Float64); + +impl_primitive_from_iter!(Interval, PhysicalInterval, Interval); From 762cbc7c2b7f670431cd02f23848172ecddbb554 Mon Sep 17 00:00:00 2001 From: Sean Smith Date: Sat, 28 Dec 2024 09:22:25 -0500 Subject: [PATCH 09/59] start executors --- .../rayexec_execution/src/arrays/array/exp.rs | 35 ++ .../src/arrays/buffer/physical_type.rs | 12 + .../src/arrays/buffer/string_view.rs | 4 + .../src/arrays/executor_exp/aggregate/mod.rs | 1 + .../src/arrays/executor_exp/mod.rs | 47 +++ .../src/arrays/executor_exp/scalar/mod.rs | 1 + .../src/arrays/executor_exp/scalar/unary.rs | 393 ++++++++++++++++++ crates/rayexec_execution/src/arrays/mod.rs | 1 + 8 files changed, 494 insertions(+) create mode 100644 crates/rayexec_execution/src/arrays/executor_exp/aggregate/mod.rs create mode 100644 crates/rayexec_execution/src/arrays/executor_exp/mod.rs create mode 100644 crates/rayexec_execution/src/arrays/executor_exp/scalar/mod.rs create mode 100644 crates/rayexec_execution/src/arrays/executor_exp/scalar/unary.rs diff --git a/crates/rayexec_execution/src/arrays/array/exp.rs b/crates/rayexec_execution/src/arrays/array/exp.rs index 04f33383f..1c5a65589 100644 --- a/crates/rayexec_execution/src/arrays/array/exp.rs +++ b/crates/rayexec_execution/src/arrays/array/exp.rs @@ -117,6 +117,14 @@ impl Array where B: BufferManager, { + pub fn data(&self) -> &ArrayData { + &self.data + } + + pub fn validity(&self) -> &Validity { + &self.validity + } + pub fn capacity(&self) -> usize { self.data.capacity() } @@ -247,3 +255,30 @@ impl_primitive_from_iter!(f32, PhysicalF32, Float32); impl_primitive_from_iter!(f64, PhysicalF64, Float64); impl_primitive_from_iter!(Interval, PhysicalInterval, Interval); + +impl<'a> TryFromExactSizeIterator<&'a str> for Array { + type Error = RayexecError; + + fn try_from_iter>( + iter: T, + ) -> Result { + let iter = iter.into_iter(); + let len = iter.len(); + + let mut buffer = + ArrayBuffer::with_primary_capacity::(&NopBufferManager, len)?; + buffer.put_secondary_buffer(SecondaryBuffer::StringViewHeap(StringViewHeap::new())); + + let mut addressable = buffer.try_as_string_view_addressable_mut()?; + + for (idx, v) in iter.enumerate() { + addressable.put(idx, v); + } + + Ok(Array { + datatype: DataType::Utf8, + validity: Validity::new_all_valid(len), + data: ArrayData::owned(buffer), + }) + } +} diff --git a/crates/rayexec_execution/src/arrays/buffer/physical_type.rs b/crates/rayexec_execution/src/arrays/buffer/physical_type.rs index c5b87884b..8f366cf25 100644 --- a/crates/rayexec_execution/src/arrays/buffer/physical_type.rs +++ b/crates/rayexec_execution/src/arrays/buffer/physical_type.rs @@ -55,6 +55,8 @@ impl PhysicalType { Self::Float32 => PhysicalF32::PRIMARY_BUFFER_TYPE_SIZE, Self::Float64 => PhysicalF64::PRIMARY_BUFFER_TYPE_SIZE, Self::Interval => PhysicalInterval::PRIMARY_BUFFER_TYPE_SIZE, + Self::Utf8 => PhysicalInterval::PRIMARY_BUFFER_TYPE_SIZE, + Self::Dictionary => PhysicalInterval::PRIMARY_BUFFER_TYPE_SIZE, _ => unimplemented!(), } @@ -127,6 +129,12 @@ where pub trait AddressableMut: Debug { type T: Send + Debug + ?Sized; + fn len(&self) -> usize; + + fn is_empty(&self) -> bool { + self.len() == 0 + } + /// Get a mutable reference to a value at the given index. fn get_mut(&mut self, idx: usize) -> Option<&mut Self::T>; @@ -142,6 +150,10 @@ where { type T = T; + fn len(&self) -> usize { + (**self).len() + } + fn get_mut(&mut self, idx: usize) -> Option<&mut Self::T> { (**self).get_mut(idx) } diff --git a/crates/rayexec_execution/src/arrays/buffer/string_view.rs b/crates/rayexec_execution/src/arrays/buffer/string_view.rs index aae74a6ec..31968b618 100644 --- a/crates/rayexec_execution/src/arrays/buffer/string_view.rs +++ b/crates/rayexec_execution/src/arrays/buffer/string_view.rs @@ -31,6 +31,10 @@ pub struct StringViewAddressableMut<'a> { impl<'a> AddressableMut for StringViewAddressableMut<'a> { type T = str; + fn len(&self) -> usize { + self.metadata.len() + } + fn get_mut(&mut self, idx: usize) -> Option<&mut Self::T> { let m = self.metadata.get_mut(idx)?; let bs = self.heap.get_mut(m)?; diff --git a/crates/rayexec_execution/src/arrays/executor_exp/aggregate/mod.rs b/crates/rayexec_execution/src/arrays/executor_exp/aggregate/mod.rs new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/crates/rayexec_execution/src/arrays/executor_exp/aggregate/mod.rs @@ -0,0 +1 @@ + diff --git a/crates/rayexec_execution/src/arrays/executor_exp/mod.rs b/crates/rayexec_execution/src/arrays/executor_exp/mod.rs new file mode 100644 index 000000000..6da833cc8 --- /dev/null +++ b/crates/rayexec_execution/src/arrays/executor_exp/mod.rs @@ -0,0 +1,47 @@ +pub mod aggregate; +pub mod scalar; + +use super::array::validity::Validity; +use super::buffer::physical_type::AddressableMut; +use super::buffer::ArrayBuffer; + +/// Wrapper around an array buffer and validity buffer that will be used to +/// construct a full array. +#[derive(Debug)] +pub struct OutBuffer<'a> { + pub buffer: &'a mut ArrayBuffer, + pub validity: &'a mut Validity, +} + +/// Helper for assigning a value to a location in a buffer. +#[derive(Debug)] +pub struct PutBuffer<'a, M> +where + M: AddressableMut, +{ + idx: usize, + buffer: &'a mut M, + validity: &'a mut Validity, +} + +impl<'a, M> PutBuffer<'a, M> +where + M: AddressableMut, +{ + pub(crate) fn new(idx: usize, buffer: &'a mut M, validity: &'a mut Validity) -> Self { + debug_assert_eq!(buffer.len(), validity.len()); + PutBuffer { + idx, + buffer, + validity, + } + } + + pub fn put(self, val: &M::T) { + self.buffer.put(self.idx, val) + } + + pub fn put_null(self) { + self.validity.set_invalid(self.idx) + } +} diff --git a/crates/rayexec_execution/src/arrays/executor_exp/scalar/mod.rs b/crates/rayexec_execution/src/arrays/executor_exp/scalar/mod.rs new file mode 100644 index 000000000..8328abc7c --- /dev/null +++ b/crates/rayexec_execution/src/arrays/executor_exp/scalar/mod.rs @@ -0,0 +1 @@ +pub mod unary; diff --git a/crates/rayexec_execution/src/arrays/executor_exp/scalar/unary.rs b/crates/rayexec_execution/src/arrays/executor_exp/scalar/unary.rs new file mode 100644 index 000000000..9aeba3dd0 --- /dev/null +++ b/crates/rayexec_execution/src/arrays/executor_exp/scalar/unary.rs @@ -0,0 +1,393 @@ +use iterutil::IntoExactSizeIterator; +use rayexec_error::Result; + +use crate::arrays::array::exp::Array; +use crate::arrays::array::flat::FlatArrayView; +use crate::arrays::buffer::physical_type::{ + Addressable, + AddressableMut, + MutablePhysicalStorage, + PhysicalStorage, +}; +use crate::arrays::executor_exp::{OutBuffer, PutBuffer}; + +#[derive(Debug, Clone)] +pub struct UnaryExecutor; + +impl UnaryExecutor { + /// Execute a unary operation on `array`, placing results in `out`. + pub fn execute( + array: &Array, + selection: impl IntoExactSizeIterator, + out: OutBuffer, + mut op: Op, + ) -> Result<()> + where + S: PhysicalStorage, + O: MutablePhysicalStorage, + for<'a> Op: FnMut(&S::StorageType, PutBuffer>), + { + if array.is_dictionary() { + let view = array.flat_view()?; + return Self::execute_flat::(view, selection, out, op); + } + + let input = S::get_addressable(array.data())?; + let mut output = O::get_addressable_mut(out.buffer)?; + + let validity = array.validity(); + + if validity.all_valid() { + for (output_idx, input_idx) in selection.into_iter().enumerate() { + op( + input.get(input_idx).unwrap(), + PutBuffer::new(output_idx, &mut output, out.validity), + ); + } + } else { + for (output_idx, input_idx) in selection.into_iter().enumerate() { + if validity.is_valid(input_idx) { + op( + input.get(input_idx).unwrap(), + PutBuffer::new(output_idx, &mut output, out.validity), + ); + } else { + out.validity.set_invalid(output_idx); + } + } + } + + Ok(()) + } + + pub fn execute_flat<'a, S, O, Op>( + array: FlatArrayView<'a>, + selection: impl IntoExactSizeIterator, + out: OutBuffer, + mut op: Op, + ) -> Result<()> + where + S: PhysicalStorage, + O: MutablePhysicalStorage, + for<'b> Op: FnMut(&S::StorageType, PutBuffer>), + { + let input = S::get_addressable(&array.array_buffer)?; + let mut output = O::get_addressable_mut(out.buffer)?; + + let validity = array.validity; + + if validity.all_valid() { + for (output_idx, input_idx) in selection.into_iter().enumerate() { + let selected_idx = array.selection.get(input_idx).unwrap(); + + op( + input.get(selected_idx).unwrap(), + PutBuffer::new(output_idx, &mut output, out.validity), + ); + } + } else { + for (output_idx, input_idx) in selection.into_iter().enumerate() { + let selected_idx = array.selection.get(input_idx).unwrap(); + + if validity.is_valid(selected_idx) { + op( + input.get(selected_idx).unwrap(), + PutBuffer::new(output_idx, &mut output, out.validity), + ); + } else { + out.validity.set_invalid(output_idx); + } + } + } + + Ok(()) + } + + /// Executes an operation in place. + /// + /// Note that changing the lengths for variable length data is not yet + /// supported, as the length change won't persist since the metadata isn't + /// being changed. + pub fn execute_in_place(array: &mut Array, mut op: Op) -> Result<()> + where + S: MutablePhysicalStorage, + Op: FnMut(&mut S::StorageType), + { + let validity = &array.validity; + let mut input = S::get_addressable_mut(array.data.try_as_mut()?)?; + + if validity.all_valid() { + for idx in 0..input.len() { + op(input.get_mut(idx).unwrap()); + } + } else { + for idx in 0..input.len() { + if validity.is_valid(idx) { + op(input.get_mut(idx).unwrap()); + } + } + } + + Ok(()) + } + + /// Iterate over all values in a flat array view, calling `op` for each row. + /// + /// Valid values are represented with Some, invalid values are represented + /// with None. + /// + /// Note this should really only be used for tests. + pub fn for_each_flat<'a, S, Op>( + array: FlatArrayView<'a>, + selection: impl IntoExactSizeIterator, + mut op: Op, + ) -> Result<()> + where + S: PhysicalStorage, + Op: FnMut(usize, Option<&S::StorageType>), + { + let input = S::get_addressable(&array.array_buffer)?; + let validity = array.validity; + + if validity.all_valid() { + for (output_idx, input_idx) in selection.into_iter().enumerate() { + let selected_idx = array.selection.get(input_idx).unwrap(); + let v = input.get(selected_idx).unwrap(); + + op(output_idx, Some(v)) + } + } else { + for (output_idx, input_idx) in selection.into_iter().enumerate() { + let selected_idx = array.selection.get(input_idx).unwrap(); + + if validity.is_valid(selected_idx) { + let v = input.get(selected_idx).unwrap(); + op(output_idx, Some(v)); + } else { + op(output_idx, None); + } + } + } + + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use iterutil::TryFromExactSizeIterator; + + use super::*; + use crate::arrays::array::validity::Validity; + use crate::arrays::buffer::buffer_manager::NopBufferManager; + use crate::arrays::buffer::physical_type::{PhysicalI32, PhysicalUtf8}; + use crate::arrays::buffer::string_view::{StringViewAddressableMut, StringViewHeap}; + use crate::arrays::buffer::{ArrayBuffer, SecondaryBuffer}; + + #[test] + fn int32_inc_by_2() { + let array = Array::try_from_iter([1, 2, 3]).unwrap(); + + let mut out = + ArrayBuffer::with_primary_capacity::(&NopBufferManager, 3).unwrap(); + let mut validity = Validity::new_all_valid(3); + + UnaryExecutor::execute::( + &array, + 0..3, + OutBuffer { + buffer: &mut out, + validity: &mut validity, + }, + |&v, buf| buf.put(&(v + 2)), + ) + .unwrap(); + assert!(validity.all_valid()); + + let out_slice = out.try_as_slice::().unwrap(); + assert_eq!(&[3, 4, 5], out_slice); + } + + #[test] + fn int32_inc_by_2_using_flat_view() { + let array = Array::try_from_iter([1, 2, 3]).unwrap(); + + let mut out = + ArrayBuffer::with_primary_capacity::(&NopBufferManager, 3).unwrap(); + let mut validity = Validity::new_all_valid(3); + + let flat = FlatArrayView::from_array(&array).unwrap(); + + UnaryExecutor::execute_flat::( + flat, + 0..3, + OutBuffer { + buffer: &mut out, + validity: &mut validity, + }, + |&v, buf| buf.put(&(v + 2)), + ) + .unwrap(); + assert!(validity.all_valid()); + + let out_slice = out.try_as_slice::().unwrap(); + assert_eq!(&[3, 4, 5], out_slice); + } + + #[test] + fn int32_inc_by_2_in_place() { + let mut array = Array::try_from_iter([1, 2, 3]).unwrap(); + + UnaryExecutor::execute_in_place::(&mut array, |v| *v = *v + 2).unwrap(); + + let arr_slice = array.data().try_as_slice::().unwrap(); + assert_eq!(&[3, 4, 5], arr_slice); + } + + #[test] + fn string_double_named_func() { + // Example with defined function, and allocating a new string every time. + let array = Array::try_from_iter([ + "a", + "bb", + "ccc", + "dddd", + "heapafter", // Inlined, will be moved to heap after doubling. + "alongerstringdontinline", + ]) + .unwrap(); + + let mut out = + ArrayBuffer::with_primary_capacity::(&NopBufferManager, 6).unwrap(); + out.put_secondary_buffer(SecondaryBuffer::StringViewHeap(StringViewHeap::new())); + + let mut validity = Validity::new_all_valid(6); + + fn my_string_double(s: &str, buf: PutBuffer) { + let mut double = s.to_string(); + double.push_str(s); + buf.put(&double); + } + + UnaryExecutor::execute::( + &array, + 0..6, + OutBuffer { + buffer: &mut out, + validity: &mut validity, + }, + my_string_double, + ) + .unwrap(); + assert!(validity.all_valid()); + + let out = out.try_as_string_view_addressable().unwrap(); + + assert_eq!("aa", out.get(0).unwrap()); + assert_eq!("bbbb", out.get(1).unwrap()); + assert_eq!("cccccc", out.get(2).unwrap()); + assert_eq!("dddddddd", out.get(3).unwrap()); + assert_eq!("heapafterheapafter", out.get(4).unwrap()); + assert_eq!( + "alongerstringdontinlinealongerstringdontinline", + out.get(5).unwrap() + ); + } + + #[test] + fn string_double_closure_reused_buf() { + // Same thing, but with closure reusing a string buffer. + let array = Array::try_from_iter([ + "a", + "bb", + "ccc", + "dddd", + "heapafter", // Inlined, will be moved to heap after doubling. + "alongerstringdontinline", + ]) + .unwrap(); + + let mut out = + ArrayBuffer::with_primary_capacity::(&NopBufferManager, 6).unwrap(); + out.put_secondary_buffer(SecondaryBuffer::StringViewHeap(StringViewHeap::new())); + + let mut validity = Validity::new_all_valid(6); + + let mut string_buf = String::new(); + + UnaryExecutor::execute::( + &array, + 0..6, + OutBuffer { + buffer: &mut out, + validity: &mut validity, + }, + |s, buf| { + string_buf.clear(); + + string_buf.push_str(s); + string_buf.push_str(s); + + buf.put(&string_buf); + }, + ) + .unwrap(); + assert!(validity.all_valid()); + + let out = out.try_as_string_view_addressable().unwrap(); + + assert_eq!("aa", out.get(0).unwrap()); + assert_eq!("bbbb", out.get(1).unwrap()); + assert_eq!("cccccc", out.get(2).unwrap()); + assert_eq!("dddddddd", out.get(3).unwrap()); + assert_eq!("heapafterheapafter", out.get(4).unwrap()); + assert_eq!( + "alongerstringdontinlinealongerstringdontinline", + out.get(5).unwrap() + ); + } + + #[test] + fn string_uppercase_in_place() { + let mut array = Array::try_from_iter(["a", "bb", "ccc"]).unwrap(); + + UnaryExecutor::execute_in_place::(&mut array, |v| { + v.make_ascii_uppercase() + }) + .unwrap(); + + let out = array.data().try_as_string_view_addressable().unwrap(); + + assert_eq!("A", out.get(0).unwrap()); + assert_eq!("BB", out.get(1).unwrap()); + assert_eq!("CCC", out.get(2).unwrap()); + } + + // #[test] + // fn int32_inc_by_2_with_dict() { + // let mut array = Array::new_with_buffer( + // DataType::Int32, + // Int32BufferBuilder::from_iter([1, 2, 3]).unwrap(), + // ); + // // [3, 3, 2, 1, 1, 3] + // array.select(&NopBufferManager, [2, 2, 1, 0, 0, 2]).unwrap(); + + // let mut out = ArrayBuffer::with_capacity::(&NopBufferManager, 6).unwrap(); + // let mut validity = Validity::new_all_valid(6); + + // UnaryExecutor::execute::( + // &array, + // 0..6, + // OutBuffer { + // buffer: &mut out, + // validity: &mut validity, + // }, + // |&v, buf| buf.put(&(v + 2)), + // ) + // .unwrap(); + // assert!(validity.all_valid()); + + // let out_slice = out.try_as_slice::().unwrap(); + // assert_eq!(&[5, 5, 4, 3, 3, 5], out_slice); + // } +} diff --git a/crates/rayexec_execution/src/arrays/mod.rs b/crates/rayexec_execution/src/arrays/mod.rs index 970a90e32..d7b9ca4ca 100644 --- a/crates/rayexec_execution/src/arrays/mod.rs +++ b/crates/rayexec_execution/src/arrays/mod.rs @@ -5,6 +5,7 @@ pub mod buffer; pub mod compute; pub mod datatype; pub mod executor; +pub mod executor_exp; pub mod field; pub mod format; pub mod row; From 39c6d165bed89d9cbca6018cb5b140ef823ddc58 Mon Sep 17 00:00:00 2001 From: Sean Smith Date: Sat, 28 Dec 2024 11:07:45 -0500 Subject: [PATCH 10/59] binary --- .../rayexec_execution/src/arrays/array/exp.rs | 112 ++++++- .../src/arrays/executor_exp/scalar/binary.rs | 282 ++++++++++++++++++ .../src/arrays/executor_exp/scalar/mod.rs | 1 + .../src/arrays/executor_exp/scalar/unary.rs | 52 ++-- 4 files changed, 419 insertions(+), 28 deletions(-) create mode 100644 crates/rayexec_execution/src/arrays/executor_exp/scalar/binary.rs diff --git a/crates/rayexec_execution/src/arrays/array/exp.rs b/crates/rayexec_execution/src/arrays/array/exp.rs index 1c5a65589..5f548067f 100644 --- a/crates/rayexec_execution/src/arrays/array/exp.rs +++ b/crates/rayexec_execution/src/arrays/array/exp.rs @@ -11,6 +11,7 @@ use crate::arrays::buffer::physical_type::{ AddressableMut, MutablePhysicalStorage, PhysicalBool, + PhysicalDictionary, PhysicalF16, PhysicalF32, PhysicalF64, @@ -29,7 +30,7 @@ use crate::arrays::buffer::physical_type::{ PhysicalUtf8, }; use crate::arrays::buffer::string_view::StringViewHeap; -use crate::arrays::buffer::{ArrayBuffer, SecondaryBuffer}; +use crate::arrays::buffer::{ArrayBuffer, DictionaryBuffer, SecondaryBuffer}; use crate::arrays::datatype::DataType; use crate::arrays::scalar::interval::Interval; @@ -125,6 +126,16 @@ where &self.validity } + pub fn put_validity(&mut self, validity: Validity) -> Result<()> { + if validity.len() != self.data().capacity() { + return Err(RayexecError::new("Invalid validity length") + .with_field("got", validity.len()) + .with_field("want", self.data.capacity())); + } + self.validity = validity; + Ok(()) + } + pub fn capacity(&self) -> usize { self.data.capacity() } @@ -170,6 +181,72 @@ where Ok(()) } + + /// Selects indice from the array. + /// + /// This will convert the underlying array buffer into a dictionary buffer. + pub fn select( + &mut self, + manager: &B, + selection: impl IntoExactSizeIterator, + ) -> Result<()> { + if self.is_dictionary() { + // Already dictionary, select the selection. + let sel = selection.into_iter(); + let mut new_buf = + ArrayBuffer::with_primary_capacity::(manager, sel.len())?; + + let old_sel = self.data.try_as_slice::()?; + let new_sel = new_buf.try_as_slice_mut::()?; + + for (sel_idx, sel_buf) in sel.zip(new_sel) { + let idx = old_sel[sel_idx]; + *sel_buf = idx; + } + + // Now swap the secondary buffers, the dictionary buffer will now be + // on `new_buf`. + std::mem::swap( + self.data.try_as_mut()?.get_secondary_mut(), // TODO: Should just clone the pointer if managed. + new_buf.get_secondary_mut(), + ); + + // And set the new buf, old buf gets dropped. + self.data = ArrayData::owned(new_buf); + + return Ok(()); + } + + let sel = selection.into_iter(); + let mut new_buf = + ArrayBuffer::with_primary_capacity::(manager, sel.len())?; + + let new_buf_slice = new_buf.try_as_slice_mut::()?; + + // Set all selection indices in the new array buffer. + for (sel_idx, sel_buf) in sel.zip(new_buf_slice) { + *sel_buf = sel_idx + } + + // TODO: Probably verify selection all in bounds. + + // Now replace the original buffer, and put the original buffer in the + // secondary buffer. + let orig_validity = std::mem::replace( + &mut self.validity, + Validity::new_all_valid(new_buf.capacity()), + ); + let orig_buffer = std::mem::replace(&mut self.data, ArrayData::owned(new_buf)); + // TODO: Should just clone the pointer if managed. + self.data + .try_as_mut()? + .put_secondary_buffer(SecondaryBuffer::Dictionary(DictionaryBuffer { + validity: orig_validity, + buffer: orig_buffer, + })); + + Ok(()) + } } /// Helper for copying rows. @@ -282,3 +359,36 @@ impl<'a> TryFromExactSizeIterator<&'a str> for Array { }) } } + +/// From iterator implementation that creates an array from optionally valid +/// values. Some is treated as valid, None as invalid. +impl TryFromExactSizeIterator> for Array +where + V: Default, + Array: TryFromExactSizeIterator, +{ + type Error = RayexecError; + + fn try_from_iter>>( + iter: T, + ) -> Result { + let iter = iter.into_iter(); + let len = iter.len(); + + let mut validity = Validity::new_all_valid(len); + + // New iterator that just uses the default value for missing values, and + // sets the validity as appropriate. + let iter = iter.enumerate().map(|(idx, v)| { + if v.is_none() { + validity.set_invalid(idx); + } + v.unwrap_or_default() + }); + + let mut array = Self::try_from_iter(iter)?; + array.put_validity(validity)?; + + Ok(array) + } +} diff --git a/crates/rayexec_execution/src/arrays/executor_exp/scalar/binary.rs b/crates/rayexec_execution/src/arrays/executor_exp/scalar/binary.rs new file mode 100644 index 000000000..3764884f1 --- /dev/null +++ b/crates/rayexec_execution/src/arrays/executor_exp/scalar/binary.rs @@ -0,0 +1,282 @@ +use iterutil::IntoExactSizeIterator; +use rayexec_error::Result; + +use crate::arrays::array::exp::Array; +use crate::arrays::array::flat::FlatArrayView; +use crate::arrays::buffer::physical_type::{Addressable, MutablePhysicalStorage, PhysicalStorage}; +use crate::arrays::executor_exp::{OutBuffer, PutBuffer}; + +#[derive(Debug, Clone)] +pub struct BinaryExecutor; + +impl BinaryExecutor { + pub fn execute( + array1: &Array, + sel1: impl IntoExactSizeIterator, + array2: &Array, + sel2: impl IntoExactSizeIterator, + out: OutBuffer, + mut op: Op, + ) -> Result<()> + where + S1: PhysicalStorage, + S2: PhysicalStorage, + O: MutablePhysicalStorage, + for<'a> Op: FnMut(&S1::StorageType, &S2::StorageType, PutBuffer>), + { + if array1.is_dictionary() || array2.is_dictionary() { + let view1 = FlatArrayView::from_array(array1)?; + let view2 = FlatArrayView::from_array(array2)?; + + return Self::execute_flat::(view1, sel1, view2, sel2, out, op); + } + + // TODO: length validation + + let input1 = S1::get_addressable(array1.data())?; + let input2 = S2::get_addressable(array2.data())?; + + let mut output = O::get_addressable_mut(out.buffer)?; + + let validity1 = array1.validity(); + let validity2 = array2.validity(); + + if validity1.all_valid() && validity2.all_valid() { + for (output_idx, (input1_idx, input2_idx)) in + sel1.into_iter().zip(sel2.into_iter()).enumerate() + { + let val1 = input1.get(input1_idx).unwrap(); + let val2 = input2.get(input2_idx).unwrap(); + + op( + val1, + val2, + PutBuffer::new(output_idx, &mut output, out.validity), + ); + } + } else { + for (output_idx, (input1_idx, input2_idx)) in + sel1.into_iter().zip(sel2.into_iter()).enumerate() + { + if validity1.is_valid(input1_idx) && validity2.is_valid(input2_idx) { + let val1 = input1.get(input1_idx).unwrap(); + let val2 = input2.get(input2_idx).unwrap(); + + op( + val1, + val2, + PutBuffer::new(output_idx, &mut output, out.validity), + ); + } else { + out.validity.set_invalid(output_idx); + } + } + } + + Ok(()) + } + + pub fn execute_flat<'a, S1, S2, O, Op>( + array1: FlatArrayView<'a>, + sel1: impl IntoExactSizeIterator, + array2: FlatArrayView<'a>, + sel2: impl IntoExactSizeIterator, + out: OutBuffer, + mut op: Op, + ) -> Result<()> + where + S1: PhysicalStorage, + S2: PhysicalStorage, + O: MutablePhysicalStorage, + for<'b> Op: FnMut(&S1::StorageType, &S2::StorageType, PutBuffer>), + { + // TODO: length validation + + let input1 = S1::get_addressable(&array1.array_buffer)?; + let input2 = S2::get_addressable(&array2.array_buffer)?; + + let mut output = O::get_addressable_mut(out.buffer)?; + + let validity1 = &array1.validity; + let validity2 = &array2.validity; + + if validity1.all_valid() && validity2.all_valid() { + for (output_idx, (input1_idx, input2_idx)) in + sel1.into_iter().zip(sel2.into_iter()).enumerate() + { + let sel1 = array1.selection.get(input1_idx).unwrap(); + let sel2 = array2.selection.get(input2_idx).unwrap(); + + let val1 = input1.get(sel1).unwrap(); + let val2 = input2.get(sel2).unwrap(); + + op( + val1, + val2, + PutBuffer::new(output_idx, &mut output, out.validity), + ); + } + } else { + for (output_idx, (input1_idx, input2_idx)) in + sel1.into_iter().zip(sel2.into_iter()).enumerate() + { + let sel1 = array1.selection.get(input1_idx).unwrap(); + let sel2 = array2.selection.get(input2_idx).unwrap(); + + if validity1.is_valid(sel1) && validity2.is_valid(sel2) { + let val1 = input1.get(sel1).unwrap(); + let val2 = input2.get(sel2).unwrap(); + + op( + val1, + val2, + PutBuffer::new(output_idx, &mut output, out.validity), + ); + } else { + out.validity.set_invalid(output_idx); + } + } + } + + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use iterutil::TryFromExactSizeIterator; + + use super::*; + use crate::arrays::array::validity::Validity; + use crate::arrays::buffer::buffer_manager::NopBufferManager; + use crate::arrays::buffer::physical_type::{PhysicalI32, PhysicalUtf8}; + use crate::arrays::buffer::string_view::StringViewHeap; + use crate::arrays::buffer::{ArrayBuffer, SecondaryBuffer}; + + #[test] + fn binary_simple_add() { + let left = Array::try_from_iter([1, 2, 3]).unwrap(); + let right = Array::try_from_iter([4, 5, 6]).unwrap(); + + let mut out = + ArrayBuffer::with_primary_capacity::(&NopBufferManager, 3).unwrap(); + let mut validity = Validity::new_all_valid(3); + + BinaryExecutor::execute::( + &left, + 0..3, + &right, + 0..3, + OutBuffer { + buffer: &mut out, + validity: &mut validity, + }, + |&a, &b, buf| buf.put(&(a + b)), + ) + .unwrap(); + assert!(validity.all_valid()); + + let out_slice = out.try_as_slice::().unwrap(); + assert_eq!(&[5, 7, 9], out_slice); + } + + #[test] + fn binary_simple_add_with_selection() { + let mut left = Array::try_from_iter([2]).unwrap(); + // [2, 2, 2] + left.select(&NopBufferManager, [0, 0, 0]).unwrap(); + + let right = Array::try_from_iter([4, 5, 6]).unwrap(); + + let mut out = + ArrayBuffer::with_primary_capacity::(&NopBufferManager, 3).unwrap(); + let mut validity = Validity::new_all_valid(3); + + BinaryExecutor::execute::( + &left, + 0..3, + &right, + 0..3, + OutBuffer { + buffer: &mut out, + validity: &mut validity, + }, + |&a, &b, buf| buf.put(&(a + b)), + ) + .unwrap(); + assert!(validity.all_valid()); + + let out_slice = out.try_as_slice::().unwrap(); + assert_eq!(&[6, 7, 8], out_slice); + } + + #[test] + fn binary_string_repeat() { + let left = Array::try_from_iter([1, 2, 3]).unwrap(); + let right = Array::try_from_iter(["hello", "world", "goodbye!"]).unwrap(); + + let mut out = + ArrayBuffer::with_primary_capacity::(&NopBufferManager, 3).unwrap(); + out.put_secondary_buffer(SecondaryBuffer::StringViewHeap(StringViewHeap::new())); + let mut validity = Validity::new_all_valid(3); + + let mut string_buf = String::new(); + BinaryExecutor::execute::( + &left, + 0..3, + &right, + 0..3, + OutBuffer { + buffer: &mut out, + validity: &mut validity, + }, + |&repeat, s, buf| { + string_buf.clear(); + for _ in 0..repeat { + string_buf.push_str(s); + } + buf.put(&string_buf); + }, + ) + .unwrap(); + assert!(validity.all_valid()); + + let out = out.try_as_string_view_addressable().unwrap(); + assert_eq!("hello", out.get(0).unwrap()); + assert_eq!("worldworld", out.get(1).unwrap()); + assert_eq!("goodbye!goodbye!goodbye!", out.get(2).unwrap()); + } + + #[test] + fn binary_add_with_invalid() { + let left = Array::try_from_iter([Some(1), None, Some(3)]).unwrap(); + let right = Array::try_from_iter([4, 5, 6]).unwrap(); + + let mut out = + ArrayBuffer::with_primary_capacity::(&NopBufferManager, 3).unwrap(); + let mut validity = Validity::new_all_valid(3); + + BinaryExecutor::execute::( + &left, + 0..3, + &right, + 0..3, + OutBuffer { + buffer: &mut out, + validity: &mut validity, + }, + |&a, &b, buf| buf.put(&(a + b)), + ) + .unwrap(); + + let out_slice = out.try_as_slice::().unwrap(); + + assert!(validity.is_valid(0)); + assert_eq!(5, out_slice[0]); + + assert!(!validity.is_valid(1)); + + assert!(validity.is_valid(2)); + assert_eq!(9, out_slice[2]); + } +} diff --git a/crates/rayexec_execution/src/arrays/executor_exp/scalar/mod.rs b/crates/rayexec_execution/src/arrays/executor_exp/scalar/mod.rs index 8328abc7c..7f1c4b5a8 100644 --- a/crates/rayexec_execution/src/arrays/executor_exp/scalar/mod.rs +++ b/crates/rayexec_execution/src/arrays/executor_exp/scalar/mod.rs @@ -1 +1,2 @@ +pub mod binary; pub mod unary; diff --git a/crates/rayexec_execution/src/arrays/executor_exp/scalar/unary.rs b/crates/rayexec_execution/src/arrays/executor_exp/scalar/unary.rs index 9aeba3dd0..4e93b53cb 100644 --- a/crates/rayexec_execution/src/arrays/executor_exp/scalar/unary.rs +++ b/crates/rayexec_execution/src/arrays/executor_exp/scalar/unary.rs @@ -363,31 +363,29 @@ mod tests { assert_eq!("CCC", out.get(2).unwrap()); } - // #[test] - // fn int32_inc_by_2_with_dict() { - // let mut array = Array::new_with_buffer( - // DataType::Int32, - // Int32BufferBuilder::from_iter([1, 2, 3]).unwrap(), - // ); - // // [3, 3, 2, 1, 1, 3] - // array.select(&NopBufferManager, [2, 2, 1, 0, 0, 2]).unwrap(); - - // let mut out = ArrayBuffer::with_capacity::(&NopBufferManager, 6).unwrap(); - // let mut validity = Validity::new_all_valid(6); - - // UnaryExecutor::execute::( - // &array, - // 0..6, - // OutBuffer { - // buffer: &mut out, - // validity: &mut validity, - // }, - // |&v, buf| buf.put(&(v + 2)), - // ) - // .unwrap(); - // assert!(validity.all_valid()); - - // let out_slice = out.try_as_slice::().unwrap(); - // assert_eq!(&[5, 5, 4, 3, 3, 5], out_slice); - // } + #[test] + fn int32_inc_by_2_with_dict() { + let mut array = Array::try_from_iter([1, 2, 3]).unwrap(); + // [3, 3, 2, 1, 1, 3] + array.select(&NopBufferManager, [2, 2, 1, 0, 0, 2]).unwrap(); + + let mut out = + ArrayBuffer::with_primary_capacity::(&NopBufferManager, 6).unwrap(); + let mut validity = Validity::new_all_valid(6); + + UnaryExecutor::execute::( + &array, + 0..6, + OutBuffer { + buffer: &mut out, + validity: &mut validity, + }, + |&v, buf| buf.put(&(v + 2)), + ) + .unwrap(); + assert!(validity.all_valid()); + + let out_slice = out.try_as_slice::().unwrap(); + assert_eq!(&[5, 5, 4, 3, 3, 5], out_slice); + } } From 459acf72637406a60fc8218d5020b617fd2ccfd0 Mon Sep 17 00:00:00 2001 From: Sean Smith Date: Sat, 28 Dec 2024 11:33:45 -0500 Subject: [PATCH 11/59] temp rename --- crates/docgen/src/markdown_table.rs | 6 +-- crates/rayexec_csv/src/copy_to.rs | 6 +-- crates/rayexec_csv/src/datatable.rs | 4 +- crates/rayexec_csv/src/reader.rs | 10 ++-- crates/rayexec_csv/src/writer.rs | 4 +- crates/rayexec_debug/src/discard.rs | 4 +- crates/rayexec_debug/src/table_storage.rs | 16 +++---- crates/rayexec_delta/src/datatable.rs | 4 +- crates/rayexec_delta/src/protocol/table.rs | 4 +- crates/rayexec_execution/src/arrays/batch.rs | 24 +++++----- .../src/arrays/format/pretty/table.rs | 48 ++++++++++--------- .../src/arrays/format/ugly.rs | 6 +-- .../rayexec_execution/src/arrays/testutil.rs | 4 +- crates/rayexec_execution/src/engine/result.rs | 10 ++-- .../src/engine/server_state.rs | 4 +- .../src/execution/computed_batch.rs | 14 +++--- .../src/execution/executable/pipeline.rs | 6 +-- .../intermediate/planner/plan_describe.rs | 4 +- .../intermediate/planner/plan_explain.rs | 4 +- .../intermediate/planner/plan_scan.rs | 8 ++-- .../intermediate/planner/plan_show_var.rs | 4 +- .../src/execution/operators/analyze.rs | 4 +- .../src/execution/operators/batch_resizer.rs | 4 +- .../src/execution/operators/create_schema.rs | 4 +- .../src/execution/operators/create_table.rs | 4 +- .../src/execution/operators/create_view.rs | 4 +- .../src/execution/operators/drop.rs | 4 +- .../src/execution/operators/empty.rs | 6 +-- .../src/execution/operators/filter.rs | 4 +- .../operators/hash_aggregate/drain.rs | 8 ++-- .../execution/operators/hash_aggregate/mod.rs | 8 ++-- .../operators/hash_join/condition.rs | 6 +-- .../operators/hash_join/global_hash_table.rs | 12 ++--- .../src/execution/operators/hash_join/mod.rs | 6 +-- .../hash_join/partition_hash_table.rs | 6 +-- .../src/execution/operators/limit.rs | 6 +-- .../src/execution/operators/materialize.rs | 6 +-- .../src/execution/operators/mod.rs | 8 ++-- .../src/execution/operators/nl_join.rs | 20 ++++---- .../src/execution/operators/project.rs | 6 +-- .../src/execution/operators/round_robin.rs | 6 +-- .../src/execution/operators/scan.rs | 6 +-- .../src/execution/operators/simple.rs | 8 ++-- .../src/execution/operators/sink.rs | 10 ++-- .../execution/operators/sort/gather_sort.rs | 4 +- .../execution/operators/sort/scatter_sort.rs | 6 +-- .../src/execution/operators/sort/top_k.rs | 4 +- .../operators/sort/util/accumulator.rs | 10 ++-- .../execution/operators/sort/util/merger.rs | 8 ++-- .../operators/sort/util/sort_keys.rs | 6 +-- .../operators/sort/util/sorted_batch.rs | 10 ++-- .../src/execution/operators/source.rs | 8 ++-- .../src/execution/operators/table_function.rs | 6 +-- .../src/execution/operators/table_inout.rs | 8 ++-- .../src/execution/operators/test_util.rs | 12 ++--- .../operators/ungrouped_aggregate.rs | 8 ++-- .../src/execution/operators/union.rs | 8 ++-- .../src/execution/operators/unnest.rs | 6 +-- .../src/execution/operators/util/broadcast.rs | 12 ++--- .../operators/util/outer_join_tracker.rs | 26 +++++----- .../src/execution/operators/util/resizer.rs | 34 ++++++------- .../src/execution/operators/values.rs | 10 ++-- .../src/execution/operators/window/mod.rs | 4 +- .../src/expr/physical/case_expr.rs | 6 +-- .../src/expr/physical/cast_expr.rs | 4 +- .../src/expr/physical/column_expr.rs | 4 +- .../src/expr/physical/literal_expr.rs | 4 +- .../src/expr/physical/mod.rs | 10 ++-- .../src/expr/physical/scalar_function_expr.rs | 4 +- .../src/functions/table/builtin/series.rs | 8 ++-- .../src/functions/table/builtin/system.rs | 22 ++++----- .../src/functions/table/builtin/unnest.rs | 6 +-- .../src/functions/table/inout.rs | 6 +-- crates/rayexec_execution/src/hybrid/buffer.rs | 16 +++---- crates/rayexec_execution/src/hybrid/client.rs | 6 +-- crates/rayexec_execution/src/hybrid/stream.rs | 6 +-- .../src/optimizer/expr_rewrite/const_fold.rs | 4 +- .../rayexec_execution/src/storage/memory.rs | 12 ++--- .../src/storage/table_storage.rs | 10 ++-- crates/rayexec_iceberg/src/datatable.rs | 4 +- crates/rayexec_iceberg/src/table.rs | 4 +- crates/rayexec_parquet/src/copy_to.rs | 6 +-- .../src/functions/datatable.rs | 4 +- crates/rayexec_parquet/src/reader/mod.rs | 8 ++-- crates/rayexec_parquet/src/writer/mod.rs | 6 +-- crates/rayexec_postgres/src/lib.rs | 10 ++-- crates/rayexec_shell/src/result_table.rs | 10 ++-- crates/rayexec_unity_catalog/src/functions.rs | 14 +++--- crates/rayexec_wasm/src/session.rs | 8 ++-- test_bin/integration_slt_hybrid.rs | 6 +-- 90 files changed, 375 insertions(+), 373 deletions(-) diff --git a/crates/docgen/src/markdown_table.rs b/crates/docgen/src/markdown_table.rs index 591d0c548..51783e48b 100644 --- a/crates/docgen/src/markdown_table.rs +++ b/crates/docgen/src/markdown_table.rs @@ -1,7 +1,7 @@ use std::fmt; use rayexec_error::Result; -use rayexec_execution::arrays::batch::Batch; +use rayexec_execution::arrays::batch::Batch2; use rayexec_execution::arrays::field::Schema; use rayexec_execution::arrays::format::{FormatOptions, Formatter}; @@ -13,7 +13,7 @@ const FORMATTER: Formatter = Formatter::new(FormatOptions { pub fn write_markdown_table<'a>( output: &mut dyn fmt::Write, schema: &Schema, - batches: impl IntoIterator, + batches: impl IntoIterator, ) -> Result<()> { // 'field1 | field2 | field3' let header = schema @@ -62,7 +62,7 @@ mod tests { #[test] fn simple() { - let batch = Batch::try_new([ + let batch = Batch2::try_new([ Array2::from_iter([1, 2, 3]), Array2::from_iter(["cat", "dog", "mouse"]), ]) diff --git a/crates/rayexec_csv/src/copy_to.rs b/crates/rayexec_csv/src/copy_to.rs index aef93f92e..130703e87 100644 --- a/crates/rayexec_csv/src/copy_to.rs +++ b/crates/rayexec_csv/src/copy_to.rs @@ -1,7 +1,7 @@ use futures::future::BoxFuture; use futures::FutureExt; use rayexec_error::Result; -use rayexec_execution::arrays::batch::Batch; +use rayexec_execution::arrays::batch::Batch2; use rayexec_execution::arrays::field::Schema; use rayexec_execution::execution::operators::sink::PartitionSink; use rayexec_execution::functions::copy::CopyToFunction; @@ -53,7 +53,7 @@ pub struct CsvCopyToSink { } impl CsvCopyToSink { - async fn push_inner(&mut self, batch: Batch) -> Result<()> { + async fn push_inner(&mut self, batch: Batch2) -> Result<()> { let mut buf = Vec::with_capacity(1024); self.encoder.encode(&batch, &mut buf)?; self.sink.write_all(buf.into()).await?; @@ -68,7 +68,7 @@ impl CsvCopyToSink { } impl PartitionSink for CsvCopyToSink { - fn push(&mut self, batch: Batch) -> BoxFuture<'_, Result<()>> { + fn push(&mut self, batch: Batch2) -> BoxFuture<'_, Result<()>> { self.push_inner(batch).boxed() } diff --git a/crates/rayexec_csv/src/datatable.rs b/crates/rayexec_csv/src/datatable.rs index 197822514..4bf1cda7b 100644 --- a/crates/rayexec_csv/src/datatable.rs +++ b/crates/rayexec_csv/src/datatable.rs @@ -2,7 +2,7 @@ use std::fmt::{self, Debug}; use futures::future::BoxFuture; use rayexec_error::Result; -use rayexec_execution::arrays::batch::Batch; +use rayexec_execution::arrays::batch::Batch2; use rayexec_execution::runtime::Runtime; use rayexec_execution::storage::table_storage::{ DataTable, @@ -60,7 +60,7 @@ pub struct CsvFileScan { } impl DataTableScan for CsvFileScan { - fn pull(&mut self) -> BoxFuture<'_, Result>> { + fn pull(&mut self) -> BoxFuture<'_, Result>> { Box::pin(async { self.reader.read_next().await }) } } diff --git a/crates/rayexec_csv/src/reader.rs b/crates/rayexec_csv/src/reader.rs index 2b0ff3721..f733344f1 100644 --- a/crates/rayexec_csv/src/reader.rs +++ b/crates/rayexec_csv/src/reader.rs @@ -24,7 +24,7 @@ use futures::stream::BoxStream; use futures::StreamExt; use rayexec_error::{RayexecError, Result}; use rayexec_execution::arrays::array::{Array2, ArrayData2}; -use rayexec_execution::arrays::batch::Batch; +use rayexec_execution::arrays::batch::Batch2; use rayexec_execution::arrays::bitmap::Bitmap; use rayexec_execution::arrays::compute::cast::parse::{ BoolParser, @@ -342,7 +342,7 @@ impl AsyncCsvReader { AsyncCsvReader { stream } } - pub async fn read_next(&mut self) -> Result> { + pub async fn read_next(&mut self) -> Result> { self.stream.next_batch().await } } @@ -387,7 +387,7 @@ struct AsyncCsvStream { } impl AsyncCsvStream { - async fn next_batch(&mut self) -> Result> { + async fn next_batch(&mut self) -> Result> { loop { let (buf, offset) = match self.buf.take() { Some(buf) => (buf, self.buf_offset), @@ -455,7 +455,7 @@ impl AsyncCsvStream { completed: CompletedRecords, schema: &Schema, skip_header: bool, - ) -> Result { + ) -> Result { let skip_records = if skip_header { 1 } else { 0 }; let mut arrs = Vec::with_capacity(schema.fields.len()); @@ -483,7 +483,7 @@ impl AsyncCsvStream { arrs.push(arr); } - Batch::try_new(arrs) + Batch2::try_new(arrs) } fn build_boolean( diff --git a/crates/rayexec_csv/src/writer.rs b/crates/rayexec_csv/src/writer.rs index d6034f225..42e049310 100644 --- a/crates/rayexec_csv/src/writer.rs +++ b/crates/rayexec_csv/src/writer.rs @@ -2,7 +2,7 @@ use std::io::Write as _; use csv::ByteRecord; use rayexec_error::{Result, ResultExt}; -use rayexec_execution::arrays::batch::Batch; +use rayexec_execution::arrays::batch::Batch2; use rayexec_execution::arrays::field::Schema; use rayexec_execution::arrays::format::{FormatOptions, Formatter}; @@ -38,7 +38,7 @@ impl CsvEncoder { } } - pub fn encode(&mut self, batch: &Batch, output_buf: &mut Vec) -> Result<()> { + pub fn encode(&mut self, batch: &Batch2, output_buf: &mut Vec) -> Result<()> { const FORMATTER: Formatter = Formatter::new(FormatOptions::new()); let mut csv_writer = csv::WriterBuilder::new() diff --git a/crates/rayexec_debug/src/discard.rs b/crates/rayexec_debug/src/discard.rs index 5ffab5eca..3a32bbc31 100644 --- a/crates/rayexec_debug/src/discard.rs +++ b/crates/rayexec_debug/src/discard.rs @@ -1,6 +1,6 @@ use futures::future::BoxFuture; use rayexec_error::Result; -use rayexec_execution::arrays::batch::Batch; +use rayexec_execution::arrays::batch::Batch2; use rayexec_execution::arrays::field::Schema; use rayexec_execution::execution::operators::sink::PartitionSink; use rayexec_execution::functions::copy::CopyToFunction; @@ -33,7 +33,7 @@ impl CopyToFunction for DiscardCopyToFunction { struct DiscardCopyToSink; impl PartitionSink for DiscardCopyToSink { - fn push(&mut self, _batch: Batch) -> BoxFuture<'_, Result<()>> { + fn push(&mut self, _batch: Batch2) -> BoxFuture<'_, Result<()>> { Box::pin(async { Ok(()) }) } diff --git a/crates/rayexec_debug/src/table_storage.rs b/crates/rayexec_debug/src/table_storage.rs index a3fb00e5c..3362e26b0 100644 --- a/crates/rayexec_debug/src/table_storage.rs +++ b/crates/rayexec_debug/src/table_storage.rs @@ -3,7 +3,7 @@ use std::sync::Arc; use futures::future::BoxFuture; use parking_lot::Mutex; use rayexec_error::{RayexecError, Result}; -use rayexec_execution::arrays::batch::Batch; +use rayexec_execution::arrays::batch::Batch2; use rayexec_execution::arrays::field::Field; use rayexec_execution::database::catalog_entry::CatalogEntry; use rayexec_execution::execution::operators::sink::PartitionSink; @@ -28,7 +28,7 @@ pub struct TablePreload { pub schema: String, pub name: String, pub columns: Vec, - pub data: Batch, + pub data: Batch2, } #[derive(Debug, Default)] @@ -124,7 +124,7 @@ impl TableStorage for DebugTableStorage { #[derive(Debug, Clone, Default)] pub struct DebugDataTable { - data: Arc>>, + data: Arc>>, } impl DataTable for DebugDataTable { @@ -168,23 +168,23 @@ impl DataTable for DebugDataTable { #[derive(Debug)] pub struct DebugDataTableScan { - data: Vec, + data: Vec, } impl DataTableScan for DebugDataTableScan { - fn pull(&mut self) -> BoxFuture<'_, Result>> { + fn pull(&mut self) -> BoxFuture<'_, Result>> { Box::pin(async { Ok(self.data.pop()) }) } } #[derive(Debug)] pub struct DebugDataTableInsert { - collected: Vec, - data: Arc>>, + collected: Vec, + data: Arc>>, } impl PartitionSink for DebugDataTableInsert { - fn push(&mut self, batch: Batch) -> BoxFuture<'_, Result<()>> { + fn push(&mut self, batch: Batch2) -> BoxFuture<'_, Result<()>> { Box::pin(async { self.collected.push(batch); Ok(()) diff --git a/crates/rayexec_delta/src/datatable.rs b/crates/rayexec_delta/src/datatable.rs index 003182e7c..4b004eb38 100644 --- a/crates/rayexec_delta/src/datatable.rs +++ b/crates/rayexec_delta/src/datatable.rs @@ -2,7 +2,7 @@ use std::sync::Arc; use futures::future::BoxFuture; use rayexec_error::Result; -use rayexec_execution::arrays::batch::Batch; +use rayexec_execution::arrays::batch::Batch2; use rayexec_execution::storage::table_storage::{DataTable, DataTableScan, Projections}; use crate::protocol::table::{Table, TableScan}; @@ -34,7 +34,7 @@ struct DeltaTableScan { } impl DataTableScan for DeltaTableScan { - fn pull(&mut self) -> BoxFuture<'_, Result>> { + fn pull(&mut self) -> BoxFuture<'_, Result>> { Box::pin(async { self.scan.read_next().await }) } } diff --git a/crates/rayexec_delta/src/protocol/table.rs b/crates/rayexec_delta/src/protocol/table.rs index 2e99e2d7b..379c2f723 100644 --- a/crates/rayexec_delta/src/protocol/table.rs +++ b/crates/rayexec_delta/src/protocol/table.rs @@ -3,7 +3,7 @@ use std::sync::Arc; use futures::{StreamExt, TryStreamExt}; use rayexec_error::{not_implemented, RayexecError, Result, ResultExt}; -use rayexec_execution::arrays::batch::Batch; +use rayexec_execution::arrays::batch::Batch2; use rayexec_execution::arrays::datatype::{DataType, DecimalTypeMeta, TimeUnit, TimestampTypeMeta}; use rayexec_execution::arrays::field::{Field, Schema}; use rayexec_execution::arrays::scalar::decimal::{Decimal128Type, DecimalType}; @@ -182,7 +182,7 @@ pub struct TableScan { impl TableScan { /// Read the next batch. - pub async fn read_next(&mut self) -> Result> { + pub async fn read_next(&mut self) -> Result> { loop { if self.current.is_none() { let path = match self.paths.pop_front() { diff --git a/crates/rayexec_execution/src/arrays/batch.rs b/crates/rayexec_execution/src/arrays/batch.rs index d61ea434b..413db2822 100644 --- a/crates/rayexec_execution/src/arrays/batch.rs +++ b/crates/rayexec_execution/src/arrays/batch.rs @@ -9,7 +9,7 @@ use crate::arrays::selection::SelectionVector; /// A batch of same-length arrays. #[derive(Debug, Clone, PartialEq)] -pub struct Batch { +pub struct Batch2 { /// Columns that make up this batch. cols: Vec, @@ -18,16 +18,16 @@ pub struct Batch { num_rows: usize, } -impl Batch { +impl Batch2 { pub const fn empty() -> Self { - Batch { + Batch2 { cols: Vec::new(), num_rows: 0, } } pub fn empty_with_num_rows(num_rows: usize) -> Self { - Batch { + Batch2 { cols: Vec::new(), num_rows, } @@ -36,7 +36,7 @@ impl Batch { /// Concat multiple batches into one. /// /// Batches are requried to have the same logical schemas. - pub fn concat(batches: &[Batch]) -> Result { + pub fn concat(batches: &[Batch2]) -> Result { let num_cols = match batches.first() { Some(batch) => batch.num_columns(), None => return Err(RayexecError::new("Cannot concat zero batches")), @@ -57,7 +57,7 @@ impl Batch { // Special case for zero col batches. The true number of rows wouldn't // be reflected if we just attempted to concat no array. if num_cols == 0 { - return Ok(Batch::empty_with_num_rows(num_rows)); + return Ok(Batch2::empty_with_num_rows(num_rows)); } let mut output_cols = Vec::with_capacity(num_cols); @@ -74,7 +74,7 @@ impl Batch { working_arrays.clear(); } - Batch::try_new(output_cols) + Batch2::try_new(output_cols) } /// Create a new batch from some number of arrays. @@ -96,7 +96,7 @@ impl Batch { } } - Ok(Batch { + Ok(Batch2 { cols, num_rows: len, }) @@ -106,7 +106,7 @@ impl Batch { pub fn project(&self, indices: &[usize]) -> Self { let cols = indices.iter().map(|idx| self.cols[*idx].clone()).collect(); - Batch { + Batch2 { cols, num_rows: self.num_rows, } @@ -114,7 +114,7 @@ impl Batch { pub fn slice(&self, offset: usize, count: usize) -> Self { let cols = self.cols.iter().map(|c| c.slice(offset, count)).collect(); - Batch { + Batch2 { cols, num_rows: count, } @@ -124,7 +124,7 @@ impl Batch { /// /// This accepts an Arc selection as it'll be cloned for each array in the /// batch. - pub fn select(&self, selection: Arc) -> Batch { + pub fn select(&self, selection: Arc) -> Batch2 { let cols = self .cols .iter() @@ -135,7 +135,7 @@ impl Batch { }) .collect(); - Batch { + Batch2 { cols, num_rows: selection.as_ref().num_rows(), } diff --git a/crates/rayexec_execution/src/arrays/format/pretty/table.rs b/crates/rayexec_execution/src/arrays/format/pretty/table.rs index 90dd98a03..24d8789cb 100644 --- a/crates/rayexec_execution/src/arrays/format/pretty/table.rs +++ b/crates/rayexec_execution/src/arrays/format/pretty/table.rs @@ -8,7 +8,7 @@ use textwrap::{fill_inplace, wrap}; use super::display::{table_width, Alignment, PrettyFooter, PrettyHeader, PrettyValues}; use crate::arrays::array::Array2; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::arrays::datatype::DataType; use crate::arrays::field::Schema; use crate::arrays::format::{FormatOptions, Formatter}; @@ -21,7 +21,7 @@ const DEFAULT_MAX_ROWS: usize = 50; pub fn pretty_format_batches( schema: &Schema, - batches: &[Batch], + batches: &[Batch2], max_width: usize, max_rows: Option, ) -> Result { @@ -40,7 +40,7 @@ impl PrettyTable { /// Try to create a new pretty-formatted table. pub fn try_new( schema: &Schema, - batches: &[Batch], + batches: &[Batch2], max_width: usize, max_rows: Option, ) -> Result { @@ -248,7 +248,7 @@ impl PrettyTable { } fn column_values_for_batch( - batch: &Batch, + batch: &Batch2, format: &TableFormat, range: Range, ) -> Result<(Vec, usize)> { @@ -841,7 +841,7 @@ mod tests { Field::new("b", DataType::Int32, true), ]); - let batch = Batch::try_new(vec![ + let batch = Batch2::try_new(vec![ Array2::from_iter([Some("a"), Some("b"), None, Some("d")]), Array2::from_iter([Some(1), None, Some(10), Some(100)]), ]) @@ -873,7 +873,7 @@ mod tests { Field::new("c3", DataType::Utf8, true), ]); - let batch = Batch::try_new(vec![ + let batch = Batch2::try_new(vec![ Array2::from_iter([Some("a\nb"), Some("c"), Some("d")]), Array2::from_iter([Some(1), Some(10), Some(100)]), Array2::from_iter([Some("Mario"), Some("Yoshi"), Some("Luigi\nPeach")]), @@ -906,7 +906,7 @@ mod tests { Field::new("b", DataType::Int32, true), ]); - let batch = Batch::try_new(vec![ + let batch = Batch2::try_new(vec![ Array2::from_iter([Some("a")]), Array2::from_iter([Some(1)]), ]) @@ -940,7 +940,7 @@ mod tests { ]); let create_batch = |s, n| { - Batch::try_new([Array2::from_iter([Some(s)]), Array2::from_iter([Some(n)])]).unwrap() + Batch2::try_new([Array2::from_iter([Some(s)]), Array2::from_iter([Some(n)])]).unwrap() }; let batches = vec![ @@ -983,10 +983,11 @@ mod tests { let a_vals: Vec<_> = (0..10).map(|v| v.to_string()).collect(); let b_vals: Vec<_> = (0..10).map(Some).collect(); - let batches = - vec![ - Batch::try_new(vec![Array2::from_iter(a_vals), Array2::from_iter(b_vals)]).unwrap(), - ]; + let batches = vec![Batch2::try_new(vec![ + Array2::from_iter(a_vals), + Array2::from_iter(b_vals), + ]) + .unwrap()]; let table = pretty_format_batches(&schema, &batches, 80, Some(4)).unwrap(); @@ -1019,10 +1020,11 @@ mod tests { let a_vals: Vec<_> = (0..10).map(|v| Some(v.to_string())).collect(); let b_vals: Vec<_> = (0..10).map(Some).collect(); - let batches = - vec![ - Batch::try_new(vec![Array2::from_iter(a_vals), Array2::from_iter(b_vals)]).unwrap(), - ]; + let batches = vec![Batch2::try_new(vec![ + Array2::from_iter(a_vals), + Array2::from_iter(b_vals), + ]) + .unwrap()]; let table = pretty_format_batches(&schema, &batches, 80, Some(3)).unwrap(); @@ -1054,7 +1056,7 @@ mod tests { ]); let create_batch = |a, b, c, d| { - Batch::try_new(vec![ + Batch2::try_new(vec![ Array2::from_iter([Some(a)]), Array2::from_iter([Some(b)]), Array2::from_iter([Some(c)]), @@ -1104,7 +1106,7 @@ mod tests { ]); let create_batch = |a, b, c, d| { - Batch::try_new(vec![ + Batch2::try_new(vec![ Array2::from_iter([Some(a)]), Array2::from_iter([Some(b)]), Array2::from_iter([Some(c)]), @@ -1148,7 +1150,7 @@ mod tests { ]); let create_batch = |a, b, c| { - Batch::try_new(vec![ + Batch2::try_new(vec![ Array2::from_iter([Some(a)]), Array2::from_iter([Some(b)]), Array2::from_iter([Some(c)]), @@ -1191,7 +1193,7 @@ mod tests { ]); let create_batch = |a, b, c, d| { - Batch::try_new(vec![ + Batch2::try_new(vec![ Array2::from_iter([Some(a)]), Array2::from_iter([Some(b)]), Array2::from_iter([Some(c)]), @@ -1266,21 +1268,21 @@ mod tests { ]); // First record should be printed. - let first = Batch::try_new(vec![ + let first = Batch2::try_new(vec![ Array2::from_iter([Some("1"), Some("2")]), Array2::from_iter([Some(1), Some(2)]), ]) .unwrap(); // Nothing in this batch should be printed. - let middle = Batch::try_new(vec![ + let middle = Batch2::try_new(vec![ Array2::from_iter([Some("3"), Some("4")]), Array2::from_iter([Some(3), Some(4)]), ]) .unwrap(); // Last record should be printed. - let last = Batch::try_new(vec![ + let last = Batch2::try_new(vec![ Array2::from_iter([Some("5"), Some("6")]), Array2::from_iter([Some(5), Some(6)]), ]) diff --git a/crates/rayexec_execution/src/arrays/format/ugly.rs b/crates/rayexec_execution/src/arrays/format/ugly.rs index f52d5565a..097902afe 100644 --- a/crates/rayexec_execution/src/arrays/format/ugly.rs +++ b/crates/rayexec_execution/src/arrays/format/ugly.rs @@ -2,13 +2,13 @@ use std::fmt::Write as _; use rayexec_error::Result; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::arrays::field::Schema; use crate::arrays::format::{FormatOptions, Formatter}; pub fn ugly_format_no_schema<'a, I>(batches: I) -> Result where - I: IntoIterator, + I: IntoIterator, { const OPTS: FormatOptions = FormatOptions::new(); let formatter = Formatter::new(OPTS); @@ -40,7 +40,7 @@ where pub fn ugly_format<'a, I>(schema: &Schema, batches: I) -> Result where - I: IntoIterator, + I: IntoIterator, { const OPTS: FormatOptions = FormatOptions::new(); let formatter = Formatter::new(OPTS); diff --git a/crates/rayexec_execution/src/arrays/testutil.rs b/crates/rayexec_execution/src/arrays/testutil.rs index 3d4749da7..b7ca194b5 100644 --- a/crates/rayexec_execution/src/arrays/testutil.rs +++ b/crates/rayexec_execution/src/arrays/testutil.rs @@ -6,7 +6,7 @@ //! Should not be used outside of tests. use crate::arrays::array::Array2; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; /// Asserts that two arrays are logically equal. pub fn assert_arrays_eq(a: &Array2, b: &Array2) { @@ -22,7 +22,7 @@ pub fn assert_arrays_eq(a: &Array2, b: &Array2) { } /// Asserts that two batches are logically equal. -pub fn assert_batches_eq(a: &Batch, b: &Batch) { +pub fn assert_batches_eq(a: &Batch2, b: &Batch2) { assert_eq!(a.num_rows(), b.num_rows(), "num rows differ"); assert_eq!(a.num_columns(), b.num_columns(), "num columns differ"); diff --git a/crates/rayexec_execution/src/engine/result.rs b/crates/rayexec_execution/src/engine/result.rs index 2b5fef6d7..45a0ab6b0 100644 --- a/crates/rayexec_execution/src/engine/result.rs +++ b/crates/rayexec_execution/src/engine/result.rs @@ -9,7 +9,7 @@ use rayexec_error::{RayexecError, Result}; use tracing::warn; use super::profiler::PlanningProfileData; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::arrays::field::Schema; use crate::database::DatabaseContext; use crate::execution::operators::sink::{PartitionSink, SinkOperation}; @@ -52,7 +52,7 @@ pub struct ResultStream { } impl Stream for ResultStream { - type Item = Result; + type Item = Result; fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { let mut inner = self.inner.lock(); @@ -117,7 +117,7 @@ pub struct ResultPartitionSink { } impl PartitionSink for ResultPartitionSink { - fn push(&mut self, batch: Batch) -> BoxFuture<'_, Result<()>> { + fn push(&mut self, batch: Batch2) -> BoxFuture<'_, Result<()>> { Box::pin(PushFuture { batch: Some(batch), inner: self.inner.clone(), @@ -157,7 +157,7 @@ impl ErrorSink for ResultErrorSink { /// This lets us inject an error into the stream that arises outside of stream. #[derive(Debug)] struct InnerState { - batch: Option, + batch: Option, error: Option, finished: bool, push_waker: Option, @@ -165,7 +165,7 @@ struct InnerState { } struct PushFuture { - batch: Option, + batch: Option, inner: Arc>, } diff --git a/crates/rayexec_execution/src/engine/server_state.rs b/crates/rayexec_execution/src/engine/server_state.rs index b0b3d3228..c8c7c9bfc 100644 --- a/crates/rayexec_execution/src/engine/server_state.rs +++ b/crates/rayexec_execution/src/engine/server_state.rs @@ -4,7 +4,7 @@ use dashmap::DashMap; use rayexec_error::{not_implemented, RayexecError, Result}; use uuid::Uuid; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::arrays::field::{Field, Schema}; use crate::config::execution::{ExecutablePlanConfig, IntermediatePlanConfig}; use crate::config::session::SessionConfig; @@ -184,7 +184,7 @@ where Ok(()) } - pub fn push_batch_for_stream(&self, stream_id: StreamId, batch: Batch) -> Result<()> { + pub fn push_batch_for_stream(&self, stream_id: StreamId, batch: Batch2) -> Result<()> { self.buffers.push_batch_for_stream(&stream_id, batch) } diff --git a/crates/rayexec_execution/src/execution/computed_batch.rs b/crates/rayexec_execution/src/execution/computed_batch.rs index 2086c4800..4d60ad7e5 100644 --- a/crates/rayexec_execution/src/execution/computed_batch.rs +++ b/crates/rayexec_execution/src/execution/computed_batch.rs @@ -2,18 +2,18 @@ use std::collections::VecDeque; use rayexec_error::Result; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; /// Computed batch results from an operator. #[derive(Debug, PartialEq)] pub enum ComputedBatches { /// A single batch was computed. - Single(Batch), + Single(Batch2), /// Multiple batches were computed. /// /// These should be ordered by which batch should be pushed to next operator /// first. - Multi(VecDeque), + Multi(VecDeque), /// No batches computed. None, // TODO: Spill references @@ -25,7 +25,7 @@ impl ComputedBatches { /// This will filter out any batches that have no rows. pub fn new(batches: I) -> Self where - I: IntoIterator, + I: IntoIterator, I::IntoIter: ExactSizeIterator, { let mut iter = batches.into_iter(); @@ -83,7 +83,7 @@ impl ComputedBatches { /// Tries to get the next batch from this collection, returning None when no /// batches remain. - pub fn try_pop_front(&mut self) -> Result> { + pub fn try_pop_front(&mut self) -> Result> { match self { Self::Single(_) => { let orig = std::mem::replace(self, Self::None); @@ -100,8 +100,8 @@ impl ComputedBatches { } } -impl From for ComputedBatches { - fn from(value: Batch) -> Self { +impl From for ComputedBatches { + fn from(value: Batch2) -> Self { Self::Single(value) } } diff --git a/crates/rayexec_execution/src/execution/executable/pipeline.rs b/crates/rayexec_execution/src/execution/executable/pipeline.rs index de96dd8fd..df452e00a 100644 --- a/crates/rayexec_execution/src/execution/executable/pipeline.rs +++ b/crates/rayexec_execution/src/execution/executable/pipeline.rs @@ -6,7 +6,7 @@ use rayexec_error::{RayexecError, Result}; use tracing::trace; use super::profiler::OperatorProfileData; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::execution::computed_batch::ComputedBatches; use crate::execution::operators::{ ExecutableOperator, @@ -274,7 +274,7 @@ pub enum PipelinePartitionState { operator_idx: usize, }, /// Need to push to an operator. - PushTo { batch: Batch, operator_idx: usize }, + PushTo { batch: Batch2, operator_idx: usize }, /// Need to finalize a push to an operator. FinalizePush { operator_idx: usize }, /// Pipeline is completed. @@ -450,7 +450,7 @@ impl ExecutablePartitionPipeline { operator_idx, } => { // To satisfy ownership. State will be updated anyways. - let batch = std::mem::replace(batch, Batch::empty()); + let batch = std::mem::replace(batch, Batch2::empty()); let operator = self .operators diff --git a/crates/rayexec_execution/src/execution/intermediate/planner/plan_describe.rs b/crates/rayexec_execution/src/execution/intermediate/planner/plan_describe.rs index 9b5691ed3..9a03c18d0 100644 --- a/crates/rayexec_execution/src/execution/intermediate/planner/plan_describe.rs +++ b/crates/rayexec_execution/src/execution/intermediate/planner/plan_describe.rs @@ -4,7 +4,7 @@ use rayexec_error::{RayexecError, Result}; use super::{InProgressPipeline, IntermediatePipelineBuildState, PipelineIdGen}; use crate::arrays::array::Array2; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::execution::intermediate::pipeline::{IntermediateOperator, PipelineSource}; use crate::execution::operators::values::PhysicalValues; use crate::execution::operators::PhysicalOperator; @@ -26,7 +26,7 @@ impl IntermediatePipelineBuildState<'_> { let names = Array2::from_iter(describe.node.schema.iter().map(|f| f.name.as_str())); let datatypes = Array2::from_iter(describe.node.schema.iter().map(|f| f.datatype.to_string())); - let batch = Batch::try_new(vec![names, datatypes])?; + let batch = Batch2::try_new(vec![names, datatypes])?; let operator = IntermediateOperator { operator: Arc::new(PhysicalOperator::Values(PhysicalValues::new(vec![batch]))), diff --git a/crates/rayexec_execution/src/execution/intermediate/planner/plan_explain.rs b/crates/rayexec_execution/src/execution/intermediate/planner/plan_explain.rs index fc0cac119..eebfbf13e 100644 --- a/crates/rayexec_execution/src/execution/intermediate/planner/plan_explain.rs +++ b/crates/rayexec_execution/src/execution/intermediate/planner/plan_explain.rs @@ -5,7 +5,7 @@ use tracing::error; use super::{InProgressPipeline, IntermediatePipelineBuildState, Materializations, PipelineIdGen}; use crate::arrays::array::Array2; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::execution::intermediate::pipeline::{IntermediateOperator, PipelineSource}; use crate::execution::operators::values::PhysicalValues; use crate::execution::operators::PhysicalOperator; @@ -81,7 +81,7 @@ impl IntermediatePipelineBuildState<'_> { } let physical = Arc::new(PhysicalOperator::Values(PhysicalValues::new(vec![ - Batch::try_new([ + Batch2::try_new([ Array2::from_iter(type_strings), Array2::from_iter(plan_strings), ])?, diff --git a/crates/rayexec_execution/src/execution/intermediate/planner/plan_scan.rs b/crates/rayexec_execution/src/execution/intermediate/planner/plan_scan.rs index 0c19b4a78..a0ad5281a 100644 --- a/crates/rayexec_execution/src/execution/intermediate/planner/plan_scan.rs +++ b/crates/rayexec_execution/src/execution/intermediate/planner/plan_scan.rs @@ -4,7 +4,7 @@ use rayexec_error::{not_implemented, RayexecError, Result, ResultExt}; use super::{InProgressPipeline, IntermediatePipelineBuildState, PipelineIdGen}; use crate::arrays::array::Array2; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::execution::intermediate::pipeline::{IntermediateOperator, PipelineSource}; use crate::execution::operators::scan::PhysicalScan; use crate::execution::operators::table_function::PhysicalTableFunction; @@ -77,7 +77,7 @@ impl IntermediatePipelineBuildState<'_> { &self, projections: Projections, rows: Vec>, - ) -> Result> { + ) -> Result> { if self.in_progress.is_some() { return Err(RayexecError::new("Expected in progress to be None")); } @@ -85,7 +85,7 @@ impl IntermediatePipelineBuildState<'_> { // TODO: This could probably be simplified. let mut row_arrs: Vec> = Vec::new(); // Row oriented. - let dummy_batch = Batch::empty_with_num_rows(1); + let dummy_batch = Batch2::empty_with_num_rows(1); // Convert expressions into arrays of one element each. for row_exprs in rows { @@ -106,7 +106,7 @@ impl IntermediatePipelineBuildState<'_> { let batches = row_arrs .into_iter() .map(|cols| { - let batch = Batch::try_new(cols)?; + let batch = Batch2::try_new(cols)?; // TODO: Got lazy, we can just avoid evaluating the expressions above. match &projections.column_indices { diff --git a/crates/rayexec_execution/src/execution/intermediate/planner/plan_show_var.rs b/crates/rayexec_execution/src/execution/intermediate/planner/plan_show_var.rs index 7caedb8c2..3c348b4e1 100644 --- a/crates/rayexec_execution/src/execution/intermediate/planner/plan_show_var.rs +++ b/crates/rayexec_execution/src/execution/intermediate/planner/plan_show_var.rs @@ -4,7 +4,7 @@ use rayexec_error::{RayexecError, Result}; use super::{InProgressPipeline, IntermediatePipelineBuildState, PipelineIdGen}; use crate::arrays::array::Array2; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::execution::intermediate::pipeline::{IntermediateOperator, PipelineSource}; use crate::execution::operators::values::PhysicalValues; use crate::execution::operators::PhysicalOperator; @@ -26,7 +26,7 @@ impl IntermediatePipelineBuildState<'_> { let operator = IntermediateOperator { operator: Arc::new(PhysicalOperator::Values(PhysicalValues::new(vec![ - Batch::try_new([Array2::from_iter([show.value.to_string().as_str()])])?, + Batch2::try_new([Array2::from_iter([show.value.to_string().as_str()])])?, ]))), partitioning_requirement: Some(1), }; diff --git a/crates/rayexec_execution/src/execution/operators/analyze.rs b/crates/rayexec_execution/src/execution/operators/analyze.rs index ca4cb2deb..eb572feab 100644 --- a/crates/rayexec_execution/src/execution/operators/analyze.rs +++ b/crates/rayexec_execution/src/execution/operators/analyze.rs @@ -11,7 +11,7 @@ use super::{ PollPull, PollPush, }; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::database::DatabaseContext; use crate::explain::explainable::{ExplainConfig, ExplainEntry, Explainable}; @@ -33,7 +33,7 @@ impl ExecutableOperator for PhysicalAnalyze { _cx: &mut Context, _partition_state: &mut PartitionState, _operator_state: &OperatorState, - _batch: Batch, + _batch: Batch2, ) -> Result { unimplemented!() } diff --git a/crates/rayexec_execution/src/execution/operators/batch_resizer.rs b/crates/rayexec_execution/src/execution/operators/batch_resizer.rs index 23c63d4d3..f43d94fc7 100644 --- a/crates/rayexec_execution/src/execution/operators/batch_resizer.rs +++ b/crates/rayexec_execution/src/execution/operators/batch_resizer.rs @@ -14,7 +14,7 @@ use super::{ PollPull, PollPush, }; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::database::DatabaseContext; use crate::execution::computed_batch::ComputedBatches; use crate::explain::explainable::{ExplainConfig, ExplainEntry, Explainable}; @@ -65,7 +65,7 @@ impl ExecutableOperator for PhysicalBatchResizer { cx: &mut Context, partition_state: &mut PartitionState, _operator_state: &OperatorState, - batch: Batch, + batch: Batch2, ) -> Result { let state = match partition_state { PartitionState::BatchResizer(state) => state, diff --git a/crates/rayexec_execution/src/execution/operators/create_schema.rs b/crates/rayexec_execution/src/execution/operators/create_schema.rs index 39bbf69ff..27305fe62 100644 --- a/crates/rayexec_execution/src/execution/operators/create_schema.rs +++ b/crates/rayexec_execution/src/execution/operators/create_schema.rs @@ -17,7 +17,7 @@ use super::{ PollPull, PollPush, }; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::database::catalog::CatalogTx; use crate::database::create::CreateSchemaInfo; use crate::database::DatabaseContext; @@ -88,7 +88,7 @@ impl ExecutableOperator for PhysicalCreateSchema { _cx: &mut Context, _partition_state: &mut PartitionState, _operator_state: &OperatorState, - _batch: Batch, + _batch: Batch2, ) -> Result { Err(RayexecError::new("Cannot push to physical create table")) } diff --git a/crates/rayexec_execution/src/execution/operators/create_table.rs b/crates/rayexec_execution/src/execution/operators/create_table.rs index e58aab9e7..3784096a1 100644 --- a/crates/rayexec_execution/src/execution/operators/create_table.rs +++ b/crates/rayexec_execution/src/execution/operators/create_table.rs @@ -6,7 +6,7 @@ use rayexec_proto::ProtoConv; use super::sink::{PartitionSink, SinkOperation, SinkOperator}; use super::util::barrier::PartitionBarrier; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::database::catalog::CatalogTx; use crate::database::create::CreateTableInfo; use crate::database::DatabaseContext; @@ -119,7 +119,7 @@ struct CreateTablePartitionSink { } impl PartitionSink for CreateTablePartitionSink { - fn push(&mut self, batch: Batch) -> BoxFuture<'_, Result<()>> { + fn push(&mut self, batch: Batch2) -> BoxFuture<'_, Result<()>> { Box::pin(async { self.create_table_if_has_fut().await?; self.wait_for_sink_if_none().await; diff --git a/crates/rayexec_execution/src/execution/operators/create_view.rs b/crates/rayexec_execution/src/execution/operators/create_view.rs index 02a595433..dc2208304 100644 --- a/crates/rayexec_execution/src/execution/operators/create_view.rs +++ b/crates/rayexec_execution/src/execution/operators/create_view.rs @@ -16,7 +16,7 @@ use super::{ PollPull, PollPush, }; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::database::catalog::CatalogTx; use crate::database::create::CreateViewInfo; use crate::database::DatabaseContext; @@ -86,7 +86,7 @@ impl ExecutableOperator for PhysicalCreateView { _cx: &mut Context, _partition_state: &mut PartitionState, _operator_state: &OperatorState, - _batch: Batch, + _batch: Batch2, ) -> Result { Err(RayexecError::new("Cannot push to physical create view")) } diff --git a/crates/rayexec_execution/src/execution/operators/drop.rs b/crates/rayexec_execution/src/execution/operators/drop.rs index cf1d78fc8..13420f9a6 100644 --- a/crates/rayexec_execution/src/execution/operators/drop.rs +++ b/crates/rayexec_execution/src/execution/operators/drop.rs @@ -17,7 +17,7 @@ use super::{ PollPull, PollPush, }; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::database::catalog::CatalogTx; use crate::database::drop::DropInfo; use crate::database::DatabaseContext; @@ -81,7 +81,7 @@ impl ExecutableOperator for PhysicalDrop { _cx: &mut Context, _partition_state: &mut PartitionState, _operator_state: &OperatorState, - _batch: Batch, + _batch: Batch2, ) -> Result { Err(RayexecError::new("Cannot push to physical create table")) } diff --git a/crates/rayexec_execution/src/execution/operators/empty.rs b/crates/rayexec_execution/src/execution/operators/empty.rs index 44ebb489e..ac812691c 100644 --- a/crates/rayexec_execution/src/execution/operators/empty.rs +++ b/crates/rayexec_execution/src/execution/operators/empty.rs @@ -12,7 +12,7 @@ use super::{ PollPull, PollPush, }; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::database::DatabaseContext; use crate::execution::operators::InputOutputStates; use crate::explain::explainable::{ExplainConfig, ExplainEntry, Explainable}; @@ -49,7 +49,7 @@ impl ExecutableOperator for PhysicalEmpty { _cx: &mut Context, _partition_state: &mut PartitionState, _operator_state: &OperatorState, - _batch: Batch, + _batch: Batch2, ) -> Result { Err(RayexecError::new("Cannot push to physical empty")) } @@ -75,7 +75,7 @@ impl ExecutableOperator for PhysicalEmpty { Ok(PollPull::Exhausted) } else { state.finished = true; - Ok(PollPull::Computed(Batch::empty_with_num_rows(1).into())) + Ok(PollPull::Computed(Batch2::empty_with_num_rows(1).into())) } } other => panic!("inner join state is not building: {other:?}"), diff --git a/crates/rayexec_execution/src/execution/operators/filter.rs b/crates/rayexec_execution/src/execution/operators/filter.rs index 3315fcbf5..fa1da0533 100644 --- a/crates/rayexec_execution/src/execution/operators/filter.rs +++ b/crates/rayexec_execution/src/execution/operators/filter.rs @@ -3,7 +3,7 @@ use std::sync::Arc; use rayexec_error::{OptionExt, Result}; use super::simple::{SimpleOperator, StatelessOperation}; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::database::DatabaseContext; use crate::explain::explainable::{ExplainConfig, ExplainEntry, Explainable}; use crate::expr::physical::PhysicalScalarExpression; @@ -23,7 +23,7 @@ impl FilterOperation { } impl StatelessOperation for FilterOperation { - fn execute(&self, batch: Batch) -> Result { + fn execute(&self, batch: Batch2) -> Result { let selection = self.predicate.select(&batch)?; let batch = batch.select(Arc::new(selection)); // TODO: Select mut diff --git a/crates/rayexec_execution/src/execution/operators/hash_aggregate/drain.rs b/crates/rayexec_execution/src/execution/operators/hash_aggregate/drain.rs index af73e0466..3f3c55c5b 100644 --- a/crates/rayexec_execution/src/execution/operators/hash_aggregate/drain.rs +++ b/crates/rayexec_execution/src/execution/operators/hash_aggregate/drain.rs @@ -1,7 +1,7 @@ use rayexec_error::Result; use super::hash_table::HashTable; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; /// Drains a hash table. /// @@ -15,7 +15,7 @@ pub struct HashTableDrain { } impl HashTableDrain { - fn next_inner(&mut self) -> Result> { + fn next_inner(&mut self) -> Result> { if self.drain_idx >= self.table.chunks.len() { return Ok(None); } @@ -31,14 +31,14 @@ impl HashTableDrain { .collect::>>()?; // Chunk arrays includes the GROUP ID column (last). - let batch = Batch::try_new(results.into_iter().chain(chunk.arrays.drain(..)))?; + let batch = Batch2::try_new(results.into_iter().chain(chunk.arrays.drain(..)))?; Ok(Some(batch)) } } impl Iterator for HashTableDrain { - type Item = Result; + type Item = Result; fn next(&mut self) -> Option { self.next_inner().transpose() diff --git a/crates/rayexec_execution/src/execution/operators/hash_aggregate/mod.rs b/crates/rayexec_execution/src/execution/operators/hash_aggregate/mod.rs index 4ed297739..c557e0b28 100644 --- a/crates/rayexec_execution/src/execution/operators/hash_aggregate/mod.rs +++ b/crates/rayexec_execution/src/execution/operators/hash_aggregate/mod.rs @@ -17,7 +17,7 @@ use rayexec_error::{RayexecError, Result}; use super::{ExecutionStates, InputOutputStates, PollFinalize}; use crate::arrays::array::Array2; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::arrays::bitmap::Bitmap; use crate::arrays::datatype::DataType; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; @@ -298,7 +298,7 @@ impl ExecutableOperator for PhysicalHashAggregate { _cx: &mut Context, partition_state: &mut PartitionState, _operator_state: &OperatorState, - batch: Batch, + batch: Batch2, ) -> Result { let state = match partition_state { PartitionState::HashAggregate(state) => state, @@ -466,7 +466,7 @@ impl ExecutableOperator for PhysicalHashAggregate { arrays.push(array); } - let batch = Batch::try_new(arrays)?; + let batch = Batch2::try_new(arrays)?; Ok(PollPull::Computed(ComputedBatches::Single(batch))) } @@ -484,7 +484,7 @@ impl PhysicalHashAggregate { fn insert_batch_agg_hash_table( &self, state: &mut AggregatingPartitionState, - batch: Batch, + batch: Batch2, ) -> Result<()> { if batch.num_rows() == 0 { return Ok(()); diff --git a/crates/rayexec_execution/src/execution/operators/hash_join/condition.rs b/crates/rayexec_execution/src/execution/operators/hash_join/condition.rs index 5fdcb25db..8da81d7f1 100644 --- a/crates/rayexec_execution/src/execution/operators/hash_join/condition.rs +++ b/crates/rayexec_execution/src/execution/operators/hash_join/condition.rs @@ -4,7 +4,7 @@ use std::sync::Arc; use rayexec_error::{RayexecError, Result}; use crate::arrays::array::Array2; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::arrays::executor::scalar::SelectExecutor; use crate::arrays::selection::SelectionVector; use crate::expr::physical::PhysicalScalarExpression; @@ -77,7 +77,7 @@ pub struct LeftPrecomputedJoinConditions { impl LeftPrecomputedJoinConditions { /// Compute the left side of the condition using the provided batch as /// input. - pub fn precompute_for_left_batch(&mut self, left: &Batch) -> Result<()> { + pub fn precompute_for_left_batch(&mut self, left: &Batch2) -> Result<()> { for condition in &mut self.conditions { let precomputed = condition.left.eval(left)?; condition.left_precomputed.push(precomputed.into_owned()) @@ -96,7 +96,7 @@ impl LeftPrecomputedJoinConditions { left_batch_idx: usize, left_row_sel: SelectionVector, right_row_sel: SelectionVector, - right: &Batch, + right: &Batch2, ) -> Result<(SelectionVector, SelectionVector)> { assert_eq!(left_row_sel.num_rows(), right_row_sel.num_rows()); diff --git a/crates/rayexec_execution/src/execution/operators/hash_join/global_hash_table.rs b/crates/rayexec_execution/src/execution/operators/hash_join/global_hash_table.rs index a034c9964..6d0507f65 100644 --- a/crates/rayexec_execution/src/execution/operators/hash_join/global_hash_table.rs +++ b/crates/rayexec_execution/src/execution/operators/hash_join/global_hash_table.rs @@ -11,7 +11,7 @@ use super::condition::{ LeftPrecomputedJoinConditions, }; use super::partition_hash_table::{PartitionHashTable, RowKey}; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::arrays::datatype::DataType; use crate::arrays::selection::SelectionVector; use crate::execution::operators::util::outer_join_tracker::{ @@ -26,7 +26,7 @@ use crate::execution::operators::util::outer_join_tracker::{ /// side. pub struct GlobalHashTable { /// All collected batches. - batches: Vec, + batches: Vec, /// Conditions we're joining on. conditions: LeftPrecomputedJoinConditions, /// Hash table pointing to a row. @@ -119,17 +119,17 @@ impl GlobalHashTable { } } - pub fn collected_batches(&self) -> &[Batch] { + pub fn collected_batches(&self) -> &[Batch2] { &self.batches } /// Probe the table. pub fn probe( &self, - right: &Batch, + right: &Batch2, hashes: &[u64], mut left_outer_tracker: Option<&mut LeftOuterJoinTracker>, - ) -> Result> { + ) -> Result> { // Track per-batch row indices that match the input columns. // // The value is a vec of (left_idx, right_idx) pairs pointing to rows in @@ -228,7 +228,7 @@ impl GlobalHashTable { let right_cols = right.select(Arc::new(right_row_sel)).into_arrays(); // Create final batch. - let batch = Batch::try_new(left_cols.into_iter().chain(right_cols))?; + let batch = Batch2::try_new(left_cols.into_iter().chain(right_cols))?; batches.push(batch); } diff --git a/crates/rayexec_execution/src/execution/operators/hash_join/mod.rs b/crates/rayexec_execution/src/execution/operators/hash_join/mod.rs index f9cf3d145..717e307be 100644 --- a/crates/rayexec_execution/src/execution/operators/hash_join/mod.rs +++ b/crates/rayexec_execution/src/execution/operators/hash_join/mod.rs @@ -24,7 +24,7 @@ use super::{ PollPull, PollPush, }; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::arrays::datatype::DataType; use crate::arrays::executor::scalar::HashExecutor; use crate::database::DatabaseContext; @@ -251,7 +251,7 @@ impl ExecutableOperator for PhysicalHashJoin { cx: &mut Context, partition_state: &mut PartitionState, operator_state: &OperatorState, - batch: Batch, + batch: Batch2, ) -> Result { match partition_state { PartitionState::HashJoinBuild(state) => { @@ -605,7 +605,7 @@ impl PhysicalHashJoin { fn insert_into_local_table( &self, state: &mut HashJoinBuildPartitionState, - batch: Batch, + batch: Batch2, ) -> Result<()> { // Compute left hashes on equality conditions. diff --git a/crates/rayexec_execution/src/execution/operators/hash_join/partition_hash_table.rs b/crates/rayexec_execution/src/execution/operators/hash_join/partition_hash_table.rs index 22b42f751..a6cfd1d50 100644 --- a/crates/rayexec_execution/src/execution/operators/hash_join/partition_hash_table.rs +++ b/crates/rayexec_execution/src/execution/operators/hash_join/partition_hash_table.rs @@ -4,7 +4,7 @@ use hashbrown::raw::RawTable; use rayexec_error::Result; use super::condition::{HashJoinCondition, LeftPrecomputedJoinConditions}; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; /// Points to a row in the hash table. #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -17,7 +17,7 @@ pub struct RowKey { pub struct PartitionHashTable { /// All collected batches. - pub batches: Vec, + pub batches: Vec, /// Conditions we're joining on. pub conditions: LeftPrecomputedJoinConditions, /// Hash table pointing to a row. @@ -39,7 +39,7 @@ impl PartitionHashTable { /// /// `hash_indices` indicates which columns in the batch was used to compute /// the hashes. - pub fn insert_batch(&mut self, batch: Batch, hashes: &[u64]) -> Result<()> { + pub fn insert_batch(&mut self, batch: Batch2, hashes: &[u64]) -> Result<()> { assert_eq!(batch.num_rows(), hashes.len()); self.conditions.precompute_for_left_batch(&batch)?; diff --git a/crates/rayexec_execution/src/execution/operators/limit.rs b/crates/rayexec_execution/src/execution/operators/limit.rs index ee0fca6ff..0335bb6b2 100644 --- a/crates/rayexec_execution/src/execution/operators/limit.rs +++ b/crates/rayexec_execution/src/execution/operators/limit.rs @@ -13,7 +13,7 @@ use super::{ PollPull, PollPush, }; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::database::DatabaseContext; use crate::explain::explainable::{ExplainConfig, ExplainEntry, Explainable}; use crate::proto::DatabaseProtoConv; @@ -29,7 +29,7 @@ pub struct LimitPartitionState { remaining_count: usize, /// A buffered batch. - buffer: Option, + buffer: Option, /// Waker on pull side if no batch is ready. pull_waker: Option, @@ -93,7 +93,7 @@ impl ExecutableOperator for PhysicalLimit { cx: &mut Context, partition_state: &mut PartitionState, _operator_state: &OperatorState, - batch: Batch, + batch: Batch2, ) -> Result { let state = match partition_state { PartitionState::Limit(state) => state, diff --git a/crates/rayexec_execution/src/execution/operators/materialize.rs b/crates/rayexec_execution/src/execution/operators/materialize.rs index d4e7510f1..0c8f04d0b 100644 --- a/crates/rayexec_execution/src/execution/operators/materialize.rs +++ b/crates/rayexec_execution/src/execution/operators/materialize.rs @@ -5,7 +5,7 @@ use rayexec_error::{RayexecError, Result}; use super::sink::{PartitionSink, SinkOperation}; use super::source::{PartitionSource, SourceOperation}; use super::util::broadcast::{BroadcastChannel, BroadcastReceiver}; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::database::DatabaseContext; use crate::explain::explainable::{ExplainConfig, ExplainEntry, Explainable}; use crate::logical::binder::bind_context::MaterializationRef; @@ -131,7 +131,7 @@ pub struct MaterializedDataPartitionSource { } impl PartitionSource for MaterializedDataPartitionSource { - fn pull(&mut self) -> BoxFuture<'_, Result>> { + fn pull(&mut self) -> BoxFuture<'_, Result>> { let fut = self.recv.recv(); Box::pin(async move { Ok(fut.await) }) } @@ -143,7 +143,7 @@ pub struct MaterializedDataPartitionSink { } impl PartitionSink for MaterializedDataPartitionSink { - fn push(&mut self, batch: Batch) -> BoxFuture<'_, Result<()>> { + fn push(&mut self, batch: Batch2) -> BoxFuture<'_, Result<()>> { Box::pin(async { self.sender.send(batch); Ok(()) diff --git a/crates/rayexec_execution/src/execution/operators/mod.rs b/crates/rayexec_execution/src/execution/operators/mod.rs index 1ceb06bcb..481822914 100644 --- a/crates/rayexec_execution/src/execution/operators/mod.rs +++ b/crates/rayexec_execution/src/execution/operators/mod.rs @@ -101,7 +101,7 @@ use self::sort::gather_sort::{ use self::sort::scatter_sort::ScatterSortPartitionState; use self::values::ValuesPartitionState; use super::computed_batch::ComputedBatches; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::database::DatabaseContext; use crate::engine::result::ResultSink; use crate::explain::explainable::{ExplainConfig, ExplainEntry, Explainable}; @@ -170,7 +170,7 @@ pub enum PollPush { /// /// A waker will be registered for a later wakeup. This same batch should be /// pushed at that time. - Pending(Batch), + Pending(Batch2), /// This operator requires no more input. /// @@ -289,7 +289,7 @@ pub trait ExecutableOperator: Sync + Send + Debug + Explainable { cx: &mut Context, partition_state: &mut PartitionState, operator_state: &OperatorState, - batch: Batch, + batch: Batch2, ) -> Result; /// Finalize pushing to partition. @@ -392,7 +392,7 @@ impl ExecutableOperator for PhysicalOperator { cx: &mut Context, partition_state: &mut PartitionState, operator_state: &OperatorState, - batch: Batch, + batch: Batch2, ) -> Result { match self { Self::HashAggregate(op) => op.poll_push(cx, partition_state, operator_state, batch), diff --git a/crates/rayexec_execution/src/execution/operators/nl_join.rs b/crates/rayexec_execution/src/execution/operators/nl_join.rs index aeb4af246..4a4457540 100644 --- a/crates/rayexec_execution/src/execution/operators/nl_join.rs +++ b/crates/rayexec_execution/src/execution/operators/nl_join.rs @@ -6,7 +6,7 @@ use rayexec_error::Result; use super::util::outer_join_tracker::LeftOuterJoinTracker; use super::ComputedBatches; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::arrays::selection::SelectionVector; use crate::database::DatabaseContext; use crate::execution::operators::{ @@ -30,7 +30,7 @@ pub struct NestedLoopJoinBuildPartitionState { /// All batches on the build side for a single partition. /// /// For hash joins, this would be a partition-local hash map. - batches: Vec, + batches: Vec, } /// Partition-local state on the probe side. @@ -47,7 +47,7 @@ pub struct NestedLoopJoinProbePartitionState { /// All batches from all partitions received on the build side. /// /// Store in the probe side local state to avoid needing to lock. - all_batches: Arc>, + all_batches: Arc>, /// Bool for determining if `all_batches` has been populated from the global /// operator state. @@ -125,7 +125,7 @@ enum SharedOperatorState { Building { /// Build sides partitions write their batches here once they're done /// building. - batches: Vec, + batches: Vec, /// Number of partitions we're still waiting to complete on the build /// side. @@ -143,7 +143,7 @@ enum SharedOperatorState { /// Build is complete, we're now in the probing phase. Probing { /// All batches from all partitions. - batches: Arc>, + batches: Arc>, /// Union of all bitmaps across all partitions. /// @@ -247,7 +247,7 @@ impl ExecutableOperator for PhysicalNestedLoopJoin { cx: &mut Context, partition_state: &mut PartitionState, operator_state: &OperatorState, - batch: Batch, + batch: Batch2, ) -> Result { match partition_state { PartitionState::NestedLoopJoinBuild(state) => { @@ -421,12 +421,12 @@ impl ExecutableOperator for PhysicalNestedLoopJoin { /// result. fn cross_join( left_batch_idx: usize, - left: &Batch, - right: &Batch, + left: &Batch2, + right: &Batch2, filter_expr: Option<&PhysicalScalarExpression>, mut left_outer_tracker: Option<&mut LeftOuterJoinTracker>, _right_join: bool, -) -> Result> { +) -> Result> { let mut batches = Vec::with_capacity(left.num_rows() * right.num_rows()); // For each row in the left batch, join the entirety of right. @@ -439,7 +439,7 @@ fn cross_join( // Columns from the right, all rows. let right_columns = right.clone().into_arrays(); - let mut output = Batch::try_new(left_columns.into_iter().chain(right_columns))?; + let mut output = Batch2::try_new(left_columns.into_iter().chain(right_columns))?; // If we have a filter, apply it to the output batch. if let Some(filter_expr) = &filter_expr { diff --git a/crates/rayexec_execution/src/execution/operators/project.rs b/crates/rayexec_execution/src/execution/operators/project.rs index 6bd1bc271..0eab9d78f 100644 --- a/crates/rayexec_execution/src/execution/operators/project.rs +++ b/crates/rayexec_execution/src/execution/operators/project.rs @@ -1,7 +1,7 @@ use rayexec_error::Result; use super::simple::{SimpleOperator, StatelessOperation}; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::database::DatabaseContext; use crate::explain::explainable::{ExplainConfig, ExplainEntry, Explainable}; use crate::expr::physical::PhysicalScalarExpression; @@ -21,7 +21,7 @@ impl ProjectOperation { } impl StatelessOperation for ProjectOperation { - fn execute(&self, batch: Batch) -> Result { + fn execute(&self, batch: Batch2) -> Result { let arrs = self .exprs .iter() @@ -31,7 +31,7 @@ impl StatelessOperation for ProjectOperation { }) .collect::>>()?; - Batch::try_new(arrs) + Batch2::try_new(arrs) } } diff --git a/crates/rayexec_execution/src/execution/operators/round_robin.rs b/crates/rayexec_execution/src/execution/operators/round_robin.rs index a3fb83271..6852fb988 100644 --- a/crates/rayexec_execution/src/execution/operators/round_robin.rs +++ b/crates/rayexec_execution/src/execution/operators/round_robin.rs @@ -8,7 +8,7 @@ use parking_lot::Mutex; use rayexec_error::{RayexecError, Result}; use super::{ExecutionStates, InputOutputStates, PollFinalize}; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::database::DatabaseContext; use crate::execution::operators::{ ExecutableOperator, @@ -128,7 +128,7 @@ impl ExecutableOperator for PhysicalRoundRobinRepartition { cx: &mut Context, partition_state: &mut PartitionState, _operator_state: &OperatorState, - batch: Batch, + batch: Batch2, ) -> Result { let state = match partition_state { PartitionState::RoundRobinPush(state) => state, @@ -247,7 +247,7 @@ struct BatchBufferInner { /// Batches buffer. /// /// Should be bounded to some capacity. - batches: VecDeque, + batches: VecDeque, /// Waker on the receiving side of the buffer. recv_waker: Option, diff --git a/crates/rayexec_execution/src/execution/operators/scan.rs b/crates/rayexec_execution/src/execution/operators/scan.rs index b58989bc9..500827472 100644 --- a/crates/rayexec_execution/src/execution/operators/scan.rs +++ b/crates/rayexec_execution/src/execution/operators/scan.rs @@ -17,7 +17,7 @@ use super::{ PollPull, PollPush, }; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::database::catalog::CatalogTx; use crate::database::catalog_entry::CatalogEntry; use crate::database::DatabaseContext; @@ -28,7 +28,7 @@ use crate::storage::table_storage::{DataTableScan, Projections}; pub struct ScanPartitionState { scan: Box, /// In progress pull we're working on. - future: Option>>>, + future: Option>>>, } impl fmt::Debug for ScanPartitionState { @@ -99,7 +99,7 @@ impl ExecutableOperator for PhysicalScan { _cx: &mut Context, _partition_state: &mut PartitionState, _operator_state: &OperatorState, - _batch: Batch, + _batch: Batch2, ) -> Result { Err(RayexecError::new("Cannot push to physical scan")) } diff --git a/crates/rayexec_execution/src/execution/operators/simple.rs b/crates/rayexec_execution/src/execution/operators/simple.rs index 1a34ba2b9..75a145f76 100644 --- a/crates/rayexec_execution/src/execution/operators/simple.rs +++ b/crates/rayexec_execution/src/execution/operators/simple.rs @@ -14,14 +14,14 @@ use super::{ PollPull, PollPush, }; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::database::DatabaseContext; use crate::explain::explainable::{ExplainConfig, ExplainEntry, Explainable}; #[derive(Debug)] pub struct SimplePartitionState { /// A batch that's waiting to be pulled. - buffered: Option, + buffered: Option, /// Waker on the pull side. /// @@ -58,7 +58,7 @@ impl SimplePartitionState { /// A stateless operation on a batch. pub trait StatelessOperation: Sync + Send + Debug + Explainable { - fn execute(&self, batch: Batch) -> Result; + fn execute(&self, batch: Batch2) -> Result; } /// A simple operator is an operator that wraps a function that requires no @@ -97,7 +97,7 @@ impl ExecutableOperator for SimpleOperator { cx: &mut Context, partition_state: &mut PartitionState, _operator_state: &OperatorState, - batch: Batch, + batch: Batch2, ) -> Result { let state = match partition_state { PartitionState::Simple(state) => state, diff --git a/crates/rayexec_execution/src/execution/operators/sink.rs b/crates/rayexec_execution/src/execution/operators/sink.rs index 545bd0602..41ad3d3aa 100644 --- a/crates/rayexec_execution/src/execution/operators/sink.rs +++ b/crates/rayexec_execution/src/execution/operators/sink.rs @@ -19,7 +19,7 @@ use super::{ PollPush, }; use crate::arrays::array::Array2; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::database::DatabaseContext; use crate::explain::explainable::{ExplainConfig, ExplainEntry, Explainable}; @@ -64,7 +64,7 @@ pub trait PartitionSink: Debug + Send { /// Push a batch to the sink. /// /// Batches are pushed in the order they're received in. - fn push(&mut self, batch: Batch) -> BoxFuture<'_, Result<()>>; + fn push(&mut self, batch: Batch2) -> BoxFuture<'_, Result<()>>; /// Finalize the sink. /// @@ -183,7 +183,7 @@ impl ExecutableOperator for SinkOperator { cx: &mut Context, partition_state: &mut PartitionState, _operator_state: &OperatorState, - batch: Batch, + batch: Batch2, ) -> Result { match partition_state { PartitionState::Sink(state) => match state { @@ -233,7 +233,7 @@ impl ExecutableOperator for SinkOperator { // // I think we'll want to do a similar thing for inserts so that // we can implement them as "just" async functions. - Ok(PollPush::Pending(Batch::empty())) + Ok(PollPush::Pending(Batch2::empty())) } } } @@ -370,7 +370,7 @@ impl ExecutableOperator for SinkOperator { let row_count = shared.global_row_count as u64; - let row_count_batch = Batch::try_new([Array2::from_iter([row_count])])?; + let row_count_batch = Batch2::try_new([Array2::from_iter([row_count])])?; return Ok(PollPull::Computed(row_count_batch.into())); } diff --git a/crates/rayexec_execution/src/execution/operators/sort/gather_sort.rs b/crates/rayexec_execution/src/execution/operators/sort/gather_sort.rs index e1968400c..391941599 100644 --- a/crates/rayexec_execution/src/execution/operators/sort/gather_sort.rs +++ b/crates/rayexec_execution/src/execution/operators/sort/gather_sort.rs @@ -7,7 +7,7 @@ use rayexec_error::Result; use super::util::merger::{KWayMerger, MergeResult}; use super::util::sort_keys::SortKeysExtractor; use super::util::sorted_batch::{PhysicallySortedBatch, SortedKeysIter}; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::database::DatabaseContext; use crate::execution::operators::sort::util::merger::IterState; use crate::execution::operators::{ @@ -244,7 +244,7 @@ impl ExecutableOperator for PhysicalGatherSort { cx: &mut Context, partition_state: &mut PartitionState, operator_state: &OperatorState, - batch: Batch, + batch: Batch2, ) -> Result { let state = match partition_state { PartitionState::GatherSortPush(state) => state, diff --git a/crates/rayexec_execution/src/execution/operators/sort/scatter_sort.rs b/crates/rayexec_execution/src/execution/operators/sort/scatter_sort.rs index 3428ec7ec..5a90d44f7 100644 --- a/crates/rayexec_execution/src/execution/operators/sort/scatter_sort.rs +++ b/crates/rayexec_execution/src/execution/operators/sort/scatter_sort.rs @@ -6,7 +6,7 @@ use rayexec_error::Result; use super::util::merger::{IterState, KWayMerger, MergeResult}; use super::util::sort_keys::SortKeysExtractor; use super::util::sorted_batch::{IndexSortedBatch, SortedIndicesIter}; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::database::DatabaseContext; use crate::execution::operators::util::resizer::DEFAULT_TARGET_BATCH_SIZE; use crate::execution::operators::{ @@ -96,7 +96,7 @@ impl ExecutableOperator for PhysicalScatterSort { _cx: &mut Context, partition_state: &mut PartitionState, _operator_state: &OperatorState, - batch: Batch, + batch: Batch2, ) -> Result { let state = match partition_state { PartitionState::ScatterSort(state) => state, @@ -207,7 +207,7 @@ impl PhysicalScatterSort { fn insert_batch_for_comparison( &self, state: &mut ConsumingPartitionState, - batch: Batch, + batch: Batch2, ) -> Result<()> { let keys = state.extractor.sort_keys(&batch)?; diff --git a/crates/rayexec_execution/src/execution/operators/sort/top_k.rs b/crates/rayexec_execution/src/execution/operators/sort/top_k.rs index 69e05dab8..b1d853710 100644 --- a/crates/rayexec_execution/src/execution/operators/sort/top_k.rs +++ b/crates/rayexec_execution/src/execution/operators/sort/top_k.rs @@ -2,7 +2,7 @@ use std::task::Context; use rayexec_error::Result; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::database::DatabaseContext; use crate::execution::operators::{ ExecutableOperator, @@ -38,7 +38,7 @@ impl ExecutableOperator for PhysicalTopK { _cx: &mut Context, _partition_state: &mut PartitionState, _operator_state: &OperatorState, - _batch: Batch, + _batch: Batch2, ) -> Result { unimplemented!() } diff --git a/crates/rayexec_execution/src/execution/operators/sort/util/accumulator.rs b/crates/rayexec_execution/src/execution/operators/sort/util/accumulator.rs index d28aa3917..a467a6e5e 100644 --- a/crates/rayexec_execution/src/execution/operators/sort/util/accumulator.rs +++ b/crates/rayexec_execution/src/execution/operators/sort/util/accumulator.rs @@ -1,6 +1,6 @@ use rayexec_error::{RayexecError, Result}; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::arrays::executor::scalar::interleave; /// Tracks the state per input into the merge. @@ -15,7 +15,7 @@ struct InputState { #[derive(Debug)] pub struct IndicesAccumulator { /// Batches we're using for the build. - batches: Vec<(usize, Batch)>, + batches: Vec<(usize, Batch2)>, /// States for each input we're reading from. states: Vec, /// Interleave indices referencing the stored batches. @@ -37,7 +37,7 @@ impl IndicesAccumulator { /// /// The inputs's state will be updated to point to the beginning of this /// batch (making any previous batches pushed for this input unreachable). - pub fn push_input_batch(&mut self, input: usize, batch: Batch) { + pub fn push_input_batch(&mut self, input: usize, batch: Batch2) { let idx = self.batches.len(); self.batches.push((input, batch)); self.states[input] = InputState { batch_idx: idx }; @@ -57,7 +57,7 @@ impl IndicesAccumulator { /// Build a batch from the accumulated interleave indices. /// /// Internally drops batches that will no longer be part of the output. - pub fn build(&mut self) -> Result> { + pub fn build(&mut self) -> Result> { if self.indices.is_empty() { return Ok(None); } @@ -78,7 +78,7 @@ impl IndicesAccumulator { .collect::>>()?; self.indices.clear(); - let batch = Batch::try_new(merged)?; + let batch = Batch2::try_new(merged)?; // Drops batches that are no longer reachable (won't be contributing to // the output). diff --git a/crates/rayexec_execution/src/execution/operators/sort/util/merger.rs b/crates/rayexec_execution/src/execution/operators/sort/util/merger.rs index b327100a5..dff52f606 100644 --- a/crates/rayexec_execution/src/execution/operators/sort/util/merger.rs +++ b/crates/rayexec_execution/src/execution/operators/sort/util/merger.rs @@ -5,14 +5,14 @@ use rayexec_error::{RayexecError, Result}; use super::accumulator::IndicesAccumulator; use super::sorted_batch::RowReference; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; #[derive(Debug)] pub enum MergeResult { /// We have a merged batch. /// /// Nothing else needed before the next call to `try_merge`. - Batch(Batch), + Batch(Batch2), /// Need to push a new batch for the input at the given index. /// @@ -73,7 +73,7 @@ where /// The initial heap will be created from the first element of each /// iterator. If an input is never expected to produce references, its iter /// state should be Finished and the batch should be None. - pub fn try_new(inputs: Vec<(Option, IterState)>) -> Result { + pub fn try_new(inputs: Vec<(Option, IterState)>) -> Result { let mut heap = BinaryHeap::new(); let mut iters = Vec::with_capacity(inputs.len()); let mut acc = IndicesAccumulator::new(inputs.len()); @@ -128,7 +128,7 @@ where } /// Push a batch and iterator for an input. - pub fn push_batch_for_input(&mut self, input: usize, batch: Batch, mut iter: I) -> Result<()> { + pub fn push_batch_for_input(&mut self, input: usize, batch: Batch2, mut iter: I) -> Result<()> { assert!(self.needs_input); self.needs_input = false; diff --git a/crates/rayexec_execution/src/execution/operators/sort/util/sort_keys.rs b/crates/rayexec_execution/src/execution/operators/sort/util/sort_keys.rs index cdb7a8f61..66bf2c419 100644 --- a/crates/rayexec_execution/src/execution/operators/sort/util/sort_keys.rs +++ b/crates/rayexec_execution/src/execution/operators/sort/util/sort_keys.rs @@ -1,7 +1,7 @@ use rayexec_error::{RayexecError, Result}; use crate::arrays::array::Array2; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::arrays::row::encoding::{ComparableColumn, ComparableRowEncoder, ComparableRows}; use crate::expr::physical::PhysicalSortExpression; @@ -31,14 +31,14 @@ impl SortKeysExtractor { } /// Get the sort keys for the batch as rows. - pub fn sort_keys(&self, batch: &Batch) -> Result { + pub fn sort_keys(&self, batch: &Batch2) -> Result { let cols = self.sort_columns(batch)?; let rows = self.encoder.encode(&cols)?; Ok(rows) } /// Get the columns that make up the sort keys. - pub fn sort_columns<'a>(&self, batch: &'a Batch) -> Result> { + pub fn sort_columns<'a>(&self, batch: &'a Batch2) -> Result> { let sort_cols = self .order_by .iter() diff --git a/crates/rayexec_execution/src/execution/operators/sort/util/sorted_batch.rs b/crates/rayexec_execution/src/execution/operators/sort/util/sorted_batch.rs index 6853283a7..95bf8b395 100644 --- a/crates/rayexec_execution/src/execution/operators/sort/util/sorted_batch.rs +++ b/crates/rayexec_execution/src/execution/operators/sort/util/sorted_batch.rs @@ -2,7 +2,7 @@ use std::cmp::Ordering; use std::fmt; use std::sync::Arc; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::arrays::row::encoding::{ComparableRow, ComparableRows}; /// A batch that's been physically sorted. @@ -12,14 +12,14 @@ use crate::arrays::row::encoding::{ComparableRow, ComparableRows}; #[derive(Debug)] pub struct PhysicallySortedBatch { /// The sorted batch. - pub batch: Batch, + pub batch: Batch2, /// The sorted keys. pub keys: ComparableRows, } impl PhysicallySortedBatch { - pub fn into_batch_and_iter(self) -> (Batch, SortedKeysIter) { + pub fn into_batch_and_iter(self) -> (Batch2, SortedKeysIter) { let iter = SortedKeysIter { row_idx: 0, keys: Arc::new(self.keys), @@ -71,11 +71,11 @@ pub struct IndexSortedBatch { /// Unsorted keys for the batch. pub keys: ComparableRows, /// The original unsorted batch. - pub batch: Batch, + pub batch: Batch2, } impl IndexSortedBatch { - pub fn into_batch_and_iter(self) -> (Batch, SortedIndicesIter) { + pub fn into_batch_and_iter(self) -> (Batch2, SortedIndicesIter) { let iter = SortedIndicesIter { indices: self.sort_indices, idx: 0, diff --git a/crates/rayexec_execution/src/execution/operators/source.rs b/crates/rayexec_execution/src/execution/operators/source.rs index 5328111bd..cf899a0ba 100644 --- a/crates/rayexec_execution/src/execution/operators/source.rs +++ b/crates/rayexec_execution/src/execution/operators/source.rs @@ -18,7 +18,7 @@ use super::{ PollPull, PollPush, }; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::database::DatabaseContext; use crate::explain::explainable::{ExplainConfig, ExplainEntry, Explainable}; @@ -55,13 +55,13 @@ pub trait PartitionSource: Debug + Send { /// Pull the enxt batch from the source. /// /// Returns None when there's no batches remaining in the source. - fn pull(&mut self) -> BoxFuture<'_, Result>>; + fn pull(&mut self) -> BoxFuture<'_, Result>>; } pub struct SourcePartitionState { source: Box, /// In progress pull we're working on. - future: Option>>>, + future: Option>>>, } impl fmt::Debug for SourcePartitionState { @@ -115,7 +115,7 @@ impl ExecutableOperator for SourceOperator { _cx: &mut Context, _partition_state: &mut PartitionState, _operator_state: &OperatorState, - _batch: Batch, + _batch: Batch2, ) -> Result { Err(RayexecError::new("Cannot push to physical scan")) } diff --git a/crates/rayexec_execution/src/execution/operators/table_function.rs b/crates/rayexec_execution/src/execution/operators/table_function.rs index 17f4a3eb1..e0880fcbd 100644 --- a/crates/rayexec_execution/src/execution/operators/table_function.rs +++ b/crates/rayexec_execution/src/execution/operators/table_function.rs @@ -17,7 +17,7 @@ use super::{ PollPull, PollPush, }; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::database::DatabaseContext; use crate::explain::explainable::{ExplainConfig, ExplainEntry, Explainable}; use crate::functions::table::{PlannedTableFunction, TableFunctionImpl}; @@ -27,7 +27,7 @@ use crate::storage::table_storage::{DataTableScan, Projections}; pub struct TableFunctionPartitionState { scan_state: Box, /// In progress pull we're working on. - future: Option>>>, + future: Option>>>, } impl fmt::Debug for TableFunctionPartitionState { @@ -94,7 +94,7 @@ impl ExecutableOperator for PhysicalTableFunction { _cx: &mut Context, _partition_state: &mut PartitionState, _operator_state: &OperatorState, - _batch: Batch, + _batch: Batch2, ) -> Result { // Could UNNEST be implemented as a table function? Err(RayexecError::new("Cannot push to physical table function")) diff --git a/crates/rayexec_execution/src/execution/operators/table_inout.rs b/crates/rayexec_execution/src/execution/operators/table_inout.rs index 728ae632b..85355dbee 100644 --- a/crates/rayexec_execution/src/execution/operators/table_inout.rs +++ b/crates/rayexec_execution/src/execution/operators/table_inout.rs @@ -14,7 +14,7 @@ use super::{ PollPush, }; use crate::arrays::array::Array2; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::arrays::selection::SelectionVector; use crate::database::DatabaseContext; use crate::explain::explainable::{ExplainConfig, ExplainEntry, Explainable}; @@ -79,7 +79,7 @@ impl ExecutableOperator for PhysicalTableInOut { cx: &mut Context, partition_state: &mut PartitionState, _operator_state: &OperatorState, - batch: Batch, + batch: Batch2, ) -> Result { let state = match partition_state { PartitionState::TableInOut(state) => state, @@ -98,7 +98,7 @@ impl ExecutableOperator for PhysicalTableInOut { }) .collect::>>()?; - let inputs = Batch::try_new(inputs)?; + let inputs = Batch2::try_new(inputs)?; // Try to push first to avoid overwriting any buffered additional // outputs. @@ -175,7 +175,7 @@ impl ExecutableOperator for PhysicalTableInOut { arrays.push(additional); } - let new_batch = Batch::try_new(arrays)?; + let new_batch = Batch2::try_new(arrays)?; Ok(PollPull::Computed(new_batch.into())) } diff --git a/crates/rayexec_execution/src/execution/operators/test_util.rs b/crates/rayexec_execution/src/execution/operators/test_util.rs index 14883e2d2..32f034030 100644 --- a/crates/rayexec_execution/src/execution/operators/test_util.rs +++ b/crates/rayexec_execution/src/execution/operators/test_util.rs @@ -14,7 +14,7 @@ use super::{ PollPush, }; use crate::arrays::array::Array2; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::arrays::scalar::ScalarValue; use crate::database::system::new_system_catalog; use crate::database::DatabaseContext; @@ -72,7 +72,7 @@ impl TestWakerContext { operator: impl AsRef, partition_state: &mut PartitionState, operator_state: &OperatorState, - batch: impl Into, + batch: impl Into, ) -> Result { operator.as_ref().poll_push( &mut self.context(), @@ -101,18 +101,18 @@ impl Wake for TestWakerInner { } /// Unwraps a batch from the PollPull::Batch variant. -pub fn unwrap_poll_pull_batch(poll: PollPull) -> Batch { +pub fn unwrap_poll_pull_batch(poll: PollPull) -> Batch2 { match poll { PollPull::Computed(ComputedBatches::Single(batch)) => batch, other => panic!("unexpected poll pull: {other:?}"), } } -pub fn logical_value(batch: &Batch, column: usize, row: usize) -> ScalarValue { +pub fn logical_value(batch: &Batch2, column: usize, row: usize) -> ScalarValue { batch.column(column).unwrap().logical_value(row).unwrap() } /// Makes a batch with a single column i32 values provided by the iterator. -pub fn make_i32_batch(iter: impl IntoIterator) -> Batch { - Batch::try_new(vec![Array2::from_iter(iter.into_iter())]).unwrap() +pub fn make_i32_batch(iter: impl IntoIterator) -> Batch2 { + Batch2::try_new(vec![Array2::from_iter(iter.into_iter())]).unwrap() } diff --git a/crates/rayexec_execution/src/execution/operators/ungrouped_aggregate.rs b/crates/rayexec_execution/src/execution/operators/ungrouped_aggregate.rs index 34efcad03..48aba3a75 100644 --- a/crates/rayexec_execution/src/execution/operators/ungrouped_aggregate.rs +++ b/crates/rayexec_execution/src/execution/operators/ungrouped_aggregate.rs @@ -16,7 +16,7 @@ use super::{ PollPull, PollPush, }; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::database::DatabaseContext; use crate::execution::operators::InputOutputStates; use crate::explain::explainable::{ExplainConfig, ExplainEntry, Explainable}; @@ -46,7 +46,7 @@ pub enum UngroupedAggregatePartitionState { /// /// Currently only one partition will actually produce output. The rest /// will be empty. - batches: Vec, + batches: Vec, }, } @@ -140,7 +140,7 @@ impl ExecutableOperator for PhysicalUngroupedAggregate { _cx: &mut Context, partition_state: &mut PartitionState, _operator_state: &OperatorState, - batch: Batch, + batch: Batch2, ) -> Result { let state = match partition_state { PartitionState::UngroupedAggregate(state) => state, @@ -237,7 +237,7 @@ impl ExecutableOperator for PhysicalUngroupedAggregate { .map(|s| s.finalize()) .collect::>>()?; - let batch = Batch::try_new(arrays)?; + let batch = Batch2::try_new(arrays)?; *state = UngroupedAggregatePartitionState::Producing { partition_idx: *partition_idx, diff --git a/crates/rayexec_execution/src/execution/operators/union.rs b/crates/rayexec_execution/src/execution/operators/union.rs index c5bc79a7f..ad7732e37 100644 --- a/crates/rayexec_execution/src/execution/operators/union.rs +++ b/crates/rayexec_execution/src/execution/operators/union.rs @@ -14,7 +14,7 @@ use super::{ PollPull, PollPush, }; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::database::DatabaseContext; use crate::explain::explainable::{ExplainConfig, ExplainEntry, Explainable}; use crate::proto::DatabaseProtoConv; @@ -22,7 +22,7 @@ use crate::proto::DatabaseProtoConv; #[derive(Debug)] pub struct UnionTopPartitionState { partition_idx: usize, - batch: Option, + batch: Option, finished: bool, push_waker: Option, pull_waker: Option, @@ -40,7 +40,7 @@ pub struct UnionOperatorState { #[derive(Debug)] struct SharedPartitionState { - batch: Option, + batch: Option, finished: bool, push_waker: Option, pull_waker: Option, @@ -120,7 +120,7 @@ impl ExecutableOperator for PhysicalUnion { cx: &mut Context, partition_state: &mut PartitionState, operator_state: &OperatorState, - batch: Batch, + batch: Batch2, ) -> Result { match partition_state { PartitionState::UnionTop(state) => { diff --git a/crates/rayexec_execution/src/execution/operators/unnest.rs b/crates/rayexec_execution/src/execution/operators/unnest.rs index 4c92f611e..2449900ab 100644 --- a/crates/rayexec_execution/src/execution/operators/unnest.rs +++ b/crates/rayexec_execution/src/execution/operators/unnest.rs @@ -16,7 +16,7 @@ use super::{ PollPush, }; use crate::arrays::array::{Array2, ArrayData2}; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::arrays::bitmap::Bitmap; use crate::arrays::executor::builder::{ ArrayBuilder, @@ -122,7 +122,7 @@ impl ExecutableOperator for PhysicalUnnest { cx: &mut Context, partition_state: &mut PartitionState, _operator_state: &OperatorState, - batch: Batch, + batch: Batch2, ) -> Result { let state = match partition_state { PartitionState::Unnest(state) => state, @@ -290,7 +290,7 @@ impl ExecutableOperator for PhysicalUnnest { } } - let batch = Batch::try_new(outputs)?; + let batch = Batch2::try_new(outputs)?; Ok(PollPull::Computed(batch.into())) } diff --git a/crates/rayexec_execution/src/execution/operators/util/broadcast.rs b/crates/rayexec_execution/src/execution/operators/util/broadcast.rs index 828b544da..94f041c83 100644 --- a/crates/rayexec_execution/src/execution/operators/util/broadcast.rs +++ b/crates/rayexec_execution/src/execution/operators/util/broadcast.rs @@ -5,7 +5,7 @@ use std::task::{Context, Poll, Waker}; use parking_lot::Mutex; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; #[derive(Debug)] pub struct BroadcastChannel { @@ -34,7 +34,7 @@ impl BroadcastChannel { (ch, recvs) } - pub fn send(&self, batch: Batch) { + pub fn send(&self, batch: Batch2) { let mut state = self.state.lock(); let idx = state.batches.len(); @@ -102,7 +102,7 @@ struct BroadcastState { #[derive(Debug)] struct BatchState { remaining_recv: usize, - batch: Option, + batch: Option, } #[derive(Debug)] @@ -113,7 +113,7 @@ pub struct RecvFut { } impl Future for RecvFut { - type Output = Option; + type Output = Option; fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { let mut state = self.state.lock(); @@ -170,9 +170,9 @@ mod tests { } /// Create a batch with a single int64 value. - fn test_batch(n: i64) -> Batch { + fn test_batch(n: i64) -> Batch2 { let col = Array2::from_iter([n]); - Batch::try_new([col]).unwrap() + Batch2::try_new([col]).unwrap() } #[test] diff --git a/crates/rayexec_execution/src/execution/operators/util/outer_join_tracker.rs b/crates/rayexec_execution/src/execution/operators/util/outer_join_tracker.rs index 1bcbdc8c8..153e34ea2 100644 --- a/crates/rayexec_execution/src/execution/operators/util/outer_join_tracker.rs +++ b/crates/rayexec_execution/src/execution/operators/util/outer_join_tracker.rs @@ -3,7 +3,7 @@ use std::sync::Arc; use rayexec_error::Result; use crate::arrays::array::{Array2, ArrayData2}; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::arrays::bitmap::Bitmap; use crate::arrays::datatype::DataType; use crate::arrays::selection::SelectionVector; @@ -23,7 +23,7 @@ pub struct LeftOuterJoinTracker { } impl LeftOuterJoinTracker { - pub fn new_for_batches(batches: &[Batch]) -> Self { + pub fn new_for_batches(batches: &[Batch2]) -> Self { let bitmaps = batches .iter() .map(|b| Bitmap::new_with_all_false(b.num_rows())) @@ -57,7 +57,7 @@ impl LeftOuterJoinTracker { pub struct LeftOuterJoinDrainState { tracker: LeftOuterJoinTracker, /// All batches from the left side. - batches: Vec, + batches: Vec, /// Types for the right side of the join. Used to create the (typed) null /// columns for left rows that weren't visited. right_types: Vec, @@ -76,7 +76,7 @@ impl LeftOuterJoinDrainState { start_idx: usize, skip: usize, tracker: LeftOuterJoinTracker, - batches: Vec, + batches: Vec, right_types: Vec, ) -> Self { LeftOuterJoinDrainState { @@ -90,7 +90,7 @@ impl LeftOuterJoinDrainState { /// Drains the next batch from the left, and appends a boolean column /// representing which rows were visited. - pub fn drain_mark_next(&mut self) -> Result> { + pub fn drain_mark_next(&mut self) -> Result> { let batch = match self.batches.get(self.batch_idx) { Some(batch) => batch, None => return Ok(None), @@ -111,7 +111,7 @@ impl LeftOuterJoinDrainState { ArrayData2::Boolean(Arc::new(bitmap.clone().into())), )]); - let batch = Batch::try_new(cols)?; + let batch = Batch2::try_new(cols)?; Ok(Some(batch)) } @@ -120,7 +120,7 @@ impl LeftOuterJoinDrainState { /// /// This will filter out rows that have been visited, and join the remaining /// rows will null columns on the right. - pub fn drain_next(&mut self) -> Result> { + pub fn drain_next(&mut self) -> Result> { loop { let batch = match self.batches.get(self.batch_idx) { Some(batch) => batch, @@ -155,13 +155,13 @@ impl LeftOuterJoinDrainState { .map(|datatype| Array2::new_typed_null_array(datatype.clone(), num_rows)) .collect::>>()?; - let batch = Batch::try_new(left_cols.into_iter().chain(right_cols))?; + let batch = Batch2::try_new(left_cols.into_iter().chain(right_cols))?; return Ok(Some(batch)); } } - pub fn drain_semi_next(&mut self) -> Result> { + pub fn drain_semi_next(&mut self) -> Result> { loop { let batch = match self.batches.get(self.batch_idx) { Some(batch) => batch, @@ -190,7 +190,7 @@ impl LeftOuterJoinDrainState { .map(|datatype| Array2::new_typed_null_array(datatype.clone(), num_rows)) .collect::>>()?; - let batch = Batch::try_new(left_cols.into_iter().chain(right_cols))?; + let batch = Batch2::try_new(left_cols.into_iter().chain(right_cols))?; return Ok(Some(batch)); } @@ -209,7 +209,7 @@ pub struct RightOuterJoinTracker { impl RightOuterJoinTracker { /// Create a new tracker for the provided batch. - pub fn new_for_batch(batch: &Batch) -> Self { + pub fn new_for_batch(batch: &Batch2) -> Self { RightOuterJoinTracker { unvisited: Bitmap::new_with_all_true(batch.num_rows()), } @@ -230,7 +230,7 @@ impl RightOuterJoinTracker { /// the batch. /// /// Returns None if all row on the right were visited. - pub fn into_unvisited(self, left_types: &[DataType], right: &Batch) -> Result> { + pub fn into_unvisited(self, left_types: &[DataType], right: &Batch2) -> Result> { let selection = SelectionVector::from_iter(self.unvisited.index_iter()); let num_rows = selection.num_rows(); if num_rows == 0 { @@ -244,7 +244,7 @@ impl RightOuterJoinTracker { .map(|datatype| Array2::new_typed_null_array(datatype.clone(), num_rows)) .collect::>>()?; - let batch = Batch::try_new(left_null_cols.into_iter().chain(right_cols))?; + let batch = Batch2::try_new(left_null_cols.into_iter().chain(right_cols))?; Ok(Some(batch)) } diff --git a/crates/rayexec_execution/src/execution/operators/util/resizer.rs b/crates/rayexec_execution/src/execution/operators/util/resizer.rs index cda83ea42..c6146cef9 100644 --- a/crates/rayexec_execution/src/execution/operators/util/resizer.rs +++ b/crates/rayexec_execution/src/execution/operators/util/resizer.rs @@ -2,7 +2,7 @@ use std::sync::Arc; use rayexec_error::Result; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::arrays::selection::SelectionVector; use crate::execution::computed_batch::ComputedBatches; @@ -16,7 +16,7 @@ pub struct BatchResizer { /// Target batch size. target: usize, /// Pending input batches. - pending: Vec, + pending: Vec, /// Current total row count for all batches. pending_row_count: usize, } @@ -36,7 +36,7 @@ impl BatchResizer { /// Typically this will return either no batches or a single batch. However /// there is a case where this can return multiple batches if 'len(input) + /// pending_row_count > target * 2' (aka very large input batch). - pub fn try_push(&mut self, batch: Batch) -> Result { + pub fn try_push(&mut self, batch: Batch2) -> Result { if batch.num_rows() == 0 { return Ok(ComputedBatches::None); } @@ -44,7 +44,7 @@ impl BatchResizer { if self.pending_row_count + batch.num_rows() == self.target { self.pending.push(batch); - let out = Batch::concat(&self.pending)?; + let out = Batch2::concat(&self.pending)?; self.pending.clear(); self.pending_row_count = 0; @@ -67,7 +67,7 @@ impl BatchResizer { self.pending.push(batch_a); // Concat current pending + batch a. - let out = Batch::concat(&self.pending)?; + let out = Batch2::concat(&self.pending)?; self.pending.clear(); self.pending_row_count = 0; @@ -107,7 +107,7 @@ impl BatchResizer { return Ok(ComputedBatches::None); } - let out = Batch::concat(&self.pending)?; + let out = Batch2::concat(&self.pending)?; self.pending.clear(); self.pending_row_count = 0; Ok(ComputedBatches::Single(out)) @@ -122,13 +122,13 @@ mod tests { #[test] fn push_within_target() { - let batch1 = Batch::try_new([ + let batch1 = Batch2::try_new([ Array2::from_iter([1, 2, 3]), Array2::from_iter(["a", "b", "c"]), ]) .unwrap(); - let batch2 = Batch::try_new([ + let batch2 = Batch2::try_new([ Array2::from_iter([4, 5, 6]), Array2::from_iter(["d", "e", "f"]), ]) @@ -145,7 +145,7 @@ mod tests { other => panic!("unexpected out: {other:?}"), }; - let expected = Batch::try_new([ + let expected = Batch2::try_new([ Array2::from_iter([1, 2, 3, 4]), Array2::from_iter(["a", "b", "c", "d"]), ]) @@ -154,7 +154,7 @@ mod tests { assert_batches_eq(&expected, &got); let expected_rem = - Batch::try_new([Array2::from_iter([5, 6]), Array2::from_iter(["e", "f"])]).unwrap(); + Batch2::try_new([Array2::from_iter([5, 6]), Array2::from_iter(["e", "f"])]).unwrap(); let remaining = match resizer.flush_remaining().unwrap() { ComputedBatches::Single(batch) => batch, @@ -168,7 +168,7 @@ mod tests { fn push_large_batch() { // len(batch) > target && len(batch) < target * 2 - let batch = Batch::try_new([ + let batch = Batch2::try_new([ Array2::from_iter([1, 2, 3, 4, 5]), Array2::from_iter(["a", "b", "c", "d", "e"]), ]) @@ -180,7 +180,7 @@ mod tests { other => panic!("unexpected out: {other:?}"), }; - let expected = Batch::try_new([ + let expected = Batch2::try_new([ Array2::from_iter([1, 2, 3, 4]), Array2::from_iter(["a", "b", "c", "d"]), ]) @@ -189,7 +189,7 @@ mod tests { assert_batches_eq(&expected, &got); let expected_rem = - Batch::try_new([Array2::from_iter([5]), Array2::from_iter(["e"])]).unwrap(); + Batch2::try_new([Array2::from_iter([5]), Array2::from_iter(["e"])]).unwrap(); let remaining = match resizer.flush_remaining().unwrap() { ComputedBatches::Single(batch) => batch, @@ -203,7 +203,7 @@ mod tests { fn push_very_large_batch() { // len(batch) > target * 2 - let batch = Batch::try_new([ + let batch = Batch2::try_new([ Array2::from_iter([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]), Array2::from_iter(["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"]), ]) @@ -217,14 +217,14 @@ mod tests { assert_eq!(2, gots.len()); - let expected1 = Batch::try_new([ + let expected1 = Batch2::try_new([ Array2::from_iter([1, 2, 3, 4]), Array2::from_iter(["a", "b", "c", "d"]), ]) .unwrap(); assert_batches_eq(&expected1, &gots[0]); - let expected2 = Batch::try_new([ + let expected2 = Batch2::try_new([ Array2::from_iter([5, 6, 7, 8]), Array2::from_iter(["e", "f", "g", "h"]), ]) @@ -232,7 +232,7 @@ mod tests { assert_batches_eq(&expected2, &gots[1]); let expected_rem = - Batch::try_new([Array2::from_iter([9, 10]), Array2::from_iter(["i", "j"])]).unwrap(); + Batch2::try_new([Array2::from_iter([9, 10]), Array2::from_iter(["i", "j"])]).unwrap(); let remaining = match resizer.flush_remaining().unwrap() { ComputedBatches::Single(batch) => batch, diff --git a/crates/rayexec_execution/src/execution/operators/values.rs b/crates/rayexec_execution/src/execution/operators/values.rs index 4fdfa7555..c96f7b3d4 100644 --- a/crates/rayexec_execution/src/execution/operators/values.rs +++ b/crates/rayexec_execution/src/execution/operators/values.rs @@ -13,23 +13,23 @@ use super::{ PollPull, PollPush, }; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::database::DatabaseContext; use crate::explain::explainable::{ExplainConfig, ExplainEntry, Explainable}; use crate::proto::DatabaseProtoConv; #[derive(Debug)] pub struct ValuesPartitionState { - batches: Vec, + batches: Vec, } #[derive(Debug)] pub struct PhysicalValues { - batches: Vec, + batches: Vec, } impl PhysicalValues { - pub fn new(batches: Vec) -> Self { + pub fn new(batches: Vec) -> Self { PhysicalValues { batches } } } @@ -65,7 +65,7 @@ impl ExecutableOperator for PhysicalValues { _cx: &mut Context, _partition_state: &mut PartitionState, _operator_state: &OperatorState, - _batch: Batch, + _batch: Batch2, ) -> Result { Err(RayexecError::new("Cannot push to Values operator")) } diff --git a/crates/rayexec_execution/src/execution/operators/window/mod.rs b/crates/rayexec_execution/src/execution/operators/window/mod.rs index 36547a198..b8da47f84 100644 --- a/crates/rayexec_execution/src/execution/operators/window/mod.rs +++ b/crates/rayexec_execution/src/execution/operators/window/mod.rs @@ -11,7 +11,7 @@ use super::{ PollPull, PollPush, }; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::database::DatabaseContext; use crate::explain::explainable::{ExplainConfig, ExplainEntry, Explainable}; @@ -32,7 +32,7 @@ impl ExecutableOperator for PhysicalWindow { _cx: &mut Context, _partition_state: &mut PartitionState, _operator_state: &OperatorState, - _batch: Batch, + _batch: Batch2, ) -> Result { unimplemented!() } diff --git a/crates/rayexec_execution/src/expr/physical/case_expr.rs b/crates/rayexec_execution/src/expr/physical/case_expr.rs index 511560153..9f81d5a6d 100644 --- a/crates/rayexec_execution/src/expr/physical/case_expr.rs +++ b/crates/rayexec_execution/src/expr/physical/case_expr.rs @@ -6,7 +6,7 @@ use rayexec_error::Result; use super::PhysicalScalarExpression; use crate::arrays::array::Array2; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::arrays::bitmap::Bitmap; use crate::arrays::executor::scalar::{interleave, SelectExecutor}; use crate::arrays::selection::SelectionVector; @@ -30,7 +30,7 @@ pub struct PhysicalCaseExpr { } impl PhysicalCaseExpr { - pub fn eval<'a>(&self, batch: &'a Batch) -> Result> { + pub fn eval<'a>(&self, batch: &'a Batch2) -> Result> { let mut arrays = Vec::new(); let mut indices: Vec<(usize, usize)> = (0..batch.num_rows()).map(|_| (0, 0)).collect(); @@ -128,7 +128,7 @@ mod tests { #[test] fn case_simple() { - let batch = Batch::try_new([ + let batch = Batch2::try_new([ Array2::from_iter([1, 2, 3, 4]), Array2::from_iter([12, 13, 14, 15]), ]) diff --git a/crates/rayexec_execution/src/expr/physical/cast_expr.rs b/crates/rayexec_execution/src/expr/physical/cast_expr.rs index 33aeb91ab..59f6067a3 100644 --- a/crates/rayexec_execution/src/expr/physical/cast_expr.rs +++ b/crates/rayexec_execution/src/expr/physical/cast_expr.rs @@ -6,7 +6,7 @@ use rayexec_proto::ProtoConv; use super::PhysicalScalarExpression; use crate::arrays::array::Array2; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::arrays::compute::cast::array::cast_array; use crate::arrays::compute::cast::behavior::CastFailBehavior; use crate::arrays::datatype::DataType; @@ -20,7 +20,7 @@ pub struct PhysicalCastExpr { } impl PhysicalCastExpr { - pub fn eval<'a>(&self, batch: &'a Batch) -> Result> { + pub fn eval<'a>(&self, batch: &'a Batch2) -> Result> { let input = self.expr.eval(batch)?; let out = cast_array(input.as_ref(), self.to.clone(), CastFailBehavior::Error)?; Ok(Cow::Owned(out)) diff --git a/crates/rayexec_execution/src/expr/physical/column_expr.rs b/crates/rayexec_execution/src/expr/physical/column_expr.rs index 889563a05..42aaff2c8 100644 --- a/crates/rayexec_execution/src/expr/physical/column_expr.rs +++ b/crates/rayexec_execution/src/expr/physical/column_expr.rs @@ -4,7 +4,7 @@ use std::fmt; use rayexec_error::{RayexecError, Result}; use crate::arrays::array::Array2; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::database::DatabaseContext; use crate::proto::DatabaseProtoConv; @@ -14,7 +14,7 @@ pub struct PhysicalColumnExpr { } impl PhysicalColumnExpr { - pub fn eval<'a>(&self, batch: &'a Batch) -> Result> { + pub fn eval<'a>(&self, batch: &'a Batch2) -> Result> { let col = batch.column(self.idx).ok_or_else(|| { RayexecError::new(format!( "Tried to get column at index {} in a batch with {} columns", diff --git a/crates/rayexec_execution/src/expr/physical/literal_expr.rs b/crates/rayexec_execution/src/expr/physical/literal_expr.rs index af66526b0..dfaee9484 100644 --- a/crates/rayexec_execution/src/expr/physical/literal_expr.rs +++ b/crates/rayexec_execution/src/expr/physical/literal_expr.rs @@ -5,7 +5,7 @@ use rayexec_error::{OptionExt, Result}; use rayexec_proto::ProtoConv; use crate::arrays::array::Array2; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::arrays::scalar::OwnedScalarValue; use crate::database::DatabaseContext; use crate::proto::DatabaseProtoConv; @@ -16,7 +16,7 @@ pub struct PhysicalLiteralExpr { } impl PhysicalLiteralExpr { - pub fn eval<'a>(&self, batch: &'a Batch) -> Result> { + pub fn eval<'a>(&self, batch: &'a Batch2) -> Result> { let arr = self.literal.as_array(batch.num_rows())?; Ok(Cow::Owned(arr)) } diff --git a/crates/rayexec_execution/src/expr/physical/mod.rs b/crates/rayexec_execution/src/expr/physical/mod.rs index 646ea0831..4d5ae80b8 100644 --- a/crates/rayexec_execution/src/expr/physical/mod.rs +++ b/crates/rayexec_execution/src/expr/physical/mod.rs @@ -17,7 +17,7 @@ use rayexec_error::{not_implemented, OptionExt, Result}; use scalar_function_expr::PhysicalScalarFunctionExpr; use crate::arrays::array::Array2; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::arrays::executor::scalar::SelectExecutor; use crate::arrays::selection::SelectionVector; use crate::database::DatabaseContext; @@ -34,7 +34,7 @@ pub enum PhysicalScalarExpression { } impl PhysicalScalarExpression { - pub fn eval<'a>(&self, batch: &'a Batch) -> Result> { + pub fn eval<'a>(&self, batch: &'a Batch2) -> Result> { match self { Self::Case(e) => e.eval(batch), Self::Cast(e) => e.eval(batch), @@ -48,7 +48,7 @@ impl PhysicalScalarExpression { /// /// The selection vector will include row indices where the expression /// evaluates to true. - pub fn select(&self, batch: &Batch) -> Result { + pub fn select(&self, batch: &Batch2) -> Result { let selected = self.eval(batch)?; let mut selection = SelectionVector::with_capacity(selected.logical_len()); @@ -198,7 +198,7 @@ mod tests { #[test] fn select_some() { - let batch = Batch::try_new([ + let batch = Batch2::try_new([ Array2::from_iter([1, 4, 6, 9, 12]), Array2::from_iter([2, 3, 8, 9, 10]), ]) @@ -225,7 +225,7 @@ mod tests { #[test] fn select_none() { - let batch = Batch::try_new([ + let batch = Batch2::try_new([ Array2::from_iter([1, 2, 6, 9, 9]), Array2::from_iter([2, 3, 8, 9, 10]), ]) diff --git a/crates/rayexec_execution/src/expr/physical/scalar_function_expr.rs b/crates/rayexec_execution/src/expr/physical/scalar_function_expr.rs index 9ee56a08b..6c59c1dd0 100644 --- a/crates/rayexec_execution/src/expr/physical/scalar_function_expr.rs +++ b/crates/rayexec_execution/src/expr/physical/scalar_function_expr.rs @@ -6,7 +6,7 @@ use rayexec_error::Result; use super::PhysicalScalarExpression; use crate::arrays::array::Array2; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::database::DatabaseContext; use crate::functions::scalar::PlannedScalarFunction; use crate::proto::DatabaseProtoConv; @@ -18,7 +18,7 @@ pub struct PhysicalScalarFunctionExpr { } impl PhysicalScalarFunctionExpr { - pub fn eval<'a>(&self, batch: &'a Batch) -> Result> { + pub fn eval<'a>(&self, batch: &'a Batch2) -> Result> { let inputs = self .inputs .iter() diff --git a/crates/rayexec_execution/src/functions/table/builtin/series.rs b/crates/rayexec_execution/src/functions/table/builtin/series.rs index 7e6414d3f..8b36b8fa0 100644 --- a/crates/rayexec_execution/src/functions/table/builtin/series.rs +++ b/crates/rayexec_execution/src/functions/table/builtin/series.rs @@ -4,7 +4,7 @@ use std::task::{Context, Waker}; use rayexec_error::{RayexecError, Result}; use crate::arrays::array::Array2; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::physical_type::PhysicalI64; use crate::arrays::executor::scalar::UnaryExecutor; @@ -203,7 +203,7 @@ impl SeriesParams { pub struct GenerateSeriesInOutPartitionState { batch_size: usize, /// Batch we're working on. - batch: Option, + batch: Option, /// Current row number next_row_idx: usize, /// If we're finished. @@ -215,7 +215,7 @@ pub struct GenerateSeriesInOutPartitionState { } impl TableInOutPartitionState for GenerateSeriesInOutPartitionState { - fn poll_push(&mut self, cx: &mut Context, batch: Batch) -> Result { + fn poll_push(&mut self, cx: &mut Context, batch: Batch2) -> Result { if self.batch.is_some() { // Still processing current batch, come back later. self.push_waker = Some(cx.waker().clone()); @@ -308,7 +308,7 @@ impl TableInOutPartitionState for GenerateSeriesInOutPartitionState { } let out = self.params.generate_next(self.batch_size); - let batch = Batch::try_new([out])?; + let batch = Batch2::try_new([out])?; let row_nums = vec![self.params.current_row_idx; batch.num_rows()]; diff --git a/crates/rayexec_execution/src/functions/table/builtin/system.rs b/crates/rayexec_execution/src/functions/table/builtin/system.rs index 6acf05a58..afa9afd66 100644 --- a/crates/rayexec_execution/src/functions/table/builtin/system.rs +++ b/crates/rayexec_execution/src/functions/table/builtin/system.rs @@ -8,7 +8,7 @@ use parking_lot::Mutex; use rayexec_error::{OptionExt, RayexecError, Result}; use crate::arrays::array::Array2; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::arrays::bitmap::Bitmap; use crate::arrays::datatype::{DataType, DataTypeId, ListTypeMeta}; use crate::arrays::executor::builder::{ArrayDataBuffer, GermanVarlenBuffer}; @@ -42,7 +42,7 @@ pub trait SystemFunctionImpl: Debug + Sync + Send + Copy + 'static { fn schema() -> Schema; fn new_batch( databases: &mut VecDeque<(String, Arc, Option)>, - ) -> Result; + ) -> Result; } pub type ListDatabases = SystemFunction; @@ -62,7 +62,7 @@ impl SystemFunctionImpl for ListDatabasesImpl { fn new_batch( databases: &mut VecDeque<(String, Arc, Option)>, - ) -> Result { + ) -> Result { let len = databases.len(); let mut database_names = GermanVarlenBuffer::::with_len(len); @@ -78,7 +78,7 @@ impl SystemFunctionImpl for ListDatabasesImpl { ); } - Batch::try_new([ + Batch2::try_new([ Array2::new_with_array_data(DataType::Utf8, database_names.into_data()), Array2::new_with_array_data(DataType::Utf8, datasources.into_data()), ]) @@ -118,7 +118,7 @@ impl SystemFunctionImpl for ListFunctionsImpl { fn new_batch( databases: &mut VecDeque<(String, Arc, Option)>, - ) -> Result { + ) -> Result { let database = databases.pop_front().required("database")?; let mut database_names = GermanVarlenStorage::with_metadata_capacity(0); @@ -231,7 +231,7 @@ impl SystemFunctionImpl for ListFunctionsImpl { Ok(()) })?; - Batch::try_new([ + Batch2::try_new([ Array2::new_with_array_data(DataType::Utf8, database_names), Array2::new_with_array_data(DataType::Utf8, schema_names), Array2::new_with_array_data(DataType::Utf8, function_names), @@ -284,7 +284,7 @@ impl SystemFunctionImpl for ListTablesImpl { fn new_batch( databases: &mut VecDeque<(String, Arc, Option)>, - ) -> Result { + ) -> Result { let database = databases.pop_front().required("database")?; let mut database_names = GermanVarlenStorage::with_metadata_capacity(0); @@ -308,7 +308,7 @@ impl SystemFunctionImpl for ListTablesImpl { Ok(()) })?; - Batch::try_new([ + Batch2::try_new([ Array2::new_with_array_data(DataType::Utf8, database_names), Array2::new_with_array_data(DataType::Utf8, schema_names), Array2::new_with_array_data(DataType::Utf8, table_names), @@ -333,7 +333,7 @@ impl SystemFunctionImpl for ListSchemasImpl { fn new_batch( databases: &mut VecDeque<(String, Arc, Option)>, - ) -> Result { + ) -> Result { let database = databases.pop_front().required("database")?; let mut database_names = GermanVarlenStorage::with_metadata_capacity(0); @@ -348,7 +348,7 @@ impl SystemFunctionImpl for ListSchemasImpl { Ok(()) })?; - Batch::try_new([ + Batch2::try_new([ Array2::new_with_array_data(DataType::Utf8, database_names), Array2::new_with_array_data(DataType::Utf8, schema_names), ]) @@ -469,7 +469,7 @@ struct SystemDataTableScan { } impl DataTableScan for SystemDataTableScan { - fn pull(&mut self) -> BoxFuture<'_, Result>> { + fn pull(&mut self) -> BoxFuture<'_, Result>> { Box::pin(async { if self.databases.is_empty() { return Ok(None); diff --git a/crates/rayexec_execution/src/functions/table/builtin/unnest.rs b/crates/rayexec_execution/src/functions/table/builtin/unnest.rs index c09301d81..353633694 100644 --- a/crates/rayexec_execution/src/functions/table/builtin/unnest.rs +++ b/crates/rayexec_execution/src/functions/table/builtin/unnest.rs @@ -4,7 +4,7 @@ use std::task::{Context, Waker}; use rayexec_error::{RayexecError, Result}; use crate::arrays::array::{Array2, ArrayData2}; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::physical_type::{PhysicalList, PhysicalType2}; use crate::arrays::executor::scalar::UnaryExecutor; @@ -152,7 +152,7 @@ pub struct UnnestInOutPartitionState { } impl TableInOutPartitionState for UnnestInOutPartitionState { - fn poll_push(&mut self, cx: &mut Context, inputs: Batch) -> Result { + fn poll_push(&mut self, cx: &mut Context, inputs: Batch2) -> Result { if self.current_row < self.input_num_rows { // Still processing inputs, come back later. self.push_waker = Some(cx.waker().clone()); @@ -247,7 +247,7 @@ impl TableInOutPartitionState for UnnestInOutPartitionState { } } - let batch = Batch::try_new([output])?; + let batch = Batch2::try_new([output])?; Ok(InOutPollPull::Batch { batch, row_nums }) } diff --git a/crates/rayexec_execution/src/functions/table/inout.rs b/crates/rayexec_execution/src/functions/table/inout.rs index f4b1229af..5108c6577 100644 --- a/crates/rayexec_execution/src/functions/table/inout.rs +++ b/crates/rayexec_execution/src/functions/table/inout.rs @@ -4,7 +4,7 @@ use std::task::Context; use dyn_clone::DynClone; use rayexec_error::Result; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::execution::operators::{PollFinalize, PollPush}; pub trait TableInOutFunction: Debug + Sync + Send + DynClone { @@ -16,13 +16,13 @@ pub trait TableInOutFunction: Debug + Sync + Send + DynClone { #[derive(Debug)] pub enum InOutPollPull { - Batch { batch: Batch, row_nums: Vec }, + Batch { batch: Batch2, row_nums: Vec }, Pending, Exhausted, } pub trait TableInOutPartitionState: Debug + Sync + Send { - fn poll_push(&mut self, cx: &mut Context, inputs: Batch) -> Result; + fn poll_push(&mut self, cx: &mut Context, inputs: Batch2) -> Result; fn poll_finalize_push(&mut self, cx: &mut Context) -> Result; fn poll_pull(&mut self, cx: &mut Context) -> Result; } diff --git a/crates/rayexec_execution/src/hybrid/buffer.rs b/crates/rayexec_execution/src/hybrid/buffer.rs index ffba013b8..900af7014 100644 --- a/crates/rayexec_execution/src/hybrid/buffer.rs +++ b/crates/rayexec_execution/src/hybrid/buffer.rs @@ -12,7 +12,7 @@ use tracing::debug; use uuid::Uuid; use super::client::{IpcBatch, PullStatus}; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::database::DatabaseContext; use crate::execution::intermediate::pipeline::StreamId; use crate::execution::operators::sink::{PartitionSink, SinkOperation}; @@ -95,7 +95,7 @@ impl ServerStreamBuffers { Ok(error_sink.value().clone()) } - pub fn push_batch_for_stream(&self, stream_id: &StreamId, batch: Batch) -> Result<()> { + pub fn push_batch_for_stream(&self, stream_id: &StreamId, batch: Batch2) -> Result<()> { let incoming = self.incoming.get(stream_id).ok_or_else(|| { RayexecError::new(format!("Missing incoming stream with id: {stream_id:?}")) })?; @@ -192,7 +192,7 @@ pub struct OutgoingPartitionStream { } impl PartitionSink for OutgoingPartitionStream { - fn push(&mut self, batch: Batch) -> BoxFuture<'_, Result<()>> { + fn push(&mut self, batch: Batch2) -> BoxFuture<'_, Result<()>> { Box::pin(OutgoingPushFuture { batch: Some(batch), state: self.state.clone(), @@ -209,13 +209,13 @@ impl PartitionSink for OutgoingPartitionStream { #[derive(Debug)] struct OutgoingStreamState { finished: bool, - batch: Option, + batch: Option, push_waker: Option, error_sink: Arc, } struct OutgoingPushFuture { - batch: Option, + batch: Option, state: Arc>, } @@ -282,7 +282,7 @@ pub struct IncomingPartitionStream { } impl PartitionSource for IncomingPartitionStream { - fn pull(&mut self) -> BoxFuture<'_, Result>> { + fn pull(&mut self) -> BoxFuture<'_, Result>> { Box::pin(IncomingPullFuture { state: self.state.clone(), }) @@ -292,7 +292,7 @@ impl PartitionSource for IncomingPartitionStream { #[derive(Debug)] struct IncomingStreamState { finished: bool, - batches: VecDeque, + batches: VecDeque, pull_waker: Option, } @@ -301,7 +301,7 @@ struct IncomingPullFuture { } impl Future for IncomingPullFuture { - type Output = Result>; + type Output = Result>; fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { let mut state = self.state.lock(); diff --git a/crates/rayexec_execution/src/hybrid/client.rs b/crates/rayexec_execution/src/hybrid/client.rs index a9d430a4f..b3d436d63 100644 --- a/crates/rayexec_execution/src/hybrid/client.rs +++ b/crates/rayexec_execution/src/hybrid/client.rs @@ -10,7 +10,7 @@ use serde::{Deserialize, Serialize}; use url::{Host, Url}; use uuid::Uuid; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::arrays::field::Schema; use crate::database::DatabaseContext; use crate::execution::intermediate::pipeline::{IntermediatePipelineGroup, StreamId}; @@ -310,7 +310,7 @@ impl ProtoConv for PullStatus { /// Wrapper around a batch that implements IPC encoding/decoding when converting /// to protobuf. #[derive(Debug)] -pub struct IpcBatch(pub Batch); +pub struct IpcBatch(pub Batch2); // TODO: Don't allocate vectors in this. impl ProtoConv for IpcBatch { @@ -478,7 +478,7 @@ impl HybridClient { Ok(()) } - pub async fn push(&self, stream_id: StreamId, partition: usize, batch: Batch) -> Result<()> { + pub async fn push(&self, stream_id: StreamId, partition: usize, batch: Batch2) -> Result<()> { let url = self .url .join(REMOTE_ENDPOINTS.rpc_hybrid_push) diff --git a/crates/rayexec_execution/src/hybrid/stream.rs b/crates/rayexec_execution/src/hybrid/stream.rs index c997eb591..ef8ec0ce4 100644 --- a/crates/rayexec_execution/src/hybrid/stream.rs +++ b/crates/rayexec_execution/src/hybrid/stream.rs @@ -5,7 +5,7 @@ use rayexec_error::Result; use rayexec_io::http::HttpClient; use super::client::{HybridClient, PullStatus}; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::database::DatabaseContext; use crate::execution::intermediate::pipeline::StreamId; use crate::execution::operators::sink::{PartitionSink, SinkOperation}; @@ -64,7 +64,7 @@ pub struct ClientToServerPartitionSink { } impl PartitionSink for ClientToServerPartitionSink { - fn push(&mut self, batch: Batch) -> BoxFuture<'_, Result<()>> { + fn push(&mut self, batch: Batch2) -> BoxFuture<'_, Result<()>> { // TODO: Figure out backpressure Box::pin(async { self.client.push(self.stream_id, 0, batch).await }) } @@ -116,7 +116,7 @@ pub struct ServerToClientPartitionSource { } impl PartitionSource for ServerToClientPartitionSource { - fn pull(&mut self) -> BoxFuture<'_, Result>> { + fn pull(&mut self) -> BoxFuture<'_, Result>> { Box::pin(async { // TODO: Backoff + hint somehow loop { diff --git a/crates/rayexec_execution/src/optimizer/expr_rewrite/const_fold.rs b/crates/rayexec_execution/src/optimizer/expr_rewrite/const_fold.rs index ebf145831..c614b4acc 100644 --- a/crates/rayexec_execution/src/optimizer/expr_rewrite/const_fold.rs +++ b/crates/rayexec_execution/src/optimizer/expr_rewrite/const_fold.rs @@ -1,7 +1,7 @@ use rayexec_error::{RayexecError, Result}; use super::ExpressionRewriteRule; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::expr::literal_expr::LiteralExpr; use crate::expr::physical::planner::PhysicalExpressionPlanner; use crate::expr::Expression; @@ -26,7 +26,7 @@ fn maybe_fold(table_list: &TableList, expr: &mut Expression) -> Result<()> { if expr.is_const_foldable() { let planner = PhysicalExpressionPlanner::new(table_list); let phys_expr = planner.plan_scalar(&[], expr)?; - let dummy = Batch::empty_with_num_rows(1); + let dummy = Batch2::empty_with_num_rows(1); let val = phys_expr.eval(&dummy)?; if val.logical_len() != 1 { diff --git a/crates/rayexec_execution/src/storage/memory.rs b/crates/rayexec_execution/src/storage/memory.rs index d0d1efa7b..fc9e34509 100644 --- a/crates/rayexec_execution/src/storage/memory.rs +++ b/crates/rayexec_execution/src/storage/memory.rs @@ -5,7 +5,7 @@ use parking_lot::Mutex; use rayexec_error::{RayexecError, Result}; use super::table_storage::{DataTable, DataTableScan, ProjectedScan, Projections, TableStorage}; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::database::catalog_entry::CatalogEntry; use crate::execution::computed_batch::ComputedBatches; use crate::execution::operators::sink::PartitionSink; @@ -84,7 +84,7 @@ impl TableStorage for MemoryTableStorage { #[derive(Debug, Clone, Default)] pub struct MemoryDataTable { - data: Arc>>, + data: Arc>>, } impl DataTable for MemoryDataTable { @@ -129,11 +129,11 @@ impl DataTable for MemoryDataTable { #[derive(Debug)] pub struct MemoryDataTableScan { - data: Vec, + data: Vec, } impl DataTableScan for MemoryDataTableScan { - fn pull(&mut self) -> BoxFuture<'_, Result>> { + fn pull(&mut self) -> BoxFuture<'_, Result>> { Box::pin(async { Ok(self.data.pop()) }) } } @@ -142,11 +142,11 @@ impl DataTableScan for MemoryDataTableScan { pub struct MemoryDataTableInsert { resizer: BatchResizer, // TODO: Need to replace. collected: Vec, - data: Arc>>, + data: Arc>>, } impl PartitionSink for MemoryDataTableInsert { - fn push(&mut self, batch: Batch) -> BoxFuture<'_, Result<()>> { + fn push(&mut self, batch: Batch2) -> BoxFuture<'_, Result<()>> { Box::pin(async { let batches = self.resizer.try_push(batch)?; if batches.is_empty() { diff --git a/crates/rayexec_execution/src/storage/table_storage.rs b/crates/rayexec_execution/src/storage/table_storage.rs index 258f69cff..6e884e576 100644 --- a/crates/rayexec_execution/src/storage/table_storage.rs +++ b/crates/rayexec_execution/src/storage/table_storage.rs @@ -4,7 +4,7 @@ use futures::future::BoxFuture; use rayexec_error::{RayexecError, Result}; use rayexec_proto::ProtoConv; -use crate::arrays::batch::Batch; +use crate::arrays::batch::Batch2; use crate::database::catalog_entry::CatalogEntry; use crate::execution::operators::sink::PartitionSink; @@ -99,7 +99,7 @@ pub trait DataTableScan: Debug + Send { /// Pull the next batch in the scan. /// /// Returns None if the scan is exhausted. - fn pull(&mut self) -> BoxFuture<'_, Result>>; + fn pull(&mut self) -> BoxFuture<'_, Result>>; } /// Helper for wrapping an unprojected scan with a projections list to produce @@ -118,7 +118,7 @@ impl ProjectedScan { ProjectedScan { projections, scan } } - async fn pull_inner(&mut self) -> Result> { + async fn pull_inner(&mut self) -> Result> { let batch = match self.scan.pull().await? { Some(batch) => batch, None => return Ok(None), @@ -135,7 +135,7 @@ impl ProjectedScan { } impl DataTableScan for ProjectedScan { - fn pull(&mut self) -> BoxFuture<'_, Result>> { + fn pull(&mut self) -> BoxFuture<'_, Result>> { Box::pin(async { self.pull_inner().await }) } } @@ -145,7 +145,7 @@ impl DataTableScan for ProjectedScan { pub struct EmptyTableScan; impl DataTableScan for EmptyTableScan { - fn pull(&mut self) -> BoxFuture<'_, Result>> { + fn pull(&mut self) -> BoxFuture<'_, Result>> { Box::pin(async move { Ok(None) }) } } diff --git a/crates/rayexec_iceberg/src/datatable.rs b/crates/rayexec_iceberg/src/datatable.rs index 1be505567..6931c5e38 100644 --- a/crates/rayexec_iceberg/src/datatable.rs +++ b/crates/rayexec_iceberg/src/datatable.rs @@ -2,7 +2,7 @@ use std::sync::Arc; use futures::future::BoxFuture; use rayexec_error::Result; -use rayexec_execution::arrays::batch::Batch; +use rayexec_execution::arrays::batch::Batch2; use rayexec_execution::storage::table_storage::{DataTable, DataTableScan, Projections}; use crate::table::{Table, TableScan}; @@ -34,7 +34,7 @@ struct IcebergTableScan { } impl DataTableScan for IcebergTableScan { - fn pull(&mut self) -> BoxFuture<'_, Result>> { + fn pull(&mut self) -> BoxFuture<'_, Result>> { Box::pin(async { self.scan.read_next().await }) } } diff --git a/crates/rayexec_iceberg/src/table.rs b/crates/rayexec_iceberg/src/table.rs index f5a08175a..d0fcac93e 100644 --- a/crates/rayexec_iceberg/src/table.rs +++ b/crates/rayexec_iceberg/src/table.rs @@ -5,7 +5,7 @@ use std::sync::Arc; use futures::StreamExt; use rayexec_error::{RayexecError, Result, ResultExt}; -use rayexec_execution::arrays::batch::Batch; +use rayexec_execution::arrays::batch::Batch2; use rayexec_execution::arrays::field::Schema; use rayexec_execution::storage::table_storage::Projections; use rayexec_io::location::{AccessConfig, FileLocation}; @@ -276,7 +276,7 @@ pub struct TableScan { } impl TableScan { - pub async fn read_next(&mut self) -> Result> { + pub async fn read_next(&mut self) -> Result> { loop { if self.current.is_none() { let file = match self.files.pop_front() { diff --git a/crates/rayexec_parquet/src/copy_to.rs b/crates/rayexec_parquet/src/copy_to.rs index 1bf3fbb42..0c20dff30 100644 --- a/crates/rayexec_parquet/src/copy_to.rs +++ b/crates/rayexec_parquet/src/copy_to.rs @@ -3,7 +3,7 @@ use std::fmt; use futures::future::BoxFuture; use futures::FutureExt; use rayexec_error::Result; -use rayexec_execution::arrays::batch::Batch; +use rayexec_execution::arrays::batch::Batch2; use rayexec_execution::arrays::field::Schema; use rayexec_execution::execution::operators::sink::PartitionSink; use rayexec_execution::functions::copy::CopyToFunction; @@ -47,7 +47,7 @@ pub struct ParquetCopyToSink { } impl ParquetCopyToSink { - async fn push_inner(&mut self, batch: Batch) -> Result<()> { + async fn push_inner(&mut self, batch: Batch2) -> Result<()> { self.writer.write(&batch).await?; Ok(()) } @@ -59,7 +59,7 @@ impl ParquetCopyToSink { } impl PartitionSink for ParquetCopyToSink { - fn push(&mut self, batch: Batch) -> BoxFuture<'_, Result<()>> { + fn push(&mut self, batch: Batch2) -> BoxFuture<'_, Result<()>> { self.push_inner(batch).boxed() } diff --git a/crates/rayexec_parquet/src/functions/datatable.rs b/crates/rayexec_parquet/src/functions/datatable.rs index bf00983bd..1859526a7 100644 --- a/crates/rayexec_parquet/src/functions/datatable.rs +++ b/crates/rayexec_parquet/src/functions/datatable.rs @@ -4,7 +4,7 @@ use std::sync::Arc; use futures::future::BoxFuture; use rayexec_error::Result; -use rayexec_execution::arrays::batch::Batch; +use rayexec_execution::arrays::batch::Batch2; use rayexec_execution::arrays::field::Schema; use rayexec_execution::runtime::Runtime; use rayexec_execution::storage::table_storage::{DataTable, DataTableScan, Projections}; @@ -71,7 +71,7 @@ struct RowGroupsScan { } impl DataTableScan for RowGroupsScan { - fn pull(&mut self) -> BoxFuture<'_, Result>> { + fn pull(&mut self) -> BoxFuture<'_, Result>> { Box::pin(async { self.reader.read_next().await }) } } diff --git a/crates/rayexec_parquet/src/reader/mod.rs b/crates/rayexec_parquet/src/reader/mod.rs index fdb28694e..1a592824a 100644 --- a/crates/rayexec_parquet/src/reader/mod.rs +++ b/crates/rayexec_parquet/src/reader/mod.rs @@ -20,7 +20,7 @@ use parquet::schema::types::ColumnDescPtr; use primitive::PrimitiveArrayReader; use rayexec_error::{RayexecError, Result, ResultExt}; use rayexec_execution::arrays::array::{Array2, ArrayData2}; -use rayexec_execution::arrays::batch::Batch; +use rayexec_execution::arrays::batch::Batch2; use rayexec_execution::arrays::bitmap::Bitmap; use rayexec_execution::arrays::datatype::DataType; use rayexec_execution::arrays::field::Schema; @@ -232,7 +232,7 @@ impl AsyncBatchReader { }) } - pub async fn read_next(&mut self) -> Result> { + pub async fn read_next(&mut self) -> Result> { if self.current_row_group.is_none() { match self.row_groups.pop_front() { Some(group) => { @@ -266,7 +266,7 @@ impl AsyncBatchReader { /// Try to read the next batch from the array builders. /// /// Returns Ok(None) when there's nothing left to read. - fn maybe_read_batch(&mut self) -> Result> { + fn maybe_read_batch(&mut self) -> Result> { for state in self.column_states.iter_mut() { state.builder.read_rows(self.batch_size)?; } @@ -276,7 +276,7 @@ impl AsyncBatchReader { .map(|state| state.builder.build()) .collect::>>()?; - let batch = Batch::try_new(arrays)?; + let batch = Batch2::try_new(arrays)?; if batch.num_rows() == 0 { Ok(None) diff --git a/crates/rayexec_parquet/src/writer/mod.rs b/crates/rayexec_parquet/src/writer/mod.rs index 28c18e0a5..8cd703e39 100644 --- a/crates/rayexec_parquet/src/writer/mod.rs +++ b/crates/rayexec_parquet/src/writer/mod.rs @@ -13,7 +13,7 @@ use parquet::format::FileMetaData; use parquet::schema::types::SchemaDescriptor; use rayexec_error::{not_implemented, OptionExt, RayexecError, Result, ResultExt}; use rayexec_execution::arrays::array::{Array2, ArrayData2}; -use rayexec_execution::arrays::batch::Batch; +use rayexec_execution::arrays::batch::Batch2; use rayexec_execution::arrays::datatype::DataType; use rayexec_execution::arrays::executor::physical_type::{PhysicalBinary, PhysicalStorage}; use rayexec_execution::arrays::field::Schema; @@ -59,7 +59,7 @@ impl AsyncBatchWriter { } /// Encode and write a batch to the underlying file sink. - pub async fn write(&mut self, batch: &Batch) -> Result<()> { + pub async fn write(&mut self, batch: &Batch2) -> Result<()> { if batch.num_rows() == 0 { return Ok(()); } @@ -178,7 +178,7 @@ impl RowGroupWriter { }) } - fn write(&mut self, batch: &Batch) -> Result<()> { + fn write(&mut self, batch: &Batch2) -> Result<()> { for (writer, col) in self.column_writers.iter_mut().zip(batch.columns()) { if col.has_selection() { let unselected_array = col.unselect()?; diff --git a/crates/rayexec_postgres/src/lib.rs b/crates/rayexec_postgres/src/lib.rs index e4fdd795b..a91fe8698 100644 --- a/crates/rayexec_postgres/src/lib.rs +++ b/crates/rayexec_postgres/src/lib.rs @@ -12,7 +12,7 @@ use futures::stream::BoxStream; use futures::{StreamExt, TryFutureExt}; use rayexec_error::{RayexecError, Result, ResultExt}; use rayexec_execution::arrays::array::Array2; -use rayexec_execution::arrays::batch::Batch; +use rayexec_execution::arrays::batch::Batch2; use rayexec_execution::arrays::datatype::{DataType, DecimalTypeMeta}; use rayexec_execution::arrays::field::Field; use rayexec_execution::arrays::scalar::OwnedScalarValue; @@ -236,11 +236,11 @@ impl DataTable for PostgresDataTable { } pub struct PostgresDataTableScan { - stream: BoxStream<'static, Result>, + stream: BoxStream<'static, Result>, } impl DataTableScan for PostgresDataTableScan { - fn pull(&mut self) -> BoxFuture<'_, Result>> { + fn pull(&mut self) -> BoxFuture<'_, Result>> { Box::pin(async { self.stream.next().await.transpose() }) } } @@ -390,7 +390,7 @@ impl PostgresClient { Ok(fields) } - fn binary_rows_to_batch(typs: &[DataType], rows: Vec) -> Result { + fn binary_rows_to_batch(typs: &[DataType], rows: Vec) -> Result { fn row_iter<'a, T: FromSql<'a>>( rows: &'a [BinaryCopyOutRow], idx: usize, @@ -439,6 +439,6 @@ impl PostgresClient { arrays.push(arr); } - Batch::try_new(arrays) + Batch2::try_new(arrays) } } diff --git a/crates/rayexec_shell/src/result_table.rs b/crates/rayexec_shell/src/result_table.rs index b55a953e3..519a7a6ec 100644 --- a/crates/rayexec_shell/src/result_table.rs +++ b/crates/rayexec_shell/src/result_table.rs @@ -6,7 +6,7 @@ use futures::stream::Stream; use futures::{StreamExt, TryStreamExt}; use rayexec_error::{RayexecError, Result}; use rayexec_execution::arrays::array::Array2; -use rayexec_execution::arrays::batch::Batch; +use rayexec_execution::arrays::batch::Batch2; use rayexec_execution::arrays::field::Schema; use rayexec_execution::arrays::format::pretty::table::PrettyTable; use rayexec_execution::arrays::row::ScalarRow; @@ -61,7 +61,7 @@ impl StreamingTable { } impl Stream for StreamingTable { - type Item = Result; + type Item = Result; fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { self.result.stream.poll_next_unpin(cx) @@ -71,7 +71,7 @@ impl Stream for StreamingTable { #[derive(Debug, Clone, PartialEq)] pub struct MaterializedResultTable { pub(crate) schema: Schema, - pub(crate) batches: Vec, + pub(crate) batches: Vec, pub(crate) planning_profile: Option, pub(crate) execution_profile: Option, } @@ -80,7 +80,7 @@ impl MaterializedResultTable { /// Create a new materialized result table. /// /// Mostly for testing. - pub fn try_new(schema: Schema, batches: impl IntoIterator) -> Result { + pub fn try_new(schema: Schema, batches: impl IntoIterator) -> Result { let batches: Vec<_> = batches.into_iter().collect(); for batch in &batches { if batch.columns().len() != schema.fields.len() { @@ -126,7 +126,7 @@ impl MaterializedResultTable { PrettyTable::try_new(&self.schema, &self.batches, width, max_rows) } - pub fn iter_batches(&self) -> impl Iterator { + pub fn iter_batches(&self) -> impl Iterator { self.batches.iter() } diff --git a/crates/rayexec_unity_catalog/src/functions.rs b/crates/rayexec_unity_catalog/src/functions.rs index 30bd17f08..b8daad5b2 100644 --- a/crates/rayexec_unity_catalog/src/functions.rs +++ b/crates/rayexec_unity_catalog/src/functions.rs @@ -8,7 +8,7 @@ use futures::stream::BoxStream; use futures::{FutureExt, TryStreamExt}; use rayexec_error::Result; use rayexec_execution::arrays::array::Array2; -use rayexec_execution::arrays::batch::Batch; +use rayexec_execution::arrays::batch::Batch2; use rayexec_execution::arrays::datatype::{DataType, DataTypeId}; use rayexec_execution::arrays::field::{Field, Schema}; use rayexec_execution::arrays::scalar::OwnedScalarValue; @@ -66,7 +66,7 @@ pub trait UnityObjectsOperation: /// Read the next batch from the stream. /// /// Returns Ok(None) when stream is finished. - fn next_batch(state: &mut Self::StreamState) -> BoxFuture<'_, Result>>; + fn next_batch(state: &mut Self::StreamState) -> BoxFuture<'_, Result>>; } #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -128,7 +128,7 @@ impl UnityObjectsOperation for ListSchemasOperation { Ok(ListSchemasStreamState { stream }) } - fn next_batch(state: &mut Self::StreamState) -> BoxFuture<'_, Result>> { + fn next_batch(state: &mut Self::StreamState) -> BoxFuture<'_, Result>> { Box::pin(async { let resp = state.stream.try_next().await?; match resp { @@ -139,7 +139,7 @@ impl UnityObjectsOperation for ListSchemasOperation { let comments = Array2::from_iter(resp.schemas.iter().map(|s| s.comment.as_deref())); - let batch = Batch::try_new([names, catalog_names, comments])?; + let batch = Batch2::try_new([names, catalog_names, comments])?; Ok(Some(batch)) } None => Ok(None), @@ -216,7 +216,7 @@ impl UnityObjectsOperation for ListTablesOperation { Ok(ListTablesStreamState { stream }) } - fn next_batch(state: &mut Self::StreamState) -> BoxFuture<'_, Result>> { + fn next_batch(state: &mut Self::StreamState) -> BoxFuture<'_, Result>> { Box::pin(async { let resp = state.stream.try_next().await?; match resp { @@ -236,7 +236,7 @@ impl UnityObjectsOperation for ListTablesOperation { let comments = Array2::from_iter(resp.tables.iter().map(|s| s.comment.as_deref())); - let batch = Batch::try_new([ + let batch = Batch2::try_new([ names, catalog_names, schema_names, @@ -353,7 +353,7 @@ pub struct UnityObjectsDataTableScan> { } impl> DataTableScan for UnityObjectsDataTableScan { - fn pull(&mut self) -> BoxFuture<'_, Result>> { + fn pull(&mut self) -> BoxFuture<'_, Result>> { O::next_batch(&mut self.stream) } } diff --git a/crates/rayexec_wasm/src/session.rs b/crates/rayexec_wasm/src/session.rs index 1f05eba92..eb39d2873 100644 --- a/crates/rayexec_wasm/src/session.rs +++ b/crates/rayexec_wasm/src/session.rs @@ -183,7 +183,7 @@ impl WasmMaterializedColumn { #[cfg(test)] mod tests { use rayexec_execution::arrays::array::Array2; - use rayexec_execution::arrays::batch::Batch; + use rayexec_execution::arrays::batch::Batch2; use rayexec_execution::arrays::datatype::DataType; use rayexec_execution::arrays::field::{Field, Schema}; @@ -194,9 +194,9 @@ mod tests { let table = MaterializedResultTable::try_new( Schema::new([Field::new("c1", DataType::Int32, true)]), [ - Batch::try_new([Array2::from_iter([0, 1, 2, 3])]).unwrap(), - Batch::try_new([Array2::from_iter([4, 5])]).unwrap(), - Batch::try_new([Array2::from_iter([6, 7, 8, 9, 10])]).unwrap(), + Batch2::try_new([Array2::from_iter([0, 1, 2, 3])]).unwrap(), + Batch2::try_new([Array2::from_iter([4, 5])]).unwrap(), + Batch2::try_new([Array2::from_iter([6, 7, 8, 9, 10])]).unwrap(), ], ) .unwrap(); diff --git a/test_bin/integration_slt_hybrid.rs b/test_bin/integration_slt_hybrid.rs index c8439ea16..b5a89aed6 100644 --- a/test_bin/integration_slt_hybrid.rs +++ b/test_bin/integration_slt_hybrid.rs @@ -6,7 +6,7 @@ use rayexec_debug::table_storage::TablePreload; use rayexec_debug::{DebugDataSource, DebugDataSourceOptions}; use rayexec_error::Result; use rayexec_execution::arrays::array::Array2; -use rayexec_execution::arrays::batch::Batch; +use rayexec_execution::arrays::batch::Batch2; use rayexec_execution::arrays::datatype::DataType; use rayexec_execution::arrays::field::Field; use rayexec_execution::datasource::DataSourceRegistry; @@ -36,7 +36,7 @@ pub fn main() -> Result<()> { Field::new("c1", DataType::Int64, false), Field::new("c2", DataType::Utf8, false), ], - data: Batch::try_new([ + data: Batch2::try_new([ Array2::from_iter([1_i64, 2_i64]), Array2::from_iter(["a", "b"]), ])?, @@ -50,7 +50,7 @@ pub fn main() -> Result<()> { Field::new("c1", DataType::Int64, false), Field::new("c2", DataType::Utf8, false), ], - data: Batch::try_new([ + data: Batch2::try_new([ Array2::from_iter([1_i64, 2_i64]), Array2::from_iter(["a", "b"]), ])?, From f6821da273c5d74b667cad3eb3c2a12cc3e2e320 Mon Sep 17 00:00:00 2001 From: Sean Smith Date: Sat, 28 Dec 2024 17:51:40 -0500 Subject: [PATCH 12/59] agg --- .../rayexec_execution/src/arrays/array/exp.rs | 50 ++-- .../rayexec_execution/src/arrays/batch_exp.rs | 77 ++++++ .../arrays/executor_exp/aggregate/binary.rs | 106 ++++++++ .../src/arrays/executor_exp/aggregate/mod.rs | 26 ++ .../arrays/executor_exp/aggregate/unary.rs | 239 ++++++++++++++++++ crates/rayexec_execution/src/arrays/mod.rs | 1 + 6 files changed, 473 insertions(+), 26 deletions(-) create mode 100644 crates/rayexec_execution/src/arrays/batch_exp.rs create mode 100644 crates/rayexec_execution/src/arrays/executor_exp/aggregate/binary.rs create mode 100644 crates/rayexec_execution/src/arrays/executor_exp/aggregate/unary.rs diff --git a/crates/rayexec_execution/src/arrays/array/exp.rs b/crates/rayexec_execution/src/arrays/array/exp.rs index 5f548067f..020cf3569 100644 --- a/crates/rayexec_execution/src/arrays/array/exp.rs +++ b/crates/rayexec_execution/src/arrays/array/exp.rs @@ -41,63 +41,64 @@ pub struct Array { pub(crate) data: ArrayData, } -impl Array { +impl Array +where + B: BufferManager, +{ /// Create a new array with the given capacity. /// /// This will take care of initalizing the primary and secondary data /// buffers depending on the type. - pub fn new(datatype: DataType, capacity: usize) -> Result { - let manager = NopBufferManager; - + pub fn new(manager: &B, datatype: DataType, capacity: usize) -> Result { let buffer = match datatype.physical_type() { PhysicalType::Boolean => { - ArrayBuffer::with_primary_capacity::(&manager, capacity)? + ArrayBuffer::with_primary_capacity::(manager, capacity)? } PhysicalType::Int8 => { - ArrayBuffer::with_primary_capacity::(&manager, capacity)? + ArrayBuffer::with_primary_capacity::(manager, capacity)? } PhysicalType::Int16 => { - ArrayBuffer::with_primary_capacity::(&manager, capacity)? + ArrayBuffer::with_primary_capacity::(manager, capacity)? } PhysicalType::Int32 => { - ArrayBuffer::with_primary_capacity::(&manager, capacity)? + ArrayBuffer::with_primary_capacity::(manager, capacity)? } PhysicalType::Int64 => { - ArrayBuffer::with_primary_capacity::(&manager, capacity)? + ArrayBuffer::with_primary_capacity::(manager, capacity)? } PhysicalType::Int128 => { - ArrayBuffer::with_primary_capacity::(&manager, capacity)? + ArrayBuffer::with_primary_capacity::(manager, capacity)? } PhysicalType::UInt8 => { - ArrayBuffer::with_primary_capacity::(&manager, capacity)? + ArrayBuffer::with_primary_capacity::(manager, capacity)? } PhysicalType::UInt16 => { - ArrayBuffer::with_primary_capacity::(&manager, capacity)? + ArrayBuffer::with_primary_capacity::(manager, capacity)? } PhysicalType::UInt32 => { - ArrayBuffer::with_primary_capacity::(&manager, capacity)? + ArrayBuffer::with_primary_capacity::(manager, capacity)? } PhysicalType::UInt64 => { - ArrayBuffer::with_primary_capacity::(&manager, capacity)? + ArrayBuffer::with_primary_capacity::(manager, capacity)? } PhysicalType::UInt128 => { - ArrayBuffer::with_primary_capacity::(&manager, capacity)? + ArrayBuffer::with_primary_capacity::(manager, capacity)? } PhysicalType::Float16 => { - ArrayBuffer::with_primary_capacity::(&manager, capacity)? + ArrayBuffer::with_primary_capacity::(manager, capacity)? } PhysicalType::Float32 => { - ArrayBuffer::with_primary_capacity::(&manager, capacity)? + ArrayBuffer::with_primary_capacity::(manager, capacity)? } PhysicalType::Float64 => { - ArrayBuffer::with_primary_capacity::(&manager, capacity)? + ArrayBuffer::with_primary_capacity::(manager, capacity)? } PhysicalType::Interval => { - ArrayBuffer::with_primary_capacity::(&manager, capacity)? + ArrayBuffer::with_primary_capacity::(manager, capacity)? } PhysicalType::Utf8 => { let mut buffer = - ArrayBuffer::with_primary_capacity::(&manager, capacity)?; + ArrayBuffer::with_primary_capacity::(manager, capacity)?; buffer.put_secondary_buffer(SecondaryBuffer::StringViewHeap(StringViewHeap::new())); buffer } @@ -112,12 +113,7 @@ impl Array { data: ArrayData::owned(buffer), }) } -} -impl Array -where - B: BufferManager, -{ pub fn data(&self) -> &ArrayData { &self.data } @@ -300,7 +296,9 @@ macro_rules! impl_primitive_from_iter { ) -> Result { let iter = iter.into_iter(); - let mut array = Array::new(DataType::$typ_variant, iter.len())?; + let manager = NopBufferManager; + + let mut array = Array::new(&manager, DataType::$typ_variant, iter.len())?; let slice = array.data.try_as_mut()?.try_as_slice_mut::<$phys>()?; for (dest, v) in slice.iter_mut().zip(iter) { diff --git a/crates/rayexec_execution/src/arrays/batch_exp.rs b/crates/rayexec_execution/src/arrays/batch_exp.rs new file mode 100644 index 000000000..6962e5e78 --- /dev/null +++ b/crates/rayexec_execution/src/arrays/batch_exp.rs @@ -0,0 +1,77 @@ +use iterutil::IntoExactSizeIterator; +use rayexec_error::Result; + +use super::array::exp::Array; +use super::buffer::buffer_manager::{BufferManager, NopBufferManager}; +use super::datatype::DataType; + +#[derive(Debug)] +pub struct Batch { + /// Arrays making up the batch. + /// + /// All arrays must have the same capacity (underlying length). + pub(crate) arrays: Vec>, + /// Number of logical rows in the batch. + /// + /// Equal to or less than capacity when batch contains at least one array. + /// If the batch contains no arrays, number of rows can be arbitarily set. + /// + /// This allows "resizing" batches without needed to resize the underlying + /// arrays, allowing for buffer reuse. + pub(crate) num_rows: usize, + /// Capacity (in number of rows) of the batch. + /// + /// This should match the capacity of the arrays. If there are zero arrays + /// in the batch, this should be zero. + pub(crate) capacity: usize, +} + +impl Batch +where + B: BufferManager, +{ + /// Create an empty batch with zero rows. + pub const fn empty() -> Self { + Self::empty_with_num_rows(0) + } + + /// Create an empty batch with some number of rows. + pub const fn empty_with_num_rows(num_rows: usize) -> Self { + Batch { + arrays: Vec::new(), + num_rows, + capacity: 0, + } + } + + /// Create a batch by initializing arrays for the given datatypes. + /// + /// Each array will be initialized to hold `capacity` rows. + pub fn new( + manager: &B, + datatypes: impl IntoExactSizeIterator, + capacity: usize, + ) -> Result { + let datatypes = datatypes.into_iter(); + let mut arrays = Vec::with_capacity(datatypes.len()); + + for datatype in datatypes { + let array = Array::new(manager, datatype, capacity)?; + arrays.push(array) + } + + Ok(Batch { + arrays, + num_rows: 0, + capacity, + }) + } + + pub fn arrays(&self) -> &[Array] { + &self.arrays + } + + pub fn arrays_mut(&mut self) -> &mut [Array] { + &mut self.arrays + } +} diff --git a/crates/rayexec_execution/src/arrays/executor_exp/aggregate/binary.rs b/crates/rayexec_execution/src/arrays/executor_exp/aggregate/binary.rs new file mode 100644 index 000000000..ade1f1ec5 --- /dev/null +++ b/crates/rayexec_execution/src/arrays/executor_exp/aggregate/binary.rs @@ -0,0 +1,106 @@ +use iterutil::IntoExactSizeIterator; +use rayexec_error::Result; + +use super::AggregateState; +use crate::arrays::array::exp::Array; +use crate::arrays::buffer::physical_type::{Addressable, PhysicalStorage}; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct BinaryNonNullUpdater; + +impl BinaryNonNullUpdater { + pub fn update( + array1: &Array, + array2: &Array, + selection: impl IntoExactSizeIterator, + mapping: impl IntoExactSizeIterator, + states: &mut [State], + ) -> Result<()> + where + S1: PhysicalStorage, + S2: PhysicalStorage, + Output: ?Sized, + for<'a> State: AggregateState<(&'a S1::StorageType, &'a S2::StorageType), Output>, + { + // TODO: Dictionary + + // TODO: Length check. + + let input1 = S1::get_addressable(array1.data())?; + let input2 = S2::get_addressable(array2.data())?; + + let validity1 = array1.validity(); + let validity2 = array2.validity(); + + if validity1.all_valid() && validity2.all_valid() { + for (input_idx, state_idx) in selection.into_iter().zip(mapping.into_iter()) { + let val1 = input1.get(input_idx).unwrap(); + let val2 = input2.get(input_idx).unwrap(); + + let state = &mut states[state_idx]; + + state.update((val1, val2))?; + } + } else { + for (input_idx, state_idx) in selection.into_iter().zip(mapping.into_iter()) { + if !validity1.is_valid(input_idx) || !validity2.is_valid(input_idx) { + continue; + } + + let val1 = input1.get(input_idx).unwrap(); + let val2 = input2.get(input_idx).unwrap(); + + let state = &mut states[state_idx]; + + state.update((val1, val2))?; + } + } + + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::arrays::buffer::physical_type::AddressableMut; + use crate::arrays::executor_exp::PutBuffer; + + // SUM(col) + PRODUCT(col) + #[derive(Debug)] + struct TestAddSumAndProductState { + sum: i32, + product: i32, + } + + impl Default for TestAddSumAndProductState { + fn default() -> Self { + TestAddSumAndProductState { sum: 0, product: 1 } + } + } + + impl AggregateState<(&i32, &i32), i32> for TestAddSumAndProductState { + fn merge(&mut self, other: &mut Self) -> Result<()> { + self.sum += other.sum; + self.product *= other.product; + Ok(()) + } + + fn update(&mut self, (&i1, &i2): (&i32, &i32)) -> Result<()> { + self.sum += i1; + self.product *= i2; + Ok(()) + } + + fn finalize(&mut self, output: PutBuffer) -> Result<()> + where + M: AddressableMut, + { + output.put(&(self.sum + self.product)); + Ok(()) + } + } + + #[test] + fn test_name() {} +} diff --git a/crates/rayexec_execution/src/arrays/executor_exp/aggregate/mod.rs b/crates/rayexec_execution/src/arrays/executor_exp/aggregate/mod.rs index 8b1378917..76356e6e3 100644 --- a/crates/rayexec_execution/src/arrays/executor_exp/aggregate/mod.rs +++ b/crates/rayexec_execution/src/arrays/executor_exp/aggregate/mod.rs @@ -1 +1,27 @@ +pub mod binary; +pub mod unary; +use std::fmt::Debug; + +use rayexec_error::Result; + +use super::PutBuffer; +use crate::arrays::buffer::physical_type::AddressableMut; + +/// State for a single group's aggregate. +/// +/// An example state for SUM would be a struct that takes a running sum from +/// values provided in `update`. +pub trait AggregateState: Debug { + /// Merge other state into this state. + fn merge(&mut self, other: &mut Self) -> Result<()>; + + /// Update this state with some input. + fn update(&mut self, input: Input) -> Result<()>; + + /// Produce a single value from the state, along with a bool indicating if + /// the value is valid. + fn finalize(&mut self, output: PutBuffer) -> Result<()> + where + M: AddressableMut; +} diff --git a/crates/rayexec_execution/src/arrays/executor_exp/aggregate/unary.rs b/crates/rayexec_execution/src/arrays/executor_exp/aggregate/unary.rs new file mode 100644 index 000000000..2532e9d65 --- /dev/null +++ b/crates/rayexec_execution/src/arrays/executor_exp/aggregate/unary.rs @@ -0,0 +1,239 @@ +use iterutil::IntoExactSizeIterator; +use rayexec_error::Result; + +use super::AggregateState; +use crate::arrays::array::exp::Array; +use crate::arrays::array::flat::FlatArrayView; +use crate::arrays::buffer::physical_type::{Addressable, PhysicalStorage}; + +#[derive(Debug, Clone, Copy)] +pub struct UnaryNonNullUpdater; + +impl UnaryNonNullUpdater { + pub fn update( + array: &Array, + selection: impl IntoExactSizeIterator, + mapping: impl IntoExactSizeIterator, + states: &mut [State], + ) -> Result<()> + where + S: PhysicalStorage, + Output: ?Sized, + for<'a> State: AggregateState<&'a S::StorageType, Output>, + { + if array.is_dictionary() { + let flat = array.flat_view()?; + return Self::update_flat::(flat, selection, mapping, states); + } + + // TODO: Length check. + + let input = S::get_addressable(array.data())?; + let validity = array.validity(); + + if validity.all_valid() { + for (input_idx, state_idx) in selection.into_iter().zip(mapping.into_iter()) { + let val = input.get(input_idx).unwrap(); + let state = &mut states[state_idx]; + state.update(val)?; + } + } else { + for (input_idx, state_idx) in selection.into_iter().zip(mapping.into_iter()) { + if !validity.is_valid(input_idx) { + continue; + } + + let val = input.get(input_idx).unwrap(); + let state = &mut states[state_idx]; + state.update(val)?; + } + } + + Ok(()) + } + + pub fn update_flat<'a, S, State, Output>( + array: FlatArrayView<'a>, + selection: impl IntoExactSizeIterator, + mapping: impl IntoExactSizeIterator, + states: &mut [State], + ) -> Result<()> + where + S: PhysicalStorage, + Output: ?Sized, + for<'b> State: AggregateState<&'b S::StorageType, Output>, + { + let input = S::get_addressable(array.array_buffer)?; + let validity = &array.validity; + + if validity.all_valid() { + for (input_idx, state_idx) in selection.into_iter().zip(mapping.into_iter()) { + let selected_idx = array.selection.get(input_idx).unwrap(); + + let val = input.get(selected_idx).unwrap(); + let state = &mut states[state_idx]; + state.update(val)?; + } + } else { + for (input_idx, state_idx) in selection.into_iter().zip(mapping.into_iter()) { + let selected_idx = array.selection.get(input_idx).unwrap(); + + if !validity.is_valid(selected_idx) { + continue; + } + + let val = input.get(selected_idx).unwrap(); + let state = &mut states[state_idx]; + state.update(val)?; + } + } + + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use iterutil::TryFromExactSizeIterator; + + use super::*; + use crate::arrays::buffer::buffer_manager::NopBufferManager; + use crate::arrays::buffer::physical_type::{AddressableMut, PhysicalI32, PhysicalUtf8}; + use crate::arrays::executor_exp::PutBuffer; + + #[derive(Debug, Default)] + struct TestSumState { + val: i32, + } + + impl AggregateState<&i32, i32> for TestSumState { + fn merge(&mut self, other: &mut Self) -> Result<()> { + self.val += other.val; + Ok(()) + } + + fn update(&mut self, &input: &i32) -> Result<()> { + self.val += input; + Ok(()) + } + + fn finalize(&mut self, output: PutBuffer) -> Result<()> + where + M: AddressableMut, + { + output.put(&self.val); + Ok(()) + } + } + + #[test] + fn unary_primitive_single_state() { + let mut states = [TestSumState::default()]; + let array = Array::try_from_iter([1, 2, 3, 4, 5]).unwrap(); + + UnaryNonNullUpdater::update::( + &array, + [0, 1, 2, 4], + [0, 0, 0, 0], + &mut states, + ) + .unwrap(); + + assert_eq!(11, states[0].val); + } + + #[test] + fn unary_primitive_single_state_dictionary() { + let mut states = [TestSumState::default()]; + let mut array = Array::try_from_iter([1, 2, 3, 4, 5]).unwrap(); + // '[1, 5, 5, 5, 5, 2, 2]' + array + .select(&NopBufferManager, [0, 4, 4, 4, 4, 1, 1]) + .unwrap(); + + UnaryNonNullUpdater::update::( + &array, + [0, 1, 2, 4], // Select from the resulting dictionary. + [0, 0, 0, 0], + &mut states, + ) + .unwrap(); + + assert_eq!(16, states[0].val); + } + + #[test] + fn unary_primitive_single_state_skip_null() { + let mut states = [TestSumState::default()]; + let array = Array::try_from_iter([None, Some(2), Some(3), Some(4), Some(5)]).unwrap(); + + UnaryNonNullUpdater::update::( + &array, + [0, 1, 2, 4], + [0, 0, 0, 0], + &mut states, + ) + .unwrap(); + + assert_eq!(10, states[0].val); + } + + #[test] + fn unary_primitive_multiple_states() { + let mut states = [TestSumState::default(), TestSumState::default()]; + let array = Array::try_from_iter([1, 2, 3, 4, 5]).unwrap(); + + UnaryNonNullUpdater::update::( + &array, + [0, 1, 2, 4, 0, 3, 3], + [0, 0, 0, 0, 1, 1, 0], + &mut states, + ) + .unwrap(); + + assert_eq!(15, states[0].val); + assert_eq!(5, states[1].val); + } + + #[derive(Debug, Default)] + struct TestStringAgg { + val: String, + } + + impl AggregateState<&str, str> for TestStringAgg { + fn merge(&mut self, other: &mut Self) -> Result<()> { + self.val.push_str(&other.val); + Ok(()) + } + + fn update(&mut self, input: &str) -> Result<()> { + self.val.push_str(input); + Ok(()) + } + + fn finalize(&mut self, output: PutBuffer) -> Result<()> + where + M: AddressableMut, + { + output.put(&self.val); + Ok(()) + } + } + + #[test] + fn unary_string_single_state() { + // Test just checks to ensure working with varlen is sane. + let mut states = [TestStringAgg::default()]; + let array = Array::try_from_iter(["aa", "bbb", "cccc"]).unwrap(); + + UnaryNonNullUpdater::update::( + &array, + [0, 1, 2], + [0, 0, 0], + &mut states, + ) + .unwrap(); + + assert_eq!("aabbbcccc", &states[0].val); + } +} diff --git a/crates/rayexec_execution/src/arrays/mod.rs b/crates/rayexec_execution/src/arrays/mod.rs index d7b9ca4ca..2d84ca46e 100644 --- a/crates/rayexec_execution/src/arrays/mod.rs +++ b/crates/rayexec_execution/src/arrays/mod.rs @@ -1,5 +1,6 @@ pub mod array; pub mod batch; +pub mod batch_exp; pub mod bitmap; pub mod buffer; pub mod compute; From e7d8cff88f183f8fc80b76d429b94729e02fb48c Mon Sep 17 00:00:00 2001 From: Sean Smith Date: Sat, 28 Dec 2024 17:58:29 -0500 Subject: [PATCH 13/59] temp renames --- .../rayexec_execution/src/arrays/array/mod.rs | 85 +++++++++---------- .../src/arrays/compute/cast/array.rs | 42 ++++----- .../src/arrays/compute/date.rs | 8 +- .../src/arrays/executor/aggregate/binary.rs | 6 +- .../src/arrays/executor/aggregate/unary.rs | 4 +- .../src/arrays/executor/physical_type.rs | 42 ++++----- .../src/arrays/executor/scalar/binary.rs | 16 ++-- .../src/arrays/executor/scalar/fill.rs | 12 +-- .../src/arrays/executor/scalar/hash.rs | 4 +- .../src/arrays/executor/scalar/list.rs | 6 +- .../src/arrays/executor/scalar/select.rs | 2 +- .../src/arrays/executor/scalar/ternary.rs | 8 +- .../src/arrays/executor/scalar/unary.rs | 25 +++--- .../src/arrays/executor/scalar/uniform.rs | 4 +- .../src/arrays/row/encoding.rs | 12 +-- .../src/arrays/scalar/decimal.rs | 6 +- .../operators/hash_aggregate/compare.rs | 4 +- .../execution/operators/hash_aggregate/mod.rs | 4 +- .../operators/hash_join/condition.rs | 4 +- .../src/execution/operators/unnest.rs | 10 +-- .../src/expr/physical/scalar_function_expr.rs | 2 +- .../src/functions/aggregate/builtin/first.rs | 4 +- .../src/functions/aggregate/builtin/minmax.rs | 4 +- .../src/functions/aggregate/states.rs | 16 ++-- .../src/functions/scalar/builtin/arith/add.rs | 12 +-- .../src/functions/scalar/builtin/arith/div.rs | 16 ++-- .../src/functions/scalar/builtin/arith/mul.rs | 22 ++--- .../src/functions/scalar/builtin/arith/rem.rs | 12 +-- .../src/functions/scalar/builtin/arith/sub.rs | 12 +-- .../src/functions/scalar/builtin/boolean.rs | 16 ++-- .../functions/scalar/builtin/comparison.rs | 38 +++++---- .../scalar/builtin/datetime/date_part.rs | 2 +- .../scalar/builtin/datetime/date_trunc.rs | 6 +- .../scalar/builtin/datetime/epoch.rs | 6 +- .../src/functions/scalar/builtin/is.rs | 10 +-- .../scalar/builtin/list/list_extract.rs | 12 +-- .../scalar/builtin/list/list_values.rs | 2 +- .../src/functions/scalar/builtin/negate.rs | 14 +-- .../functions/scalar/builtin/numeric/abs.rs | 8 +- .../functions/scalar/builtin/numeric/acos.rs | 8 +- .../functions/scalar/builtin/numeric/asin.rs | 8 +- .../functions/scalar/builtin/numeric/atan.rs | 8 +- .../functions/scalar/builtin/numeric/cbrt.rs | 8 +- .../functions/scalar/builtin/numeric/ceil.rs | 8 +- .../functions/scalar/builtin/numeric/cos.rs | 8 +- .../scalar/builtin/numeric/degrees.rs | 8 +- .../functions/scalar/builtin/numeric/exp.rs | 8 +- .../functions/scalar/builtin/numeric/floor.rs | 8 +- .../functions/scalar/builtin/numeric/isnan.rs | 14 +-- .../functions/scalar/builtin/numeric/ln.rs | 8 +- .../functions/scalar/builtin/numeric/log.rs | 12 +-- .../functions/scalar/builtin/numeric/mod.rs | 6 +- .../scalar/builtin/numeric/radians.rs | 8 +- .../functions/scalar/builtin/numeric/sin.rs | 8 +- .../functions/scalar/builtin/numeric/sqrt.rs | 8 +- .../functions/scalar/builtin/numeric/tan.rs | 8 +- .../src/functions/scalar/builtin/random.rs | 2 +- .../scalar/builtin/similarity/l2_distance.rs | 10 +-- .../functions/scalar/builtin/string/ascii.rs | 6 +- .../functions/scalar/builtin/string/case.rs | 8 +- .../functions/scalar/builtin/string/concat.rs | 6 +- .../scalar/builtin/string/contains.rs | 10 +-- .../scalar/builtin/string/ends_with.rs | 10 +-- .../functions/scalar/builtin/string/length.rs | 14 +-- .../functions/scalar/builtin/string/like.rs | 10 +-- .../functions/scalar/builtin/string/pad.rs | 10 +-- .../scalar/builtin/string/regexp_replace.rs | 10 +-- .../functions/scalar/builtin/string/repeat.rs | 6 +- .../scalar/builtin/string/starts_with.rs | 8 +- .../scalar/builtin/string/substring.rs | 8 +- .../functions/scalar/builtin/string/trim.rs | 10 +-- .../src/functions/scalar/mod.rs | 15 +++- .../src/functions/table/builtin/series.rs | 8 +- .../src/functions/table/builtin/unnest.rs | 4 +- crates/rayexec_parquet/src/writer/mod.rs | 2 +- 75 files changed, 417 insertions(+), 402 deletions(-) diff --git a/crates/rayexec_execution/src/arrays/array/mod.rs b/crates/rayexec_execution/src/arrays/array/mod.rs index 9c55b47d3..d16f531a1 100644 --- a/crates/rayexec_execution/src/arrays/array/mod.rs +++ b/crates/rayexec_execution/src/arrays/array/mod.rs @@ -42,7 +42,7 @@ use crate::arrays::executor::physical_type::{ PhysicalU8, PhysicalUtf8, }; -use crate::arrays::executor::scalar::UnaryExecutor; +use crate::arrays::executor::scalar::UnaryExecutor2; use crate::arrays::scalar::decimal::{Decimal128Scalar, Decimal64Scalar}; use crate::arrays::scalar::interval::Interval; use crate::arrays::scalar::timestamp::TimestampScalar; @@ -309,7 +309,7 @@ impl Array2 { validity: None, data: UntypedNullStorage(self.logical_len()).into(), }), - ArrayData2::Boolean(_) => UnaryExecutor::execute::( + ArrayData2::Boolean(_) => UnaryExecutor2::execute::( self, ArrayBuilder { datatype: self.datatype.clone(), @@ -317,7 +317,7 @@ impl Array2 { }, |v, buf| buf.put(&v), ), - ArrayData2::Int8(_) => UnaryExecutor::execute::( + ArrayData2::Int8(_) => UnaryExecutor2::execute::( self, ArrayBuilder { datatype: self.datatype.clone(), @@ -325,7 +325,7 @@ impl Array2 { }, |v, buf| buf.put(&v), ), - ArrayData2::Int16(_) => UnaryExecutor::execute::( + ArrayData2::Int16(_) => UnaryExecutor2::execute::( self, ArrayBuilder { datatype: self.datatype.clone(), @@ -333,7 +333,7 @@ impl Array2 { }, |v, buf| buf.put(&v), ), - ArrayData2::Int32(_) => UnaryExecutor::execute::( + ArrayData2::Int32(_) => UnaryExecutor2::execute::( self, ArrayBuilder { datatype: self.datatype.clone(), @@ -341,7 +341,7 @@ impl Array2 { }, |v, buf| buf.put(&v), ), - ArrayData2::Int64(_) => UnaryExecutor::execute::( + ArrayData2::Int64(_) => UnaryExecutor2::execute::( self, ArrayBuilder { datatype: self.datatype.clone(), @@ -349,7 +349,7 @@ impl Array2 { }, |v, buf| buf.put(&v), ), - ArrayData2::Int128(_) => UnaryExecutor::execute::( + ArrayData2::Int128(_) => UnaryExecutor2::execute::( self, ArrayBuilder { datatype: self.datatype.clone(), @@ -357,7 +357,7 @@ impl Array2 { }, |v, buf| buf.put(&v), ), - ArrayData2::UInt8(_) => UnaryExecutor::execute::( + ArrayData2::UInt8(_) => UnaryExecutor2::execute::( self, ArrayBuilder { datatype: self.datatype.clone(), @@ -365,7 +365,7 @@ impl Array2 { }, |v, buf| buf.put(&v), ), - ArrayData2::UInt16(_) => UnaryExecutor::execute::( + ArrayData2::UInt16(_) => UnaryExecutor2::execute::( self, ArrayBuilder { datatype: self.datatype.clone(), @@ -373,7 +373,7 @@ impl Array2 { }, |v, buf| buf.put(&v), ), - ArrayData2::UInt32(_) => UnaryExecutor::execute::( + ArrayData2::UInt32(_) => UnaryExecutor2::execute::( self, ArrayBuilder { datatype: self.datatype.clone(), @@ -381,7 +381,7 @@ impl Array2 { }, |v, buf| buf.put(&v), ), - ArrayData2::UInt64(_) => UnaryExecutor::execute::( + ArrayData2::UInt64(_) => UnaryExecutor2::execute::( self, ArrayBuilder { datatype: self.datatype.clone(), @@ -389,7 +389,7 @@ impl Array2 { }, |v, buf| buf.put(&v), ), - ArrayData2::UInt128(_) => UnaryExecutor::execute::( + ArrayData2::UInt128(_) => UnaryExecutor2::execute::( self, ArrayBuilder { datatype: self.datatype.clone(), @@ -397,7 +397,7 @@ impl Array2 { }, |v, buf| buf.put(&v), ), - ArrayData2::Float16(_) => UnaryExecutor::execute::( + ArrayData2::Float16(_) => UnaryExecutor2::execute::( self, ArrayBuilder { datatype: self.datatype.clone(), @@ -405,7 +405,7 @@ impl Array2 { }, |v, buf| buf.put(&v), ), - ArrayData2::Float32(_) => UnaryExecutor::execute::( + ArrayData2::Float32(_) => UnaryExecutor2::execute::( self, ArrayBuilder { datatype: self.datatype.clone(), @@ -413,7 +413,7 @@ impl Array2 { }, |v, buf| buf.put(&v), ), - ArrayData2::Float64(_) => UnaryExecutor::execute::( + ArrayData2::Float64(_) => UnaryExecutor2::execute::( self, ArrayBuilder { datatype: self.datatype.clone(), @@ -421,7 +421,7 @@ impl Array2 { }, |v, buf| buf.put(&v), ), - ArrayData2::Interval(_) => UnaryExecutor::execute::( + ArrayData2::Interval(_) => UnaryExecutor2::execute::( self, ArrayBuilder { datatype: self.datatype.clone(), @@ -437,7 +437,7 @@ impl Array2 { // data while just selecting the appropriate metadata. Instead // this will just copy everything. if self.datatype().is_utf8() { - UnaryExecutor::execute::( + UnaryExecutor2::execute::( self, ArrayBuilder { datatype: self.datatype.clone(), @@ -446,7 +446,7 @@ impl Array2 { |v, buf| buf.put(v), ) } else { - UnaryExecutor::execute::( + UnaryExecutor2::execute::( self, ArrayBuilder { datatype: self.datatype.clone(), @@ -619,118 +619,117 @@ impl Array2 { match scalar { ScalarValue::Null => { - UnaryExecutor::value_at::(self, row).map(|arr_val| arr_val.is_none()) + UnaryExecutor2::value_at::(self, row).map(|arr_val| arr_val.is_none()) } // None == NULL ScalarValue::Boolean(v) => { - UnaryExecutor::value_at::(self, row).map(|arr_val| match arr_val { + UnaryExecutor2::value_at::(self, row).map(|arr_val| match arr_val { Some(arr_val) => arr_val == *v, None => false, }) } ScalarValue::Int8(v) => { - UnaryExecutor::value_at::(self, row).map(|arr_val| match arr_val { + UnaryExecutor2::value_at::(self, row).map(|arr_val| match arr_val { Some(arr_val) => arr_val == *v, None => false, }) } ScalarValue::Int16(v) => { - UnaryExecutor::value_at::(self, row).map(|arr_val| match arr_val { + UnaryExecutor2::value_at::(self, row).map(|arr_val| match arr_val { Some(arr_val) => arr_val == *v, None => false, }) } ScalarValue::Int32(v) => { - UnaryExecutor::value_at::(self, row).map(|arr_val| match arr_val { + UnaryExecutor2::value_at::(self, row).map(|arr_val| match arr_val { Some(arr_val) => arr_val == *v, None => false, }) } ScalarValue::Int64(v) => { - UnaryExecutor::value_at::(self, row).map(|arr_val| match arr_val { + UnaryExecutor2::value_at::(self, row).map(|arr_val| match arr_val { Some(arr_val) => arr_val == *v, None => false, }) } ScalarValue::Int128(v) => { - UnaryExecutor::value_at::(self, row).map(|arr_val| match arr_val { + UnaryExecutor2::value_at::(self, row).map(|arr_val| match arr_val { Some(arr_val) => arr_val == *v, None => false, }) } ScalarValue::UInt8(v) => { - UnaryExecutor::value_at::(self, row).map(|arr_val| match arr_val { + UnaryExecutor2::value_at::(self, row).map(|arr_val| match arr_val { Some(arr_val) => arr_val == *v, None => false, }) } ScalarValue::UInt16(v) => { - UnaryExecutor::value_at::(self, row).map(|arr_val| match arr_val { + UnaryExecutor2::value_at::(self, row).map(|arr_val| match arr_val { Some(arr_val) => arr_val == *v, None => false, }) } ScalarValue::UInt32(v) => { - UnaryExecutor::value_at::(self, row).map(|arr_val| match arr_val { + UnaryExecutor2::value_at::(self, row).map(|arr_val| match arr_val { Some(arr_val) => arr_val == *v, None => false, }) } ScalarValue::UInt64(v) => { - UnaryExecutor::value_at::(self, row).map(|arr_val| match arr_val { + UnaryExecutor2::value_at::(self, row).map(|arr_val| match arr_val { Some(arr_val) => arr_val == *v, None => false, }) } ScalarValue::UInt128(v) => { - UnaryExecutor::value_at::(self, row).map(|arr_val| match arr_val { + UnaryExecutor2::value_at::(self, row).map(|arr_val| match arr_val { Some(arr_val) => arr_val == *v, None => false, }) } ScalarValue::Float32(v) => { - UnaryExecutor::value_at::(self, row).map(|arr_val| match arr_val { + UnaryExecutor2::value_at::(self, row).map(|arr_val| match arr_val { Some(arr_val) => arr_val == *v, None => false, }) } ScalarValue::Float64(v) => { - UnaryExecutor::value_at::(self, row).map(|arr_val| match arr_val { + UnaryExecutor2::value_at::(self, row).map(|arr_val| match arr_val { Some(arr_val) => arr_val == *v, None => false, }) } ScalarValue::Date32(v) => { - UnaryExecutor::value_at::(self, row).map(|arr_val| match arr_val { + UnaryExecutor2::value_at::(self, row).map(|arr_val| match arr_val { Some(arr_val) => arr_val == *v, None => false, }) } ScalarValue::Date64(v) => { - UnaryExecutor::value_at::(self, row).map(|arr_val| match arr_val { + UnaryExecutor2::value_at::(self, row).map(|arr_val| match arr_val { Some(arr_val) => arr_val == *v, None => false, }) } - ScalarValue::Interval(v) => UnaryExecutor::value_at::(self, row).map( - |arr_val| match arr_val { + ScalarValue::Interval(v) => UnaryExecutor2::value_at::(self, row) + .map(|arr_val| match arr_val { Some(arr_val) => arr_val == *v, None => false, - }, - ), + }), ScalarValue::Utf8(v) => { - UnaryExecutor::value_at::(self, row).map(|arr_val| match arr_val { + UnaryExecutor2::value_at::(self, row).map(|arr_val| match arr_val { Some(arr_val) => arr_val == v.as_ref(), None => false, }) } ScalarValue::Binary(v) => { - UnaryExecutor::value_at::(self, row).map(|arr_val| match arr_val { + UnaryExecutor2::value_at::(self, row).map(|arr_val| match arr_val { Some(arr_val) => arr_val == v.as_ref(), None => false, }) } ScalarValue::Timestamp(v) => { - UnaryExecutor::value_at::(self, row).map(|arr_val| { + UnaryExecutor2::value_at::(self, row).map(|arr_val| { // Assumes time unit is the same match arr_val { Some(arr_val) => arr_val == v.value, @@ -739,7 +738,7 @@ impl Array2 { }) } ScalarValue::Decimal64(v) => { - UnaryExecutor::value_at::(self, row).map(|arr_val| { + UnaryExecutor2::value_at::(self, row).map(|arr_val| { // Assumes precision/scale are the same. match arr_val { Some(arr_val) => arr_val == v.value, @@ -748,7 +747,7 @@ impl Array2 { }) } ScalarValue::Decimal128(v) => { - UnaryExecutor::value_at::(self, row).map(|arr_val| { + UnaryExecutor2::value_at::(self, row).map(|arr_val| { // Assumes precision/scale are the same. match arr_val { Some(arr_val) => arr_val == v.value, diff --git a/crates/rayexec_execution/src/arrays/compute/cast/array.rs b/crates/rayexec_execution/src/arrays/compute/cast/array.rs index a96084493..23e0e6645 100644 --- a/crates/rayexec_execution/src/arrays/compute/cast/array.rs +++ b/crates/rayexec_execution/src/arrays/compute/cast/array.rs @@ -67,7 +67,7 @@ use crate::arrays::executor::physical_type::{ PhysicalI32, PhysicalI64, PhysicalI8, - PhysicalStorage, + PhysicalStorage2, PhysicalU128, PhysicalU16, PhysicalU32, @@ -75,7 +75,7 @@ use crate::arrays::executor::physical_type::{ PhysicalU8, PhysicalUtf8, }; -use crate::arrays::executor::scalar::UnaryExecutor; +use crate::arrays::executor::scalar::UnaryExecutor2; use crate::arrays::scalar::decimal::{Decimal128Type, Decimal64Type, DecimalType}; use crate::arrays::storage::{AddressableStorage, PrimitiveStorage}; @@ -233,7 +233,7 @@ fn decimal_rescale_helper<'a, S>( behavior: CastFailBehavior, ) -> Result where - S: PhysicalStorage, + S: PhysicalStorage2, S::Type<'a>: PrimInt, { match to { @@ -249,7 +249,7 @@ pub fn decimal_rescale<'a, S, D>( behavior: CastFailBehavior, ) -> Result where - S: PhysicalStorage, + S: PhysicalStorage2, D: DecimalType, S::Type<'a>: PrimInt, ArrayData2: From>, @@ -263,7 +263,7 @@ where .expect("to be in range"); let mut fail_state = behavior.new_state_for_array(arr); - let output = UnaryExecutor::execute::( + let output = UnaryExecutor2::execute::( arr, ArrayBuilder { datatype: to, @@ -302,7 +302,7 @@ fn cast_float_to_decimal_helper<'a, S>( behavior: CastFailBehavior, ) -> Result where - S: PhysicalStorage, + S: PhysicalStorage2, S::Type<'a>: Float, { match to { @@ -318,7 +318,7 @@ fn cast_float_to_decimal<'a, S, D>( behavior: CastFailBehavior, ) -> Result where - S: PhysicalStorage, + S: PhysicalStorage2, D: DecimalType, S::Type<'a>: Float, ArrayData2: From>, @@ -333,7 +333,7 @@ where .ok_or_else(|| RayexecError::new(format!("Failed to cast scale {scale} to float")))?; let mut fail_state = behavior.new_state_for_array(arr); - let output = UnaryExecutor::execute::( + let output = UnaryExecutor2::execute::( arr, ArrayBuilder { datatype: to, @@ -366,9 +366,9 @@ pub fn cast_decimal_to_float<'a, S, F>( behavior: CastFailBehavior, ) -> Result where - S: PhysicalStorage, + S: PhysicalStorage2, F: Float + Default + Copy, - <::Storage<'a> as AddressableStorage>::T: ToPrimitive, + <::Storage<'a> as AddressableStorage>::T: ToPrimitive, ArrayData2: From>, { let decimal_meta = arr.datatype().try_get_decimal_type_meta()?; @@ -387,7 +387,7 @@ where let mut fail_state = behavior.new_state_for_array(arr); let output = - UnaryExecutor::execute::(arr, builder, |v, buf| match ::from(v) { + UnaryExecutor2::execute::(arr, builder, |v, buf| match ::from(v) { Some(v) => { let scaled = v.div(scale); buf.put(&scaled); @@ -404,7 +404,7 @@ fn cast_int_to_decimal_helper<'a, S>( behavior: CastFailBehavior, ) -> Result where - S: PhysicalStorage, + S: PhysicalStorage2, S::Type<'a>: PrimInt, { match to { @@ -420,7 +420,7 @@ fn cast_int_to_decimal<'a, S, D>( behavior: CastFailBehavior, ) -> Result where - S: PhysicalStorage, + S: PhysicalStorage2, D: DecimalType, S::Type<'a>: PrimInt, ArrayData2: From>, @@ -433,7 +433,7 @@ where .expect("to be in range"); let mut fail_state = behavior.new_state_for_array(arr); - let output = UnaryExecutor::execute::( + let output = UnaryExecutor2::execute::( arr, ArrayBuilder { datatype: to, @@ -486,7 +486,7 @@ fn cast_primitive_numeric_helper<'a, S>( behavior: CastFailBehavior, ) -> Result where - S: PhysicalStorage, + S: PhysicalStorage2, S::Type<'a>: ToPrimitive, { match to { @@ -513,13 +513,13 @@ pub fn cast_primitive_numeric<'a, S, T>( behavior: CastFailBehavior, ) -> Result where - S: PhysicalStorage, + S: PhysicalStorage2, S::Type<'a>: ToPrimitive, T: NumCast + Default + Copy, ArrayData2: From>, { let mut fail_state = behavior.new_state_for_array(arr); - let output = UnaryExecutor::execute::( + let output = UnaryExecutor2::execute::( arr, ArrayBuilder { datatype, @@ -657,13 +657,13 @@ fn cast_format<'a, S, F>( behavior: CastFailBehavior, ) -> Result where - S: PhysicalStorage, + S: PhysicalStorage2, F: Formatter>, { let mut fail_state = behavior.new_state_for_array(arr); let mut string_buf = String::new(); - let output = UnaryExecutor::execute::( + let output = UnaryExecutor2::execute::( arr, ArrayBuilder { datatype: DataType::Utf8, @@ -683,7 +683,7 @@ where fn cast_parse_bool(arr: &Array2, behavior: CastFailBehavior) -> Result { let mut fail_state = behavior.new_state_for_array(arr); - let output = UnaryExecutor::execute::( + let output = UnaryExecutor2::execute::( arr, ArrayBuilder { datatype: DataType::Boolean, @@ -710,7 +710,7 @@ where ArrayData2: From>, { let mut fail_state = behavior.new_state_for_array(arr); - let output = UnaryExecutor::execute::( + let output = UnaryExecutor2::execute::( arr, ArrayBuilder { datatype: datatype.clone(), diff --git a/crates/rayexec_execution/src/arrays/compute/date.rs b/crates/rayexec_execution/src/arrays/compute/date.rs index dee45b5bc..3770ecd02 100644 --- a/crates/rayexec_execution/src/arrays/compute/date.rs +++ b/crates/rayexec_execution/src/arrays/compute/date.rs @@ -5,7 +5,7 @@ use crate::arrays::array::Array2; use crate::arrays::datatype::{DataType, DecimalTypeMeta, TimeUnit}; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; use crate::arrays::executor::physical_type::{PhysicalI32, PhysicalI64}; -use crate::arrays::executor::scalar::UnaryExecutor; +use crate::arrays::executor::scalar::UnaryExecutor2; use crate::arrays::scalar::decimal::{Decimal64Type, DecimalType}; pub const EPOCH_NAIVE_DATE: NaiveDate = match NaiveDate::from_ymd_opt(1970, 1, 1) { @@ -148,7 +148,7 @@ where B: Fn(i64) -> DateTime, F: Fn(DateTime) -> i64, { - UnaryExecutor::execute::( + UnaryExecutor2::execute::( arr, ArrayBuilder { datatype: DataType::Decimal64(DecimalTypeMeta { @@ -168,7 +168,7 @@ fn date32_extract_with_fn(arr: &Array2, f: F) -> Result where F: Fn(DateTime) -> i64, { - UnaryExecutor::execute::( + UnaryExecutor2::execute::( arr, ArrayBuilder { datatype: DataType::Decimal64(DecimalTypeMeta { @@ -189,7 +189,7 @@ fn date64_extract_with_fn(arr: &Array2, f: F) -> Result where F: Fn(DateTime) -> i64, { - UnaryExecutor::execute::( + UnaryExecutor2::execute::( arr, ArrayBuilder { datatype: DataType::Decimal64(DecimalTypeMeta { diff --git a/crates/rayexec_execution/src/arrays/executor/aggregate/binary.rs b/crates/rayexec_execution/src/arrays/executor/aggregate/binary.rs index 0b29d1bd4..c3ab64ebd 100644 --- a/crates/rayexec_execution/src/arrays/executor/aggregate/binary.rs +++ b/crates/rayexec_execution/src/arrays/executor/aggregate/binary.rs @@ -2,7 +2,7 @@ use rayexec_error::{RayexecError, Result}; use super::{AggregateState, RowToStateMapping}; use crate::arrays::array::Array2; -use crate::arrays::executor::physical_type::PhysicalStorage; +use crate::arrays::executor::physical_type::PhysicalStorage2; use crate::arrays::executor::scalar::check_validity; use crate::arrays::selection; use crate::arrays::storage::AddressableStorage; @@ -19,8 +19,8 @@ impl BinaryNonNullUpdater { states: &mut [State], ) -> Result<()> where - S1: PhysicalStorage, - S2: PhysicalStorage, + S1: PhysicalStorage2, + S2: PhysicalStorage2, I: IntoIterator, State: AggregateState<(S1::Type<'a>, S2::Type<'a>), Output>, { diff --git a/crates/rayexec_execution/src/arrays/executor/aggregate/unary.rs b/crates/rayexec_execution/src/arrays/executor/aggregate/unary.rs index 0281f3403..423aae97e 100644 --- a/crates/rayexec_execution/src/arrays/executor/aggregate/unary.rs +++ b/crates/rayexec_execution/src/arrays/executor/aggregate/unary.rs @@ -2,7 +2,7 @@ use rayexec_error::Result; use super::{AggregateState, RowToStateMapping}; use crate::arrays::array::Array2; -use crate::arrays::executor::physical_type::PhysicalStorage; +use crate::arrays::executor::physical_type::PhysicalStorage2; use crate::arrays::selection; use crate::arrays::storage::AddressableStorage; @@ -17,7 +17,7 @@ impl UnaryNonNullUpdater { states: &mut [State], ) -> Result<()> where - S: PhysicalStorage, + S: PhysicalStorage2, I: IntoIterator, State: AggregateState, Output>, { diff --git a/crates/rayexec_execution/src/arrays/executor/physical_type.rs b/crates/rayexec_execution/src/arrays/executor/physical_type.rs index a286499c5..ad8223086 100644 --- a/crates/rayexec_execution/src/arrays/executor/physical_type.rs +++ b/crates/rayexec_execution/src/arrays/executor/physical_type.rs @@ -177,7 +177,7 @@ impl VarlenType for [u8] { /// /// Contains a lifetime to enable tying the returned storage to the provided /// array data. -pub trait PhysicalStorage: Debug + Sync + Send + Clone + Copy + 'static { +pub trait PhysicalStorage2: Debug + Sync + Send + Clone + Copy + 'static { // /// The type that's stored in the primary buffer. // /// // /// This should be small and fixed sized. @@ -200,7 +200,7 @@ pub trait PhysicalStorage: Debug + Sync + Send + Clone + Copy + 'static { #[derive(Debug, Clone, Copy)] pub struct PhysicalAny; -impl PhysicalStorage for PhysicalAny { +impl PhysicalStorage2 for PhysicalAny { type Type<'a> = (); type Storage<'a> = UnitStorage; @@ -234,7 +234,7 @@ impl AddressableStorage for UnitStorage { #[derive(Debug, Clone, Copy)] pub struct PhysicalUntypedNull; -impl PhysicalStorage for PhysicalUntypedNull { +impl PhysicalStorage2 for PhysicalUntypedNull { type Type<'a> = UntypedNull; type Storage<'a> = UntypedNullStorage; @@ -249,7 +249,7 @@ impl PhysicalStorage for PhysicalUntypedNull { #[derive(Debug, Clone, Copy)] pub struct PhysicalBool; -impl PhysicalStorage for PhysicalBool { +impl PhysicalStorage2 for PhysicalBool { type Type<'a> = bool; type Storage<'a> = BooleanStorageRef<'a>; @@ -264,7 +264,7 @@ impl PhysicalStorage for PhysicalBool { #[derive(Debug, Clone, Copy)] pub struct PhysicalI8; -impl PhysicalStorage for PhysicalI8 { +impl PhysicalStorage2 for PhysicalI8 { type Type<'a> = i8; type Storage<'a> = PrimitiveStorageSlice<'a, i8>; @@ -279,7 +279,7 @@ impl PhysicalStorage for PhysicalI8 { #[derive(Debug, Clone, Copy)] pub struct PhysicalI16; -impl PhysicalStorage for PhysicalI16 { +impl PhysicalStorage2 for PhysicalI16 { type Type<'a> = i16; type Storage<'a> = PrimitiveStorageSlice<'a, i16>; @@ -294,7 +294,7 @@ impl PhysicalStorage for PhysicalI16 { #[derive(Debug, Clone, Copy)] pub struct PhysicalI32; -impl PhysicalStorage for PhysicalI32 { +impl PhysicalStorage2 for PhysicalI32 { type Type<'a> = i32; type Storage<'a> = PrimitiveStorageSlice<'a, i32>; @@ -309,7 +309,7 @@ impl PhysicalStorage for PhysicalI32 { #[derive(Debug, Clone, Copy)] pub struct PhysicalI64; -impl PhysicalStorage for PhysicalI64 { +impl PhysicalStorage2 for PhysicalI64 { type Type<'a> = i64; type Storage<'a> = PrimitiveStorageSlice<'a, i64>; @@ -324,7 +324,7 @@ impl PhysicalStorage for PhysicalI64 { #[derive(Debug, Clone, Copy)] pub struct PhysicalI128; -impl PhysicalStorage for PhysicalI128 { +impl PhysicalStorage2 for PhysicalI128 { type Type<'a> = i128; type Storage<'a> = PrimitiveStorageSlice<'a, i128>; @@ -339,7 +339,7 @@ impl PhysicalStorage for PhysicalI128 { #[derive(Debug, Clone, Copy)] pub struct PhysicalU8; -impl PhysicalStorage for PhysicalU8 { +impl PhysicalStorage2 for PhysicalU8 { type Type<'a> = u8; type Storage<'a> = PrimitiveStorageSlice<'a, u8>; @@ -354,7 +354,7 @@ impl PhysicalStorage for PhysicalU8 { #[derive(Debug, Clone, Copy)] pub struct PhysicalU16; -impl PhysicalStorage for PhysicalU16 { +impl PhysicalStorage2 for PhysicalU16 { type Type<'a> = u16; type Storage<'a> = PrimitiveStorageSlice<'a, u16>; @@ -369,7 +369,7 @@ impl PhysicalStorage for PhysicalU16 { #[derive(Debug, Clone, Copy)] pub struct PhysicalU32; -impl PhysicalStorage for PhysicalU32 { +impl PhysicalStorage2 for PhysicalU32 { type Type<'a> = u32; type Storage<'a> = PrimitiveStorageSlice<'a, u32>; @@ -384,7 +384,7 @@ impl PhysicalStorage for PhysicalU32 { #[derive(Debug, Clone, Copy)] pub struct PhysicalU64; -impl PhysicalStorage for PhysicalU64 { +impl PhysicalStorage2 for PhysicalU64 { type Type<'a> = u64; type Storage<'a> = PrimitiveStorageSlice<'a, u64>; @@ -399,7 +399,7 @@ impl PhysicalStorage for PhysicalU64 { #[derive(Debug, Clone, Copy)] pub struct PhysicalU128; -impl PhysicalStorage for PhysicalU128 { +impl PhysicalStorage2 for PhysicalU128 { type Type<'a> = u128; type Storage<'a> = PrimitiveStorageSlice<'a, u128>; @@ -414,7 +414,7 @@ impl PhysicalStorage for PhysicalU128 { #[derive(Debug, Clone, Copy)] pub struct PhysicalF16; -impl PhysicalStorage for PhysicalF16 { +impl PhysicalStorage2 for PhysicalF16 { type Type<'a> = f16; type Storage<'a> = PrimitiveStorageSlice<'a, f16>; @@ -429,7 +429,7 @@ impl PhysicalStorage for PhysicalF16 { #[derive(Debug, Clone, Copy)] pub struct PhysicalF32; -impl PhysicalStorage for PhysicalF32 { +impl PhysicalStorage2 for PhysicalF32 { type Type<'a> = f32; type Storage<'a> = PrimitiveStorageSlice<'a, f32>; @@ -444,7 +444,7 @@ impl PhysicalStorage for PhysicalF32 { #[derive(Debug, Clone, Copy)] pub struct PhysicalF64; -impl PhysicalStorage for PhysicalF64 { +impl PhysicalStorage2 for PhysicalF64 { type Type<'a> = f64; type Storage<'a> = PrimitiveStorageSlice<'a, f64>; @@ -459,7 +459,7 @@ impl PhysicalStorage for PhysicalF64 { #[derive(Debug, Clone, Copy)] pub struct PhysicalInterval; -impl PhysicalStorage for PhysicalInterval { +impl PhysicalStorage2 for PhysicalInterval { type Type<'a> = Interval; type Storage<'a> = PrimitiveStorageSlice<'a, Interval>; @@ -474,7 +474,7 @@ impl PhysicalStorage for PhysicalInterval { #[derive(Debug, Clone, Copy)] pub struct PhysicalBinary; -impl PhysicalStorage for PhysicalBinary { +impl PhysicalStorage2 for PhysicalBinary { type Type<'a> = &'a [u8]; type Storage<'a> = BinaryDataStorage<'a>; @@ -497,7 +497,7 @@ impl PhysicalStorage for PhysicalBinary { #[derive(Debug, Clone, Copy)] pub struct PhysicalUtf8; -impl PhysicalStorage for PhysicalUtf8 { +impl PhysicalStorage2 for PhysicalUtf8 { type Type<'a> = &'a str; type Storage<'a> = StrDataStorage<'a>; @@ -591,7 +591,7 @@ impl<'a> From> for StrDataStorage<'a> { #[derive(Debug, Clone, Copy)] pub struct PhysicalList; -impl PhysicalStorage for PhysicalList { +impl PhysicalStorage2 for PhysicalList { type Type<'a> = ListItemMetadata; type Storage<'a> = PrimitiveStorageSlice<'a, ListItemMetadata>; diff --git a/crates/rayexec_execution/src/arrays/executor/scalar/binary.rs b/crates/rayexec_execution/src/arrays/executor/scalar/binary.rs index eef30999f..e9b0d6939 100644 --- a/crates/rayexec_execution/src/arrays/executor/scalar/binary.rs +++ b/crates/rayexec_execution/src/arrays/executor/scalar/binary.rs @@ -4,15 +4,15 @@ use super::check_validity; use crate::arrays::array::Array2; use crate::arrays::bitmap::Bitmap; use crate::arrays::executor::builder::{ArrayBuilder, ArrayDataBuffer, OutputBuffer}; -use crate::arrays::executor::physical_type::PhysicalStorage; +use crate::arrays::executor::physical_type::PhysicalStorage2; use crate::arrays::executor::scalar::validate_logical_len; use crate::arrays::selection; use crate::arrays::storage::AddressableStorage; #[derive(Debug, Clone, Copy)] -pub struct BinaryExecutor; +pub struct BinaryExecutor2; -impl BinaryExecutor { +impl BinaryExecutor2 { pub fn execute<'a, S1, S2, B, Op>( array1: &'a Array2, array2: &'a Array2, @@ -21,8 +21,8 @@ impl BinaryExecutor { ) -> Result where Op: FnMut(S1::Type<'a>, S2::Type<'a>, &mut OutputBuffer), - S1: PhysicalStorage, - S2: PhysicalStorage, + S1: PhysicalStorage2, + S2: PhysicalStorage2, B: ArrayDataBuffer, { let len = validate_logical_len(&builder.buffer, array1)?; @@ -110,7 +110,7 @@ mod tests { buffer: PrimitiveBuffer::::with_len(3), }; - let got = BinaryExecutor::execute::( + let got = BinaryExecutor2::execute::( &left, &right, builder, @@ -134,7 +134,7 @@ mod tests { }; let mut string_buf = String::new(); - let got = BinaryExecutor::execute::( + let got = BinaryExecutor2::execute::( &left, &right, builder, @@ -168,7 +168,7 @@ mod tests { let right = Array2::from_iter([2, 3, 4]); - let got = BinaryExecutor::execute::( + let got = BinaryExecutor2::execute::( &left, &right, ArrayBuilder { diff --git a/crates/rayexec_execution/src/arrays/executor/scalar/fill.rs b/crates/rayexec_execution/src/arrays/executor/scalar/fill.rs index 5fd18f9c2..346268ea3 100644 --- a/crates/rayexec_execution/src/arrays/executor/scalar/fill.rs +++ b/crates/rayexec_execution/src/arrays/executor/scalar/fill.rs @@ -25,7 +25,7 @@ use crate::arrays::executor::physical_type::{ PhysicalI8, PhysicalInterval, PhysicalList, - PhysicalStorage, + PhysicalStorage2, PhysicalType2, PhysicalU128, PhysicalU16, @@ -34,7 +34,7 @@ use crate::arrays::executor::physical_type::{ PhysicalU8, PhysicalUtf8, }; -use crate::arrays::executor::scalar::UnaryExecutor; +use crate::arrays::executor::scalar::UnaryExecutor2; use crate::arrays::selection; use crate::arrays::storage::{ AddressableStorage, @@ -83,7 +83,7 @@ where /// where they should be placed in the buffer. pub fn fill<'a, S, I>(&mut self, array: &'a Array2, fill_map: I) -> Result<()> where - S: PhysicalStorage, + S: PhysicalStorage2, I: IntoIterator, S::Type<'a>: Borrow<::Type>, { @@ -308,7 +308,7 @@ fn concat_lists(datatype: DataType, arrays: &[&Array2], total_len: usize) -> Res let mut acc_rows = 0; for (array, child_array) in arrays.iter().zip(inner_arrays) { - UnaryExecutor::for_each::(array, |_row_num, metadata| match metadata { + UnaryExecutor2::for_each::(array, |_row_num, metadata| match metadata { Some(metadata) => { metadatas.push(ListItemMetadata { offset: metadata.offset + acc_rows, @@ -342,7 +342,7 @@ fn concat_with_fill_state<'a, S, B>( mut fill_state: FillState, ) -> Result where - S: PhysicalStorage, + S: PhysicalStorage2, B: ArrayDataBuffer, S::Type<'a>: Borrow<::Type>, { @@ -517,7 +517,7 @@ fn interleave_with_fill_state<'a, S, B>( mut fill_state: FillState, ) -> Result where - S: PhysicalStorage, + S: PhysicalStorage2, B: ArrayDataBuffer, S::Type<'a>: Borrow<::Type>, { diff --git a/crates/rayexec_execution/src/arrays/executor/scalar/hash.rs b/crates/rayexec_execution/src/arrays/executor/scalar/hash.rs index 5686e133e..e828cec02 100644 --- a/crates/rayexec_execution/src/arrays/executor/scalar/hash.rs +++ b/crates/rayexec_execution/src/arrays/executor/scalar/hash.rs @@ -16,7 +16,7 @@ use crate::arrays::executor::physical_type::{ PhysicalI8, PhysicalInterval, PhysicalList, - PhysicalStorage, + PhysicalStorage2, PhysicalType2, PhysicalU16, PhysicalU32, @@ -180,7 +180,7 @@ impl HashExecutor { fn hash_one_inner<'a, 'b, S, H>(array: &'a Array2, hashes: &'b mut [u64]) -> Result<()> where - S: PhysicalStorage, + S: PhysicalStorage2, S::Type<'a>: HashValue, H: SetHash, { diff --git a/crates/rayexec_execution/src/arrays/executor/scalar/list.rs b/crates/rayexec_execution/src/arrays/executor/scalar/list.rs index d5cd0d537..a16bbabaf 100644 --- a/crates/rayexec_execution/src/arrays/executor/scalar/list.rs +++ b/crates/rayexec_execution/src/arrays/executor/scalar/list.rs @@ -3,7 +3,7 @@ use rayexec_error::{not_implemented, RayexecError, Result}; use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::bitmap::Bitmap; use crate::arrays::executor::builder::{ArrayBuilder, ArrayDataBuffer}; -use crate::arrays::executor::physical_type::{PhysicalList, PhysicalStorage}; +use crate::arrays::executor::physical_type::{PhysicalList, PhysicalStorage2}; use crate::arrays::executor::scalar::{ can_skip_validity_check, check_validity, @@ -42,7 +42,7 @@ impl ) -> Result where R: BinaryListReducer, B::Type>, - S: PhysicalStorage, + S: PhysicalStorage2, B: ArrayDataBuffer, ::Type: Sized, { @@ -170,7 +170,7 @@ impl /// contain NULLs. fn get_inner_array_storage(array: &Array2) -> Result<(S::Storage<'_>, Option<&Bitmap>)> where - S: PhysicalStorage, + S: PhysicalStorage2, { match array.array_data() { ArrayData2::List(d) => { diff --git a/crates/rayexec_execution/src/arrays/executor/scalar/select.rs b/crates/rayexec_execution/src/arrays/executor/scalar/select.rs index 74ac06747..a3f5f1b75 100644 --- a/crates/rayexec_execution/src/arrays/executor/scalar/select.rs +++ b/crates/rayexec_execution/src/arrays/executor/scalar/select.rs @@ -1,7 +1,7 @@ use rayexec_error::Result; use crate::arrays::array::Array2; -use crate::arrays::executor::physical_type::{PhysicalBool, PhysicalStorage}; +use crate::arrays::executor::physical_type::{PhysicalBool, PhysicalStorage2}; use crate::arrays::selection::{self, SelectionVector}; use crate::arrays::storage::AddressableStorage; diff --git a/crates/rayexec_execution/src/arrays/executor/scalar/ternary.rs b/crates/rayexec_execution/src/arrays/executor/scalar/ternary.rs index b3b91eae9..9e2fd283c 100644 --- a/crates/rayexec_execution/src/arrays/executor/scalar/ternary.rs +++ b/crates/rayexec_execution/src/arrays/executor/scalar/ternary.rs @@ -6,7 +6,7 @@ use super::check_validity; use crate::arrays::array::Array2; use crate::arrays::bitmap::Bitmap; use crate::arrays::executor::builder::{ArrayBuilder, ArrayDataBuffer, OutputBuffer}; -use crate::arrays::executor::physical_type::PhysicalStorage; +use crate::arrays::executor::physical_type::PhysicalStorage2; use crate::arrays::executor::scalar::validate_logical_len; use crate::arrays::selection; use crate::arrays::storage::AddressableStorage; @@ -24,9 +24,9 @@ impl TernaryExecutor { ) -> Result where Op: FnMut(S1::Type<'a>, S2::Type<'a>, S3::Type<'a>, &mut OutputBuffer), - S1: PhysicalStorage, - S2: PhysicalStorage, - S3: PhysicalStorage, + S1: PhysicalStorage2, + S2: PhysicalStorage2, + S3: PhysicalStorage2, B: ArrayDataBuffer, { let len = validate_logical_len(&builder.buffer, array1)?; diff --git a/crates/rayexec_execution/src/arrays/executor/scalar/unary.rs b/crates/rayexec_execution/src/arrays/executor/scalar/unary.rs index 00c73368a..867e5c084 100644 --- a/crates/rayexec_execution/src/arrays/executor/scalar/unary.rs +++ b/crates/rayexec_execution/src/arrays/executor/scalar/unary.rs @@ -4,14 +4,14 @@ use super::validate_logical_len; use crate::arrays::array::Array2; use crate::arrays::bitmap::Bitmap; use crate::arrays::executor::builder::{ArrayBuilder, ArrayDataBuffer, OutputBuffer}; -use crate::arrays::executor::physical_type::PhysicalStorage; +use crate::arrays::executor::physical_type::PhysicalStorage2; use crate::arrays::selection; use crate::arrays::storage::AddressableStorage; #[derive(Debug, Clone)] -pub struct UnaryExecutor; +pub struct UnaryExecutor2; -impl UnaryExecutor { +impl UnaryExecutor2 { /// Executes `op` on every non-null input. pub fn execute<'a, S, B, Op>( array: &'a Array2, @@ -20,7 +20,7 @@ impl UnaryExecutor { ) -> Result where Op: FnMut(S::Type<'a>, &mut OutputBuffer), - S: PhysicalStorage, + S: PhysicalStorage2, B: ArrayDataBuffer, { let len = validate_logical_len(&builder.buffer, array)?; @@ -83,7 +83,7 @@ impl UnaryExecutor { pub fn for_each<'a, S, Op>(array: &'a Array2, mut op: Op) -> Result<()> where Op: FnMut(usize, Option>), - S: PhysicalStorage, + S: PhysicalStorage2, { let selection = array.selection_vector(); let len = array.logical_len(); @@ -121,7 +121,7 @@ impl UnaryExecutor { /// Returns Some if the value is valid, None otherwise. pub fn value_at(array: &Array2, idx: usize) -> Result>> where - S: PhysicalStorage, + S: PhysicalStorage2, { let selection = array.selection_vector(); @@ -165,7 +165,7 @@ mod tests { buffer: PrimitiveBuffer::::with_len(3), }; - let got = UnaryExecutor::execute::(&array, builder, |v, buf| { + let got = UnaryExecutor2::execute::(&array, builder, |v, buf| { buf.put(&(v + 2)) }) .unwrap(); @@ -194,7 +194,7 @@ mod tests { buf.put(&double) } - let got = UnaryExecutor::execute::(&array, builder, my_string_double) + let got = UnaryExecutor2::execute::(&array, builder, my_string_double) .unwrap(); assert_eq!(ScalarValue::from("aa"), got.physical_scalar(0).unwrap()); @@ -227,7 +227,7 @@ mod tests { buf.put(buffer.as_str()) }; - let got = UnaryExecutor::execute::(&array, builder, my_string_double) + let got = UnaryExecutor2::execute::(&array, builder, my_string_double) .unwrap(); assert_eq!(ScalarValue::from("aa"), got.physical_scalar(0).unwrap()); @@ -254,8 +254,9 @@ mod tests { buf.put(s.get(0..len).unwrap_or("")) }; - let got = UnaryExecutor::execute::(&array, builder, my_string_truncate) - .unwrap(); + let got = + UnaryExecutor2::execute::(&array, builder, my_string_truncate) + .unwrap(); assert_eq!(ScalarValue::from("a"), got.physical_scalar(0).unwrap()); assert_eq!(ScalarValue::from("bb"), got.physical_scalar(1).unwrap()); @@ -288,7 +289,7 @@ mod tests { }; let got = - UnaryExecutor::execute::(&array, builder, my_string_uppercase) + UnaryExecutor2::execute::(&array, builder, my_string_uppercase) .unwrap(); assert_eq!(ScalarValue::from("DDDD"), got.physical_scalar(0).unwrap()); diff --git a/crates/rayexec_execution/src/arrays/executor/scalar/uniform.rs b/crates/rayexec_execution/src/arrays/executor/scalar/uniform.rs index 9b7bb8180..de02c60c8 100644 --- a/crates/rayexec_execution/src/arrays/executor/scalar/uniform.rs +++ b/crates/rayexec_execution/src/arrays/executor/scalar/uniform.rs @@ -4,7 +4,7 @@ use super::check_validity; use crate::arrays::array::Array2; use crate::arrays::bitmap::Bitmap; use crate::arrays::executor::builder::{ArrayBuilder, ArrayDataBuffer, OutputBuffer}; -use crate::arrays::executor::physical_type::PhysicalStorage; +use crate::arrays::executor::physical_type::PhysicalStorage2; use crate::arrays::executor::scalar::validate_logical_len; use crate::arrays::selection; use crate::arrays::storage::AddressableStorage; @@ -20,7 +20,7 @@ impl UniformExecutor { ) -> Result where Op: FnMut(&[S::Type<'a>], &mut OutputBuffer), - S: PhysicalStorage, + S: PhysicalStorage2, B: ArrayDataBuffer, { let len = match arrays.first() { diff --git a/crates/rayexec_execution/src/arrays/row/encoding.rs b/crates/rayexec_execution/src/arrays/row/encoding.rs index 1c9bb9e60..2e20b942e 100644 --- a/crates/rayexec_execution/src/arrays/row/encoding.rs +++ b/crates/rayexec_execution/src/arrays/row/encoding.rs @@ -15,14 +15,14 @@ use crate::arrays::executor::physical_type::{ PhysicalI64, PhysicalI8, PhysicalInterval, - PhysicalStorage, + PhysicalStorage2, PhysicalU128, PhysicalU16, PhysicalU32, PhysicalU64, PhysicalU8, }; -use crate::arrays::executor::scalar::UnaryExecutor; +use crate::arrays::executor::scalar::UnaryExecutor2; use crate::arrays::scalar::interval::Interval; /// Binary-encoded rows suitable for comparisons. @@ -307,13 +307,13 @@ impl ComparableRowEncoder { start: usize, ) -> Result where - S: PhysicalStorage, + S: PhysicalStorage2, S::Type<'a>: ComparableEncode + AsBytes, { let null_byte = col.null_byte(); let valid_byte = col.valid_byte(); - match UnaryExecutor::value_at::(arr, row)? { + match UnaryExecutor2::value_at::(arr, row)? { Some(val) => { buf[start] = valid_byte; let end = start + 1 + val.as_bytes().len(); @@ -348,13 +348,13 @@ impl ComparableRowEncoder { start: usize, ) -> Result where - S: PhysicalStorage, + S: PhysicalStorage2, S::Type<'a>: ComparableEncode, { let null_byte = col.null_byte(); let valid_byte = col.valid_byte(); - match UnaryExecutor::value_at::(arr, row)? { + match UnaryExecutor2::value_at::(arr, row)? { Some(val) => { buf[start] = valid_byte; let end = start + 1 + std::mem::size_of::>(); diff --git a/crates/rayexec_execution/src/arrays/scalar/decimal.rs b/crates/rayexec_execution/src/arrays/scalar/decimal.rs index 041a7aca5..dcbca9243 100644 --- a/crates/rayexec_execution/src/arrays/scalar/decimal.rs +++ b/crates/rayexec_execution/src/arrays/scalar/decimal.rs @@ -5,7 +5,7 @@ use rayexec_error::{RayexecError, Result, ResultExt}; use rayexec_proto::ProtoConv; use serde::{Deserialize, Serialize}; -use crate::arrays::executor::physical_type::{PhysicalI128, PhysicalI64, PhysicalStorage}; +use crate::arrays::executor::physical_type::{PhysicalI128, PhysicalI64, PhysicalStorage2}; pub trait DecimalPrimitive: PrimInt + FromPrimitive + Signed + Default + Debug + Display + Sync + Send @@ -30,11 +30,11 @@ impl DecimalPrimitive for i128 { pub trait DecimalType: Debug + Sync + Send + Copy + 'static where - for<'a> Self::Storage: PhysicalStorage = Self::Primitive>, + for<'a> Self::Storage: PhysicalStorage2 = Self::Primitive>, { /// The underlying primitive type storing the decimal's value. type Primitive: DecimalPrimitive; - type Storage: PhysicalStorage; + type Storage: PhysicalStorage2; /// Max precision for this decimal type. const MAX_PRECISION: u8; diff --git a/crates/rayexec_execution/src/execution/operators/hash_aggregate/compare.rs b/crates/rayexec_execution/src/execution/operators/hash_aggregate/compare.rs index 51a005fce..fd4bcc78c 100644 --- a/crates/rayexec_execution/src/execution/operators/hash_aggregate/compare.rs +++ b/crates/rayexec_execution/src/execution/operators/hash_aggregate/compare.rs @@ -17,7 +17,7 @@ use crate::arrays::executor::physical_type::{ PhysicalI64, PhysicalI8, PhysicalInterval, - PhysicalStorage, + PhysicalStorage2, PhysicalType2, PhysicalU128, PhysicalU16, @@ -180,7 +180,7 @@ fn compare_rows_eq<'a, S, I1, I2>( not_eq_rows: &mut BTreeSet, ) -> Result<()> where - S: PhysicalStorage, + S: PhysicalStorage2, as AddressableStorage>::T: PartialEq, I1: Iterator, I2: Iterator, diff --git a/crates/rayexec_execution/src/execution/operators/hash_aggregate/mod.rs b/crates/rayexec_execution/src/execution/operators/hash_aggregate/mod.rs index c557e0b28..221091f77 100644 --- a/crates/rayexec_execution/src/execution/operators/hash_aggregate/mod.rs +++ b/crates/rayexec_execution/src/execution/operators/hash_aggregate/mod.rs @@ -22,7 +22,7 @@ use crate::arrays::bitmap::Bitmap; use crate::arrays::datatype::DataType; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; use crate::arrays::executor::physical_type::PhysicalU64; -use crate::arrays::executor::scalar::{HashExecutor, UnaryExecutor}; +use crate::arrays::executor::scalar::{HashExecutor, UnaryExecutor2}; use crate::arrays::scalar::ScalarValue; use crate::arrays::selection::SelectionVector; use crate::database::DatabaseContext; @@ -440,7 +440,7 @@ impl ExecutableOperator for PhysicalHashAggregate { buffer: PrimitiveBuffer::with_len(group_ids.logical_len()), }; - let array = UnaryExecutor::execute::( + let array = UnaryExecutor2::execute::( &group_ids, builder, |id, buf| { diff --git a/crates/rayexec_execution/src/execution/operators/hash_join/condition.rs b/crates/rayexec_execution/src/execution/operators/hash_join/condition.rs index 8da81d7f1..1c40a69ae 100644 --- a/crates/rayexec_execution/src/execution/operators/hash_join/condition.rs +++ b/crates/rayexec_execution/src/execution/operators/hash_join/condition.rs @@ -127,14 +127,14 @@ impl LeftPrecomputedJoinConditions { let result = condition .function .function_impl - .execute(&[&left_precomputed, right_arr.as_ref()])?; + .execute2(&[&left_precomputed, right_arr.as_ref()])?; results.push(result); } // AND the results. let refs: Vec<_> = results.iter().collect(); - let out = AndImpl.execute(&refs)?; + let out = AndImpl.execute2(&refs)?; // Generate a selection for the left and right selections. let mut select_the_selection = SelectionVector::with_capacity(out.logical_len()); diff --git a/crates/rayexec_execution/src/execution/operators/unnest.rs b/crates/rayexec_execution/src/execution/operators/unnest.rs index 2449900ab..42f5245f7 100644 --- a/crates/rayexec_execution/src/execution/operators/unnest.rs +++ b/crates/rayexec_execution/src/execution/operators/unnest.rs @@ -37,7 +37,7 @@ use crate::arrays::executor::physical_type::{ PhysicalI64, PhysicalI8, PhysicalList, - PhysicalStorage, + PhysicalStorage2, PhysicalType2, PhysicalU128, PhysicalU16, @@ -46,7 +46,7 @@ use crate::arrays::executor::physical_type::{ PhysicalU8, PhysicalUtf8, }; -use crate::arrays::executor::scalar::UnaryExecutor; +use crate::arrays::executor::scalar::UnaryExecutor2; use crate::arrays::selection::{self, SelectionVector}; use crate::arrays::storage::{AddressableStorage, ListItemMetadata}; use crate::database::DatabaseContext; @@ -212,7 +212,7 @@ impl ExecutableOperator for PhysicalUnnest { continue; } - if let Some(list_meta) = UnaryExecutor::value_at::( + if let Some(list_meta) = UnaryExecutor2::value_at::( &state.unnest_inputs[input_idx], state.current_row, )? { @@ -250,7 +250,7 @@ impl ExecutableOperator for PhysicalUnnest { _other => return Err(RayexecError::new("Unexpected storage type")), }; - match UnaryExecutor::value_at::(arr, state.current_row)? { + match UnaryExecutor2::value_at::(arr, state.current_row)? { Some(meta) => { // Row is a list, unnest. let out = unnest(child, longest as usize, meta)?; @@ -431,7 +431,7 @@ fn unnest_inner<'a, S, B>( meta: ListItemMetadata, ) -> Result where - S: PhysicalStorage, + S: PhysicalStorage2, B: ArrayDataBuffer, S::Type<'a>: Borrow, { diff --git a/crates/rayexec_execution/src/expr/physical/scalar_function_expr.rs b/crates/rayexec_execution/src/expr/physical/scalar_function_expr.rs index 6c59c1dd0..79c9e4f17 100644 --- a/crates/rayexec_execution/src/expr/physical/scalar_function_expr.rs +++ b/crates/rayexec_execution/src/expr/physical/scalar_function_expr.rs @@ -26,7 +26,7 @@ impl PhysicalScalarFunctionExpr { .collect::>>()?; let refs: Vec<_> = inputs.iter().map(|a| a.as_ref()).collect(); // Can I not? - let mut out = self.function.function_impl.execute(&refs)?; + let mut out = self.function.function_impl.execute2(&refs)?; // If function is provided no input, it's expected to return an // array of length 1. We extend the array here so that it's the diff --git a/crates/rayexec_execution/src/functions/aggregate/builtin/first.rs b/crates/rayexec_execution/src/functions/aggregate/builtin/first.rs index 23fe60270..a03e10a96 100644 --- a/crates/rayexec_execution/src/functions/aggregate/builtin/first.rs +++ b/crates/rayexec_execution/src/functions/aggregate/builtin/first.rs @@ -20,7 +20,7 @@ use crate::arrays::executor::physical_type::{ PhysicalI64, PhysicalI8, PhysicalInterval, - PhysicalStorage, + PhysicalStorage2, PhysicalType2, PhysicalU128, PhysicalU16, @@ -214,7 +214,7 @@ impl FirstPrimitiveImpl { impl AggregateFunctionImpl for FirstPrimitiveImpl where - for<'a> S: PhysicalStorage = T>, + for<'a> S: PhysicalStorage2 = T>, T: Copy + Debug + Default + Sync + Send + 'static, ArrayData2: From>, { diff --git a/crates/rayexec_execution/src/functions/aggregate/builtin/minmax.rs b/crates/rayexec_execution/src/functions/aggregate/builtin/minmax.rs index bd4429f20..a7fff276b 100644 --- a/crates/rayexec_execution/src/functions/aggregate/builtin/minmax.rs +++ b/crates/rayexec_execution/src/functions/aggregate/builtin/minmax.rs @@ -20,7 +20,7 @@ use crate::arrays::executor::physical_type::{ PhysicalI64, PhysicalI8, PhysicalInterval, - PhysicalStorage, + PhysicalStorage2, PhysicalType2, PhysicalU128, PhysicalU16, @@ -346,7 +346,7 @@ impl MinMaxPrimitiveImpl { impl AggregateFunctionImpl for MinMaxPrimitiveImpl where - for<'a> S: PhysicalStorage = T>, + for<'a> S: PhysicalStorage2 = T>, T: PartialOrd + Debug + Default + Sync + Send + Copy + 'static, M: AggregateState + Default + Sync + Send + 'static, ArrayData2: From>, diff --git a/crates/rayexec_execution/src/functions/aggregate/states.rs b/crates/rayexec_execution/src/functions/aggregate/states.rs index a5971926c..7d0660add 100644 --- a/crates/rayexec_execution/src/functions/aggregate/states.rs +++ b/crates/rayexec_execution/src/functions/aggregate/states.rs @@ -16,7 +16,7 @@ use crate::arrays::executor::aggregate::{ UnaryNonNullUpdater, }; use crate::arrays::executor::builder::{ArrayBuilder, BooleanBuffer, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::PhysicalStorage; +use crate::arrays::executor::physical_type::PhysicalStorage2; use crate::arrays::storage::{AddressableStorage, PrimitiveStorage}; pub struct TypedAggregateGroupStates { @@ -55,9 +55,9 @@ pub fn new_unary_aggregate_states Box where - Storage: PhysicalStorage, + Storage: PhysicalStorage2, State: for<'a> AggregateState< - <::Storage<'a> as AddressableStorage>::T, + <::Storage<'a> as AddressableStorage>::T, Output, > + Sync + Send @@ -82,8 +82,8 @@ pub fn new_binary_aggregate_states Box where - Storage1: PhysicalStorage, - Storage2: PhysicalStorage, + Storage1: PhysicalStorage2, + Storage2: PhysicalStorage2, State: for<'a> AggregateState<(Storage1::Type<'a>, Storage2::Type<'a>), Output> + Sync + Send @@ -199,7 +199,7 @@ pub fn unary_update( states: &mut [State], ) -> Result<()> where - Storage: PhysicalStorage, + Storage: PhysicalStorage2, State: for<'a> AggregateState, Output>, { UnaryNonNullUpdater::update::(arrays[0], mapping, states) @@ -211,8 +211,8 @@ pub fn binary_update( states: &mut [State], ) -> Result<()> where - Storage1: PhysicalStorage, - Storage2: PhysicalStorage, + Storage1: PhysicalStorage2, + Storage2: PhysicalStorage2, State: for<'a> AggregateState<(Storage1::Type<'a>, Storage2::Type<'a>), Output>, { BinaryNonNullUpdater::update::( diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/arith/add.rs b/crates/rayexec_execution/src/functions/scalar/builtin/arith/add.rs index 780ce89a7..d0ea67aa9 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/arith/add.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/arith/add.rs @@ -15,14 +15,14 @@ use crate::arrays::executor::physical_type::{ PhysicalI32, PhysicalI64, PhysicalI8, - PhysicalStorage, + PhysicalStorage2, PhysicalU128, PhysicalU16, PhysicalU32, PhysicalU64, PhysicalU8, }; -use crate::arrays::executor::scalar::BinaryExecutor; +use crate::arrays::executor::scalar::BinaryExecutor2; use crate::arrays::storage::PrimitiveStorage; use crate::expr::Expression; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunction, ScalarFunctionImpl}; @@ -204,11 +204,11 @@ impl AddImpl { impl ScalarFunctionImpl for AddImpl where - S: PhysicalStorage, + S: PhysicalStorage2, for<'a> S::Type<'a>: std::ops::Add> + Default + Copy, ArrayData2: From>>, { - fn execute(&self, inputs: &[&Array2]) -> Result { + fn execute2(&self, inputs: &[&Array2]) -> Result { let a = inputs[0]; let b = inputs[1]; @@ -217,7 +217,7 @@ where buffer: PrimitiveBuffer::with_len(a.logical_len()), }; - BinaryExecutor::execute::(a, b, builder, |a, b, buf| buf.put(&(a + b))) + BinaryExecutor2::execute::(a, b, builder, |a, b, buf| buf.put(&(a + b))) } } @@ -249,7 +249,7 @@ mod tests { ) .unwrap(); - let out = planned.function_impl.execute(&[&a, &b]).unwrap(); + let out = planned.function_impl.execute2(&[&a, &b]).unwrap(); let expected = Array2::from_iter([5, 7, 9]); assert_eq!(expected, out); diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/arith/div.rs b/crates/rayexec_execution/src/functions/scalar/builtin/arith/div.rs index cbbdd2680..552c32a79 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/arith/div.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/arith/div.rs @@ -17,14 +17,14 @@ use crate::arrays::executor::physical_type::{ PhysicalI32, PhysicalI64, PhysicalI8, - PhysicalStorage, + PhysicalStorage2, PhysicalU128, PhysicalU16, PhysicalU32, PhysicalU64, PhysicalU8, }; -use crate::arrays::executor::scalar::BinaryExecutor; +use crate::arrays::executor::scalar::BinaryExecutor2; use crate::arrays::scalar::decimal::{Decimal128Type, Decimal64Type, DecimalType}; use crate::arrays::storage::PrimitiveStorage; use crate::expr::Expression; @@ -198,7 +198,7 @@ impl ScalarFunctionImpl for DecimalDivImpl where D: DecimalType, { - fn execute(&self, inputs: &[&Array2]) -> Result { + fn execute2(&self, inputs: &[&Array2]) -> Result { let a = inputs[0]; let b = inputs[1]; @@ -218,7 +218,7 @@ where buffer: PrimitiveBuffer::with_len(a.logical_len()), }; - BinaryExecutor::execute::(&a, &b, builder, |a, b, buf| { + BinaryExecutor2::execute::(&a, &b, builder, |a, b, buf| { buf.put(&(a / b)) }) } @@ -241,11 +241,11 @@ impl DivImpl { impl ScalarFunctionImpl for DivImpl where - S: PhysicalStorage, + S: PhysicalStorage2, for<'a> S::Type<'a>: std::ops::Div> + Default + Copy, ArrayData2: From>>, { - fn execute(&self, inputs: &[&Array2]) -> Result { + fn execute2(&self, inputs: &[&Array2]) -> Result { let a = inputs[0]; let b = inputs[1]; @@ -254,7 +254,7 @@ where buffer: PrimitiveBuffer::with_len(a.logical_len()), }; - BinaryExecutor::execute::(a, b, builder, |a, b, buf| buf.put(&(a / b))) + BinaryExecutor2::execute::(a, b, builder, |a, b, buf| buf.put(&(a / b))) } } @@ -286,7 +286,7 @@ mod tests { ) .unwrap(); - let out = planned.function_impl.execute(&[&a, &b]).unwrap(); + let out = planned.function_impl.execute2(&[&a, &b]).unwrap(); let expected = Array2::from_iter([4, 2, 2]); assert_eq!(expected, out); diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/arith/mul.rs b/crates/rayexec_execution/src/functions/scalar/builtin/arith/mul.rs index 428125557..0d6b959fd 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/arith/mul.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/arith/mul.rs @@ -17,14 +17,14 @@ use crate::arrays::executor::physical_type::{ PhysicalI64, PhysicalI8, PhysicalInterval, - PhysicalStorage, + PhysicalStorage2, PhysicalU128, PhysicalU16, PhysicalU32, PhysicalU64, PhysicalU8, }; -use crate::arrays::executor::scalar::BinaryExecutor; +use crate::arrays::executor::scalar::BinaryExecutor2; use crate::arrays::scalar::decimal::{Decimal128Type, Decimal64Type, DecimalType}; use crate::arrays::scalar::interval::Interval; use crate::arrays::storage::PrimitiveStorage; @@ -236,10 +236,10 @@ impl IntervalMulImpl { impl ScalarFunctionImpl for IntervalMulImpl where - Rhs: PhysicalStorage, + Rhs: PhysicalStorage2, for<'a> Rhs::Type<'a>: PrimInt, { - fn execute(&self, inputs: &[&Array2]) -> Result { + fn execute2(&self, inputs: &[&Array2]) -> Result { let (lhs, rhs) = if LHS_RHS_FLIPPED { (inputs[1], inputs[0]) } else { @@ -251,7 +251,7 @@ where buffer: PrimitiveBuffer::::with_len(lhs.logical_len()), }; - BinaryExecutor::execute::(lhs, rhs, builder, |a, b, buf| { + BinaryExecutor2::execute::(lhs, rhs, builder, |a, b, buf| { // TODO: Overflow check buf.put(&Interval { months: a.months * (::from(b).unwrap_or_default()), @@ -282,7 +282,7 @@ where D: DecimalType, ArrayData2: From>, { - fn execute(&self, inputs: &[&Array2]) -> Result { + fn execute2(&self, inputs: &[&Array2]) -> Result { let a = inputs[0]; let b = inputs[1]; @@ -291,7 +291,7 @@ where buffer: PrimitiveBuffer::::with_len(a.logical_len()), }; - BinaryExecutor::execute::(a, b, builder, |a, b, buf| { + BinaryExecutor2::execute::(a, b, builder, |a, b, buf| { buf.put(&(a * b)) }) } @@ -314,11 +314,11 @@ impl MulImpl { impl ScalarFunctionImpl for MulImpl where - S: PhysicalStorage, + S: PhysicalStorage2, for<'a> S::Type<'a>: std::ops::Mul> + Default + Copy, ArrayData2: From>>, { - fn execute(&self, inputs: &[&Array2]) -> Result { + fn execute2(&self, inputs: &[&Array2]) -> Result { let a = inputs[0]; let b = inputs[1]; @@ -327,7 +327,7 @@ where buffer: PrimitiveBuffer::with_len(a.logical_len()), }; - BinaryExecutor::execute::(a, b, builder, |a, b, buf| buf.put(&(a * b))) + BinaryExecutor2::execute::(a, b, builder, |a, b, buf| buf.put(&(a * b))) } } @@ -359,7 +359,7 @@ mod tests { ) .unwrap(); - let out = planned.function_impl.execute(&[&a, &b]).unwrap(); + let out = planned.function_impl.execute2(&[&a, &b]).unwrap(); let expected = Array2::from_iter([4, 10, 18]); assert_eq!(expected, out); diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/arith/rem.rs b/crates/rayexec_execution/src/functions/scalar/builtin/arith/rem.rs index f7b5da2b0..3fac975b6 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/arith/rem.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/arith/rem.rs @@ -15,14 +15,14 @@ use crate::arrays::executor::physical_type::{ PhysicalI32, PhysicalI64, PhysicalI8, - PhysicalStorage, + PhysicalStorage2, PhysicalU128, PhysicalU16, PhysicalU32, PhysicalU64, PhysicalU8, }; -use crate::arrays::executor::scalar::BinaryExecutor; +use crate::arrays::executor::scalar::BinaryExecutor2; use crate::arrays::storage::PrimitiveStorage; use crate::expr::Expression; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunction, ScalarFunctionImpl}; @@ -196,11 +196,11 @@ impl RemImpl { impl ScalarFunctionImpl for RemImpl where - S: PhysicalStorage, + S: PhysicalStorage2, for<'a> S::Type<'a>: std::ops::Rem> + Default + Copy, ArrayData2: From>>, { - fn execute(&self, inputs: &[&Array2]) -> Result { + fn execute2(&self, inputs: &[&Array2]) -> Result { let a = inputs[0]; let b = inputs[1]; @@ -209,7 +209,7 @@ where buffer: PrimitiveBuffer::with_len(a.logical_len()), }; - BinaryExecutor::execute::(a, b, builder, |a, b, buf| buf.put(&(a % b))) + BinaryExecutor2::execute::(a, b, builder, |a, b, buf| buf.put(&(a % b))) } } @@ -241,7 +241,7 @@ mod tests { ) .unwrap(); - let out = planned.function_impl.execute(&[&a, &b]).unwrap(); + let out = planned.function_impl.execute2(&[&a, &b]).unwrap(); let expected = Array2::from_iter([0, 1, 0]); assert_eq!(expected, out); diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/arith/sub.rs b/crates/rayexec_execution/src/functions/scalar/builtin/arith/sub.rs index 057b450f3..2df5bb868 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/arith/sub.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/arith/sub.rs @@ -15,14 +15,14 @@ use crate::arrays::executor::physical_type::{ PhysicalI32, PhysicalI64, PhysicalI8, - PhysicalStorage, + PhysicalStorage2, PhysicalU128, PhysicalU16, PhysicalU32, PhysicalU64, PhysicalU8, }; -use crate::arrays::executor::scalar::BinaryExecutor; +use crate::arrays::executor::scalar::BinaryExecutor2; use crate::arrays::storage::PrimitiveStorage; use crate::expr::Expression; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunction, ScalarFunctionImpl}; @@ -211,11 +211,11 @@ impl SubImpl { impl ScalarFunctionImpl for SubImpl where - S: PhysicalStorage, + S: PhysicalStorage2, for<'a> S::Type<'a>: std::ops::Sub> + Default + Copy, ArrayData2: From>>, { - fn execute(&self, inputs: &[&Array2]) -> Result { + fn execute2(&self, inputs: &[&Array2]) -> Result { let a = inputs[0]; let b = inputs[1]; @@ -224,7 +224,7 @@ where buffer: PrimitiveBuffer::with_len(a.logical_len()), }; - BinaryExecutor::execute::(a, b, builder, |a, b, buf| buf.put(&(a - b))) + BinaryExecutor2::execute::(a, b, builder, |a, b, buf| buf.put(&(a - b))) } } @@ -256,7 +256,7 @@ mod tests { ) .unwrap(); - let out = planned.function_impl.execute(&[&a, &b]).unwrap(); + let out = planned.function_impl.execute2(&[&a, &b]).unwrap(); let expected = Array2::from_iter([3, 3, 3]); assert_eq!(expected, out); diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/boolean.rs b/crates/rayexec_execution/src/functions/scalar/builtin/boolean.rs index a9d1d069a..3c2adf139 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/boolean.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/boolean.rs @@ -8,7 +8,7 @@ use crate::arrays::bitmap::Bitmap; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::builder::{ArrayBuilder, BooleanBuffer}; use crate::arrays::executor::physical_type::PhysicalBool; -use crate::arrays::executor::scalar::{BinaryExecutor, TernaryExecutor, UniformExecutor}; +use crate::arrays::executor::scalar::{BinaryExecutor2, TernaryExecutor, UniformExecutor}; use crate::arrays::storage::BooleanStorage; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation, Example}; @@ -70,7 +70,7 @@ impl ScalarFunction for And { pub struct AndImpl; impl ScalarFunctionImpl for AndImpl { - fn execute(&self, inputs: &[&Array2]) -> Result { + fn execute2(&self, inputs: &[&Array2]) -> Result { match inputs.len() { 0 => { let mut array = Array2::new_with_array_data( @@ -84,7 +84,7 @@ impl ScalarFunctionImpl for AndImpl { 2 => { let a = inputs[0]; let b = inputs[1]; - BinaryExecutor::execute::( + BinaryExecutor2::execute::( a, b, ArrayBuilder { @@ -178,7 +178,7 @@ impl ScalarFunction for Or { pub struct OrImpl; impl ScalarFunctionImpl for OrImpl { - fn execute(&self, inputs: &[&Array2]) -> Result { + fn execute2(&self, inputs: &[&Array2]) -> Result { match inputs.len() { 0 => { let mut array = Array2::new_with_array_data( @@ -192,7 +192,7 @@ impl ScalarFunctionImpl for OrImpl { 2 => { let a = inputs[0]; let b = inputs[1]; - BinaryExecutor::execute::( + BinaryExecutor2::execute::( a, b, ArrayBuilder { @@ -244,7 +244,7 @@ mod tests { ) .unwrap(); - let out = planned.function_impl.execute(&[&a, &b]).unwrap(); + let out = planned.function_impl.execute2(&[&a, &b]).unwrap(); assert_eq!(ScalarValue::from(true), out.logical_value(0).unwrap()); assert_eq!(ScalarValue::from(false), out.logical_value(1).unwrap()); @@ -277,7 +277,7 @@ mod tests { ) .unwrap(); - let out = planned.function_impl.execute(&[&a, &b, &c]).unwrap(); + let out = planned.function_impl.execute2(&[&a, &b, &c]).unwrap(); assert_eq!(ScalarValue::from(false), out.logical_value(0).unwrap()); assert_eq!(ScalarValue::from(true), out.logical_value(1).unwrap()); @@ -305,7 +305,7 @@ mod tests { ) .unwrap(); - let out = planned.function_impl.execute(&[&a, &b]).unwrap(); + let out = planned.function_impl.execute2(&[&a, &b]).unwrap(); assert_eq!(ScalarValue::from(true), out.logical_value(0).unwrap()); assert_eq!(ScalarValue::from(true), out.logical_value(1).unwrap()); diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/comparison.rs b/crates/rayexec_execution/src/functions/scalar/builtin/comparison.rs index 68db6f81a..ca090e20e 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/comparison.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/comparison.rs @@ -4,7 +4,9 @@ use std::marker::PhantomData; use rayexec_error::{RayexecError, Result}; +use crate::arrays::array::exp::Array; use crate::arrays::array::{Array2, ArrayData2}; +use crate::arrays::batch_exp::Batch; use crate::arrays::compute::cast::array::decimal_rescale; use crate::arrays::compute::cast::behavior::CastFailBehavior; use crate::arrays::datatype::{DataType, DataTypeId, DecimalTypeMeta}; @@ -21,7 +23,7 @@ use crate::arrays::executor::physical_type::{ PhysicalI64, PhysicalI8, PhysicalInterval, - PhysicalStorage, + PhysicalStorage2, PhysicalType2, PhysicalU128, PhysicalU16, @@ -31,7 +33,7 @@ use crate::arrays::executor::physical_type::{ PhysicalUntypedNull, PhysicalUtf8, }; -use crate::arrays::executor::scalar::{BinaryExecutor, BinaryListReducer, FlexibleListExecutor}; +use crate::arrays::executor::scalar::{BinaryExecutor2, BinaryListReducer, FlexibleListExecutor}; use crate::arrays::scalar::decimal::{Decimal128Type, Decimal64Type, DecimalType}; use crate::arrays::storage::PrimitiveStorage; use crate::expr::Expression; @@ -660,7 +662,7 @@ impl ScalarFunctionImpl for ListComparisonImpl where O: ComparisonOperation, { - fn execute(&self, inputs: &[&Array2]) -> Result { + fn execute2(&self, inputs: &[&Array2]) -> Result { let left = inputs[0]; let right = inputs[1]; @@ -772,7 +774,7 @@ where } #[derive(Debug, Clone)] -struct BaseComparisonImpl { +struct BaseComparisonImpl { _op: PhantomData, _s: PhantomData, } @@ -780,7 +782,7 @@ struct BaseComparisonImpl { impl BaseComparisonImpl where O: ComparisonOperation, - S: PhysicalStorage, + S: PhysicalStorage2, for<'a> S::Type<'a>: PartialEq + PartialOrd, { fn new() -> Self { @@ -794,10 +796,10 @@ where impl ScalarFunctionImpl for BaseComparisonImpl where O: ComparisonOperation, - S: PhysicalStorage, + S: PhysicalStorage2, for<'a> S::Type<'a>: PartialEq + PartialOrd, { - fn execute(&self, inputs: &[&Array2]) -> Result { + fn execute2(&self, inputs: &[&Array2]) -> Result { let left = inputs[0]; let right = inputs[1]; @@ -806,7 +808,7 @@ where buffer: BooleanBuffer::with_len(left.logical_len()), }; - BinaryExecutor::execute::(left, right, builder, |a, b, buf| { + BinaryExecutor2::execute::(left, right, builder, |a, b, buf| { buf.put(&O::compare(a, b)) }) } @@ -845,7 +847,7 @@ where T: DecimalType, ArrayData2: From>, { - fn execute(&self, inputs: &[&Array2]) -> Result { + fn execute2(&self, inputs: &[&Array2]) -> Result { let left = inputs[0]; let right = inputs[1]; @@ -862,7 +864,7 @@ where CastFailBehavior::Error, )?; - BinaryExecutor::execute::( + BinaryExecutor2::execute::( left, &scaled_right, builder, @@ -876,14 +878,14 @@ where CastFailBehavior::Error, )?; - BinaryExecutor::execute::( + BinaryExecutor2::execute::( &scaled_left, right, builder, |a, b, buf| buf.put(&O::compare(a, b)), ) } - Ordering::Equal => BinaryExecutor::execute::( + Ordering::Equal => BinaryExecutor2::execute::( left, right, builder, @@ -920,7 +922,7 @@ mod tests { ) .unwrap(); - let out = planned.function_impl.execute(&[&a, &b]).unwrap(); + let out = planned.function_impl.execute2(&[&a, &b]).unwrap(); let expected = Array2::from_iter([false, true, false]); assert_eq!(expected, out); @@ -947,7 +949,7 @@ mod tests { ) .unwrap(); - let out = planned.function_impl.execute(&[&a, &b]).unwrap(); + let out = planned.function_impl.execute2(&[&a, &b]).unwrap(); let expected = Array2::from_iter([true, false, true]); assert_eq!(expected, out); @@ -974,7 +976,7 @@ mod tests { ) .unwrap(); - let out = planned.function_impl.execute(&[&a, &b]).unwrap(); + let out = planned.function_impl.execute2(&[&a, &b]).unwrap(); let expected = Array2::from_iter([true, false, true]); assert_eq!(expected, out); @@ -1001,7 +1003,7 @@ mod tests { ) .unwrap(); - let out = planned.function_impl.execute(&[&a, &b]).unwrap(); + let out = planned.function_impl.execute2(&[&a, &b]).unwrap(); let expected = Array2::from_iter([true, true, true]); assert_eq!(expected, out); @@ -1028,7 +1030,7 @@ mod tests { ) .unwrap(); - let out = planned.function_impl.execute(&[&a, &b]).unwrap(); + let out = planned.function_impl.execute2(&[&a, &b]).unwrap(); let expected = Array2::from_iter([false, false, false]); assert_eq!(expected, out); @@ -1055,7 +1057,7 @@ mod tests { ) .unwrap(); - let out = planned.function_impl.execute(&[&a, &b]).unwrap(); + let out = planned.function_impl.execute2(&[&a, &b]).unwrap(); let expected = Array2::from_iter([false, true, false]); assert_eq!(expected, out); diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/datetime/date_part.rs b/crates/rayexec_execution/src/functions/scalar/builtin/datetime/date_part.rs index 2a6cb7823..b980b6bc6 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/datetime/date_part.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/datetime/date_part.rs @@ -101,7 +101,7 @@ pub struct DatePartImpl { } impl ScalarFunctionImpl for DatePartImpl { - fn execute(&self, inputs: &[&Array2]) -> Result { + fn execute2(&self, inputs: &[&Array2]) -> Result { // First input ignored (the constant "part" to extract) extract_date_part(self.part, inputs[1]) } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/datetime/date_trunc.rs b/crates/rayexec_execution/src/functions/scalar/builtin/datetime/date_trunc.rs index a359bcdac..48ab8cbda 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/datetime/date_trunc.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/datetime/date_trunc.rs @@ -6,7 +6,7 @@ use crate::arrays::array::Array2; use crate::arrays::datatype::{DataType, DataTypeId, TimeUnit, TimestampTypeMeta}; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; use crate::arrays::executor::physical_type::PhysicalI64; -use crate::arrays::executor::scalar::UnaryExecutor; +use crate::arrays::executor::scalar::UnaryExecutor2; use crate::expr::Expression; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunction, ScalarFunctionImpl}; use crate::functions::{invalid_input_types_error, plan_check_num_args, FunctionInfo, Signature}; @@ -140,7 +140,7 @@ pub struct DateTruncImpl { } impl ScalarFunctionImpl for DateTruncImpl { - fn execute(&self, inputs: &[&Array2]) -> Result { + fn execute2(&self, inputs: &[&Array2]) -> Result { let input = &inputs[1]; let trunc = match self.input_unit { @@ -188,7 +188,7 @@ impl ScalarFunctionImpl for DateTruncImpl { buffer: PrimitiveBuffer::with_len(input.logical_len()), }; - UnaryExecutor::execute::(input, builder, |v, buf| { + UnaryExecutor2::execute::(input, builder, |v, buf| { let v = (v / trunc) * trunc; buf.put(&v) }) diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/datetime/epoch.rs b/crates/rayexec_execution/src/functions/scalar/builtin/datetime/epoch.rs index 9e360163f..e7bcbf5c3 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/datetime/epoch.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/datetime/epoch.rs @@ -4,7 +4,7 @@ use crate::arrays::array::Array2; use crate::arrays::datatype::{DataType, DataTypeId, TimeUnit, TimestampTypeMeta}; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; use crate::arrays::executor::physical_type::PhysicalI64; -use crate::arrays::executor::scalar::UnaryExecutor; +use crate::arrays::executor::scalar::UnaryExecutor2; use crate::expr::Expression; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunction, ScalarFunctionImpl}; use crate::functions::{invalid_input_types_error, plan_check_num_args, FunctionInfo, Signature}; @@ -102,7 +102,7 @@ impl ScalarFunction for EpochMs { pub struct EpochImpl; impl ScalarFunctionImpl for EpochImpl { - fn execute(&self, inputs: &[&Array2]) -> Result { + fn execute2(&self, inputs: &[&Array2]) -> Result { let input = inputs[0]; to_timestamp::(input) } @@ -116,7 +116,7 @@ fn to_timestamp(input: &Array2) -> Result { buffer: PrimitiveBuffer::with_len(input.logical_len()), }; - UnaryExecutor::execute::(input, builder, |v, buf| { + UnaryExecutor2::execute::(input, builder, |v, buf| { buf.put(&(v * S)); }) } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/is.rs b/crates/rayexec_execution/src/functions/scalar/builtin/is.rs index e841c8c67..ba5b07b42 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/is.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/is.rs @@ -4,7 +4,7 @@ use crate::arrays::array::Array2; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::builder::{ArrayBuilder, BooleanBuffer}; use crate::arrays::executor::physical_type::{PhysicalAny, PhysicalBool}; -use crate::arrays::executor::scalar::UnaryExecutor; +use crate::arrays::executor::scalar::UnaryExecutor2; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation, Example}; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunction, ScalarFunctionImpl}; @@ -101,7 +101,7 @@ impl ScalarFunction for IsNotNull { pub struct CheckNullImpl; impl ScalarFunctionImpl for CheckNullImpl { - fn execute(&self, inputs: &[&Array2]) -> Result { + fn execute2(&self, inputs: &[&Array2]) -> Result { let input = inputs[0]; let (initial, updated) = if IS_NULL { @@ -117,7 +117,7 @@ impl ScalarFunctionImpl for CheckNullImpl { datatype: DataType::Boolean, buffer: BooleanBuffer::with_len_and_default_value(input.logical_len(), initial), }; - let array = UnaryExecutor::execute::(input, builder, |_, buf| { + let array = UnaryExecutor2::execute::(input, builder, |_, buf| { buf.put(&updated) })?; @@ -303,7 +303,7 @@ impl ScalarFunction for IsNotFalse { pub struct CheckBoolImpl; impl ScalarFunctionImpl for CheckBoolImpl { - fn execute(&self, inputs: &[&Array2]) -> Result { + fn execute2(&self, inputs: &[&Array2]) -> Result { let input = inputs[0]; let initial = NOT; @@ -312,7 +312,7 @@ impl ScalarFunctionImpl for CheckBoolImpl(input, builder, |val, buf| { + let array = UnaryExecutor2::execute::(input, builder, |val, buf| { let b = if NOT { val != BOOL } else { val == BOOL }; buf.put(&b) })?; diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/list/list_extract.rs b/crates/rayexec_execution/src/functions/scalar/builtin/list/list_extract.rs index d0cfe2569..003818837 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/list/list_extract.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/list/list_extract.rs @@ -26,7 +26,7 @@ use crate::arrays::executor::physical_type::{ PhysicalI64, PhysicalI8, PhysicalList, - PhysicalStorage, + PhysicalStorage2, PhysicalType2, PhysicalU128, PhysicalU16, @@ -35,7 +35,7 @@ use crate::arrays::executor::physical_type::{ PhysicalU8, PhysicalUtf8, }; -use crate::arrays::executor::scalar::UnaryExecutor; +use crate::arrays::executor::scalar::UnaryExecutor2; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation, Example}; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunction, ScalarFunctionImpl}; @@ -120,7 +120,7 @@ pub struct ListExtractImpl { } impl ScalarFunctionImpl for ListExtractImpl { - fn execute(&self, inputs: &[&Array2]) -> Result { + fn execute2(&self, inputs: &[&Array2]) -> Result { let input = inputs[0]; extract(input, self.index) } @@ -257,7 +257,7 @@ fn extract_inner<'a, S, B>( el_idx: usize, ) -> Result where - S: PhysicalStorage, + S: PhysicalStorage2, B: ArrayDataBuffer, S::Type<'a>: Borrow<::Type>, { @@ -265,7 +265,7 @@ where let mut validity = Bitmap::new_with_all_true(builder.buffer.len()); - UnaryExecutor::for_each::(outer, |idx, metadata| { + UnaryExecutor2::for_each::(outer, |idx, metadata| { if let Some(metadata) = metadata { if el_idx >= metadata.len { // Indexing outside of the list. Mark null @@ -275,7 +275,7 @@ where // Otherwise put the element into the builder. let inner_el_idx = metadata.offset + el_idx; - match UnaryExecutor::value_at::(inner, inner_el_idx as usize) { + match UnaryExecutor2::value_at::(inner, inner_el_idx as usize) { Ok(Some(el)) => { builder.buffer.put(idx, el.borrow()); return; diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/list/list_values.rs b/crates/rayexec_execution/src/functions/scalar/builtin/list/list_values.rs index ffb2aa7ef..13844e2cb 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/list/list_values.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/list/list_values.rs @@ -90,7 +90,7 @@ pub struct ListValuesImpl { } impl ScalarFunctionImpl for ListValuesImpl { - fn execute(&self, inputs: &[&Array2]) -> Result { + fn execute2(&self, inputs: &[&Array2]) -> Result { if inputs.is_empty() { let inner_type = match &self.list_datatype { DataType::List(l) => l.datatype.as_ref(), diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/negate.rs b/crates/rayexec_execution/src/functions/scalar/builtin/negate.rs index d4b0e8df9..344acb9cd 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/negate.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/negate.rs @@ -15,9 +15,9 @@ use crate::arrays::executor::physical_type::{ PhysicalI32, PhysicalI64, PhysicalI8, - PhysicalStorage, + PhysicalStorage2, }; -use crate::arrays::executor::scalar::UnaryExecutor; +use crate::arrays::executor::scalar::UnaryExecutor2; use crate::arrays::storage::PrimitiveStorage; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation, Example}; @@ -98,11 +98,11 @@ impl NegateImpl { impl ScalarFunctionImpl for NegateImpl where - S: PhysicalStorage, + S: PhysicalStorage2, for<'a> S::Type<'a>: std::ops::Neg> + Default + Copy, ArrayData2: From>>, { - fn execute(&self, inputs: &[&Array2]) -> Result { + fn execute2(&self, inputs: &[&Array2]) -> Result { use std::ops::Neg; let a = inputs[0]; @@ -112,7 +112,7 @@ where buffer: PrimitiveBuffer::with_len(a.logical_len()), }; - UnaryExecutor::execute::(a, builder, |a, buf| buf.put(&(a.neg()))) + UnaryExecutor2::execute::(a, builder, |a, buf| buf.put(&(a.neg()))) } } @@ -165,8 +165,8 @@ impl ScalarFunction for Not { pub struct NotImpl; impl ScalarFunctionImpl for NotImpl { - fn execute(&self, inputs: &[&Array2]) -> Result { - UnaryExecutor::execute::( + fn execute2(&self, inputs: &[&Array2]) -> Result { + UnaryExecutor2::execute::( inputs[0], ArrayBuilder { datatype: DataType::Boolean, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/abs.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/abs.rs index b36516c7a..d9feefae9 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/abs.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/abs.rs @@ -5,8 +5,8 @@ use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::datatype::DataType; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::PhysicalStorage; -use crate::arrays::executor::scalar::UnaryExecutor; +use crate::arrays::executor::physical_type::PhysicalStorage2; +use crate::arrays::executor::scalar::UnaryExecutor2; use crate::arrays::storage::PrimitiveStorage; pub type Abs = UnaryInputNumericScalar; @@ -20,7 +20,7 @@ impl UnaryInputNumericOperation for AbsOp { fn execute_float<'a, S>(input: &'a Array2, ret: DataType) -> Result where - S: PhysicalStorage, + S: PhysicalStorage2, S::Type<'a>: Float + Default, ArrayData2: From>>, { @@ -28,6 +28,6 @@ impl UnaryInputNumericOperation for AbsOp { datatype: ret, buffer: PrimitiveBuffer::with_len(input.logical_len()), }; - UnaryExecutor::execute::(input, builder, |v, buf| buf.put(&v.abs())) + UnaryExecutor2::execute::(input, builder, |v, buf| buf.put(&v.abs())) } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/acos.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/acos.rs index e6196e020..14f5b7e06 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/acos.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/acos.rs @@ -5,8 +5,8 @@ use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::datatype::DataType; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::PhysicalStorage; -use crate::arrays::executor::scalar::UnaryExecutor; +use crate::arrays::executor::physical_type::PhysicalStorage2; +use crate::arrays::executor::scalar::UnaryExecutor2; use crate::arrays::storage::PrimitiveStorage; pub type Acos = UnaryInputNumericScalar; @@ -20,7 +20,7 @@ impl UnaryInputNumericOperation for AcosOp { fn execute_float<'a, S>(input: &'a Array2, ret: DataType) -> Result where - S: PhysicalStorage, + S: PhysicalStorage2, S::Type<'a>: Float + Default, ArrayData2: From>>, { @@ -28,6 +28,6 @@ impl UnaryInputNumericOperation for AcosOp { datatype: ret, buffer: PrimitiveBuffer::with_len(input.logical_len()), }; - UnaryExecutor::execute::(input, builder, |v, buf| buf.put(&v.acos())) + UnaryExecutor2::execute::(input, builder, |v, buf| buf.put(&v.acos())) } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/asin.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/asin.rs index e09163bd3..c4106887c 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/asin.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/asin.rs @@ -5,8 +5,8 @@ use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::datatype::DataType; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::PhysicalStorage; -use crate::arrays::executor::scalar::UnaryExecutor; +use crate::arrays::executor::physical_type::PhysicalStorage2; +use crate::arrays::executor::scalar::UnaryExecutor2; use crate::arrays::storage::PrimitiveStorage; pub type Asin = UnaryInputNumericScalar; @@ -20,7 +20,7 @@ impl UnaryInputNumericOperation for AsinOp { fn execute_float<'a, S>(input: &'a Array2, ret: DataType) -> Result where - S: PhysicalStorage, + S: PhysicalStorage2, S::Type<'a>: Float + Default, ArrayData2: From>>, { @@ -28,6 +28,6 @@ impl UnaryInputNumericOperation for AsinOp { datatype: ret, buffer: PrimitiveBuffer::with_len(input.logical_len()), }; - UnaryExecutor::execute::(input, builder, |v, buf| buf.put(&v.asin())) + UnaryExecutor2::execute::(input, builder, |v, buf| buf.put(&v.asin())) } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/atan.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/atan.rs index d54dc8e8a..cc5de1d3b 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/atan.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/atan.rs @@ -5,8 +5,8 @@ use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::datatype::DataType; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::PhysicalStorage; -use crate::arrays::executor::scalar::UnaryExecutor; +use crate::arrays::executor::physical_type::PhysicalStorage2; +use crate::arrays::executor::scalar::UnaryExecutor2; use crate::arrays::storage::PrimitiveStorage; pub type Atan = UnaryInputNumericScalar; @@ -20,7 +20,7 @@ impl UnaryInputNumericOperation for AtanOp { fn execute_float<'a, S>(input: &'a Array2, ret: DataType) -> Result where - S: PhysicalStorage, + S: PhysicalStorage2, S::Type<'a>: Float + Default, ArrayData2: From>>, { @@ -28,6 +28,6 @@ impl UnaryInputNumericOperation for AtanOp { datatype: ret, buffer: PrimitiveBuffer::with_len(input.logical_len()), }; - UnaryExecutor::execute::(input, builder, |v, buf| buf.put(&v.atan())) + UnaryExecutor2::execute::(input, builder, |v, buf| buf.put(&v.atan())) } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/cbrt.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/cbrt.rs index 044ca4628..3f5e9be05 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/cbrt.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/cbrt.rs @@ -5,8 +5,8 @@ use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::datatype::DataType; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::PhysicalStorage; -use crate::arrays::executor::scalar::UnaryExecutor; +use crate::arrays::executor::physical_type::PhysicalStorage2; +use crate::arrays::executor::scalar::UnaryExecutor2; use crate::arrays::storage::PrimitiveStorage; pub type Cbrt = UnaryInputNumericScalar; @@ -20,7 +20,7 @@ impl UnaryInputNumericOperation for CbrtOp { fn execute_float<'a, S>(input: &'a Array2, ret: DataType) -> Result where - S: PhysicalStorage, + S: PhysicalStorage2, S::Type<'a>: Float + Default, ArrayData2: From>>, { @@ -28,6 +28,6 @@ impl UnaryInputNumericOperation for CbrtOp { datatype: ret, buffer: PrimitiveBuffer::with_len(input.logical_len()), }; - UnaryExecutor::execute::(input, builder, |v, buf| buf.put(&v.cbrt())) + UnaryExecutor2::execute::(input, builder, |v, buf| buf.put(&v.cbrt())) } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/ceil.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/ceil.rs index 56b4e5bbb..b4d9f976b 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/ceil.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/ceil.rs @@ -5,8 +5,8 @@ use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::datatype::DataType; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::PhysicalStorage; -use crate::arrays::executor::scalar::UnaryExecutor; +use crate::arrays::executor::physical_type::PhysicalStorage2; +use crate::arrays::executor::scalar::UnaryExecutor2; use crate::arrays::storage::PrimitiveStorage; pub type Ceil = UnaryInputNumericScalar; @@ -20,7 +20,7 @@ impl UnaryInputNumericOperation for CeilOp { fn execute_float<'a, S>(input: &'a Array2, ret: DataType) -> Result where - S: PhysicalStorage, + S: PhysicalStorage2, S::Type<'a>: Float + Default, ArrayData2: From>>, { @@ -28,6 +28,6 @@ impl UnaryInputNumericOperation for CeilOp { datatype: ret, buffer: PrimitiveBuffer::with_len(input.logical_len()), }; - UnaryExecutor::execute::(input, builder, |v, buf| buf.put(&v.ceil())) + UnaryExecutor2::execute::(input, builder, |v, buf| buf.put(&v.ceil())) } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/cos.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/cos.rs index d6fee453c..61e01ceed 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/cos.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/cos.rs @@ -5,8 +5,8 @@ use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::datatype::DataType; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::PhysicalStorage; -use crate::arrays::executor::scalar::UnaryExecutor; +use crate::arrays::executor::physical_type::PhysicalStorage2; +use crate::arrays::executor::scalar::UnaryExecutor2; use crate::arrays::storage::PrimitiveStorage; pub type Cos = UnaryInputNumericScalar; @@ -20,7 +20,7 @@ impl UnaryInputNumericOperation for CosOp { fn execute_float<'a, S>(input: &'a Array2, ret: DataType) -> Result where - S: PhysicalStorage, + S: PhysicalStorage2, S::Type<'a>: Float + Default, ArrayData2: From>>, { @@ -28,6 +28,6 @@ impl UnaryInputNumericOperation for CosOp { datatype: ret, buffer: PrimitiveBuffer::with_len(input.logical_len()), }; - UnaryExecutor::execute::(input, builder, |v, buf| buf.put(&v.cos())) + UnaryExecutor2::execute::(input, builder, |v, buf| buf.put(&v.cos())) } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/degrees.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/degrees.rs index 1094e7f79..a1f289b2a 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/degrees.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/degrees.rs @@ -5,8 +5,8 @@ use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::datatype::DataType; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::PhysicalStorage; -use crate::arrays::executor::scalar::UnaryExecutor; +use crate::arrays::executor::physical_type::PhysicalStorage2; +use crate::arrays::executor::scalar::UnaryExecutor2; use crate::arrays::storage::PrimitiveStorage; pub type Degrees = UnaryInputNumericScalar; @@ -20,7 +20,7 @@ impl UnaryInputNumericOperation for DegreesOp { fn execute_float<'a, S>(input: &'a Array2, ret: DataType) -> Result where - S: PhysicalStorage, + S: PhysicalStorage2, S::Type<'a>: Float + Default, ArrayData2: From>>, { @@ -28,6 +28,6 @@ impl UnaryInputNumericOperation for DegreesOp { datatype: ret, buffer: PrimitiveBuffer::with_len(input.logical_len()), }; - UnaryExecutor::execute::(input, builder, |v, buf| buf.put(&v.to_degrees())) + UnaryExecutor2::execute::(input, builder, |v, buf| buf.put(&v.to_degrees())) } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/exp.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/exp.rs index e2c72b61c..7f824ec4e 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/exp.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/exp.rs @@ -5,8 +5,8 @@ use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::datatype::DataType; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::PhysicalStorage; -use crate::arrays::executor::scalar::UnaryExecutor; +use crate::arrays::executor::physical_type::PhysicalStorage2; +use crate::arrays::executor::scalar::UnaryExecutor2; use crate::arrays::storage::PrimitiveStorage; pub type Exp = UnaryInputNumericScalar; @@ -20,7 +20,7 @@ impl UnaryInputNumericOperation for ExpOp { fn execute_float<'a, S>(input: &'a Array2, ret: DataType) -> Result where - S: PhysicalStorage, + S: PhysicalStorage2, S::Type<'a>: Float + Default, ArrayData2: From>>, { @@ -28,6 +28,6 @@ impl UnaryInputNumericOperation for ExpOp { datatype: ret, buffer: PrimitiveBuffer::with_len(input.logical_len()), }; - UnaryExecutor::execute::(input, builder, |v, buf| buf.put(&v.exp())) + UnaryExecutor2::execute::(input, builder, |v, buf| buf.put(&v.exp())) } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/floor.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/floor.rs index 042aa67f6..9905a2ea0 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/floor.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/floor.rs @@ -5,8 +5,8 @@ use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::datatype::DataType; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::PhysicalStorage; -use crate::arrays::executor::scalar::UnaryExecutor; +use crate::arrays::executor::physical_type::PhysicalStorage2; +use crate::arrays::executor::scalar::UnaryExecutor2; use crate::arrays::storage::PrimitiveStorage; pub type Floor = UnaryInputNumericScalar; @@ -20,7 +20,7 @@ impl UnaryInputNumericOperation for FloorOp { fn execute_float<'a, S>(input: &'a Array2, ret: DataType) -> Result where - S: PhysicalStorage, + S: PhysicalStorage2, S::Type<'a>: Float + Default, ArrayData2: From>>, { @@ -28,6 +28,6 @@ impl UnaryInputNumericOperation for FloorOp { datatype: ret, buffer: PrimitiveBuffer::with_len(input.logical_len()), }; - UnaryExecutor::execute::(input, builder, |v, buf| buf.put(&v.floor())) + UnaryExecutor2::execute::(input, builder, |v, buf| buf.put(&v.floor())) } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/isnan.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/isnan.rs index b8a05427e..a9ed2c305 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/isnan.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/isnan.rs @@ -11,9 +11,9 @@ use crate::arrays::executor::physical_type::{ PhysicalF16, PhysicalF32, PhysicalF64, - PhysicalStorage, + PhysicalStorage2, }; -use crate::arrays::executor::scalar::UnaryExecutor; +use crate::arrays::executor::scalar::UnaryExecutor2; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation, Example}; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunctionImpl}; @@ -87,11 +87,11 @@ impl ScalarFunction for IsNan { } #[derive(Debug, Clone, Copy)] -pub struct IsNanImpl { +pub struct IsNanImpl { _s: PhantomData, } -impl IsNanImpl { +impl IsNanImpl { fn new() -> Self { IsNanImpl { _s: PhantomData } } @@ -99,16 +99,16 @@ impl IsNanImpl { impl ScalarFunctionImpl for IsNanImpl where - S: PhysicalStorage, + S: PhysicalStorage2, for<'a> S::Type<'a>: Float, { - fn execute(&self, inputs: &[&Array2]) -> Result { + fn execute2(&self, inputs: &[&Array2]) -> Result { let input = inputs[0]; let builder = ArrayBuilder { datatype: DataType::Boolean, buffer: BooleanBuffer::with_len(input.logical_len()), }; - UnaryExecutor::execute::(input, builder, |v, buf| buf.put(&v.is_nan())) + UnaryExecutor2::execute::(input, builder, |v, buf| buf.put(&v.is_nan())) } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/ln.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/ln.rs index 1bfb58813..a96132df6 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/ln.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/ln.rs @@ -5,8 +5,8 @@ use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::datatype::DataType; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::PhysicalStorage; -use crate::arrays::executor::scalar::UnaryExecutor; +use crate::arrays::executor::physical_type::PhysicalStorage2; +use crate::arrays::executor::scalar::UnaryExecutor2; use crate::arrays::storage::PrimitiveStorage; pub type Ln = UnaryInputNumericScalar; @@ -20,7 +20,7 @@ impl UnaryInputNumericOperation for LnOp { fn execute_float<'a, S>(input: &'a Array2, ret: DataType) -> Result where - S: PhysicalStorage, + S: PhysicalStorage2, S::Type<'a>: Float + Default, ArrayData2: From>>, { @@ -28,6 +28,6 @@ impl UnaryInputNumericOperation for LnOp { datatype: ret, buffer: PrimitiveBuffer::with_len(input.logical_len()), }; - UnaryExecutor::execute::(input, builder, |v, buf| buf.put(&v.ln())) + UnaryExecutor2::execute::(input, builder, |v, buf| buf.put(&v.ln())) } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/log.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/log.rs index 57460b66e..0425634c4 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/log.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/log.rs @@ -5,8 +5,8 @@ use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::datatype::DataType; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::PhysicalStorage; -use crate::arrays::executor::scalar::UnaryExecutor; +use crate::arrays::executor::physical_type::PhysicalStorage2; +use crate::arrays::executor::scalar::UnaryExecutor2; use crate::arrays::storage::PrimitiveStorage; pub type Log = UnaryInputNumericScalar; @@ -20,7 +20,7 @@ impl UnaryInputNumericOperation for LogOp { fn execute_float<'a, S>(input: &'a Array2, ret: DataType) -> Result where - S: PhysicalStorage, + S: PhysicalStorage2, S::Type<'a>: Float + Default, ArrayData2: From>>, { @@ -28,7 +28,7 @@ impl UnaryInputNumericOperation for LogOp { datatype: ret, buffer: PrimitiveBuffer::with_len(input.logical_len()), }; - UnaryExecutor::execute::(input, builder, |v, buf| buf.put(&v.log10())) + UnaryExecutor2::execute::(input, builder, |v, buf| buf.put(&v.log10())) } } @@ -43,7 +43,7 @@ impl UnaryInputNumericOperation for LogOp2 { fn execute_float<'a, S>(input: &'a Array2, ret: DataType) -> Result where - S: PhysicalStorage, + S: PhysicalStorage2, S::Type<'a>: Float + Default, ArrayData2: From>>, { @@ -51,6 +51,6 @@ impl UnaryInputNumericOperation for LogOp2 { datatype: ret, buffer: PrimitiveBuffer::with_len(input.logical_len()), }; - UnaryExecutor::execute::(input, builder, |v, buf| buf.put(&v.log2())) + UnaryExecutor2::execute::(input, builder, |v, buf| buf.put(&v.log2())) } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/mod.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/mod.rs index 8b9291257..07baeea1b 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/mod.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/mod.rs @@ -44,7 +44,7 @@ use crate::arrays::executor::physical_type::{ PhysicalF16, PhysicalF32, PhysicalF64, - PhysicalStorage, + PhysicalStorage2, PhysicalType2, }; use crate::arrays::storage::PrimitiveStorage; @@ -83,7 +83,7 @@ pub trait UnaryInputNumericOperation: Debug + Clone + Copy + Sync + Send + 'stat fn execute_float<'a, S>(input: &'a Array2, ret: DataType) -> Result where - S: PhysicalStorage, + S: PhysicalStorage2, S::Type<'a>: Float + Default, ArrayData2: From>>; } @@ -145,7 +145,7 @@ pub(crate) struct UnaryInputNumericScalarImpl { } impl ScalarFunctionImpl for UnaryInputNumericScalarImpl { - fn execute(&self, inputs: &[&Array2]) -> Result { + fn execute2(&self, inputs: &[&Array2]) -> Result { let input = inputs[0]; match input.physical_type() { PhysicalType2::Float16 => O::execute_float::(input, self.ret.clone()), diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/radians.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/radians.rs index 8d70420bb..48f0a3883 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/radians.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/radians.rs @@ -5,8 +5,8 @@ use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::datatype::DataType; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::PhysicalStorage; -use crate::arrays::executor::scalar::UnaryExecutor; +use crate::arrays::executor::physical_type::PhysicalStorage2; +use crate::arrays::executor::scalar::UnaryExecutor2; use crate::arrays::storage::PrimitiveStorage; pub type Radians = UnaryInputNumericScalar; @@ -20,7 +20,7 @@ impl UnaryInputNumericOperation for RadiansOp { fn execute_float<'a, S>(input: &'a Array2, ret: DataType) -> Result where - S: PhysicalStorage, + S: PhysicalStorage2, S::Type<'a>: Float + Default, ArrayData2: From>>, { @@ -28,6 +28,6 @@ impl UnaryInputNumericOperation for RadiansOp { datatype: ret, buffer: PrimitiveBuffer::with_len(input.logical_len()), }; - UnaryExecutor::execute::(input, builder, |v, buf| buf.put(&v.to_radians())) + UnaryExecutor2::execute::(input, builder, |v, buf| buf.put(&v.to_radians())) } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/sin.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/sin.rs index f22c276bb..90d6595ca 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/sin.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/sin.rs @@ -5,8 +5,8 @@ use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::datatype::DataType; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::PhysicalStorage; -use crate::arrays::executor::scalar::UnaryExecutor; +use crate::arrays::executor::physical_type::PhysicalStorage2; +use crate::arrays::executor::scalar::UnaryExecutor2; use crate::arrays::storage::PrimitiveStorage; pub type Sin = UnaryInputNumericScalar; @@ -20,7 +20,7 @@ impl UnaryInputNumericOperation for SinOp { fn execute_float<'a, S>(input: &'a Array2, ret: DataType) -> Result where - S: PhysicalStorage, + S: PhysicalStorage2, S::Type<'a>: Float + Default, ArrayData2: From>>, { @@ -28,6 +28,6 @@ impl UnaryInputNumericOperation for SinOp { datatype: ret, buffer: PrimitiveBuffer::with_len(input.logical_len()), }; - UnaryExecutor::execute::(input, builder, |v, buf| buf.put(&v.sin())) + UnaryExecutor2::execute::(input, builder, |v, buf| buf.put(&v.sin())) } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/sqrt.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/sqrt.rs index aca72b13a..98c2739ce 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/sqrt.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/sqrt.rs @@ -5,8 +5,8 @@ use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::datatype::DataType; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::PhysicalStorage; -use crate::arrays::executor::scalar::UnaryExecutor; +use crate::arrays::executor::physical_type::PhysicalStorage2; +use crate::arrays::executor::scalar::UnaryExecutor2; use crate::arrays::storage::PrimitiveStorage; pub type Sqrt = UnaryInputNumericScalar; @@ -20,7 +20,7 @@ impl UnaryInputNumericOperation for SqrtOp { fn execute_float<'a, S>(input: &'a Array2, ret: DataType) -> Result where - S: PhysicalStorage, + S: PhysicalStorage2, S::Type<'a>: Float + Default, ArrayData2: From>>, { @@ -28,6 +28,6 @@ impl UnaryInputNumericOperation for SqrtOp { datatype: ret, buffer: PrimitiveBuffer::with_len(input.logical_len()), }; - UnaryExecutor::execute::(input, builder, |v, buf| buf.put(&v.sqrt())) + UnaryExecutor2::execute::(input, builder, |v, buf| buf.put(&v.sqrt())) } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/tan.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/tan.rs index 7669b561b..e45187a1a 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/tan.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/tan.rs @@ -5,8 +5,8 @@ use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::datatype::DataType; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::PhysicalStorage; -use crate::arrays::executor::scalar::UnaryExecutor; +use crate::arrays::executor::physical_type::PhysicalStorage2; +use crate::arrays::executor::scalar::UnaryExecutor2; use crate::arrays::storage::PrimitiveStorage; pub type Tan = UnaryInputNumericScalar; @@ -20,7 +20,7 @@ impl UnaryInputNumericOperation for TanOp { fn execute_float<'a, S>(input: &'a Array2, ret: DataType) -> Result where - S: PhysicalStorage, + S: PhysicalStorage2, S::Type<'a>: Float + Default, ArrayData2: From>>, { @@ -28,6 +28,6 @@ impl UnaryInputNumericOperation for TanOp { datatype: ret, buffer: PrimitiveBuffer::with_len(input.logical_len()), }; - UnaryExecutor::execute::(input, builder, |v, buf| buf.put(&v.tan())) + UnaryExecutor2::execute::(input, builder, |v, buf| buf.put(&v.tan())) } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/random.rs b/crates/rayexec_execution/src/functions/scalar/builtin/random.rs index 7adcef12d..abefb8879 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/random.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/random.rs @@ -62,7 +62,7 @@ impl ScalarFunction for Random { pub struct RandomImpl; impl ScalarFunctionImpl for RandomImpl { - fn execute(&self, _inputs: &[&Array2]) -> Result { + fn execute2(&self, _inputs: &[&Array2]) -> Result { // TODO: Need to pass in dummy input to produce all unique values. let val = rand::random::(); Ok(Array2::new_with_array_data( diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/similarity/l2_distance.rs b/crates/rayexec_execution/src/functions/scalar/builtin/similarity/l2_distance.rs index c7f3bff3e..75432aa2c 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/similarity/l2_distance.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/similarity/l2_distance.rs @@ -11,7 +11,7 @@ use crate::arrays::executor::physical_type::{ PhysicalF16, PhysicalF32, PhysicalF64, - PhysicalStorage, + PhysicalStorage2, }; use crate::arrays::executor::scalar::{BinaryListReducer, ListExecutor}; use crate::expr::Expression; @@ -92,13 +92,13 @@ impl ScalarFunction for L2Distance { } #[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub struct L2DistanceImpl { +pub struct L2DistanceImpl { _s: PhantomData, } impl L2DistanceImpl where - S: PhysicalStorage, + S: PhysicalStorage2, { fn new() -> Self { L2DistanceImpl { _s: PhantomData } @@ -107,10 +107,10 @@ where impl ScalarFunctionImpl for L2DistanceImpl where - S: PhysicalStorage, + S: PhysicalStorage2, for<'a> S::Type<'a>: Float + AddAssign + AsPrimitive + Default + Copy, { - fn execute(&self, inputs: &[&Array2]) -> Result { + fn execute2(&self, inputs: &[&Array2]) -> Result { let a = inputs[0]; let b = inputs[1]; diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/string/ascii.rs b/crates/rayexec_execution/src/functions/scalar/builtin/string/ascii.rs index 960687ef7..f102ba675 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/string/ascii.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/string/ascii.rs @@ -4,7 +4,7 @@ use crate::arrays::array::Array2; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; use crate::arrays::executor::physical_type::PhysicalUtf8; -use crate::arrays::executor::scalar::UnaryExecutor; +use crate::arrays::executor::scalar::UnaryExecutor2; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation, Example}; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunction, ScalarFunctionImpl}; @@ -61,14 +61,14 @@ impl ScalarFunction for Ascii { pub struct AsciiImpl; impl ScalarFunctionImpl for AsciiImpl { - fn execute(&self, inputs: &[&Array2]) -> Result { + fn execute2(&self, inputs: &[&Array2]) -> Result { let input = inputs[0]; let builder = ArrayBuilder { datatype: DataType::Int32, buffer: PrimitiveBuffer::with_len(inputs[0].logical_len()), }; - UnaryExecutor::execute::(input, builder, |v, buf| { + UnaryExecutor2::execute::(input, builder, |v, buf| { let v = v.chars().next().map(|c| c as i32).unwrap_or(0); buf.put(&v) }) diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/string/case.rs b/crates/rayexec_execution/src/functions/scalar/builtin/string/case.rs index d12c51c91..12a840db8 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/string/case.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/string/case.rs @@ -4,7 +4,7 @@ use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::builder::{ArrayBuilder, GermanVarlenBuffer}; use crate::arrays::executor::physical_type::PhysicalUtf8; -use crate::arrays::executor::scalar::UnaryExecutor; +use crate::arrays::executor::scalar::UnaryExecutor2; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation, Example}; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunction, ScalarFunctionImpl}; @@ -60,7 +60,7 @@ impl ScalarFunction for Lower { pub struct LowerImpl; impl ScalarFunctionImpl for LowerImpl { - fn execute(&self, inputs: &[&Array2]) -> Result { + fn execute2(&self, inputs: &[&Array2]) -> Result { let input = inputs[0]; case_convert_execute(input, str::to_lowercase) } @@ -115,7 +115,7 @@ impl ScalarFunction for Upper { pub struct UpperImpl; impl ScalarFunctionImpl for UpperImpl { - fn execute(&self, inputs: &[&Array2]) -> Result { + fn execute2(&self, inputs: &[&Array2]) -> Result { let input = inputs[0]; case_convert_execute(input, str::to_uppercase) } @@ -135,7 +135,7 @@ where buffer: GermanVarlenBuffer::::with_len_and_data_capacity(input.logical_len(), cap), }; - UnaryExecutor::execute::(input, builder, |v, buf| { + UnaryExecutor2::execute::(input, builder, |v, buf| { // TODO: Non-allocating variant. buf.put(&case_fn(v)) }) diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/string/concat.rs b/crates/rayexec_execution/src/functions/scalar/builtin/string/concat.rs index c9e019c20..51f0c59c2 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/string/concat.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/string/concat.rs @@ -4,7 +4,7 @@ use crate::arrays::array::Array2; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::builder::{ArrayBuilder, GermanVarlenBuffer}; use crate::arrays::executor::physical_type::PhysicalUtf8; -use crate::arrays::executor::scalar::{BinaryExecutor, UniformExecutor}; +use crate::arrays::executor::scalar::{BinaryExecutor2, UniformExecutor}; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation, Example}; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunction, ScalarFunctionImpl}; @@ -68,7 +68,7 @@ impl ScalarFunction for Concat { pub struct StringConcatImpl; impl ScalarFunctionImpl for StringConcatImpl { - fn execute(&self, inputs: &[&Array2]) -> Result { + fn execute2(&self, inputs: &[&Array2]) -> Result { match inputs.len() { 0 => { let mut array = Array2::from_iter([""]); @@ -84,7 +84,7 @@ impl ScalarFunctionImpl for StringConcatImpl { // TODO: Compute data capacity. - BinaryExecutor::execute::( + BinaryExecutor2::execute::( a, b, ArrayBuilder { diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/string/contains.rs b/crates/rayexec_execution/src/functions/scalar/builtin/string/contains.rs index 01396a46b..13f00dd2c 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/string/contains.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/string/contains.rs @@ -4,7 +4,7 @@ use crate::arrays::array::Array2; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::builder::{ArrayBuilder, BooleanBuffer}; use crate::arrays::executor::physical_type::PhysicalUtf8; -use crate::arrays::executor::scalar::{BinaryExecutor, UnaryExecutor}; +use crate::arrays::executor::scalar::{BinaryExecutor2, UnaryExecutor2}; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation, Example}; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunction, ScalarFunctionImpl}; @@ -82,13 +82,13 @@ pub struct StringContainsConstantImpl { } impl ScalarFunctionImpl for StringContainsConstantImpl { - fn execute(&self, inputs: &[&Array2]) -> Result { + fn execute2(&self, inputs: &[&Array2]) -> Result { let builder = ArrayBuilder { datatype: DataType::Boolean, buffer: BooleanBuffer::with_len(inputs[0].logical_len()), }; - UnaryExecutor::execute::(inputs[0], builder, |s, buf| { + UnaryExecutor2::execute::(inputs[0], builder, |s, buf| { buf.put(&s.contains(&self.constant)) }) } @@ -98,13 +98,13 @@ impl ScalarFunctionImpl for StringContainsConstantImpl { pub struct StringContainsImpl; impl ScalarFunctionImpl for StringContainsImpl { - fn execute(&self, inputs: &[&Array2]) -> Result { + fn execute2(&self, inputs: &[&Array2]) -> Result { let builder = ArrayBuilder { datatype: DataType::Boolean, buffer: BooleanBuffer::with_len(inputs[0].logical_len()), }; - BinaryExecutor::execute::( + BinaryExecutor2::execute::( inputs[0], inputs[1], builder, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/string/ends_with.rs b/crates/rayexec_execution/src/functions/scalar/builtin/string/ends_with.rs index 68941685f..c2bcb71b6 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/string/ends_with.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/string/ends_with.rs @@ -4,7 +4,7 @@ use crate::arrays::array::Array2; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::builder::{ArrayBuilder, BooleanBuffer}; use crate::arrays::executor::physical_type::PhysicalUtf8; -use crate::arrays::executor::scalar::{BinaryExecutor, UnaryExecutor}; +use crate::arrays::executor::scalar::{BinaryExecutor2, UnaryExecutor2}; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation, Example}; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunction, ScalarFunctionImpl}; @@ -86,13 +86,13 @@ pub struct EndsWithConstantImpl { } impl ScalarFunctionImpl for EndsWithConstantImpl { - fn execute(&self, inputs: &[&Array2]) -> Result { + fn execute2(&self, inputs: &[&Array2]) -> Result { let builder = ArrayBuilder { datatype: DataType::Boolean, buffer: BooleanBuffer::with_len(inputs[0].logical_len()), }; - UnaryExecutor::execute::(inputs[0], builder, |s, buf| { + UnaryExecutor2::execute::(inputs[0], builder, |s, buf| { buf.put(&s.ends_with(&self.constant)) }) } @@ -102,13 +102,13 @@ impl ScalarFunctionImpl for EndsWithConstantImpl { pub struct EndsWithImpl; impl ScalarFunctionImpl for EndsWithImpl { - fn execute(&self, inputs: &[&Array2]) -> Result { + fn execute2(&self, inputs: &[&Array2]) -> Result { let builder = ArrayBuilder { datatype: DataType::Boolean, buffer: BooleanBuffer::with_len(inputs[0].logical_len()), }; - BinaryExecutor::execute::( + BinaryExecutor2::execute::( inputs[0], inputs[1], builder, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/string/length.rs b/crates/rayexec_execution/src/functions/scalar/builtin/string/length.rs index 426ee3e47..d60b0a5f0 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/string/length.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/string/length.rs @@ -4,7 +4,7 @@ use crate::arrays::array::Array2; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; use crate::arrays::executor::physical_type::{PhysicalBinary, PhysicalUtf8}; -use crate::arrays::executor::scalar::UnaryExecutor; +use crate::arrays::executor::scalar::UnaryExecutor2; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation, Example}; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunction, ScalarFunctionImpl}; @@ -64,7 +64,7 @@ impl ScalarFunction for Length { pub struct StrLengthImpl; impl ScalarFunctionImpl for StrLengthImpl { - fn execute(&self, inputs: &[&Array2]) -> Result { + fn execute2(&self, inputs: &[&Array2]) -> Result { let input = inputs[0]; let builder = ArrayBuilder { @@ -72,7 +72,7 @@ impl ScalarFunctionImpl for StrLengthImpl { buffer: PrimitiveBuffer::with_len(input.logical_len()), }; - UnaryExecutor::execute::(input, builder, |v, buf| { + UnaryExecutor2::execute::(input, builder, |v, buf| { let len = v.chars().count() as i64; buf.put(&len) }) @@ -145,7 +145,7 @@ impl ScalarFunction for ByteLength { pub struct ByteLengthImpl; impl ScalarFunctionImpl for ByteLengthImpl { - fn execute(&self, inputs: &[&Array2]) -> Result { + fn execute2(&self, inputs: &[&Array2]) -> Result { let input = inputs[0]; let builder = ArrayBuilder { @@ -154,7 +154,7 @@ impl ScalarFunctionImpl for ByteLengthImpl { }; // Binary applicable to both str and [u8]. - UnaryExecutor::execute::(input, builder, |v, buf| { + UnaryExecutor2::execute::(input, builder, |v, buf| { buf.put(&(v.len() as i64)) }) } @@ -222,7 +222,7 @@ impl ScalarFunction for BitLength { pub struct BitLengthImpl; impl ScalarFunctionImpl for BitLengthImpl { - fn execute(&self, inputs: &[&Array2]) -> Result { + fn execute2(&self, inputs: &[&Array2]) -> Result { let input = inputs[0]; let builder = ArrayBuilder { @@ -231,7 +231,7 @@ impl ScalarFunctionImpl for BitLengthImpl { }; // Binary applicable to both str and [u8]. - UnaryExecutor::execute::(input, builder, |v, buf| { + UnaryExecutor2::execute::(input, builder, |v, buf| { let bit_len = v.len() * 8; buf.put(&(bit_len as i64)) }) diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/string/like.rs b/crates/rayexec_execution/src/functions/scalar/builtin/string/like.rs index f1283ffdb..549439033 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/string/like.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/string/like.rs @@ -5,7 +5,7 @@ use crate::arrays::array::Array2; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::builder::{ArrayBuilder, BooleanBuffer}; use crate::arrays::executor::physical_type::PhysicalUtf8; -use crate::arrays::executor::scalar::{BinaryExecutor, UnaryExecutor}; +use crate::arrays::executor::scalar::{BinaryExecutor2, UnaryExecutor2}; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation, Example}; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunction, ScalarFunctionImpl}; @@ -84,13 +84,13 @@ pub struct LikeConstImpl { } impl ScalarFunctionImpl for LikeConstImpl { - fn execute(&self, inputs: &[&Array2]) -> Result { + fn execute2(&self, inputs: &[&Array2]) -> Result { let builder = ArrayBuilder { datatype: DataType::Boolean, buffer: BooleanBuffer::with_len(inputs[0].logical_len()), }; - UnaryExecutor::execute::(inputs[0], builder, |s, buf| { + UnaryExecutor2::execute::(inputs[0], builder, |s, buf| { let b = self.constant.is_match(s); buf.put(&b); }) @@ -101,7 +101,7 @@ impl ScalarFunctionImpl for LikeConstImpl { pub struct LikeImpl; impl ScalarFunctionImpl for LikeImpl { - fn execute(&self, inputs: &[&Array2]) -> Result { + fn execute2(&self, inputs: &[&Array2]) -> Result { let builder = ArrayBuilder { datatype: DataType::Boolean, buffer: BooleanBuffer::with_len(inputs[0].logical_len()), @@ -109,7 +109,7 @@ impl ScalarFunctionImpl for LikeImpl { let mut s_buf = String::new(); - BinaryExecutor::execute::( + BinaryExecutor2::execute::( inputs[0], inputs[1], builder, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/string/pad.rs b/crates/rayexec_execution/src/functions/scalar/builtin/string/pad.rs index 1ed2c7f01..0a3f4f7b9 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/string/pad.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/string/pad.rs @@ -4,7 +4,7 @@ use crate::arrays::array::Array2; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::builder::{ArrayBuilder, GermanVarlenBuffer}; use crate::arrays::executor::physical_type::{PhysicalI64, PhysicalUtf8}; -use crate::arrays::executor::scalar::{BinaryExecutor, TernaryExecutor}; +use crate::arrays::executor::scalar::{BinaryExecutor2, TernaryExecutor}; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation, Example}; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunction, ScalarFunctionImpl}; @@ -96,7 +96,7 @@ impl ScalarFunction for LeftPad { pub struct LeftPadImpl; impl ScalarFunctionImpl for LeftPadImpl { - fn execute(&self, inputs: &[&Array2]) -> Result { + fn execute2(&self, inputs: &[&Array2]) -> Result { let mut string_buf = String::new(); let builder = ArrayBuilder { datatype: DataType::Utf8, @@ -104,7 +104,7 @@ impl ScalarFunctionImpl for LeftPadImpl { }; match inputs.len() { - 2 => BinaryExecutor::execute::( + 2 => BinaryExecutor2::execute::( inputs[0], inputs[1], builder, @@ -209,7 +209,7 @@ impl ScalarFunction for RightPad { pub struct RightPadImpl; impl ScalarFunctionImpl for RightPadImpl { - fn execute(&self, inputs: &[&Array2]) -> Result { + fn execute2(&self, inputs: &[&Array2]) -> Result { let mut string_buf = String::new(); let builder = ArrayBuilder { datatype: DataType::Utf8, @@ -217,7 +217,7 @@ impl ScalarFunctionImpl for RightPadImpl { }; match inputs.len() { - 2 => BinaryExecutor::execute::( + 2 => BinaryExecutor2::execute::( inputs[0], inputs[1], builder, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/string/regexp_replace.rs b/crates/rayexec_execution/src/functions/scalar/builtin/string/regexp_replace.rs index 0250f98d3..ac1dc6849 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/string/regexp_replace.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/string/regexp_replace.rs @@ -5,7 +5,7 @@ use crate::arrays::array::Array2; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::builder::{ArrayBuilder, GermanVarlenBuffer}; use crate::arrays::executor::physical_type::PhysicalUtf8; -use crate::arrays::executor::scalar::{BinaryExecutor, TernaryExecutor, UnaryExecutor}; +use crate::arrays::executor::scalar::{BinaryExecutor2, TernaryExecutor, UnaryExecutor2}; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation, Example}; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunction, ScalarFunctionImpl}; @@ -98,7 +98,7 @@ pub struct RegexpReplaceImpl { } impl ScalarFunctionImpl for RegexpReplaceImpl { - fn execute(&self, inputs: &[&Array2]) -> Result { + fn execute2(&self, inputs: &[&Array2]) -> Result { let builder = ArrayBuilder { datatype: DataType::Utf8, buffer: GermanVarlenBuffer::::with_len(inputs[0].logical_len()), @@ -106,13 +106,13 @@ impl ScalarFunctionImpl for RegexpReplaceImpl { match (self.pattern.as_ref(), self.replacement.as_ref()) { (Some(pattern), Some(replacement)) => { - UnaryExecutor::execute::(inputs[0], builder, |s, buf| { + UnaryExecutor2::execute::(inputs[0], builder, |s, buf| { // TODO: Flags to more many. let out = pattern.replace(s, replacement); buf.put(out.as_ref()); }) } - (Some(pattern), None) => BinaryExecutor::execute::( + (Some(pattern), None) => BinaryExecutor2::execute::( inputs[0], inputs[2], builder, @@ -122,7 +122,7 @@ impl ScalarFunctionImpl for RegexpReplaceImpl { }, ), (None, Some(replacement)) => { - BinaryExecutor::execute::( + BinaryExecutor2::execute::( inputs[0], inputs[1], builder, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/string/repeat.rs b/crates/rayexec_execution/src/functions/scalar/builtin/string/repeat.rs index cda468e69..d4dd46cab 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/string/repeat.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/string/repeat.rs @@ -6,7 +6,7 @@ use crate::arrays::array::Array2; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::builder::{ArrayBuilder, GermanVarlenBuffer}; use crate::arrays::executor::physical_type::{PhysicalI64, PhysicalUtf8}; -use crate::arrays::executor::scalar::BinaryExecutor; +use crate::arrays::executor::scalar::BinaryExecutor2; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation, Example}; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunction, ScalarFunctionImpl}; @@ -65,7 +65,7 @@ impl ScalarFunction for Repeat { pub struct RepeatUtf8Impl; impl ScalarFunctionImpl for RepeatUtf8Impl { - fn execute(&self, inputs: &[&Array2]) -> Result { + fn execute2(&self, inputs: &[&Array2]) -> Result { let strings = inputs[0]; let nums = inputs[1]; @@ -73,7 +73,7 @@ impl ScalarFunctionImpl for RepeatUtf8Impl { let mut string_buf = String::new(); - BinaryExecutor::execute::( + BinaryExecutor2::execute::( strings, nums, ArrayBuilder { diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/string/starts_with.rs b/crates/rayexec_execution/src/functions/scalar/builtin/string/starts_with.rs index 495c84b1e..4e80eb0aa 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/string/starts_with.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/string/starts_with.rs @@ -4,7 +4,7 @@ use crate::arrays::array::Array2; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::builder::{ArrayBuilder, BooleanBuffer}; use crate::arrays::executor::physical_type::PhysicalUtf8; -use crate::arrays::executor::scalar::{BinaryExecutor, UnaryExecutor}; +use crate::arrays::executor::scalar::{BinaryExecutor2, UnaryExecutor2}; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation, Example}; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunction, ScalarFunctionImpl}; @@ -84,7 +84,7 @@ pub struct StartsWithImpl { } impl ScalarFunctionImpl for StartsWithImpl { - fn execute(&self, inputs: &[&Array2]) -> Result { + fn execute2(&self, inputs: &[&Array2]) -> Result { let builder = ArrayBuilder { datatype: DataType::Boolean, buffer: BooleanBuffer::with_len(inputs[0].logical_len()), @@ -92,11 +92,11 @@ impl ScalarFunctionImpl for StartsWithImpl { match self.constant.as_ref() { Some(constant) => { - UnaryExecutor::execute::(inputs[0], builder, |s, buf| { + UnaryExecutor2::execute::(inputs[0], builder, |s, buf| { buf.put(&s.starts_with(constant)) }) } - None => BinaryExecutor::execute::( + None => BinaryExecutor2::execute::( inputs[0], inputs[1], builder, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/string/substring.rs b/crates/rayexec_execution/src/functions/scalar/builtin/string/substring.rs index d6aa76568..102dc4120 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/string/substring.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/string/substring.rs @@ -4,7 +4,7 @@ use crate::arrays::array::Array2; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::builder::{ArrayBuilder, GermanVarlenBuffer}; use crate::arrays::executor::physical_type::{PhysicalI64, PhysicalUtf8}; -use crate::arrays::executor::scalar::{BinaryExecutor, TernaryExecutor}; +use crate::arrays::executor::scalar::{BinaryExecutor2, TernaryExecutor}; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation, Example}; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunction, ScalarFunctionImpl}; @@ -105,9 +105,9 @@ impl ScalarFunction for Substring { pub struct SubstringFromImpl; impl ScalarFunctionImpl for SubstringFromImpl { - fn execute(&self, inputs: &[&Array2]) -> Result { + fn execute2(&self, inputs: &[&Array2]) -> Result { let len = inputs[0].logical_len(); - BinaryExecutor::execute::( + BinaryExecutor2::execute::( inputs[0], inputs[1], ArrayBuilder { @@ -123,7 +123,7 @@ impl ScalarFunctionImpl for SubstringFromImpl { pub struct SubstringFromToImpl; impl ScalarFunctionImpl for SubstringFromToImpl { - fn execute(&self, inputs: &[&Array2]) -> Result { + fn execute2(&self, inputs: &[&Array2]) -> Result { let len = inputs[0].logical_len(); TernaryExecutor::execute::( inputs[0], diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/string/trim.rs b/crates/rayexec_execution/src/functions/scalar/builtin/string/trim.rs index 32819198e..e192fee34 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/string/trim.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/string/trim.rs @@ -7,7 +7,7 @@ use crate::arrays::array::Array2; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::builder::{ArrayBuilder, GermanVarlenBuffer}; use crate::arrays::executor::physical_type::PhysicalUtf8; -use crate::arrays::executor::scalar::{BinaryExecutor, UnaryExecutor}; +use crate::arrays::executor::scalar::{BinaryExecutor2, UnaryExecutor2}; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation, Example}; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunction, ScalarFunctionImpl}; @@ -222,13 +222,13 @@ impl TrimWhitespaceImpl { } impl ScalarFunctionImpl for TrimWhitespaceImpl { - fn execute(&self, inputs: &[&Array2]) -> Result { + fn execute2(&self, inputs: &[&Array2]) -> Result { let builder = ArrayBuilder { datatype: DataType::Utf8, buffer: GermanVarlenBuffer::::with_len(inputs[0].logical_len()), }; - UnaryExecutor::execute::(inputs[0], builder, |s, buf| { + UnaryExecutor2::execute::(inputs[0], builder, |s, buf| { let trimmed = F::trim_func(s, " "); buf.put(trimmed) }) @@ -247,13 +247,13 @@ impl TrimPatternImpl { } impl ScalarFunctionImpl for TrimPatternImpl { - fn execute(&self, inputs: &[&Array2]) -> Result { + fn execute2(&self, inputs: &[&Array2]) -> Result { let builder = ArrayBuilder { datatype: DataType::Utf8, buffer: GermanVarlenBuffer::::with_len(inputs[0].logical_len()), }; - BinaryExecutor::execute::( + BinaryExecutor2::execute::( inputs[0], inputs[1], builder, diff --git a/crates/rayexec_execution/src/functions/scalar/mod.rs b/crates/rayexec_execution/src/functions/scalar/mod.rs index bb36d066e..09db07146 100644 --- a/crates/rayexec_execution/src/functions/scalar/mod.rs +++ b/crates/rayexec_execution/src/functions/scalar/mod.rs @@ -7,7 +7,9 @@ use dyn_clone::DynClone; use rayexec_error::Result; use super::FunctionInfo; +use crate::arrays::array::exp::Array; use crate::arrays::array::Array2; +use crate::arrays::batch_exp::Batch; use crate::arrays::datatype::DataType; use crate::expr::Expression; use crate::logical::binder::table_list::TableList; @@ -103,7 +105,18 @@ impl Hash for PlannedScalarFunction { } pub trait ScalarFunctionImpl: Debug + Sync + Send + DynClone { - fn execute(&self, inputs: &[&Array2]) -> Result; + fn execute2(&self, inputs: &[&Array2]) -> Result { + unimplemented!() + } + + /// Execute the function the input batch, writing the output for each row + /// into `output` at the same index. + /// + /// `output` is guaranteed to be the exact size needed for the output as + /// well as being the correct physical type. + fn execute(&self, input: Batch, output: &mut Array) -> Result<()> { + unimplemented!() + } } impl Clone for Box { diff --git a/crates/rayexec_execution/src/functions/table/builtin/series.rs b/crates/rayexec_execution/src/functions/table/builtin/series.rs index 8b36b8fa0..5c379e9cf 100644 --- a/crates/rayexec_execution/src/functions/table/builtin/series.rs +++ b/crates/rayexec_execution/src/functions/table/builtin/series.rs @@ -7,7 +7,7 @@ use crate::arrays::array::Array2; use crate::arrays::batch::Batch2; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::physical_type::PhysicalI64; -use crate::arrays::executor::scalar::UnaryExecutor; +use crate::arrays::executor::scalar::UnaryExecutor2; use crate::arrays::field::{Field, Schema}; use crate::arrays::scalar::OwnedScalarValue; use crate::arrays::storage::PrimitiveStorage; @@ -259,15 +259,15 @@ impl TableInOutPartitionState for GenerateSeriesInOutPartitionState { }; // Generate new params from row. - let start = UnaryExecutor::value_at::( + let start = UnaryExecutor2::value_at::( batch.column(0).unwrap(), self.next_row_idx, )?; - let end = UnaryExecutor::value_at::( + let end = UnaryExecutor2::value_at::( batch.column(1).unwrap(), self.next_row_idx, )?; - let step = UnaryExecutor::value_at::( + let step = UnaryExecutor2::value_at::( batch.column(2).unwrap(), self.next_row_idx, )?; diff --git a/crates/rayexec_execution/src/functions/table/builtin/unnest.rs b/crates/rayexec_execution/src/functions/table/builtin/unnest.rs index 353633694..cba16259c 100644 --- a/crates/rayexec_execution/src/functions/table/builtin/unnest.rs +++ b/crates/rayexec_execution/src/functions/table/builtin/unnest.rs @@ -7,7 +7,7 @@ use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::batch::Batch2; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::physical_type::{PhysicalList, PhysicalType2}; -use crate::arrays::executor::scalar::UnaryExecutor; +use crate::arrays::executor::scalar::UnaryExecutor2; use crate::arrays::field::{Field, Schema}; use crate::arrays::scalar::OwnedScalarValue; use crate::execution::operators::unnest::unnest; @@ -213,7 +213,7 @@ impl TableInOutPartitionState for UnnestInOutPartitionState { _other => return Err(RayexecError::new("Unexpected storage type")), }; - match UnaryExecutor::value_at::(input, self.current_row)? { + match UnaryExecutor2::value_at::(input, self.current_row)? { Some(meta) => { // Row is a list, unnest. unnest(child, meta.len as usize, meta)? diff --git a/crates/rayexec_parquet/src/writer/mod.rs b/crates/rayexec_parquet/src/writer/mod.rs index 8cd703e39..f9d41567c 100644 --- a/crates/rayexec_parquet/src/writer/mod.rs +++ b/crates/rayexec_parquet/src/writer/mod.rs @@ -15,7 +15,7 @@ use rayexec_error::{not_implemented, OptionExt, RayexecError, Result, ResultExt} use rayexec_execution::arrays::array::{Array2, ArrayData2}; use rayexec_execution::arrays::batch::Batch2; use rayexec_execution::arrays::datatype::DataType; -use rayexec_execution::arrays::executor::physical_type::{PhysicalBinary, PhysicalStorage}; +use rayexec_execution::arrays::executor::physical_type::{PhysicalBinary, PhysicalStorage2}; use rayexec_execution::arrays::field::Schema; use rayexec_execution::arrays::storage::AddressableStorage; use rayexec_io::FileSink; From 278b43a5528212a58eb8d954fd340045bffac9b7 Mon Sep 17 00:00:00 2001 From: Sean Smith Date: Sat, 28 Dec 2024 18:17:54 -0500 Subject: [PATCH 14/59] ok --- .../rayexec_execution/src/arrays/array/exp.rs | 4 ++ .../rayexec_execution/src/arrays/array/mod.rs | 16 +++---- .../rayexec_execution/src/arrays/batch_exp.rs | 6 +++ .../src/arrays/compute/cast/array.rs | 20 ++++---- .../src/arrays/executor/physical_type.rs | 12 ++--- .../src/arrays/executor/scalar/fill.rs | 18 +++---- .../src/arrays/executor/scalar/hash.rs | 18 +++---- .../src/arrays/executor_exp/mod.rs | 12 +++++ .../src/arrays/row/encoding.rs | 12 ++--- .../operators/hash_aggregate/compare.rs | 12 ++--- .../src/execution/operators/unnest.rs | 12 ++--- .../src/functions/aggregate/builtin/avg.rs | 4 +- .../src/functions/aggregate/builtin/corr.rs | 4 +- .../src/functions/aggregate/builtin/covar.rs | 6 +-- .../src/functions/aggregate/builtin/first.rs | 12 ++--- .../src/functions/aggregate/builtin/minmax.rs | 42 ++++++++--------- .../functions/aggregate/builtin/regr_avg.rs | 6 +-- .../functions/aggregate/builtin/regr_r2.rs | 4 +- .../functions/aggregate/builtin/regr_slope.rs | 4 +- .../src/functions/aggregate/builtin/stddev.rs | 10 ++-- .../src/functions/aggregate/builtin/sum.rs | 4 +- .../src/functions/scalar/builtin/arith/add.rs | 12 ++--- .../src/functions/scalar/builtin/arith/div.rs | 21 +++++---- .../src/functions/scalar/builtin/arith/mul.rs | 12 ++--- .../src/functions/scalar/builtin/arith/rem.rs | 12 ++--- .../src/functions/scalar/builtin/arith/sub.rs | 12 ++--- .../functions/scalar/builtin/comparison.rs | 20 ++++---- .../scalar/builtin/list/list_extract.rs | 12 ++--- .../src/functions/scalar/builtin/negate.rs | 12 ++--- .../functions/scalar/builtin/numeric/abs.rs | 24 +++++++++- .../functions/scalar/builtin/numeric/acos.rs | 2 +- .../functions/scalar/builtin/numeric/asin.rs | 2 +- .../functions/scalar/builtin/numeric/atan.rs | 2 +- .../functions/scalar/builtin/numeric/cbrt.rs | 2 +- .../functions/scalar/builtin/numeric/ceil.rs | 2 +- .../functions/scalar/builtin/numeric/cos.rs | 2 +- .../scalar/builtin/numeric/degrees.rs | 2 +- .../functions/scalar/builtin/numeric/exp.rs | 2 +- .../functions/scalar/builtin/numeric/floor.rs | 2 +- .../functions/scalar/builtin/numeric/isnan.rs | 12 ++--- .../functions/scalar/builtin/numeric/ln.rs | 2 +- .../functions/scalar/builtin/numeric/log.rs | 4 +- .../functions/scalar/builtin/numeric/mod.rs | 47 ++++++++++++++++--- .../scalar/builtin/numeric/radians.rs | 2 +- .../functions/scalar/builtin/numeric/sin.rs | 2 +- .../functions/scalar/builtin/numeric/sqrt.rs | 2 +- .../functions/scalar/builtin/numeric/tan.rs | 2 +- .../scalar/builtin/similarity/l2_distance.rs | 12 ++--- 48 files changed, 279 insertions(+), 199 deletions(-) diff --git a/crates/rayexec_execution/src/arrays/array/exp.rs b/crates/rayexec_execution/src/arrays/array/exp.rs index 020cf3569..51b50ea66 100644 --- a/crates/rayexec_execution/src/arrays/array/exp.rs +++ b/crates/rayexec_execution/src/arrays/array/exp.rs @@ -114,6 +114,10 @@ where }) } + pub fn datatype(&self) -> &DataType { + &self.datatype + } + pub fn data(&self) -> &ArrayData { &self.data } diff --git a/crates/rayexec_execution/src/arrays/array/mod.rs b/crates/rayexec_execution/src/arrays/array/mod.rs index d16f531a1..cf785cee1 100644 --- a/crates/rayexec_execution/src/arrays/array/mod.rs +++ b/crates/rayexec_execution/src/arrays/array/mod.rs @@ -25,9 +25,9 @@ use crate::arrays::executor::physical_type::{ PhysicalAny, PhysicalBinary, PhysicalBool, - PhysicalF16, - PhysicalF32, - PhysicalF64, + PhysicalF16_2, + PhysicalF32_2, + PhysicalF64_2, PhysicalI128, PhysicalI16, PhysicalI32, @@ -397,7 +397,7 @@ impl Array2 { }, |v, buf| buf.put(&v), ), - ArrayData2::Float16(_) => UnaryExecutor2::execute::( + ArrayData2::Float16(_) => UnaryExecutor2::execute::( self, ArrayBuilder { datatype: self.datatype.clone(), @@ -405,7 +405,7 @@ impl Array2 { }, |v, buf| buf.put(&v), ), - ArrayData2::Float32(_) => UnaryExecutor2::execute::( + ArrayData2::Float32(_) => UnaryExecutor2::execute::( self, ArrayBuilder { datatype: self.datatype.clone(), @@ -413,7 +413,7 @@ impl Array2 { }, |v, buf| buf.put(&v), ), - ArrayData2::Float64(_) => UnaryExecutor2::execute::( + ArrayData2::Float64(_) => UnaryExecutor2::execute::( self, ArrayBuilder { datatype: self.datatype.clone(), @@ -688,13 +688,13 @@ impl Array2 { }) } ScalarValue::Float32(v) => { - UnaryExecutor2::value_at::(self, row).map(|arr_val| match arr_val { + UnaryExecutor2::value_at::(self, row).map(|arr_val| match arr_val { Some(arr_val) => arr_val == *v, None => false, }) } ScalarValue::Float64(v) => { - UnaryExecutor2::value_at::(self, row).map(|arr_val| match arr_val { + UnaryExecutor2::value_at::(self, row).map(|arr_val| match arr_val { Some(arr_val) => arr_val == *v, None => false, }) diff --git a/crates/rayexec_execution/src/arrays/batch_exp.rs b/crates/rayexec_execution/src/arrays/batch_exp.rs index 6962e5e78..9033d930f 100644 --- a/crates/rayexec_execution/src/arrays/batch_exp.rs +++ b/crates/rayexec_execution/src/arrays/batch_exp.rs @@ -2,6 +2,7 @@ use iterutil::IntoExactSizeIterator; use rayexec_error::Result; use super::array::exp::Array; +use super::array::selection::Selection; use super::buffer::buffer_manager::{BufferManager, NopBufferManager}; use super::datatype::DataType; @@ -67,6 +68,11 @@ where }) } + /// Returns a selection that selects rows [0, num_rows). + pub fn selection(&self) -> Selection { + Selection::Linear { len: self.num_rows } + } + pub fn arrays(&self) -> &[Array] { &self.arrays } diff --git a/crates/rayexec_execution/src/arrays/compute/cast/array.rs b/crates/rayexec_execution/src/arrays/compute/cast/array.rs index 23e0e6645..7abf9a21e 100644 --- a/crates/rayexec_execution/src/arrays/compute/cast/array.rs +++ b/crates/rayexec_execution/src/arrays/compute/cast/array.rs @@ -59,9 +59,9 @@ use crate::arrays::executor::builder::{ }; use crate::arrays::executor::physical_type::{ PhysicalBool, - PhysicalF16, - PhysicalF32, - PhysicalF64, + PhysicalF16_2, + PhysicalF32_2, + PhysicalF64_2, PhysicalI128, PhysicalI16, PhysicalI32, @@ -128,13 +128,13 @@ pub fn cast_array(arr: &Array2, to: DataType, behavior: CastFailBehavior) -> Res cast_primitive_numeric_helper::(arr, to, behavior)? } DataType::Float16 if to.is_primitive_numeric() => { - cast_primitive_numeric_helper::(arr, to, behavior)? + cast_primitive_numeric_helper::(arr, to, behavior)? } DataType::Float32 if to.is_primitive_numeric() => { - cast_primitive_numeric_helper::(arr, to, behavior)? + cast_primitive_numeric_helper::(arr, to, behavior)? } DataType::Float64 if to.is_primitive_numeric() => { - cast_primitive_numeric_helper::(arr, to, behavior)? + cast_primitive_numeric_helper::(arr, to, behavior)? } // Int to date32 @@ -188,10 +188,10 @@ pub fn cast_array(arr: &Array2, to: DataType, behavior: CastFailBehavior) -> Res // Float to decimal. DataType::Float32 if to.is_decimal() => { - cast_float_to_decimal_helper::(arr, to, behavior)? + cast_float_to_decimal_helper::(arr, to, behavior)? } DataType::Float64 if to.is_decimal() => { - cast_float_to_decimal_helper::(arr, to, behavior)? + cast_float_to_decimal_helper::(arr, to, behavior)? } // Decimal to decimal @@ -610,10 +610,10 @@ pub fn cast_to_utf8(arr: &Array2, behavior: CastFailBehavior) -> Result cast_format::(arr, UInt128Formatter::default(), behavior) } DataType::Float32 => { - cast_format::(arr, Float32Formatter::default(), behavior) + cast_format::(arr, Float32Formatter::default(), behavior) } DataType::Float64 => { - cast_format::(arr, Float64Formatter::default(), behavior) + cast_format::(arr, Float64Formatter::default(), behavior) } DataType::Decimal64(m) => cast_format::( arr, diff --git a/crates/rayexec_execution/src/arrays/executor/physical_type.rs b/crates/rayexec_execution/src/arrays/executor/physical_type.rs index ad8223086..6cd6d2bd0 100644 --- a/crates/rayexec_execution/src/arrays/executor/physical_type.rs +++ b/crates/rayexec_execution/src/arrays/executor/physical_type.rs @@ -412,9 +412,9 @@ impl PhysicalStorage2 for PhysicalU128 { } #[derive(Debug, Clone, Copy)] -pub struct PhysicalF16; +pub struct PhysicalF16_2; -impl PhysicalStorage2 for PhysicalF16 { +impl PhysicalStorage2 for PhysicalF16_2 { type Type<'a> = f16; type Storage<'a> = PrimitiveStorageSlice<'a, f16>; @@ -427,9 +427,9 @@ impl PhysicalStorage2 for PhysicalF16 { } #[derive(Debug, Clone, Copy)] -pub struct PhysicalF32; +pub struct PhysicalF32_2; -impl PhysicalStorage2 for PhysicalF32 { +impl PhysicalStorage2 for PhysicalF32_2 { type Type<'a> = f32; type Storage<'a> = PrimitiveStorageSlice<'a, f32>; @@ -442,9 +442,9 @@ impl PhysicalStorage2 for PhysicalF32 { } #[derive(Debug, Clone, Copy)] -pub struct PhysicalF64; +pub struct PhysicalF64_2; -impl PhysicalStorage2 for PhysicalF64 { +impl PhysicalStorage2 for PhysicalF64_2 { type Type<'a> = f64; type Storage<'a> = PrimitiveStorageSlice<'a, f64>; diff --git a/crates/rayexec_execution/src/arrays/executor/scalar/fill.rs b/crates/rayexec_execution/src/arrays/executor/scalar/fill.rs index 346268ea3..be169d2e3 100644 --- a/crates/rayexec_execution/src/arrays/executor/scalar/fill.rs +++ b/crates/rayexec_execution/src/arrays/executor/scalar/fill.rs @@ -15,9 +15,9 @@ use crate::arrays::executor::builder::{ use crate::arrays::executor::physical_type::{ PhysicalBinary, PhysicalBool, - PhysicalF16, - PhysicalF32, - PhysicalF64, + PhysicalF16_2, + PhysicalF32_2, + PhysicalF64_2, PhysicalI128, PhysicalI16, PhysicalI32, @@ -241,21 +241,21 @@ pub(crate) fn concat_with_exact_total_len(arrays: &[&Array2], total_len: usize) datatype: datatype.clone(), buffer: PrimitiveBuffer::with_len(total_len), }); - concat_with_fill_state::(arrays, state) + concat_with_fill_state::(arrays, state) } PhysicalType2::Float32 => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: PrimitiveBuffer::with_len(total_len), }); - concat_with_fill_state::(arrays, state) + concat_with_fill_state::(arrays, state) } PhysicalType2::Float64 => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: PrimitiveBuffer::with_len(total_len), }); - concat_with_fill_state::(arrays, state) + concat_with_fill_state::(arrays, state) } PhysicalType2::Interval => { let state = FillState::new(ArrayBuilder { @@ -465,21 +465,21 @@ pub fn interleave(arrays: &[&Array2], indices: &[(usize, usize)]) -> Result(arrays, indices, state) + interleave_with_fill_state::(arrays, indices, state) } PhysicalType2::Float32 => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: PrimitiveBuffer::with_len(indices.len()), }); - interleave_with_fill_state::(arrays, indices, state) + interleave_with_fill_state::(arrays, indices, state) } PhysicalType2::Float64 => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: PrimitiveBuffer::with_len(indices.len()), }); - interleave_with_fill_state::(arrays, indices, state) + interleave_with_fill_state::(arrays, indices, state) } PhysicalType2::Interval => { let state = FillState::new(ArrayBuilder { diff --git a/crates/rayexec_execution/src/arrays/executor/scalar/hash.rs b/crates/rayexec_execution/src/arrays/executor/scalar/hash.rs index e828cec02..1477c141a 100644 --- a/crates/rayexec_execution/src/arrays/executor/scalar/hash.rs +++ b/crates/rayexec_execution/src/arrays/executor/scalar/hash.rs @@ -6,9 +6,9 @@ use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::executor::physical_type::{ PhysicalBinary, PhysicalBool, - PhysicalF16, - PhysicalF32, - PhysicalF64, + PhysicalF16_2, + PhysicalF32_2, + PhysicalF64_2, PhysicalI128, PhysicalI16, PhysicalI32, @@ -77,13 +77,13 @@ impl HashExecutor { Self::hash_one_inner::(array, hashes)? } PhysicalType2::Float16 => { - Self::hash_one_inner::(array, hashes)? + Self::hash_one_inner::(array, hashes)? } PhysicalType2::Float32 => { - Self::hash_one_inner::(array, hashes)? + Self::hash_one_inner::(array, hashes)? } PhysicalType2::Float64 => { - Self::hash_one_inner::(array, hashes)? + Self::hash_one_inner::(array, hashes)? } PhysicalType2::Binary => { Self::hash_one_inner::(array, hashes)? @@ -141,13 +141,13 @@ impl HashExecutor { Self::hash_one_inner::(array, hashes)? } PhysicalType2::Float16 => { - Self::hash_one_inner::(array, hashes)? + Self::hash_one_inner::(array, hashes)? } PhysicalType2::Float32 => { - Self::hash_one_inner::(array, hashes)? + Self::hash_one_inner::(array, hashes)? } PhysicalType2::Float64 => { - Self::hash_one_inner::(array, hashes)? + Self::hash_one_inner::(array, hashes)? } PhysicalType2::Binary => { Self::hash_one_inner::(array, hashes)? diff --git a/crates/rayexec_execution/src/arrays/executor_exp/mod.rs b/crates/rayexec_execution/src/arrays/executor_exp/mod.rs index 6da833cc8..50baf6f24 100644 --- a/crates/rayexec_execution/src/arrays/executor_exp/mod.rs +++ b/crates/rayexec_execution/src/arrays/executor_exp/mod.rs @@ -1,6 +1,9 @@ pub mod aggregate; pub mod scalar; +use rayexec_error::Result; + +use super::array::exp::Array; use super::array::validity::Validity; use super::buffer::physical_type::AddressableMut; use super::buffer::ArrayBuffer; @@ -13,6 +16,15 @@ pub struct OutBuffer<'a> { pub validity: &'a mut Validity, } +impl<'a> OutBuffer<'a> { + pub fn from_array(array: &'a mut Array) -> Result { + Ok(OutBuffer { + buffer: array.data.try_as_mut()?, + validity: &mut array.validity, + }) + } +} + /// Helper for assigning a value to a location in a buffer. #[derive(Debug)] pub struct PutBuffer<'a, M> diff --git a/crates/rayexec_execution/src/arrays/row/encoding.rs b/crates/rayexec_execution/src/arrays/row/encoding.rs index 2e20b942e..ec5178630 100644 --- a/crates/rayexec_execution/src/arrays/row/encoding.rs +++ b/crates/rayexec_execution/src/arrays/row/encoding.rs @@ -6,9 +6,9 @@ use crate::arrays::executor::physical_type::{ AsBytes, PhysicalBinary, PhysicalBool, - PhysicalF16, - PhysicalF32, - PhysicalF64, + PhysicalF16_2, + PhysicalF32_2, + PhysicalF64_2, PhysicalI128, PhysicalI16, PhysicalI32, @@ -228,13 +228,13 @@ impl ComparableRowEncoder { ArrayData2::UInt128(_) => Self::encode_primitive::( cmp_col, arr, row_idx, data, row_offset, )?, - ArrayData2::Float16(_) => Self::encode_primitive::( + ArrayData2::Float16(_) => Self::encode_primitive::( cmp_col, arr, row_idx, data, row_offset, )?, - ArrayData2::Float32(_) => Self::encode_primitive::( + ArrayData2::Float32(_) => Self::encode_primitive::( cmp_col, arr, row_idx, data, row_offset, )?, - ArrayData2::Float64(_) => Self::encode_primitive::( + ArrayData2::Float64(_) => Self::encode_primitive::( cmp_col, arr, row_idx, data, row_offset, )?, ArrayData2::Interval(_) => Self::encode_primitive::( diff --git a/crates/rayexec_execution/src/execution/operators/hash_aggregate/compare.rs b/crates/rayexec_execution/src/execution/operators/hash_aggregate/compare.rs index fd4bcc78c..3008777e5 100644 --- a/crates/rayexec_execution/src/execution/operators/hash_aggregate/compare.rs +++ b/crates/rayexec_execution/src/execution/operators/hash_aggregate/compare.rs @@ -8,9 +8,9 @@ use crate::arrays::array::Array2; use crate::arrays::executor::physical_type::{ PhysicalBinary, PhysicalBool, - PhysicalF16, - PhysicalF32, - PhysicalF64, + PhysicalF16_2, + PhysicalF32_2, + PhysicalF64_2, PhysicalI128, PhysicalI16, PhysicalI32, @@ -137,13 +137,13 @@ where compare_rows_eq::(array1, array2, rows1, rows2, not_eq_rows)? } PhysicalType2::Float16 => { - compare_rows_eq::(array1, array2, rows1, rows2, not_eq_rows)? + compare_rows_eq::(array1, array2, rows1, rows2, not_eq_rows)? } PhysicalType2::Float32 => { - compare_rows_eq::(array1, array2, rows1, rows2, not_eq_rows)? + compare_rows_eq::(array1, array2, rows1, rows2, not_eq_rows)? } PhysicalType2::Float64 => { - compare_rows_eq::(array1, array2, rows1, rows2, not_eq_rows)? + compare_rows_eq::(array1, array2, rows1, rows2, not_eq_rows)? } PhysicalType2::Interval => compare_rows_eq::( array1, diff --git a/crates/rayexec_execution/src/execution/operators/unnest.rs b/crates/rayexec_execution/src/execution/operators/unnest.rs index 42f5245f7..76150b37e 100644 --- a/crates/rayexec_execution/src/execution/operators/unnest.rs +++ b/crates/rayexec_execution/src/execution/operators/unnest.rs @@ -28,9 +28,9 @@ use crate::arrays::executor::builder::{ use crate::arrays::executor::physical_type::{ PhysicalBinary, PhysicalBool, - PhysicalF16, - PhysicalF32, - PhysicalF64, + PhysicalF16_2, + PhysicalF32_2, + PhysicalF64_2, PhysicalI128, PhysicalI16, PhysicalI32, @@ -391,21 +391,21 @@ pub(crate) fn unnest(child: &Array2, longest_len: usize, meta: ListItemMetadata) datatype, buffer: PrimitiveBuffer::::with_len(longest_len), }; - unnest_inner::(builder, child, meta) + unnest_inner::(builder, child, meta) } PhysicalType2::Float32 => { let builder = ArrayBuilder { datatype, buffer: PrimitiveBuffer::::with_len(longest_len), }; - unnest_inner::(builder, child, meta) + unnest_inner::(builder, child, meta) } PhysicalType2::Float64 => { let builder = ArrayBuilder { datatype, buffer: PrimitiveBuffer::::with_len(longest_len), }; - unnest_inner::(builder, child, meta) + unnest_inner::(builder, child, meta) } PhysicalType2::Utf8 => { let builder = ArrayBuilder { diff --git a/crates/rayexec_execution/src/functions/aggregate/builtin/avg.rs b/crates/rayexec_execution/src/functions/aggregate/builtin/avg.rs index 8d9d29f86..a8e3625b3 100644 --- a/crates/rayexec_execution/src/functions/aggregate/builtin/avg.rs +++ b/crates/rayexec_execution/src/functions/aggregate/builtin/avg.rs @@ -11,7 +11,7 @@ use crate::arrays::bitmap::Bitmap; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::aggregate::AggregateState; use crate::arrays::executor::builder::{ArrayBuilder, ArrayDataBuffer, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::{PhysicalF64, PhysicalI64}; +use crate::arrays::executor::physical_type::{PhysicalF64_2, PhysicalI64}; use crate::arrays::scalar::decimal::{Decimal128Type, Decimal64Type, DecimalType}; use crate::expr::Expression; use crate::functions::aggregate::states::{ @@ -178,7 +178,7 @@ pub struct AvgFloat64Impl; impl AggregateFunctionImpl for AvgFloat64Impl { fn new_states(&self) -> Box { - new_unary_aggregate_states::( + new_unary_aggregate_states::( AvgStateF64::::default, move |states| primitive_finalize(DataType::Float64, states), ) diff --git a/crates/rayexec_execution/src/functions/aggregate/builtin/corr.rs b/crates/rayexec_execution/src/functions/aggregate/builtin/corr.rs index ad557baf5..9ed3a65a3 100644 --- a/crates/rayexec_execution/src/functions/aggregate/builtin/corr.rs +++ b/crates/rayexec_execution/src/functions/aggregate/builtin/corr.rs @@ -6,7 +6,7 @@ use super::covar::{CovarPopFinalize, CovarState}; use super::stddev::{StddevPopFinalize, VarianceState}; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::aggregate::AggregateState; -use crate::arrays::executor::physical_type::PhysicalF64; +use crate::arrays::executor::physical_type::PhysicalF64_2; use crate::expr::Expression; use crate::functions::aggregate::states::{ new_binary_aggregate_states, @@ -74,7 +74,7 @@ pub struct CorrImpl; impl AggregateFunctionImpl for CorrImpl { fn new_states(&self) -> Box { - new_binary_aggregate_states::( + new_binary_aggregate_states::( CorrelationState::default, move |states| primitive_finalize(DataType::Float64, states), ) diff --git a/crates/rayexec_execution/src/functions/aggregate/builtin/covar.rs b/crates/rayexec_execution/src/functions/aggregate/builtin/covar.rs index 7de84c41f..b84c82a29 100644 --- a/crates/rayexec_execution/src/functions/aggregate/builtin/covar.rs +++ b/crates/rayexec_execution/src/functions/aggregate/builtin/covar.rs @@ -5,7 +5,7 @@ use rayexec_error::Result; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::aggregate::AggregateState; -use crate::arrays::executor::physical_type::PhysicalF64; +use crate::arrays::executor::physical_type::PhysicalF64_2; use crate::expr::Expression; use crate::functions::aggregate::states::{ new_binary_aggregate_states, @@ -72,7 +72,7 @@ pub struct CovarPopImpl; impl AggregateFunctionImpl for CovarPopImpl { fn new_states(&self) -> Box { - new_binary_aggregate_states::( + new_binary_aggregate_states::( CovarState::::default, move |states| primitive_finalize(DataType::Float64, states), ) @@ -130,7 +130,7 @@ pub struct CovarSampImpl; impl AggregateFunctionImpl for CovarSampImpl { fn new_states(&self) -> Box { - new_binary_aggregate_states::( + new_binary_aggregate_states::( CovarState::::default, move |states| primitive_finalize(DataType::Float64, states), ) diff --git a/crates/rayexec_execution/src/functions/aggregate/builtin/first.rs b/crates/rayexec_execution/src/functions/aggregate/builtin/first.rs index a03e10a96..ad4a38db5 100644 --- a/crates/rayexec_execution/src/functions/aggregate/builtin/first.rs +++ b/crates/rayexec_execution/src/functions/aggregate/builtin/first.rs @@ -11,9 +11,9 @@ use crate::arrays::executor::builder::{ArrayBuilder, GermanVarlenBuffer}; use crate::arrays::executor::physical_type::{ PhysicalBinary, PhysicalBool, - PhysicalF16, - PhysicalF32, - PhysicalF64, + PhysicalF16_2, + PhysicalF32_2, + PhysicalF64_2, PhysicalI128, PhysicalI16, PhysicalI32, @@ -84,13 +84,13 @@ impl AggregateFunction for First { let function_impl: Box = match datatype.physical_type2()? { PhysicalType2::UntypedNull => Box::new(FirstUntypedNullImpl), PhysicalType2::Boolean => Box::new(FirstBoolImpl), - PhysicalType2::Float16 => Box::new(FirstPrimitiveImpl::::new( + PhysicalType2::Float16 => Box::new(FirstPrimitiveImpl::::new( datatype.clone(), )), - PhysicalType2::Float32 => Box::new(FirstPrimitiveImpl::::new( + PhysicalType2::Float32 => Box::new(FirstPrimitiveImpl::::new( datatype.clone(), )), - PhysicalType2::Float64 => Box::new(FirstPrimitiveImpl::::new( + PhysicalType2::Float64 => Box::new(FirstPrimitiveImpl::::new( datatype.clone(), )), PhysicalType2::Int8 => { diff --git a/crates/rayexec_execution/src/functions/aggregate/builtin/minmax.rs b/crates/rayexec_execution/src/functions/aggregate/builtin/minmax.rs index a7fff276b..04ded97ca 100644 --- a/crates/rayexec_execution/src/functions/aggregate/builtin/minmax.rs +++ b/crates/rayexec_execution/src/functions/aggregate/builtin/minmax.rs @@ -11,9 +11,9 @@ use crate::arrays::executor::builder::{ArrayBuilder, GermanVarlenBuffer}; use crate::arrays::executor::physical_type::{ PhysicalBinary, PhysicalBool, - PhysicalF16, - PhysicalF32, - PhysicalF64, + PhysicalF16_2, + PhysicalF32_2, + PhysicalF64_2, PhysicalI128, PhysicalI16, PhysicalI32, @@ -84,15 +84,15 @@ impl AggregateFunction for Min { let function_impl: Box = match datatype.physical_type2()? { PhysicalType2::UntypedNull => Box::new(MinMaxUntypedNull), PhysicalType2::Boolean => Box::new(MinBoolImpl::new()), - PhysicalType2::Float16 => { - Box::new(MinPrimitiveImpl::::new(datatype.clone())) - } - PhysicalType2::Float32 => { - Box::new(MinPrimitiveImpl::::new(datatype.clone())) - } - PhysicalType2::Float64 => { - Box::new(MinPrimitiveImpl::::new(datatype.clone())) - } + PhysicalType2::Float16 => Box::new(MinPrimitiveImpl::::new( + datatype.clone(), + )), + PhysicalType2::Float32 => Box::new(MinPrimitiveImpl::::new( + datatype.clone(), + )), + PhysicalType2::Float64 => Box::new(MinPrimitiveImpl::::new( + datatype.clone(), + )), PhysicalType2::Int8 => { Box::new(MinPrimitiveImpl::::new(datatype.clone())) } @@ -178,15 +178,15 @@ impl AggregateFunction for Max { let function_impl: Box = match datatype.physical_type2()? { PhysicalType2::UntypedNull => Box::new(MinMaxUntypedNull), PhysicalType2::Boolean => Box::new(MaxBoolImpl::new()), - PhysicalType2::Float16 => { - Box::new(MaxPrimitiveImpl::::new(datatype.clone())) - } - PhysicalType2::Float32 => { - Box::new(MaxPrimitiveImpl::::new(datatype.clone())) - } - PhysicalType2::Float64 => { - Box::new(MaxPrimitiveImpl::::new(datatype.clone())) - } + PhysicalType2::Float16 => Box::new(MaxPrimitiveImpl::::new( + datatype.clone(), + )), + PhysicalType2::Float32 => Box::new(MaxPrimitiveImpl::::new( + datatype.clone(), + )), + PhysicalType2::Float64 => Box::new(MaxPrimitiveImpl::::new( + datatype.clone(), + )), PhysicalType2::Int8 => { Box::new(MaxPrimitiveImpl::::new(datatype.clone())) } diff --git a/crates/rayexec_execution/src/functions/aggregate/builtin/regr_avg.rs b/crates/rayexec_execution/src/functions/aggregate/builtin/regr_avg.rs index 040fd880b..2bd9b1649 100644 --- a/crates/rayexec_execution/src/functions/aggregate/builtin/regr_avg.rs +++ b/crates/rayexec_execution/src/functions/aggregate/builtin/regr_avg.rs @@ -5,7 +5,7 @@ use rayexec_error::Result; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::aggregate::AggregateState; -use crate::arrays::executor::physical_type::PhysicalF64; +use crate::arrays::executor::physical_type::PhysicalF64_2; use crate::expr::Expression; use crate::functions::aggregate::states::{ new_binary_aggregate_states, @@ -72,7 +72,7 @@ pub struct RegrAvgYImpl; impl AggregateFunctionImpl for RegrAvgYImpl { fn new_states(&self) -> Box { - new_binary_aggregate_states::( + new_binary_aggregate_states::( RegrAvgState::::default, move |states| primitive_finalize(DataType::Float64, states), ) @@ -137,7 +137,7 @@ pub struct RegrAvgXImpl; impl AggregateFunctionImpl for RegrAvgXImpl { fn new_states(&self) -> Box { - new_binary_aggregate_states::( + new_binary_aggregate_states::( RegrAvgState::::default, move |states| primitive_finalize(DataType::Float64, states), ) diff --git a/crates/rayexec_execution/src/functions/aggregate/builtin/regr_r2.rs b/crates/rayexec_execution/src/functions/aggregate/builtin/regr_r2.rs index 11941ba7c..7acdc9df5 100644 --- a/crates/rayexec_execution/src/functions/aggregate/builtin/regr_r2.rs +++ b/crates/rayexec_execution/src/functions/aggregate/builtin/regr_r2.rs @@ -5,7 +5,7 @@ use rayexec_error::Result; use super::corr::CorrelationState; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::aggregate::AggregateState; -use crate::arrays::executor::physical_type::PhysicalF64; +use crate::arrays::executor::physical_type::PhysicalF64_2; use crate::expr::Expression; use crate::functions::aggregate::states::{ new_binary_aggregate_states, @@ -72,7 +72,7 @@ pub struct RegrR2Impl; impl AggregateFunctionImpl for RegrR2Impl { fn new_states(&self) -> Box { - new_binary_aggregate_states::( + new_binary_aggregate_states::( RegrR2State::default, move |states| primitive_finalize(DataType::Float64, states), ) diff --git a/crates/rayexec_execution/src/functions/aggregate/builtin/regr_slope.rs b/crates/rayexec_execution/src/functions/aggregate/builtin/regr_slope.rs index 4d4dd5ba7..5d7d7933a 100644 --- a/crates/rayexec_execution/src/functions/aggregate/builtin/regr_slope.rs +++ b/crates/rayexec_execution/src/functions/aggregate/builtin/regr_slope.rs @@ -6,7 +6,7 @@ use super::covar::{CovarPopFinalize, CovarState}; use super::stddev::{VariancePopFinalize, VarianceState}; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::aggregate::AggregateState; -use crate::arrays::executor::physical_type::PhysicalF64; +use crate::arrays::executor::physical_type::PhysicalF64_2; use crate::expr::Expression; use crate::functions::aggregate::states::{ new_binary_aggregate_states, @@ -73,7 +73,7 @@ pub struct RegrSlopeImpl; impl AggregateFunctionImpl for RegrSlopeImpl { fn new_states(&self) -> Box { - new_binary_aggregate_states::( + new_binary_aggregate_states::( RegrSlopeState::default, move |states| primitive_finalize(DataType::Float64, states), ) diff --git a/crates/rayexec_execution/src/functions/aggregate/builtin/stddev.rs b/crates/rayexec_execution/src/functions/aggregate/builtin/stddev.rs index 0e66f7d67..f196b7c63 100644 --- a/crates/rayexec_execution/src/functions/aggregate/builtin/stddev.rs +++ b/crates/rayexec_execution/src/functions/aggregate/builtin/stddev.rs @@ -5,7 +5,7 @@ use rayexec_error::Result; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::aggregate::AggregateState; -use crate::arrays::executor::physical_type::PhysicalF64; +use crate::arrays::executor::physical_type::PhysicalF64_2; use crate::expr::Expression; use crate::functions::aggregate::states::{ new_unary_aggregate_states, @@ -69,7 +69,7 @@ pub struct StddevPopImpl; impl AggregateFunctionImpl for StddevPopImpl { fn new_states(&self) -> Box { - new_unary_aggregate_states::( + new_unary_aggregate_states::( VarianceState::::default, move |states| primitive_finalize(DataType::Float64, states), ) @@ -128,7 +128,7 @@ pub struct StddevSampImpl; impl AggregateFunctionImpl for StddevSampImpl { fn new_states(&self) -> Box { - new_unary_aggregate_states::( + new_unary_aggregate_states::( VarianceState::::default, move |states| primitive_finalize(DataType::Float64, states), ) @@ -183,7 +183,7 @@ pub struct VarPopImpl; impl AggregateFunctionImpl for VarPopImpl { fn new_states(&self) -> Box { - new_unary_aggregate_states::( + new_unary_aggregate_states::( VarianceState::::default, move |states| primitive_finalize(DataType::Float64, states), ) @@ -238,7 +238,7 @@ pub struct VarSampImpl; impl AggregateFunctionImpl for VarSampImpl { fn new_states(&self) -> Box { - new_unary_aggregate_states::( + new_unary_aggregate_states::( VarianceState::::default, move |states| primitive_finalize(DataType::Float64, states), ) diff --git a/crates/rayexec_execution/src/functions/aggregate/builtin/sum.rs b/crates/rayexec_execution/src/functions/aggregate/builtin/sum.rs index f8ff4f71a..91a01e909 100644 --- a/crates/rayexec_execution/src/functions/aggregate/builtin/sum.rs +++ b/crates/rayexec_execution/src/functions/aggregate/builtin/sum.rs @@ -8,7 +8,7 @@ use rayexec_error::Result; use crate::arrays::array::ArrayData2; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::aggregate::AggregateState; -use crate::arrays::executor::physical_type::{PhysicalF64, PhysicalI64}; +use crate::arrays::executor::physical_type::{PhysicalF64_2, PhysicalI64}; use crate::arrays::scalar::decimal::{Decimal128Type, Decimal64Type, DecimalType}; use crate::arrays::storage::PrimitiveStorage; use crate::expr::Expression; @@ -126,7 +126,7 @@ pub struct SumFloat64Impl; impl AggregateFunctionImpl for SumFloat64Impl { fn new_states(&self) -> Box { - new_unary_aggregate_states::( + new_unary_aggregate_states::( SumStateAdd::::default, move |states| primitive_finalize(DataType::Float64, states), ) diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/arith/add.rs b/crates/rayexec_execution/src/functions/scalar/builtin/arith/add.rs index d0ea67aa9..46bc110bf 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/arith/add.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/arith/add.rs @@ -7,9 +7,9 @@ use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; use crate::arrays::executor::physical_type::{ - PhysicalF16, - PhysicalF32, - PhysicalF64, + PhysicalF16_2, + PhysicalF32_2, + PhysicalF64_2, PhysicalI128, PhysicalI16, PhysicalI32, @@ -100,15 +100,15 @@ impl ScalarFunction for Add { inputs[1].datatype(table_list)?, ) { (DataType::Float16, DataType::Float16) => ( - Box::new(AddImpl::::new(DataType::Float16)), + Box::new(AddImpl::::new(DataType::Float16)), DataType::Float16, ), (DataType::Float32, DataType::Float32) => ( - Box::new(AddImpl::::new(DataType::Float32)), + Box::new(AddImpl::::new(DataType::Float32)), DataType::Float32, ), (DataType::Float64, DataType::Float64) => ( - Box::new(AddImpl::::new(DataType::Float64)), + Box::new(AddImpl::::new(DataType::Float64)), DataType::Float64, ), (DataType::Int8, DataType::Int8) => ( diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/arith/div.rs b/crates/rayexec_execution/src/functions/scalar/builtin/arith/div.rs index 552c32a79..3685ff5eb 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/arith/div.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/arith/div.rs @@ -9,9 +9,9 @@ use crate::arrays::compute::cast::behavior::CastFailBehavior; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; use crate::arrays::executor::physical_type::{ - PhysicalF16, - PhysicalF32, - PhysicalF64, + PhysicalF16_2, + PhysicalF32_2, + PhysicalF64_2, PhysicalI128, PhysicalI16, PhysicalI32, @@ -106,15 +106,15 @@ impl ScalarFunction for Div { inputs[1].datatype(table_list)?, ) { (DataType::Float16, DataType::Float16) => ( - Box::new(DivImpl::::new(DataType::Float16)), + Box::new(DivImpl::::new(DataType::Float16)), DataType::Float16, ), (DataType::Float32, DataType::Float32) => ( - Box::new(DivImpl::::new(DataType::Float32)), + Box::new(DivImpl::::new(DataType::Float32)), DataType::Float32, ), (DataType::Float64, DataType::Float64) => ( - Box::new(DivImpl::::new(DataType::Float64)), + Box::new(DivImpl::::new(DataType::Float64)), DataType::Float64, ), (DataType::Int8, DataType::Int8) => ( @@ -218,9 +218,12 @@ where buffer: PrimitiveBuffer::with_len(a.logical_len()), }; - BinaryExecutor2::execute::(&a, &b, builder, |a, b, buf| { - buf.put(&(a / b)) - }) + BinaryExecutor2::execute::( + &a, + &b, + builder, + |a, b, buf| buf.put(&(a / b)), + ) } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/arith/mul.rs b/crates/rayexec_execution/src/functions/scalar/builtin/arith/mul.rs index 0d6b959fd..f5a25812b 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/arith/mul.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/arith/mul.rs @@ -8,9 +8,9 @@ use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::datatype::{DataType, DataTypeId, DecimalTypeMeta}; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; use crate::arrays::executor::physical_type::{ - PhysicalF16, - PhysicalF32, - PhysicalF64, + PhysicalF16_2, + PhysicalF32_2, + PhysicalF64_2, PhysicalI128, PhysicalI16, PhysicalI32, @@ -117,15 +117,15 @@ impl ScalarFunction for Mul { inputs[1].datatype(table_list)?, ) { (DataType::Float16, DataType::Float16) => ( - Box::new(MulImpl::::new(DataType::Float16)), + Box::new(MulImpl::::new(DataType::Float16)), DataType::Float16, ), (DataType::Float32, DataType::Float32) => ( - Box::new(MulImpl::::new(DataType::Float32)), + Box::new(MulImpl::::new(DataType::Float32)), DataType::Float32, ), (DataType::Float64, DataType::Float64) => ( - Box::new(MulImpl::::new(DataType::Float64)), + Box::new(MulImpl::::new(DataType::Float64)), DataType::Float64, ), (DataType::Int8, DataType::Int8) => ( diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/arith/rem.rs b/crates/rayexec_execution/src/functions/scalar/builtin/arith/rem.rs index 3fac975b6..88e05a26e 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/arith/rem.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/arith/rem.rs @@ -7,9 +7,9 @@ use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; use crate::arrays::executor::physical_type::{ - PhysicalF16, - PhysicalF32, - PhysicalF64, + PhysicalF16_2, + PhysicalF32_2, + PhysicalF64_2, PhysicalI128, PhysicalI16, PhysicalI32, @@ -114,15 +114,15 @@ impl ScalarFunction for Rem { inputs[1].datatype(table_list)?, ) { (DataType::Float16, DataType::Float16) => ( - Box::new(RemImpl::::new(DataType::Float16)), + Box::new(RemImpl::::new(DataType::Float16)), DataType::Float16, ), (DataType::Float32, DataType::Float32) => ( - Box::new(RemImpl::::new(DataType::Float32)), + Box::new(RemImpl::::new(DataType::Float32)), DataType::Float32, ), (DataType::Float64, DataType::Float64) => ( - Box::new(RemImpl::::new(DataType::Float64)), + Box::new(RemImpl::::new(DataType::Float64)), DataType::Float64, ), (DataType::Int8, DataType::Int8) => ( diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/arith/sub.rs b/crates/rayexec_execution/src/functions/scalar/builtin/arith/sub.rs index 2df5bb868..3bf9b3820 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/arith/sub.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/arith/sub.rs @@ -7,9 +7,9 @@ use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; use crate::arrays::executor::physical_type::{ - PhysicalF16, - PhysicalF32, - PhysicalF64, + PhysicalF16_2, + PhysicalF32_2, + PhysicalF64_2, PhysicalI128, PhysicalI16, PhysicalI32, @@ -113,15 +113,15 @@ impl ScalarFunction for Sub { inputs[1].datatype(table_list)?, ) { (DataType::Float16, DataType::Float16) => ( - Box::new(SubImpl::::new(DataType::Float16)), + Box::new(SubImpl::::new(DataType::Float16)), DataType::Float16, ), (DataType::Float32, DataType::Float32) => ( - Box::new(SubImpl::::new(DataType::Float32)), + Box::new(SubImpl::::new(DataType::Float32)), DataType::Float32, ), (DataType::Float64, DataType::Float64) => ( - Box::new(SubImpl::::new(DataType::Float64)), + Box::new(SubImpl::::new(DataType::Float64)), DataType::Float64, ), (DataType::Int8, DataType::Int8) => ( diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/comparison.rs b/crates/rayexec_execution/src/functions/scalar/builtin/comparison.rs index ca090e20e..f69c50624 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/comparison.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/comparison.rs @@ -4,9 +4,7 @@ use std::marker::PhantomData; use rayexec_error::{RayexecError, Result}; -use crate::arrays::array::exp::Array; use crate::arrays::array::{Array2, ArrayData2}; -use crate::arrays::batch_exp::Batch; use crate::arrays::compute::cast::array::decimal_rescale; use crate::arrays::compute::cast::behavior::CastFailBehavior; use crate::arrays::datatype::{DataType, DataTypeId, DecimalTypeMeta}; @@ -14,9 +12,9 @@ use crate::arrays::executor::builder::{ArrayBuilder, BooleanBuffer}; use crate::arrays::executor::physical_type::{ PhysicalBinary, PhysicalBool, - PhysicalF16, - PhysicalF32, - PhysicalF64, + PhysicalF16_2, + PhysicalF32_2, + PhysicalF64_2, PhysicalI128, PhysicalI16, PhysicalI32, @@ -553,13 +551,13 @@ fn new_comparison_impl( Box::new(BaseComparisonImpl::::new()) } (DataType::Float16, DataType::Float16) => { - Box::new(BaseComparisonImpl::::new()) + Box::new(BaseComparisonImpl::::new()) } (DataType::Float32, DataType::Float32) => { - Box::new(BaseComparisonImpl::::new()) + Box::new(BaseComparisonImpl::::new()) } (DataType::Float64, DataType::Float64) => { - Box::new(BaseComparisonImpl::::new()) + Box::new(BaseComparisonImpl::::new()) } (DataType::Decimal64(left), DataType::Decimal64(right)) => Box::new( RescalingComparisionImpl::::new(left, right), @@ -733,17 +731,17 @@ where )? } PhysicalType2::Float16 => { - FlexibleListExecutor::binary_reduce::>( + FlexibleListExecutor::binary_reduce::>( left, right, builder, )? } PhysicalType2::Float32 => { - FlexibleListExecutor::binary_reduce::>( + FlexibleListExecutor::binary_reduce::>( left, right, builder, )? } PhysicalType2::Float64 => { - FlexibleListExecutor::binary_reduce::>( + FlexibleListExecutor::binary_reduce::>( left, right, builder, )? } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/list/list_extract.rs b/crates/rayexec_execution/src/functions/scalar/builtin/list/list_extract.rs index 003818837..c3db4c4b9 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/list/list_extract.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/list/list_extract.rs @@ -17,9 +17,9 @@ use crate::arrays::executor::builder::{ use crate::arrays::executor::physical_type::{ PhysicalBinary, PhysicalBool, - PhysicalF16, - PhysicalF32, - PhysicalF64, + PhysicalF16_2, + PhysicalF32_2, + PhysicalF64_2, PhysicalI128, PhysicalI16, PhysicalI32, @@ -216,21 +216,21 @@ fn extract(array: &Array2, idx: usize) -> Result { datatype: DataType::Float16, buffer: PrimitiveBuffer::::with_len(array.logical_len()), }; - extract_inner::(builder, array, data.inner_array(), idx) + extract_inner::(builder, array, data.inner_array(), idx) } PhysicalType2::Float32 => { let builder = ArrayBuilder { datatype: DataType::Float32, buffer: PrimitiveBuffer::::with_len(array.logical_len()), }; - extract_inner::(builder, array, data.inner_array(), idx) + extract_inner::(builder, array, data.inner_array(), idx) } PhysicalType2::Float64 => { let builder = ArrayBuilder { datatype: DataType::Float64, buffer: PrimitiveBuffer::::with_len(array.logical_len()), }; - extract_inner::(builder, array, data.inner_array(), idx) + extract_inner::(builder, array, data.inner_array(), idx) } PhysicalType2::Utf8 => { let builder = ArrayBuilder { diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/negate.rs b/crates/rayexec_execution/src/functions/scalar/builtin/negate.rs index 344acb9cd..7fca34c97 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/negate.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/negate.rs @@ -7,9 +7,9 @@ use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::builder::{ArrayBuilder, BooleanBuffer, PrimitiveBuffer}; use crate::arrays::executor::physical_type::{ PhysicalBool, - PhysicalF16, - PhysicalF32, - PhysicalF64, + PhysicalF16_2, + PhysicalF32_2, + PhysicalF64_2, PhysicalI128, PhysicalI16, PhysicalI32, @@ -66,9 +66,9 @@ impl ScalarFunction for Negate { dt @ DataType::Int32 => Box::new(NegateImpl::::new(dt)), dt @ DataType::Int64 => Box::new(NegateImpl::::new(dt)), dt @ DataType::Int128 => Box::new(NegateImpl::::new(dt)), - dt @ DataType::Float16 => Box::new(NegateImpl::::new(dt)), - dt @ DataType::Float32 => Box::new(NegateImpl::::new(dt)), - dt @ DataType::Float64 => Box::new(NegateImpl::::new(dt)), + dt @ DataType::Float16 => Box::new(NegateImpl::::new(dt)), + dt @ DataType::Float32 => Box::new(NegateImpl::::new(dt)), + dt @ DataType::Float64 => Box::new(NegateImpl::::new(dt)), other => return Err(invalid_input_types_error(self, &[other])), }; diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/abs.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/abs.rs index d9feefae9..26daafd20 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/abs.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/abs.rs @@ -1,12 +1,17 @@ +use iterutil::IntoExactSizeIterator; use num_traits::Float; use rayexec_error::Result; use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; +use crate::arrays::array::exp::Array; use crate::arrays::array::{Array2, ArrayData2}; +use crate::arrays::buffer::physical_type::{MutablePhysicalStorage, PhysicalStorage}; use crate::arrays::datatype::DataType; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; use crate::arrays::executor::physical_type::PhysicalStorage2; use crate::arrays::executor::scalar::UnaryExecutor2; +use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; +use crate::arrays::executor_exp::OutBuffer; use crate::arrays::storage::PrimitiveStorage; pub type Abs = UnaryInputNumericScalar; @@ -18,7 +23,7 @@ impl UnaryInputNumericOperation for AbsOp { const NAME: &'static str = "abs"; const DESCRIPTION: &'static str = "Compute the absolute value of a number"; - fn execute_float<'a, S>(input: &'a Array2, ret: DataType) -> Result + fn execute_float2<'a, S>(input: &'a Array2, ret: DataType) -> Result where S: PhysicalStorage2, S::Type<'a>: Float + Default, @@ -30,4 +35,21 @@ impl UnaryInputNumericOperation for AbsOp { }; UnaryExecutor2::execute::(input, builder, |v, buf| buf.put(&v.abs())) } + + fn execute_float( + input: &Array, + selection: impl IntoExactSizeIterator, + output: &mut Array, + ) -> Result<()> + where + S: MutablePhysicalStorage, + S::StorageType: Float, + { + UnaryExecutor::execute::( + input, + selection, + OutBuffer::from_array(output)?, + |&v, buf| buf.put(&v.abs()), + ) + } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/acos.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/acos.rs index 14f5b7e06..76cc0032c 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/acos.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/acos.rs @@ -18,7 +18,7 @@ impl UnaryInputNumericOperation for AcosOp { const NAME: &'static str = "acos"; const DESCRIPTION: &'static str = "Compute the arccosine of value"; - fn execute_float<'a, S>(input: &'a Array2, ret: DataType) -> Result + fn execute_float2<'a, S>(input: &'a Array2, ret: DataType) -> Result where S: PhysicalStorage2, S::Type<'a>: Float + Default, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/asin.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/asin.rs index c4106887c..5f3dfbffe 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/asin.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/asin.rs @@ -18,7 +18,7 @@ impl UnaryInputNumericOperation for AsinOp { const NAME: &'static str = "asin"; const DESCRIPTION: &'static str = "Compute the arcsine of value"; - fn execute_float<'a, S>(input: &'a Array2, ret: DataType) -> Result + fn execute_float2<'a, S>(input: &'a Array2, ret: DataType) -> Result where S: PhysicalStorage2, S::Type<'a>: Float + Default, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/atan.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/atan.rs index cc5de1d3b..f87624eb5 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/atan.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/atan.rs @@ -18,7 +18,7 @@ impl UnaryInputNumericOperation for AtanOp { const NAME: &'static str = "atan"; const DESCRIPTION: &'static str = "Compute the arctangent of value"; - fn execute_float<'a, S>(input: &'a Array2, ret: DataType) -> Result + fn execute_float2<'a, S>(input: &'a Array2, ret: DataType) -> Result where S: PhysicalStorage2, S::Type<'a>: Float + Default, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/cbrt.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/cbrt.rs index 3f5e9be05..5b90082d9 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/cbrt.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/cbrt.rs @@ -18,7 +18,7 @@ impl UnaryInputNumericOperation for CbrtOp { const NAME: &'static str = "cbrt"; const DESCRIPTION: &'static str = "Compute the cube root of value"; - fn execute_float<'a, S>(input: &'a Array2, ret: DataType) -> Result + fn execute_float2<'a, S>(input: &'a Array2, ret: DataType) -> Result where S: PhysicalStorage2, S::Type<'a>: Float + Default, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/ceil.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/ceil.rs index b4d9f976b..38a9ef0d4 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/ceil.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/ceil.rs @@ -18,7 +18,7 @@ impl UnaryInputNumericOperation for CeilOp { const NAME: &'static str = "ceil"; const DESCRIPTION: &'static str = "Round number up"; - fn execute_float<'a, S>(input: &'a Array2, ret: DataType) -> Result + fn execute_float2<'a, S>(input: &'a Array2, ret: DataType) -> Result where S: PhysicalStorage2, S::Type<'a>: Float + Default, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/cos.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/cos.rs index 61e01ceed..61caa8680 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/cos.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/cos.rs @@ -18,7 +18,7 @@ impl UnaryInputNumericOperation for CosOp { const NAME: &'static str = "cos"; const DESCRIPTION: &'static str = "Compute the cosine of value"; - fn execute_float<'a, S>(input: &'a Array2, ret: DataType) -> Result + fn execute_float2<'a, S>(input: &'a Array2, ret: DataType) -> Result where S: PhysicalStorage2, S::Type<'a>: Float + Default, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/degrees.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/degrees.rs index a1f289b2a..7901ec1d4 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/degrees.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/degrees.rs @@ -18,7 +18,7 @@ impl UnaryInputNumericOperation for DegreesOp { const NAME: &'static str = "degrees"; const DESCRIPTION: &'static str = "Converts radians to degrees"; - fn execute_float<'a, S>(input: &'a Array2, ret: DataType) -> Result + fn execute_float2<'a, S>(input: &'a Array2, ret: DataType) -> Result where S: PhysicalStorage2, S::Type<'a>: Float + Default, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/exp.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/exp.rs index 7f824ec4e..120bbc594 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/exp.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/exp.rs @@ -18,7 +18,7 @@ impl UnaryInputNumericOperation for ExpOp { const NAME: &'static str = "exp"; const DESCRIPTION: &'static str = "Compute `e ^ val`"; - fn execute_float<'a, S>(input: &'a Array2, ret: DataType) -> Result + fn execute_float2<'a, S>(input: &'a Array2, ret: DataType) -> Result where S: PhysicalStorage2, S::Type<'a>: Float + Default, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/floor.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/floor.rs index 9905a2ea0..3171ce741 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/floor.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/floor.rs @@ -18,7 +18,7 @@ impl UnaryInputNumericOperation for FloorOp { const NAME: &'static str = "floor"; const DESCRIPTION: &'static str = "Round number down"; - fn execute_float<'a, S>(input: &'a Array2, ret: DataType) -> Result + fn execute_float2<'a, S>(input: &'a Array2, ret: DataType) -> Result where S: PhysicalStorage2, S::Type<'a>: Float + Default, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/isnan.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/isnan.rs index a9ed2c305..ea5788b58 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/isnan.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/isnan.rs @@ -8,9 +8,9 @@ use crate::arrays::array::Array2; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::builder::{ArrayBuilder, BooleanBuffer}; use crate::arrays::executor::physical_type::{ - PhysicalF16, - PhysicalF32, - PhysicalF64, + PhysicalF16_2, + PhysicalF32_2, + PhysicalF64_2, PhysicalStorage2, }; use crate::arrays::executor::scalar::UnaryExecutor2; @@ -71,9 +71,9 @@ impl ScalarFunction for IsNan { plan_check_num_args(self, &inputs, 1)?; let function_impl: Box = match inputs[0].datatype(table_list)? { - DataType::Float16 => Box::new(IsNanImpl::::new()), - DataType::Float32 => Box::new(IsNanImpl::::new()), - DataType::Float64 => Box::new(IsNanImpl::::new()), + DataType::Float16 => Box::new(IsNanImpl::::new()), + DataType::Float32 => Box::new(IsNanImpl::::new()), + DataType::Float64 => Box::new(IsNanImpl::::new()), other => return Err(invalid_input_types_error(self, &[other])), }; diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/ln.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/ln.rs index a96132df6..1a2ca0279 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/ln.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/ln.rs @@ -18,7 +18,7 @@ impl UnaryInputNumericOperation for LnOp { const NAME: &'static str = "ln"; const DESCRIPTION: &'static str = "Compute natural log of value"; - fn execute_float<'a, S>(input: &'a Array2, ret: DataType) -> Result + fn execute_float2<'a, S>(input: &'a Array2, ret: DataType) -> Result where S: PhysicalStorage2, S::Type<'a>: Float + Default, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/log.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/log.rs index 0425634c4..d0efedd2b 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/log.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/log.rs @@ -18,7 +18,7 @@ impl UnaryInputNumericOperation for LogOp { const NAME: &'static str = "log"; const DESCRIPTION: &'static str = "Compute base-10 log of value"; - fn execute_float<'a, S>(input: &'a Array2, ret: DataType) -> Result + fn execute_float2<'a, S>(input: &'a Array2, ret: DataType) -> Result where S: PhysicalStorage2, S::Type<'a>: Float + Default, @@ -41,7 +41,7 @@ impl UnaryInputNumericOperation for LogOp2 { const NAME: &'static str = "log2"; const DESCRIPTION: &'static str = "Compute base-2 log of value"; - fn execute_float<'a, S>(input: &'a Array2, ret: DataType) -> Result + fn execute_float2<'a, S>(input: &'a Array2, ret: DataType) -> Result where S: PhysicalStorage2, S::Type<'a>: Float + Default, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/mod.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/mod.rs index 07baeea1b..671f5cb54 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/mod.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/mod.rs @@ -29,6 +29,7 @@ pub use degrees::*; pub use exp::*; pub use floor::*; pub use isnan::*; +use iterutil::IntoExactSizeIterator; pub use ln::*; pub use log::*; use num_traits::Float; @@ -38,12 +39,20 @@ pub use sin::*; pub use sqrt::*; pub use tan::*; +use crate::arrays::array::exp::Array; use crate::arrays::array::{Array2, ArrayData2}; -use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::physical_type::{ +use crate::arrays::batch_exp::Batch; +use crate::arrays::buffer::physical_type::{ + MutablePhysicalStorage, PhysicalF16, PhysicalF32, PhysicalF64, +}; +use crate::arrays::datatype::{DataType, DataTypeId}; +use crate::arrays::executor::physical_type::{ + PhysicalF16_2, + PhysicalF32_2, + PhysicalF64_2, PhysicalStorage2, PhysicalType2, }; @@ -81,11 +90,23 @@ pub trait UnaryInputNumericOperation: Debug + Clone + Copy + Sync + Send + 'stat const NAME: &'static str; const DESCRIPTION: &'static str; - fn execute_float<'a, S>(input: &'a Array2, ret: DataType) -> Result + fn execute_float2<'a, S>(input: &'a Array2, ret: DataType) -> Result where S: PhysicalStorage2, S::Type<'a>: Float + Default, ArrayData2: From>>; + + fn execute_float( + input: &Array, + selection: impl IntoExactSizeIterator, + output: &mut Array, + ) -> Result<()> + where + S: MutablePhysicalStorage, + S::StorageType: Float, + { + unimplemented!() + } } /// Helper struct for creating functions that accept and produce a single @@ -148,9 +169,23 @@ impl ScalarFunctionImpl for UnaryInputNumericScal fn execute2(&self, inputs: &[&Array2]) -> Result { let input = inputs[0]; match input.physical_type() { - PhysicalType2::Float16 => O::execute_float::(input, self.ret.clone()), - PhysicalType2::Float32 => O::execute_float::(input, self.ret.clone()), - PhysicalType2::Float64 => O::execute_float::(input, self.ret.clone()), + PhysicalType2::Float16 => O::execute_float2::(input, self.ret.clone()), + PhysicalType2::Float32 => O::execute_float2::(input, self.ret.clone()), + PhysicalType2::Float64 => O::execute_float2::(input, self.ret.clone()), + other => Err(RayexecError::new(format!( + "Invalid physical type: {other:?}" + ))), + } + } + + fn execute(&self, input: Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + let input = &input.arrays()[0]; + + match input.datatype() { + DataType::Float16 => O::execute_float::(input, sel, output), + DataType::Float32 => O::execute_float::(input, sel, output), + DataType::Float64 => O::execute_float::(input, sel, output), other => Err(RayexecError::new(format!( "Invalid physical type: {other:?}" ))), diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/radians.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/radians.rs index 48f0a3883..0387d449b 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/radians.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/radians.rs @@ -18,7 +18,7 @@ impl UnaryInputNumericOperation for RadiansOp { const NAME: &'static str = "radians"; const DESCRIPTION: &'static str = "Converts degrees to radians"; - fn execute_float<'a, S>(input: &'a Array2, ret: DataType) -> Result + fn execute_float2<'a, S>(input: &'a Array2, ret: DataType) -> Result where S: PhysicalStorage2, S::Type<'a>: Float + Default, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/sin.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/sin.rs index 90d6595ca..151e1e022 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/sin.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/sin.rs @@ -18,7 +18,7 @@ impl UnaryInputNumericOperation for SinOp { const NAME: &'static str = "sin"; const DESCRIPTION: &'static str = "Compute the sin of value"; - fn execute_float<'a, S>(input: &'a Array2, ret: DataType) -> Result + fn execute_float2<'a, S>(input: &'a Array2, ret: DataType) -> Result where S: PhysicalStorage2, S::Type<'a>: Float + Default, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/sqrt.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/sqrt.rs index 98c2739ce..90d0eadfb 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/sqrt.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/sqrt.rs @@ -18,7 +18,7 @@ impl UnaryInputNumericOperation for SqrtOp { const NAME: &'static str = "sqrt"; const DESCRIPTION: &'static str = "Compute the square root of value"; - fn execute_float<'a, S>(input: &'a Array2, ret: DataType) -> Result + fn execute_float2<'a, S>(input: &'a Array2, ret: DataType) -> Result where S: PhysicalStorage2, S::Type<'a>: Float + Default, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/tan.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/tan.rs index e45187a1a..52c270b86 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/tan.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/tan.rs @@ -18,7 +18,7 @@ impl UnaryInputNumericOperation for TanOp { const NAME: &'static str = "tan"; const DESCRIPTION: &'static str = "Compute the tangent of value"; - fn execute_float<'a, S>(input: &'a Array2, ret: DataType) -> Result + fn execute_float2<'a, S>(input: &'a Array2, ret: DataType) -> Result where S: PhysicalStorage2, S::Type<'a>: Float + Default, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/similarity/l2_distance.rs b/crates/rayexec_execution/src/functions/scalar/builtin/similarity/l2_distance.rs index 75432aa2c..0adae6425 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/similarity/l2_distance.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/similarity/l2_distance.rs @@ -8,9 +8,9 @@ use crate::arrays::array::Array2; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; use crate::arrays::executor::physical_type::{ - PhysicalF16, - PhysicalF32, - PhysicalF64, + PhysicalF16_2, + PhysicalF32_2, + PhysicalF64_2, PhysicalStorage2, }; use crate::arrays::executor::scalar::{BinaryListReducer, ListExecutor}; @@ -68,13 +68,13 @@ impl ScalarFunction for L2Distance { (DataType::List(a), DataType::List(b)) => { match (a.datatype.as_ref(), b.datatype.as_ref()) { (DataType::Float16, DataType::Float16) => { - Box::new(L2DistanceImpl::::new()) + Box::new(L2DistanceImpl::::new()) } (DataType::Float32, DataType::Float32) => { - Box::new(L2DistanceImpl::::new()) + Box::new(L2DistanceImpl::::new()) } (DataType::Float64, DataType::Float64) => { - Box::new(L2DistanceImpl::::new()) + Box::new(L2DistanceImpl::::new()) } (a, b) => return Err(invalid_input_types_error(self, &[a, b])), } From 06ce68de37d274851a906cc9eabd80bd3ae0988f Mon Sep 17 00:00:00 2001 From: Sean Smith Date: Sat, 28 Dec 2024 18:33:27 -0500 Subject: [PATCH 15/59] ok --- .../src/arrays/buffer/physical_type.rs | 2 +- .../src/arrays/executor/aggregate/binary.rs | 6 +-- .../src/arrays/executor/aggregate/mod.rs | 6 +-- .../src/arrays/executor/aggregate/unary.rs | 8 +-- .../operators/hash_aggregate/chunk.rs | 2 +- .../operators/hash_aggregate/distinct.rs | 8 +-- .../operators/hash_aggregate/drain.rs | 2 +- .../operators/ungrouped_aggregate.rs | 4 +- .../src/functions/aggregate/builtin/avg.rs | 6 +-- .../src/functions/aggregate/builtin/corr.rs | 4 +- .../src/functions/aggregate/builtin/count.rs | 4 +- .../src/functions/aggregate/builtin/covar.rs | 4 +- .../src/functions/aggregate/builtin/first.rs | 6 +-- .../src/functions/aggregate/builtin/minmax.rs | 16 +++--- .../functions/aggregate/builtin/regr_avg.rs | 4 +- .../functions/aggregate/builtin/regr_count.rs | 4 +- .../functions/aggregate/builtin/regr_r2.rs | 4 +- .../functions/aggregate/builtin/regr_slope.rs | 4 +- .../src/functions/aggregate/builtin/stddev.rs | 4 +- .../functions/aggregate/builtin/string_agg.rs | 4 +- .../src/functions/aggregate/builtin/sum.rs | 51 +++++++++++++++---- .../src/functions/aggregate/states.rs | 33 +++++++----- 22 files changed, 112 insertions(+), 74 deletions(-) diff --git a/crates/rayexec_execution/src/arrays/buffer/physical_type.rs b/crates/rayexec_execution/src/arrays/buffer/physical_type.rs index 8f366cf25..57aecab24 100644 --- a/crates/rayexec_execution/src/arrays/buffer/physical_type.rs +++ b/crates/rayexec_execution/src/arrays/buffer/physical_type.rs @@ -127,7 +127,7 @@ where /// Represents in-memory storage that we can get mutable references to. pub trait AddressableMut: Debug { - type T: Send + Debug + ?Sized; + type T: Debug + ?Sized; fn len(&self) -> usize; diff --git a/crates/rayexec_execution/src/arrays/executor/aggregate/binary.rs b/crates/rayexec_execution/src/arrays/executor/aggregate/binary.rs index c3ab64ebd..ac4128b77 100644 --- a/crates/rayexec_execution/src/arrays/executor/aggregate/binary.rs +++ b/crates/rayexec_execution/src/arrays/executor/aggregate/binary.rs @@ -1,6 +1,6 @@ use rayexec_error::{RayexecError, Result}; -use super::{AggregateState, RowToStateMapping}; +use super::{AggregateState2, RowToStateMapping}; use crate::arrays::array::Array2; use crate::arrays::executor::physical_type::PhysicalStorage2; use crate::arrays::executor::scalar::check_validity; @@ -22,7 +22,7 @@ impl BinaryNonNullUpdater { S1: PhysicalStorage2, S2: PhysicalStorage2, I: IntoIterator, - State: AggregateState<(S1::Type<'a>, S2::Type<'a>), Output>, + State: AggregateState2<(S1::Type<'a>, S2::Type<'a>), Output>, { if array1.logical_len() != array2.logical_len() { return Err(RayexecError::new(format!( @@ -92,7 +92,7 @@ mod tests { } } - impl AggregateState<(i32, i32), i32> for TestAddSumAndProductState { + impl AggregateState2<(i32, i32), i32> for TestAddSumAndProductState { fn merge(&mut self, other: &mut Self) -> Result<()> { self.sum += other.sum; self.product *= other.product; diff --git a/crates/rayexec_execution/src/arrays/executor/aggregate/mod.rs b/crates/rayexec_execution/src/arrays/executor/aggregate/mod.rs index 9a38d8282..4dbead80e 100644 --- a/crates/rayexec_execution/src/arrays/executor/aggregate/mod.rs +++ b/crates/rayexec_execution/src/arrays/executor/aggregate/mod.rs @@ -18,7 +18,7 @@ use crate::arrays::bitmap::Bitmap; /// /// An example state for SUM would be a struct that takes a running sum from /// values provided in `update`. -pub trait AggregateState: Debug { +pub trait AggregateState2: Debug { /// Merge other state into this state. fn merge(&mut self, other: &mut Self) -> Result<()>; @@ -53,7 +53,7 @@ impl StateCombiner { targets: &mut [State], ) -> Result<()> where - State: AggregateState, + State: AggregateState2, { for mapping in mapping { let target = &mut targets[mapping.to_state]; @@ -77,7 +77,7 @@ impl StateFinalizer { B: ArrayDataBuffer, I: IntoIterator, I::IntoIter: ExactSizeIterator, - State: AggregateState + 'a, + State: AggregateState2 + 'a, Output: Borrow, { let states = states.into_iter(); diff --git a/crates/rayexec_execution/src/arrays/executor/aggregate/unary.rs b/crates/rayexec_execution/src/arrays/executor/aggregate/unary.rs index 423aae97e..1b348434b 100644 --- a/crates/rayexec_execution/src/arrays/executor/aggregate/unary.rs +++ b/crates/rayexec_execution/src/arrays/executor/aggregate/unary.rs @@ -1,6 +1,6 @@ use rayexec_error::Result; -use super::{AggregateState, RowToStateMapping}; +use super::{AggregateState2, RowToStateMapping}; use crate::arrays::array::Array2; use crate::arrays::executor::physical_type::PhysicalStorage2; use crate::arrays::selection; @@ -19,7 +19,7 @@ impl UnaryNonNullUpdater { where S: PhysicalStorage2, I: IntoIterator, - State: AggregateState, Output>, + State: AggregateState2, Output>, { let selection = array.selection_vector(); @@ -67,7 +67,7 @@ mod tests { val: i32, } - impl AggregateState for TestSumState { + impl AggregateState2 for TestSumState { fn merge(&mut self, other: &mut Self) -> Result<()> { self.val += other.val; Ok(()) @@ -165,7 +165,7 @@ mod tests { buf: String, } - impl AggregateState<&str, String> for TestStringAgg { + impl AggregateState2<&str, String> for TestStringAgg { fn merge(&mut self, other: &mut Self) -> Result<()> { self.buf.push_str(&other.buf); Ok(()) diff --git a/crates/rayexec_execution/src/execution/operators/hash_aggregate/chunk.rs b/crates/rayexec_execution/src/execution/operators/hash_aggregate/chunk.rs index c2ab28fe4..805614f39 100644 --- a/crates/rayexec_execution/src/execution/operators/hash_aggregate/chunk.rs +++ b/crates/rayexec_execution/src/execution/operators/hash_aggregate/chunk.rs @@ -91,7 +91,7 @@ impl GroupChunk { .filter_map(|(selected, arr)| if selected { Some(arr) } else { None }) .collect(); - agg_states.states.update_states( + agg_states.states.update_states2( &input_cols, ChunkGroupAddressIter::new(self.chunk_idx, addrs), )?; diff --git a/crates/rayexec_execution/src/execution/operators/hash_aggregate/distinct.rs b/crates/rayexec_execution/src/execution/operators/hash_aggregate/distinct.rs index 5296423fe..6dfd36bac 100644 --- a/crates/rayexec_execution/src/execution/operators/hash_aggregate/distinct.rs +++ b/crates/rayexec_execution/src/execution/operators/hash_aggregate/distinct.rs @@ -50,7 +50,7 @@ impl AggregateGroupStates for DistinctGroupedStates { self.distinct_inputs.len() } - fn update_states(&mut self, inputs: &[&Array2], mapping: ChunkGroupAddressIter) -> Result<()> { + fn update_states2(&mut self, inputs: &[&Array2], mapping: ChunkGroupAddressIter) -> Result<()> { // TODO: Would be cool not needing to do this. let mappings: Vec<_> = mapping.collect(); @@ -114,7 +114,7 @@ impl AggregateGroupStates for DistinctGroupedStates { Ok(()) } - fn finalize(&mut self) -> Result { + fn finalize2(&mut self) -> Result { // And now we actually create the states we need. self.states.new_states(self.distinct_inputs.len()); @@ -146,11 +146,11 @@ impl AggregateGroupStates for DistinctGroupedStates { let chunk_iter = ChunkGroupAddressIter::new(0, &addresses_buf); let inputs: Vec<_> = arrays.iter().collect(); // TODO - self.states.update_states(&inputs, chunk_iter)?; + self.states.update_states2(&inputs, chunk_iter)?; } } // Now we can actually drain the states. - self.states.finalize() + self.states.finalize2() } } diff --git a/crates/rayexec_execution/src/execution/operators/hash_aggregate/drain.rs b/crates/rayexec_execution/src/execution/operators/hash_aggregate/drain.rs index 3f3c55c5b..62b4021c0 100644 --- a/crates/rayexec_execution/src/execution/operators/hash_aggregate/drain.rs +++ b/crates/rayexec_execution/src/execution/operators/hash_aggregate/drain.rs @@ -27,7 +27,7 @@ impl HashTableDrain { let results = chunk .aggregate_states .iter_mut() - .map(|s| s.states.finalize()) + .map(|s| s.states.finalize2()) .collect::>>()?; // Chunk arrays includes the GROUP ID column (last). diff --git a/crates/rayexec_execution/src/execution/operators/ungrouped_aggregate.rs b/crates/rayexec_execution/src/execution/operators/ungrouped_aggregate.rs index 48aba3a75..dcad66f79 100644 --- a/crates/rayexec_execution/src/execution/operators/ungrouped_aggregate.rs +++ b/crates/rayexec_execution/src/execution/operators/ungrouped_aggregate.rs @@ -165,7 +165,7 @@ impl ExecutableOperator for PhysicalUngroupedAggregate { .collect(); agg_states[agg_idx] - .update_states(&cols, ChunkGroupAddressIter::new(0, &addrs))?; + .update_states2(&cols, ChunkGroupAddressIter::new(0, &addrs))?; } // Keep pushing. @@ -234,7 +234,7 @@ impl ExecutableOperator for PhysicalUngroupedAggregate { let arrays = final_states .iter_mut() - .map(|s| s.finalize()) + .map(|s| s.finalize2()) .collect::>>()?; let batch = Batch2::try_new(arrays)?; diff --git a/crates/rayexec_execution/src/functions/aggregate/builtin/avg.rs b/crates/rayexec_execution/src/functions/aggregate/builtin/avg.rs index a8e3625b3..aa61e2538 100644 --- a/crates/rayexec_execution/src/functions/aggregate/builtin/avg.rs +++ b/crates/rayexec_execution/src/functions/aggregate/builtin/avg.rs @@ -9,7 +9,7 @@ use serde::{Deserialize, Serialize}; use crate::arrays::array::Array2; use crate::arrays::bitmap::Bitmap; use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::aggregate::AggregateState; +use crate::arrays::executor::aggregate::AggregateState2; use crate::arrays::executor::builder::{ArrayBuilder, ArrayDataBuffer, PrimitiveBuffer}; use crate::arrays::executor::physical_type::{PhysicalF64_2, PhysicalI64}; use crate::arrays::scalar::decimal::{Decimal128Type, Decimal64Type, DecimalType}; @@ -204,7 +204,7 @@ struct AvgStateDecimal { _input: PhantomData, } -impl + Default + Debug> AggregateState for AvgStateDecimal { +impl + Default + Debug> AggregateState2 for AvgStateDecimal { fn merge(&mut self, other: &mut Self) -> Result<()> { self.sum += other.sum; self.count += other.count; @@ -232,7 +232,7 @@ struct AvgStateF64 { _input: PhantomData, } -impl AggregateState for AvgStateF64 +impl AggregateState2 for AvgStateF64 where I: Into + Default + Debug, T: AsPrimitive + AddAssign + Debug + Default, diff --git a/crates/rayexec_execution/src/functions/aggregate/builtin/corr.rs b/crates/rayexec_execution/src/functions/aggregate/builtin/corr.rs index 9ed3a65a3..bd612b42f 100644 --- a/crates/rayexec_execution/src/functions/aggregate/builtin/corr.rs +++ b/crates/rayexec_execution/src/functions/aggregate/builtin/corr.rs @@ -5,7 +5,7 @@ use rayexec_error::Result; use super::covar::{CovarPopFinalize, CovarState}; use super::stddev::{StddevPopFinalize, VarianceState}; use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::aggregate::AggregateState; +use crate::arrays::executor::aggregate::AggregateState2; use crate::arrays::executor::physical_type::PhysicalF64_2; use crate::expr::Expression; use crate::functions::aggregate::states::{ @@ -88,7 +88,7 @@ pub struct CorrelationState { stddev_y: VarianceState, } -impl AggregateState<(f64, f64), f64> for CorrelationState { +impl AggregateState2<(f64, f64), f64> for CorrelationState { fn merge(&mut self, other: &mut Self) -> Result<()> { self.covar.merge(&mut other.covar)?; self.stddev_x.merge(&mut other.stddev_x)?; diff --git a/crates/rayexec_execution/src/functions/aggregate/builtin/count.rs b/crates/rayexec_execution/src/functions/aggregate/builtin/count.rs index 68761f921..354d90240 100644 --- a/crates/rayexec_execution/src/functions/aggregate/builtin/count.rs +++ b/crates/rayexec_execution/src/functions/aggregate/builtin/count.rs @@ -1,7 +1,7 @@ use rayexec_error::Result; use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::aggregate::AggregateState; +use crate::arrays::executor::aggregate::AggregateState2; use crate::arrays::executor::physical_type::PhysicalAny; use crate::expr::{self, Expression}; use crate::functions::aggregate::states::{ @@ -87,7 +87,7 @@ pub struct CountNonNullState { count: i64, } -impl AggregateState<(), i64> for CountNonNullState { +impl AggregateState2<(), i64> for CountNonNullState { fn merge(&mut self, other: &mut Self) -> Result<()> { self.count += other.count; Ok(()) diff --git a/crates/rayexec_execution/src/functions/aggregate/builtin/covar.rs b/crates/rayexec_execution/src/functions/aggregate/builtin/covar.rs index b84c82a29..653123530 100644 --- a/crates/rayexec_execution/src/functions/aggregate/builtin/covar.rs +++ b/crates/rayexec_execution/src/functions/aggregate/builtin/covar.rs @@ -4,7 +4,7 @@ use std::marker::PhantomData; use rayexec_error::Result; use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::aggregate::AggregateState; +use crate::arrays::executor::aggregate::AggregateState2; use crate::arrays::executor::physical_type::PhysicalF64_2; use crate::expr::Expression; use crate::functions::aggregate::states::{ @@ -174,7 +174,7 @@ pub struct CovarState { _finalize: PhantomData, } -impl AggregateState<(f64, f64), f64> for CovarState +impl AggregateState2<(f64, f64), f64> for CovarState where F: CovarFinalize, { diff --git a/crates/rayexec_execution/src/functions/aggregate/builtin/first.rs b/crates/rayexec_execution/src/functions/aggregate/builtin/first.rs index ad4a38db5..9fd30ad83 100644 --- a/crates/rayexec_execution/src/functions/aggregate/builtin/first.rs +++ b/crates/rayexec_execution/src/functions/aggregate/builtin/first.rs @@ -6,7 +6,7 @@ use rayexec_error::{not_implemented, Result}; use crate::arrays::array::ArrayData2; use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::aggregate::{AggregateState, StateFinalizer}; +use crate::arrays::executor::aggregate::{AggregateState2, StateFinalizer}; use crate::arrays::executor::builder::{ArrayBuilder, GermanVarlenBuffer}; use crate::arrays::executor::physical_type::{ PhysicalBinary, @@ -232,7 +232,7 @@ pub struct FirstState { value: Option, } -impl AggregateState for FirstState { +impl AggregateState2 for FirstState { fn merge(&mut self, other: &mut Self) -> Result<()> { if self.value.is_none() { self.value = other.value; @@ -261,7 +261,7 @@ pub struct FirstStateBinary { value: Option>, } -impl AggregateState<&[u8], Vec> for FirstStateBinary { +impl AggregateState2<&[u8], Vec> for FirstStateBinary { fn merge(&mut self, other: &mut Self) -> Result<()> { if self.value.is_none() { std::mem::swap(&mut self.value, &mut other.value); diff --git a/crates/rayexec_execution/src/functions/aggregate/builtin/minmax.rs b/crates/rayexec_execution/src/functions/aggregate/builtin/minmax.rs index 04ded97ca..cf69100d3 100644 --- a/crates/rayexec_execution/src/functions/aggregate/builtin/minmax.rs +++ b/crates/rayexec_execution/src/functions/aggregate/builtin/minmax.rs @@ -6,7 +6,7 @@ use rayexec_error::{not_implemented, Result}; use crate::arrays::array::ArrayData2; use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::aggregate::{AggregateState, StateFinalizer}; +use crate::arrays::executor::aggregate::{AggregateState2, StateFinalizer}; use crate::arrays::executor::builder::{ArrayBuilder, GermanVarlenBuffer}; use crate::arrays::executor::physical_type::{ PhysicalBinary, @@ -269,7 +269,7 @@ impl MinMaxBinaryImpl { impl AggregateFunctionImpl for MinMaxBinaryImpl where - M: for<'a> AggregateState<&'a [u8], Vec> + Default + Sync + Send + 'static, + M: for<'a> AggregateState2<&'a [u8], Vec> + Default + Sync + Send + 'static, { fn new_states(&self) -> Box { let datatype = self.datatype.clone(); @@ -306,7 +306,7 @@ impl MinMaxBoolImpl { impl AggregateFunctionImpl for MinMaxBoolImpl where - M: AggregateState + Default + Sync + Send + 'static, + M: AggregateState2 + Default + Sync + Send + 'static, { fn new_states(&self) -> Box { new_unary_aggregate_states::(M::default, move |states| { @@ -348,7 +348,7 @@ impl AggregateFunctionImpl for MinMaxPrimitiveImpl where for<'a> S: PhysicalStorage2 = T>, T: PartialOrd + Debug + Default + Sync + Send + Copy + 'static, - M: AggregateState + Default + Sync + Send + 'static, + M: AggregateState2 + Default + Sync + Send + 'static, ArrayData2: From>, { fn new_states(&self) -> Box { @@ -372,7 +372,7 @@ pub struct MinState { valid: bool, } -impl AggregateState for MinState +impl AggregateState2 for MinState where T: PartialOrd + Debug + Default + Copy, { @@ -412,7 +412,7 @@ pub struct MinStateBinary { valid: bool, } -impl AggregateState<&[u8], Vec> for MinStateBinary { +impl AggregateState2<&[u8], Vec> for MinStateBinary { fn merge(&mut self, other: &mut Self) -> Result<()> { if !self.valid { self.valid = other.valid; @@ -450,7 +450,7 @@ pub struct MaxState { valid: bool, } -impl AggregateState for MaxState +impl AggregateState2 for MaxState where T: PartialOrd + Debug + Default + Copy, { @@ -490,7 +490,7 @@ pub struct MaxStateBinary { valid: bool, } -impl AggregateState<&[u8], Vec> for MaxStateBinary { +impl AggregateState2<&[u8], Vec> for MaxStateBinary { fn merge(&mut self, other: &mut Self) -> Result<()> { if !self.valid { self.valid = other.valid; diff --git a/crates/rayexec_execution/src/functions/aggregate/builtin/regr_avg.rs b/crates/rayexec_execution/src/functions/aggregate/builtin/regr_avg.rs index 2bd9b1649..b86a9668b 100644 --- a/crates/rayexec_execution/src/functions/aggregate/builtin/regr_avg.rs +++ b/crates/rayexec_execution/src/functions/aggregate/builtin/regr_avg.rs @@ -4,7 +4,7 @@ use std::marker::PhantomData; use rayexec_error::Result; use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::aggregate::AggregateState; +use crate::arrays::executor::aggregate::AggregateState2; use crate::arrays::executor::physical_type::PhysicalF64_2; use crate::expr::Expression; use crate::functions::aggregate::states::{ @@ -165,7 +165,7 @@ where _input: PhantomData, } -impl AggregateState<(f64, f64), f64> for RegrAvgState +impl AggregateState2<(f64, f64), f64> for RegrAvgState where F: RegrAvgInput, { diff --git a/crates/rayexec_execution/src/functions/aggregate/builtin/regr_count.rs b/crates/rayexec_execution/src/functions/aggregate/builtin/regr_count.rs index 2a26b0169..74b16b960 100644 --- a/crates/rayexec_execution/src/functions/aggregate/builtin/regr_count.rs +++ b/crates/rayexec_execution/src/functions/aggregate/builtin/regr_count.rs @@ -3,7 +3,7 @@ use std::fmt::Debug; use rayexec_error::Result; use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::aggregate::AggregateState; +use crate::arrays::executor::aggregate::AggregateState2; use crate::arrays::executor::physical_type::PhysicalAny; use crate::expr::Expression; use crate::functions::aggregate::states::{ @@ -87,7 +87,7 @@ pub struct RegrCountState { count: i64, } -impl AggregateState<((), ()), i64> for RegrCountState { +impl AggregateState2<((), ()), i64> for RegrCountState { fn merge(&mut self, other: &mut Self) -> Result<()> { self.count += other.count; Ok(()) diff --git a/crates/rayexec_execution/src/functions/aggregate/builtin/regr_r2.rs b/crates/rayexec_execution/src/functions/aggregate/builtin/regr_r2.rs index 7acdc9df5..e42a68442 100644 --- a/crates/rayexec_execution/src/functions/aggregate/builtin/regr_r2.rs +++ b/crates/rayexec_execution/src/functions/aggregate/builtin/regr_r2.rs @@ -4,7 +4,7 @@ use rayexec_error::Result; use super::corr::CorrelationState; use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::aggregate::AggregateState; +use crate::arrays::executor::aggregate::AggregateState2; use crate::arrays::executor::physical_type::PhysicalF64_2; use crate::expr::Expression; use crate::functions::aggregate::states::{ @@ -84,7 +84,7 @@ pub struct RegrR2State { corr: CorrelationState, } -impl AggregateState<(f64, f64), f64> for RegrR2State { +impl AggregateState2<(f64, f64), f64> for RegrR2State { fn merge(&mut self, other: &mut Self) -> Result<()> { self.corr.merge(&mut other.corr)?; Ok(()) diff --git a/crates/rayexec_execution/src/functions/aggregate/builtin/regr_slope.rs b/crates/rayexec_execution/src/functions/aggregate/builtin/regr_slope.rs index 5d7d7933a..5fa3e2da3 100644 --- a/crates/rayexec_execution/src/functions/aggregate/builtin/regr_slope.rs +++ b/crates/rayexec_execution/src/functions/aggregate/builtin/regr_slope.rs @@ -5,7 +5,7 @@ use rayexec_error::Result; use super::covar::{CovarPopFinalize, CovarState}; use super::stddev::{VariancePopFinalize, VarianceState}; use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::aggregate::AggregateState; +use crate::arrays::executor::aggregate::AggregateState2; use crate::arrays::executor::physical_type::PhysicalF64_2; use crate::expr::Expression; use crate::functions::aggregate::states::{ @@ -86,7 +86,7 @@ pub struct RegrSlopeState { var: VarianceState, } -impl AggregateState<(f64, f64), f64> for RegrSlopeState { +impl AggregateState2<(f64, f64), f64> for RegrSlopeState { fn merge(&mut self, other: &mut Self) -> Result<()> { self.cov.merge(&mut other.cov)?; self.var.merge(&mut other.var)?; diff --git a/crates/rayexec_execution/src/functions/aggregate/builtin/stddev.rs b/crates/rayexec_execution/src/functions/aggregate/builtin/stddev.rs index f196b7c63..03a28a764 100644 --- a/crates/rayexec_execution/src/functions/aggregate/builtin/stddev.rs +++ b/crates/rayexec_execution/src/functions/aggregate/builtin/stddev.rs @@ -4,7 +4,7 @@ use std::marker::PhantomData; use rayexec_error::Result; use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::aggregate::AggregateState; +use crate::arrays::executor::aggregate::AggregateState2; use crate::arrays::executor::physical_type::PhysicalF64_2; use crate::expr::Expression; use crate::functions::aggregate::states::{ @@ -319,7 +319,7 @@ pub struct VarianceState { _finalize: PhantomData, } -impl AggregateState for VarianceState +impl AggregateState2 for VarianceState where F: VarianceFinalize, { diff --git a/crates/rayexec_execution/src/functions/aggregate/builtin/string_agg.rs b/crates/rayexec_execution/src/functions/aggregate/builtin/string_agg.rs index 5484f9938..81f620bab 100644 --- a/crates/rayexec_execution/src/functions/aggregate/builtin/string_agg.rs +++ b/crates/rayexec_execution/src/functions/aggregate/builtin/string_agg.rs @@ -3,7 +3,7 @@ use std::fmt::Debug; use rayexec_error::{RayexecError, Result}; use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::aggregate::{AggregateState, StateFinalizer}; +use crate::arrays::executor::aggregate::{AggregateState2, StateFinalizer}; use crate::arrays::executor::builder::{ArrayBuilder, GermanVarlenBuffer}; use crate::arrays::executor::physical_type::PhysicalUtf8; use crate::arrays::scalar::ScalarValue; @@ -119,7 +119,7 @@ pub struct StringAggState { string: Option, } -impl AggregateState<&str, String> for StringAggState { +impl AggregateState2<&str, String> for StringAggState { fn merge(&mut self, other: &mut Self) -> Result<()> { if self.string.is_none() { std::mem::swap(self, other); diff --git a/crates/rayexec_execution/src/functions/aggregate/builtin/sum.rs b/crates/rayexec_execution/src/functions/aggregate/builtin/sum.rs index 91a01e909..f72f6362e 100644 --- a/crates/rayexec_execution/src/functions/aggregate/builtin/sum.rs +++ b/crates/rayexec_execution/src/functions/aggregate/builtin/sum.rs @@ -6,9 +6,12 @@ use num_traits::CheckedAdd; use rayexec_error::Result; use crate::arrays::array::ArrayData2; +use crate::arrays::buffer::physical_type::AddressableMut; use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::aggregate::AggregateState; +use crate::arrays::executor::aggregate::AggregateState2; use crate::arrays::executor::physical_type::{PhysicalF64_2, PhysicalI64}; +use crate::arrays::executor_exp::aggregate::AggregateState; +use crate::arrays::executor_exp::PutBuffer; use crate::arrays::scalar::decimal::{Decimal128Type, Decimal64Type, DecimalType}; use crate::arrays::storage::PrimitiveStorage; use crate::expr::Expression; @@ -182,6 +185,32 @@ impl AggregateState for SumStateCh Ok(()) } + fn finalize(&mut self, output: PutBuffer) -> Result<()> + where + M: AddressableMut, + { + if self.set { + output.put_null(); + } else { + output.put(&self.sum); + } + Ok(()) + } +} + +impl AggregateState2 for SumStateCheckedAdd { + fn merge(&mut self, other: &mut Self) -> Result<()> { + self.sum = self.sum.checked_add(&other.sum).unwrap_or_default(); // TODO + self.set = self.set || other.set; + Ok(()) + } + + fn update(&mut self, input: T) -> Result<()> { + self.sum = self.sum.checked_add(&input).unwrap_or_default(); // TODO + self.set = true; + Ok(()) + } + fn finalize(&mut self) -> Result<(T, bool)> { if self.set { Ok((self.sum, true)) @@ -197,7 +226,7 @@ pub struct SumStateAdd { valid: bool, } -impl AggregateState for SumStateAdd { +impl AggregateState2 for SumStateAdd { fn merge(&mut self, other: &mut Self) -> Result<()> { self.sum += other.sum; self.valid = self.valid || other.valid; @@ -265,10 +294,10 @@ mod tests { .collect(); states_1 - .update_states(&[partition_1_vals], ChunkGroupAddressIter::new(0, &addrs_1)) + .update_states2(&[partition_1_vals], ChunkGroupAddressIter::new(0, &addrs_1)) .unwrap(); states_2 - .update_states(&[partition_2_vals], ChunkGroupAddressIter::new(0, &addrs_2)) + .update_states2(&[partition_2_vals], ChunkGroupAddressIter::new(0, &addrs_2)) .unwrap(); // Combine states. @@ -287,7 +316,7 @@ mod tests { .unwrap(); // Get final output. - let out = states_1.finalize().unwrap(); + let out = states_1.finalize2().unwrap(); assert_eq!(1, out.logical_len()); assert_eq!(ScalarValue::Int64(21), out.logical_value(0).unwrap()); @@ -367,10 +396,10 @@ mod tests { ]; states_1 - .update_states(&[partition_1_vals], ChunkGroupAddressIter::new(0, &addrs_1)) + .update_states2(&[partition_1_vals], ChunkGroupAddressIter::new(0, &addrs_1)) .unwrap(); states_2 - .update_states(&[partition_2_vals], ChunkGroupAddressIter::new(0, &addrs_2)) + .update_states2(&[partition_2_vals], ChunkGroupAddressIter::new(0, &addrs_2)) .unwrap(); // Combine states. @@ -400,7 +429,7 @@ mod tests { .unwrap(); // Get final output. - let out = states_1.finalize().unwrap(); + let out = states_1.finalize2().unwrap(); assert_eq!(2, out.logical_len()); assert_eq!(ScalarValue::Int64(9), out.logical_value(0).unwrap()); @@ -499,10 +528,10 @@ mod tests { ]; states_1 - .update_states(&[partition_1_vals], ChunkGroupAddressIter::new(0, &addrs_1)) + .update_states2(&[partition_1_vals], ChunkGroupAddressIter::new(0, &addrs_1)) .unwrap(); states_2 - .update_states(&[partition_2_vals], ChunkGroupAddressIter::new(0, &addrs_2)) + .update_states2(&[partition_2_vals], ChunkGroupAddressIter::new(0, &addrs_2)) .unwrap(); // Combine states. @@ -528,7 +557,7 @@ mod tests { .unwrap(); // Get final output. - let out = states_1.finalize().unwrap(); + let out = states_1.finalize2().unwrap(); assert_eq!(3, out.logical_len()); assert_eq!(ScalarValue::Int64(8), out.logical_value(0).unwrap()); diff --git a/crates/rayexec_execution/src/functions/aggregate/states.rs b/crates/rayexec_execution/src/functions/aggregate/states.rs index 7d0660add..d4af24b9b 100644 --- a/crates/rayexec_execution/src/functions/aggregate/states.rs +++ b/crates/rayexec_execution/src/functions/aggregate/states.rs @@ -6,10 +6,11 @@ use std::marker::PhantomData; use rayexec_error::{RayexecError, Result}; use super::ChunkGroupAddressIter; +use crate::arrays::array::exp::Array; use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::datatype::DataType; use crate::arrays::executor::aggregate::{ - AggregateState, + AggregateState2, BinaryNonNullUpdater, StateCombiner, StateFinalizer, @@ -56,7 +57,7 @@ pub fn new_unary_aggregate_states Box where Storage: PhysicalStorage2, - State: for<'a> AggregateState< + State: for<'a> AggregateState2< <::Storage<'a> as AddressableStorage>::T, Output, > + Sync @@ -84,7 +85,7 @@ pub fn new_binary_aggregate_states AggregateState<(Storage1::Type<'a>, Storage2::Type<'a>), Output> + State: for<'a> AggregateState2<(Storage1::Type<'a>, Storage2::Type<'a>), Output> + Sync + Send + 'static, @@ -105,7 +106,7 @@ where impl AggregateGroupStates for TypedAggregateGroupStates where - State: AggregateState + Sync + Send + 'static, + State: AggregateState2 + Sync + Send + 'static, Input: Sync + Send, Output: Sync + Send, StateInit: Fn() -> State + Sync + Send, @@ -124,7 +125,7 @@ where self.states.len() } - fn update_states(&mut self, inputs: &[&Array2], mapping: ChunkGroupAddressIter) -> Result<()> { + fn update_states2(&mut self, inputs: &[&Array2], mapping: ChunkGroupAddressIter) -> Result<()> { (self.state_update)(inputs, mapping, &mut self.states) } @@ -137,7 +138,7 @@ where StateCombiner::combine(consume_states, mapping, &mut self.states) } - fn finalize(&mut self) -> Result { + fn finalize2(&mut self) -> Result { (self.state_finalize)(&mut self.states) } } @@ -166,7 +167,11 @@ pub trait AggregateGroupStates: Debug + Sync + Send { fn num_states(&self) -> usize; /// Update states from inputs using some mapping. - fn update_states(&mut self, inputs: &[&Array2], mapping: ChunkGroupAddressIter) -> Result<()>; + fn update_states2(&mut self, inputs: &[&Array2], mapping: ChunkGroupAddressIter) -> Result<()>; + + fn update_states(&mut self, inputs: &[Array], mapping: ChunkGroupAddressIter) -> Result<()> { + unimplemented!() + } /// Combine states from another partition into self using some mapping. fn combine( @@ -176,7 +181,11 @@ pub trait AggregateGroupStates: Debug + Sync + Send { ) -> Result<()>; /// Finalize the states and return an array. - fn finalize(&mut self) -> Result; + fn finalize2(&mut self) -> Result; + + fn drain(&mut self, output: &mut Array) -> Result<()> { + unimplemented!() + } } #[derive(Debug)] @@ -200,7 +209,7 @@ pub fn unary_update( ) -> Result<()> where Storage: PhysicalStorage2, - State: for<'a> AggregateState, Output>, + State: for<'a> AggregateState2, Output>, { UnaryNonNullUpdater::update::(arrays[0], mapping, states) } @@ -213,7 +222,7 @@ pub fn binary_update( where Storage1: PhysicalStorage2, Storage2: PhysicalStorage2, - State: for<'a> AggregateState<(Storage1::Type<'a>, Storage2::Type<'a>), Output>, + State: for<'a> AggregateState2<(Storage1::Type<'a>, Storage2::Type<'a>), Output>, { BinaryNonNullUpdater::update::( arrays[0], arrays[1], mapping, states, @@ -226,7 +235,7 @@ pub fn untyped_null_finalize(states: &mut [State]) -> Result { pub fn boolean_finalize(datatype: DataType, states: &mut [State]) -> Result where - State: AggregateState, + State: AggregateState2, { let builder = ArrayBuilder { datatype, @@ -240,7 +249,7 @@ pub fn primitive_finalize( states: &mut [State], ) -> Result where - State: AggregateState, + State: AggregateState2, Output: Copy + Default, ArrayData2: From>, { From e96cca65d47a9d0e96ed0308197dee5ea3083f01 Mon Sep 17 00:00:00 2001 From: Sean Smith Date: Sun, 29 Dec 2024 13:31:56 -0500 Subject: [PATCH 16/59] transition some numeric funcs --- .../arrays/executor_exp/aggregate/binary.rs | 22 ++++++++++- .../functions/scalar/builtin/numeric/abs.rs | 2 +- .../functions/scalar/builtin/numeric/acos.rs | 22 +++++++++++ .../functions/scalar/builtin/numeric/asin.rs | 22 +++++++++++ .../functions/scalar/builtin/numeric/atan.rs | 22 +++++++++++ .../functions/scalar/builtin/numeric/cbrt.rs | 22 +++++++++++ .../functions/scalar/builtin/numeric/ceil.rs | 22 +++++++++++ .../functions/scalar/builtin/numeric/cos.rs | 22 +++++++++++ .../scalar/builtin/numeric/degrees.rs | 22 +++++++++++ .../functions/scalar/builtin/numeric/exp.rs | 22 +++++++++++ .../functions/scalar/builtin/numeric/floor.rs | 22 +++++++++++ .../functions/scalar/builtin/numeric/isnan.rs | 6 +++ .../functions/scalar/builtin/numeric/ln.rs | 22 +++++++++++ .../functions/scalar/builtin/numeric/log.rs | 39 +++++++++++++++++++ .../scalar/builtin/numeric/radians.rs | 22 +++++++++++ .../functions/scalar/builtin/numeric/sin.rs | 22 +++++++++++ .../functions/scalar/builtin/numeric/sqrt.rs | 22 +++++++++++ .../functions/scalar/builtin/numeric/tan.rs | 22 +++++++++++ 18 files changed, 374 insertions(+), 3 deletions(-) diff --git a/crates/rayexec_execution/src/arrays/executor_exp/aggregate/binary.rs b/crates/rayexec_execution/src/arrays/executor_exp/aggregate/binary.rs index ade1f1ec5..71d1c6b0e 100644 --- a/crates/rayexec_execution/src/arrays/executor_exp/aggregate/binary.rs +++ b/crates/rayexec_execution/src/arrays/executor_exp/aggregate/binary.rs @@ -62,8 +62,10 @@ impl BinaryNonNullUpdater { #[cfg(test)] mod tests { + use iterutil::TryFromExactSizeIterator; + use super::*; - use crate::arrays::buffer::physical_type::AddressableMut; + use crate::arrays::buffer::physical_type::{AddressableMut, PhysicalI32}; use crate::arrays::executor_exp::PutBuffer; // SUM(col) + PRODUCT(col) @@ -102,5 +104,21 @@ mod tests { } #[test] - fn test_name() {} + fn binary_primitive_single_state() { + let mut states = [TestAddSumAndProductState::default()]; + let array1 = Array::try_from_iter([1, 2, 3, 4, 5]).unwrap(); + let array2 = Array::try_from_iter([6, 7, 8, 9, 10]).unwrap(); + + BinaryNonNullUpdater::update::( + &array1, + &array2, + [1, 3, 4], + [0, 0, 0], + &mut states, + ) + .unwrap(); + + assert_eq!(11, states[0].sum); + assert_eq!(630, states[0].product); + } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/abs.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/abs.rs index 26daafd20..0c3438743 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/abs.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/abs.rs @@ -5,7 +5,7 @@ use rayexec_error::Result; use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; use crate::arrays::array::exp::Array; use crate::arrays::array::{Array2, ArrayData2}; -use crate::arrays::buffer::physical_type::{MutablePhysicalStorage, PhysicalStorage}; +use crate::arrays::buffer::physical_type::MutablePhysicalStorage; use crate::arrays::datatype::DataType; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; use crate::arrays::executor::physical_type::PhysicalStorage2; diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/acos.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/acos.rs index 76cc0032c..2a01e8966 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/acos.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/acos.rs @@ -1,12 +1,17 @@ +use iterutil::IntoExactSizeIterator; use num_traits::Float; use rayexec_error::Result; use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; +use crate::arrays::array::exp::Array; use crate::arrays::array::{Array2, ArrayData2}; +use crate::arrays::buffer::physical_type::MutablePhysicalStorage; use crate::arrays::datatype::DataType; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; use crate::arrays::executor::physical_type::PhysicalStorage2; use crate::arrays::executor::scalar::UnaryExecutor2; +use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; +use crate::arrays::executor_exp::OutBuffer; use crate::arrays::storage::PrimitiveStorage; pub type Acos = UnaryInputNumericScalar; @@ -30,4 +35,21 @@ impl UnaryInputNumericOperation for AcosOp { }; UnaryExecutor2::execute::(input, builder, |v, buf| buf.put(&v.acos())) } + + fn execute_float( + input: &Array, + selection: impl IntoExactSizeIterator, + output: &mut Array, + ) -> Result<()> + where + S: MutablePhysicalStorage, + S::StorageType: Float, + { + UnaryExecutor::execute::( + input, + selection, + OutBuffer::from_array(output)?, + |&v, buf| buf.put(&v.acos()), + ) + } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/asin.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/asin.rs index 5f3dfbffe..d56b539d7 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/asin.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/asin.rs @@ -1,12 +1,17 @@ +use iterutil::IntoExactSizeIterator; use num_traits::Float; use rayexec_error::Result; use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; +use crate::arrays::array::exp::Array; use crate::arrays::array::{Array2, ArrayData2}; +use crate::arrays::buffer::physical_type::MutablePhysicalStorage; use crate::arrays::datatype::DataType; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; use crate::arrays::executor::physical_type::PhysicalStorage2; use crate::arrays::executor::scalar::UnaryExecutor2; +use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; +use crate::arrays::executor_exp::OutBuffer; use crate::arrays::storage::PrimitiveStorage; pub type Asin = UnaryInputNumericScalar; @@ -30,4 +35,21 @@ impl UnaryInputNumericOperation for AsinOp { }; UnaryExecutor2::execute::(input, builder, |v, buf| buf.put(&v.asin())) } + + fn execute_float( + input: &Array, + selection: impl IntoExactSizeIterator, + output: &mut Array, + ) -> Result<()> + where + S: MutablePhysicalStorage, + S::StorageType: Float, + { + UnaryExecutor::execute::( + input, + selection, + OutBuffer::from_array(output)?, + |&v, buf| buf.put(&v.asin()), + ) + } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/atan.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/atan.rs index f87624eb5..324f823ba 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/atan.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/atan.rs @@ -1,12 +1,17 @@ +use iterutil::IntoExactSizeIterator; use num_traits::Float; use rayexec_error::Result; use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; +use crate::arrays::array::exp::Array; use crate::arrays::array::{Array2, ArrayData2}; +use crate::arrays::buffer::physical_type::MutablePhysicalStorage; use crate::arrays::datatype::DataType; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; use crate::arrays::executor::physical_type::PhysicalStorage2; use crate::arrays::executor::scalar::UnaryExecutor2; +use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; +use crate::arrays::executor_exp::OutBuffer; use crate::arrays::storage::PrimitiveStorage; pub type Atan = UnaryInputNumericScalar; @@ -30,4 +35,21 @@ impl UnaryInputNumericOperation for AtanOp { }; UnaryExecutor2::execute::(input, builder, |v, buf| buf.put(&v.atan())) } + + fn execute_float( + input: &Array, + selection: impl IntoExactSizeIterator, + output: &mut Array, + ) -> Result<()> + where + S: MutablePhysicalStorage, + S::StorageType: Float, + { + UnaryExecutor::execute::( + input, + selection, + OutBuffer::from_array(output)?, + |&v, buf| buf.put(&v.atan()), + ) + } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/cbrt.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/cbrt.rs index 5b90082d9..105b82dc3 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/cbrt.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/cbrt.rs @@ -1,12 +1,17 @@ +use iterutil::IntoExactSizeIterator; use num_traits::Float; use rayexec_error::Result; use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; +use crate::arrays::array::exp::Array; use crate::arrays::array::{Array2, ArrayData2}; +use crate::arrays::buffer::physical_type::MutablePhysicalStorage; use crate::arrays::datatype::DataType; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; use crate::arrays::executor::physical_type::PhysicalStorage2; use crate::arrays::executor::scalar::UnaryExecutor2; +use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; +use crate::arrays::executor_exp::OutBuffer; use crate::arrays::storage::PrimitiveStorage; pub type Cbrt = UnaryInputNumericScalar; @@ -30,4 +35,21 @@ impl UnaryInputNumericOperation for CbrtOp { }; UnaryExecutor2::execute::(input, builder, |v, buf| buf.put(&v.cbrt())) } + + fn execute_float( + input: &Array, + selection: impl IntoExactSizeIterator, + output: &mut Array, + ) -> Result<()> + where + S: MutablePhysicalStorage, + S::StorageType: Float, + { + UnaryExecutor::execute::( + input, + selection, + OutBuffer::from_array(output)?, + |&v, buf| buf.put(&v.cbrt()), + ) + } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/ceil.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/ceil.rs index 38a9ef0d4..a50f6d282 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/ceil.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/ceil.rs @@ -1,12 +1,17 @@ +use iterutil::IntoExactSizeIterator; use num_traits::Float; use rayexec_error::Result; use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; +use crate::arrays::array::exp::Array; use crate::arrays::array::{Array2, ArrayData2}; +use crate::arrays::buffer::physical_type::MutablePhysicalStorage; use crate::arrays::datatype::DataType; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; use crate::arrays::executor::physical_type::PhysicalStorage2; use crate::arrays::executor::scalar::UnaryExecutor2; +use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; +use crate::arrays::executor_exp::OutBuffer; use crate::arrays::storage::PrimitiveStorage; pub type Ceil = UnaryInputNumericScalar; @@ -30,4 +35,21 @@ impl UnaryInputNumericOperation for CeilOp { }; UnaryExecutor2::execute::(input, builder, |v, buf| buf.put(&v.ceil())) } + + fn execute_float( + input: &Array, + selection: impl IntoExactSizeIterator, + output: &mut Array, + ) -> Result<()> + where + S: MutablePhysicalStorage, + S::StorageType: Float, + { + UnaryExecutor::execute::( + input, + selection, + OutBuffer::from_array(output)?, + |&v, buf| buf.put(&v.ceil()), + ) + } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/cos.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/cos.rs index 61caa8680..3bd5d5826 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/cos.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/cos.rs @@ -1,12 +1,17 @@ +use iterutil::IntoExactSizeIterator; use num_traits::Float; use rayexec_error::Result; use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; +use crate::arrays::array::exp::Array; use crate::arrays::array::{Array2, ArrayData2}; +use crate::arrays::buffer::physical_type::MutablePhysicalStorage; use crate::arrays::datatype::DataType; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; use crate::arrays::executor::physical_type::PhysicalStorage2; use crate::arrays::executor::scalar::UnaryExecutor2; +use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; +use crate::arrays::executor_exp::OutBuffer; use crate::arrays::storage::PrimitiveStorage; pub type Cos = UnaryInputNumericScalar; @@ -30,4 +35,21 @@ impl UnaryInputNumericOperation for CosOp { }; UnaryExecutor2::execute::(input, builder, |v, buf| buf.put(&v.cos())) } + + fn execute_float( + input: &Array, + selection: impl IntoExactSizeIterator, + output: &mut Array, + ) -> Result<()> + where + S: MutablePhysicalStorage, + S::StorageType: Float, + { + UnaryExecutor::execute::( + input, + selection, + OutBuffer::from_array(output)?, + |&v, buf| buf.put(&v.cos()), + ) + } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/degrees.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/degrees.rs index 7901ec1d4..7a445054a 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/degrees.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/degrees.rs @@ -1,12 +1,17 @@ +use iterutil::IntoExactSizeIterator; use num_traits::Float; use rayexec_error::Result; use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; +use crate::arrays::array::exp::Array; use crate::arrays::array::{Array2, ArrayData2}; +use crate::arrays::buffer::physical_type::MutablePhysicalStorage; use crate::arrays::datatype::DataType; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; use crate::arrays::executor::physical_type::PhysicalStorage2; use crate::arrays::executor::scalar::UnaryExecutor2; +use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; +use crate::arrays::executor_exp::OutBuffer; use crate::arrays::storage::PrimitiveStorage; pub type Degrees = UnaryInputNumericScalar; @@ -30,4 +35,21 @@ impl UnaryInputNumericOperation for DegreesOp { }; UnaryExecutor2::execute::(input, builder, |v, buf| buf.put(&v.to_degrees())) } + + fn execute_float( + input: &Array, + selection: impl IntoExactSizeIterator, + output: &mut Array, + ) -> Result<()> + where + S: MutablePhysicalStorage, + S::StorageType: Float, + { + UnaryExecutor::execute::( + input, + selection, + OutBuffer::from_array(output)?, + |&v, buf| buf.put(&v.to_degrees()), + ) + } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/exp.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/exp.rs index 120bbc594..f8a1df45f 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/exp.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/exp.rs @@ -1,12 +1,17 @@ +use iterutil::IntoExactSizeIterator; use num_traits::Float; use rayexec_error::Result; use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; +use crate::arrays::array::exp::Array; use crate::arrays::array::{Array2, ArrayData2}; +use crate::arrays::buffer::physical_type::MutablePhysicalStorage; use crate::arrays::datatype::DataType; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; use crate::arrays::executor::physical_type::PhysicalStorage2; use crate::arrays::executor::scalar::UnaryExecutor2; +use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; +use crate::arrays::executor_exp::OutBuffer; use crate::arrays::storage::PrimitiveStorage; pub type Exp = UnaryInputNumericScalar; @@ -30,4 +35,21 @@ impl UnaryInputNumericOperation for ExpOp { }; UnaryExecutor2::execute::(input, builder, |v, buf| buf.put(&v.exp())) } + + fn execute_float( + input: &Array, + selection: impl IntoExactSizeIterator, + output: &mut Array, + ) -> Result<()> + where + S: MutablePhysicalStorage, + S::StorageType: Float, + { + UnaryExecutor::execute::( + input, + selection, + OutBuffer::from_array(output)?, + |&v, buf| buf.put(&v.exp()), + ) + } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/floor.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/floor.rs index 3171ce741..6d932f661 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/floor.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/floor.rs @@ -1,12 +1,17 @@ +use iterutil::IntoExactSizeIterator; use num_traits::Float; use rayexec_error::Result; use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; +use crate::arrays::array::exp::Array; use crate::arrays::array::{Array2, ArrayData2}; +use crate::arrays::buffer::physical_type::MutablePhysicalStorage; use crate::arrays::datatype::DataType; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; use crate::arrays::executor::physical_type::PhysicalStorage2; use crate::arrays::executor::scalar::UnaryExecutor2; +use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; +use crate::arrays::executor_exp::OutBuffer; use crate::arrays::storage::PrimitiveStorage; pub type Floor = UnaryInputNumericScalar; @@ -30,4 +35,21 @@ impl UnaryInputNumericOperation for FloorOp { }; UnaryExecutor2::execute::(input, builder, |v, buf| buf.put(&v.floor())) } + + fn execute_float( + input: &Array, + selection: impl IntoExactSizeIterator, + output: &mut Array, + ) -> Result<()> + where + S: MutablePhysicalStorage, + S::StorageType: Float, + { + UnaryExecutor::execute::( + input, + selection, + OutBuffer::from_array(output)?, + |&v, buf| buf.put(&v.floor()), + ) + } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/isnan.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/isnan.rs index ea5788b58..99fb057a6 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/isnan.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/isnan.rs @@ -4,7 +4,9 @@ use num_traits::Float; use rayexec_error::Result; use super::ScalarFunction; +use crate::arrays::array::exp::Array; use crate::arrays::array::Array2; +use crate::arrays::batch_exp::Batch; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::builder::{ArrayBuilder, BooleanBuffer}; use crate::arrays::executor::physical_type::{ @@ -111,4 +113,8 @@ where UnaryExecutor2::execute::(input, builder, |v, buf| buf.put(&v.is_nan())) } + + fn execute(&self, input: Batch, output: &mut Array) -> Result<()> { + unimplemented!() + } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/ln.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/ln.rs index 1a2ca0279..3945a8d89 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/ln.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/ln.rs @@ -1,12 +1,17 @@ +use iterutil::IntoExactSizeIterator; use num_traits::Float; use rayexec_error::Result; use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; +use crate::arrays::array::exp::Array; use crate::arrays::array::{Array2, ArrayData2}; +use crate::arrays::buffer::physical_type::MutablePhysicalStorage; use crate::arrays::datatype::DataType; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; use crate::arrays::executor::physical_type::PhysicalStorage2; use crate::arrays::executor::scalar::UnaryExecutor2; +use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; +use crate::arrays::executor_exp::OutBuffer; use crate::arrays::storage::PrimitiveStorage; pub type Ln = UnaryInputNumericScalar; @@ -30,4 +35,21 @@ impl UnaryInputNumericOperation for LnOp { }; UnaryExecutor2::execute::(input, builder, |v, buf| buf.put(&v.ln())) } + + fn execute_float( + input: &Array, + selection: impl IntoExactSizeIterator, + output: &mut Array, + ) -> Result<()> + where + S: MutablePhysicalStorage, + S::StorageType: Float, + { + UnaryExecutor::execute::( + input, + selection, + OutBuffer::from_array(output)?, + |&v, buf| buf.put(&v.ln()), + ) + } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/log.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/log.rs index d0efedd2b..9c2c312c9 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/log.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/log.rs @@ -1,12 +1,17 @@ +use iterutil::IntoExactSizeIterator; use num_traits::Float; use rayexec_error::Result; use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; +use crate::arrays::array::exp::Array; use crate::arrays::array::{Array2, ArrayData2}; +use crate::arrays::buffer::physical_type::MutablePhysicalStorage; use crate::arrays::datatype::DataType; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; use crate::arrays::executor::physical_type::PhysicalStorage2; use crate::arrays::executor::scalar::UnaryExecutor2; +use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; +use crate::arrays::executor_exp::OutBuffer; use crate::arrays::storage::PrimitiveStorage; pub type Log = UnaryInputNumericScalar; @@ -30,6 +35,23 @@ impl UnaryInputNumericOperation for LogOp { }; UnaryExecutor2::execute::(input, builder, |v, buf| buf.put(&v.log10())) } + + fn execute_float( + input: &Array, + selection: impl IntoExactSizeIterator, + output: &mut Array, + ) -> Result<()> + where + S: MutablePhysicalStorage, + S::StorageType: Float, + { + UnaryExecutor::execute::( + input, + selection, + OutBuffer::from_array(output)?, + |&v, buf| buf.put(&v.log10()), + ) + } } pub type Log2 = UnaryInputNumericScalar; @@ -53,4 +75,21 @@ impl UnaryInputNumericOperation for LogOp2 { }; UnaryExecutor2::execute::(input, builder, |v, buf| buf.put(&v.log2())) } + + fn execute_float( + input: &Array, + selection: impl IntoExactSizeIterator, + output: &mut Array, + ) -> Result<()> + where + S: MutablePhysicalStorage, + S::StorageType: Float, + { + UnaryExecutor::execute::( + input, + selection, + OutBuffer::from_array(output)?, + |&v, buf| buf.put(&v.log2()), + ) + } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/radians.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/radians.rs index 0387d449b..2bd34f2c6 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/radians.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/radians.rs @@ -1,12 +1,17 @@ +use iterutil::IntoExactSizeIterator; use num_traits::Float; use rayexec_error::Result; use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; +use crate::arrays::array::exp::Array; use crate::arrays::array::{Array2, ArrayData2}; +use crate::arrays::buffer::physical_type::MutablePhysicalStorage; use crate::arrays::datatype::DataType; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; use crate::arrays::executor::physical_type::PhysicalStorage2; use crate::arrays::executor::scalar::UnaryExecutor2; +use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; +use crate::arrays::executor_exp::OutBuffer; use crate::arrays::storage::PrimitiveStorage; pub type Radians = UnaryInputNumericScalar; @@ -30,4 +35,21 @@ impl UnaryInputNumericOperation for RadiansOp { }; UnaryExecutor2::execute::(input, builder, |v, buf| buf.put(&v.to_radians())) } + + fn execute_float( + input: &Array, + selection: impl IntoExactSizeIterator, + output: &mut Array, + ) -> Result<()> + where + S: MutablePhysicalStorage, + S::StorageType: Float, + { + UnaryExecutor::execute::( + input, + selection, + OutBuffer::from_array(output)?, + |&v, buf| buf.put(&v.to_radians()), + ) + } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/sin.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/sin.rs index 151e1e022..d18bdc42b 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/sin.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/sin.rs @@ -1,12 +1,17 @@ +use iterutil::IntoExactSizeIterator; use num_traits::Float; use rayexec_error::Result; use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; +use crate::arrays::array::exp::Array; use crate::arrays::array::{Array2, ArrayData2}; +use crate::arrays::buffer::physical_type::MutablePhysicalStorage; use crate::arrays::datatype::DataType; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; use crate::arrays::executor::physical_type::PhysicalStorage2; use crate::arrays::executor::scalar::UnaryExecutor2; +use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; +use crate::arrays::executor_exp::OutBuffer; use crate::arrays::storage::PrimitiveStorage; pub type Sin = UnaryInputNumericScalar; @@ -30,4 +35,21 @@ impl UnaryInputNumericOperation for SinOp { }; UnaryExecutor2::execute::(input, builder, |v, buf| buf.put(&v.sin())) } + + fn execute_float( + input: &Array, + selection: impl IntoExactSizeIterator, + output: &mut Array, + ) -> Result<()> + where + S: MutablePhysicalStorage, + S::StorageType: Float, + { + UnaryExecutor::execute::( + input, + selection, + OutBuffer::from_array(output)?, + |&v, buf| buf.put(&v.sin()), + ) + } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/sqrt.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/sqrt.rs index 90d0eadfb..bf4eb2b67 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/sqrt.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/sqrt.rs @@ -1,12 +1,17 @@ +use iterutil::IntoExactSizeIterator; use num_traits::Float; use rayexec_error::Result; use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; +use crate::arrays::array::exp::Array; use crate::arrays::array::{Array2, ArrayData2}; +use crate::arrays::buffer::physical_type::MutablePhysicalStorage; use crate::arrays::datatype::DataType; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; use crate::arrays::executor::physical_type::PhysicalStorage2; use crate::arrays::executor::scalar::UnaryExecutor2; +use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; +use crate::arrays::executor_exp::OutBuffer; use crate::arrays::storage::PrimitiveStorage; pub type Sqrt = UnaryInputNumericScalar; @@ -30,4 +35,21 @@ impl UnaryInputNumericOperation for SqrtOp { }; UnaryExecutor2::execute::(input, builder, |v, buf| buf.put(&v.sqrt())) } + + fn execute_float( + input: &Array, + selection: impl IntoExactSizeIterator, + output: &mut Array, + ) -> Result<()> + where + S: MutablePhysicalStorage, + S::StorageType: Float, + { + UnaryExecutor::execute::( + input, + selection, + OutBuffer::from_array(output)?, + |&v, buf| buf.put(&v.sqrt()), + ) + } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/tan.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/tan.rs index 52c270b86..9566eecb9 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/tan.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/tan.rs @@ -1,12 +1,17 @@ +use iterutil::IntoExactSizeIterator; use num_traits::Float; use rayexec_error::Result; use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; +use crate::arrays::array::exp::Array; use crate::arrays::array::{Array2, ArrayData2}; +use crate::arrays::buffer::physical_type::MutablePhysicalStorage; use crate::arrays::datatype::DataType; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; use crate::arrays::executor::physical_type::PhysicalStorage2; use crate::arrays::executor::scalar::UnaryExecutor2; +use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; +use crate::arrays::executor_exp::OutBuffer; use crate::arrays::storage::PrimitiveStorage; pub type Tan = UnaryInputNumericScalar; @@ -30,4 +35,21 @@ impl UnaryInputNumericOperation for TanOp { }; UnaryExecutor2::execute::(input, builder, |v, buf| buf.put(&v.tan())) } + + fn execute_float( + input: &Array, + selection: impl IntoExactSizeIterator, + output: &mut Array, + ) -> Result<()> + where + S: MutablePhysicalStorage, + S::StorageType: Float, + { + UnaryExecutor::execute::( + input, + selection, + OutBuffer::from_array(output)?, + |&v, buf| buf.put(&v.tan()), + ) + } } From b098ce13d9feebdd0bce9015bae88cbcf4fbc23e Mon Sep 17 00:00:00 2001 From: Sean Smith Date: Sun, 29 Dec 2024 13:42:10 -0500 Subject: [PATCH 17/59] temp rename --- .../rayexec_execution/src/arrays/array/mod.rs | 95 ++++++------- .../src/arrays/compute/cast/array.rs | 130 +++++++++--------- .../src/arrays/compute/date.rs | 8 +- .../src/arrays/executor/aggregate/binary.rs | 4 +- .../src/arrays/executor/aggregate/unary.rs | 14 +- .../src/arrays/executor/physical_type.rs | 64 ++++----- .../src/arrays/executor/scalar/binary.rs | 8 +- .../src/arrays/executor/scalar/fill.rs | 125 +++++++++-------- .../src/arrays/executor/scalar/hash.rs | 92 ++++++------- .../src/arrays/executor/scalar/list.rs | 6 +- .../src/arrays/executor/scalar/select.rs | 6 +- .../src/arrays/executor/scalar/unary.rs | 18 +-- .../src/arrays/executor/scalar/uniform.rs | 8 +- .../src/arrays/row/encoding.rs | 52 +++---- .../src/arrays/scalar/decimal.rs | 6 +- .../operators/hash_aggregate/compare.rs | 68 ++++----- .../execution/operators/hash_aggregate/mod.rs | 4 +- .../src/execution/operators/unnest.rs | 58 ++++---- .../src/functions/aggregate/builtin/avg.rs | 4 +- .../src/functions/aggregate/builtin/first.rs | 64 ++++----- .../src/functions/aggregate/builtin/minmax.rs | 126 ++++++++--------- .../functions/aggregate/builtin/string_agg.rs | 4 +- .../src/functions/aggregate/builtin/sum.rs | 4 +- .../src/functions/scalar/builtin/arith/add.rs | 48 +++---- .../src/functions/scalar/builtin/arith/div.rs | 40 +++--- .../src/functions/scalar/builtin/arith/mul.rs | 52 +++---- .../src/functions/scalar/builtin/arith/rem.rs | 40 +++--- .../src/functions/scalar/builtin/arith/sub.rs | 46 +++---- .../src/functions/scalar/builtin/boolean.rs | 12 +- .../functions/scalar/builtin/comparison.rs | 102 +++++++------- .../scalar/builtin/datetime/date_trunc.rs | 4 +- .../scalar/builtin/datetime/epoch.rs | 4 +- .../src/functions/scalar/builtin/is.rs | 4 +- .../scalar/builtin/list/list_extract.rs | 56 ++++---- .../src/functions/scalar/builtin/negate.rs | 24 ++-- .../functions/scalar/builtin/string/ascii.rs | 4 +- .../functions/scalar/builtin/string/case.rs | 4 +- .../functions/scalar/builtin/string/concat.rs | 6 +- .../scalar/builtin/string/contains.rs | 6 +- .../scalar/builtin/string/ends_with.rs | 6 +- .../functions/scalar/builtin/string/length.rs | 8 +- .../functions/scalar/builtin/string/like.rs | 6 +- .../functions/scalar/builtin/string/pad.rs | 10 +- .../scalar/builtin/string/regexp_replace.rs | 28 ++-- .../functions/scalar/builtin/string/repeat.rs | 4 +- .../scalar/builtin/string/starts_with.rs | 6 +- .../scalar/builtin/string/substring.rs | 6 +- .../functions/scalar/builtin/string/trim.rs | 6 +- .../src/functions/table/builtin/series.rs | 8 +- .../src/functions/table/builtin/unnest.rs | 4 +- crates/rayexec_parquet/src/writer/mod.rs | 4 +- 51 files changed, 769 insertions(+), 747 deletions(-) diff --git a/crates/rayexec_execution/src/arrays/array/mod.rs b/crates/rayexec_execution/src/arrays/array/mod.rs index cf785cee1..882a21775 100644 --- a/crates/rayexec_execution/src/arrays/array/mod.rs +++ b/crates/rayexec_execution/src/arrays/array/mod.rs @@ -23,24 +23,24 @@ use crate::arrays::executor::builder::{ }; use crate::arrays::executor::physical_type::{ PhysicalAny, - PhysicalBinary, - PhysicalBool, + PhysicalBinary_2, + PhysicalBool_2, PhysicalF16_2, PhysicalF32_2, PhysicalF64_2, - PhysicalI128, - PhysicalI16, - PhysicalI32, - PhysicalI64, - PhysicalI8, - PhysicalInterval, + PhysicalI128_2, + PhysicalI16_2, + PhysicalI32_2, + PhysicalI64_2, + PhysicalI8_2, + PhysicalInterval_2, PhysicalType2, - PhysicalU128, - PhysicalU16, - PhysicalU32, - PhysicalU64, - PhysicalU8, - PhysicalUtf8, + PhysicalU128_2, + PhysicalU16_2, + PhysicalU32_2, + PhysicalU64_2, + PhysicalU8_2, + PhysicalUtf8_2, }; use crate::arrays::executor::scalar::UnaryExecutor2; use crate::arrays::scalar::decimal::{Decimal128Scalar, Decimal64Scalar}; @@ -309,7 +309,7 @@ impl Array2 { validity: None, data: UntypedNullStorage(self.logical_len()).into(), }), - ArrayData2::Boolean(_) => UnaryExecutor2::execute::( + ArrayData2::Boolean(_) => UnaryExecutor2::execute::( self, ArrayBuilder { datatype: self.datatype.clone(), @@ -317,7 +317,7 @@ impl Array2 { }, |v, buf| buf.put(&v), ), - ArrayData2::Int8(_) => UnaryExecutor2::execute::( + ArrayData2::Int8(_) => UnaryExecutor2::execute::( self, ArrayBuilder { datatype: self.datatype.clone(), @@ -325,7 +325,7 @@ impl Array2 { }, |v, buf| buf.put(&v), ), - ArrayData2::Int16(_) => UnaryExecutor2::execute::( + ArrayData2::Int16(_) => UnaryExecutor2::execute::( self, ArrayBuilder { datatype: self.datatype.clone(), @@ -333,7 +333,7 @@ impl Array2 { }, |v, buf| buf.put(&v), ), - ArrayData2::Int32(_) => UnaryExecutor2::execute::( + ArrayData2::Int32(_) => UnaryExecutor2::execute::( self, ArrayBuilder { datatype: self.datatype.clone(), @@ -341,7 +341,7 @@ impl Array2 { }, |v, buf| buf.put(&v), ), - ArrayData2::Int64(_) => UnaryExecutor2::execute::( + ArrayData2::Int64(_) => UnaryExecutor2::execute::( self, ArrayBuilder { datatype: self.datatype.clone(), @@ -349,7 +349,7 @@ impl Array2 { }, |v, buf| buf.put(&v), ), - ArrayData2::Int128(_) => UnaryExecutor2::execute::( + ArrayData2::Int128(_) => UnaryExecutor2::execute::( self, ArrayBuilder { datatype: self.datatype.clone(), @@ -357,7 +357,7 @@ impl Array2 { }, |v, buf| buf.put(&v), ), - ArrayData2::UInt8(_) => UnaryExecutor2::execute::( + ArrayData2::UInt8(_) => UnaryExecutor2::execute::( self, ArrayBuilder { datatype: self.datatype.clone(), @@ -365,7 +365,7 @@ impl Array2 { }, |v, buf| buf.put(&v), ), - ArrayData2::UInt16(_) => UnaryExecutor2::execute::( + ArrayData2::UInt16(_) => UnaryExecutor2::execute::( self, ArrayBuilder { datatype: self.datatype.clone(), @@ -373,7 +373,7 @@ impl Array2 { }, |v, buf| buf.put(&v), ), - ArrayData2::UInt32(_) => UnaryExecutor2::execute::( + ArrayData2::UInt32(_) => UnaryExecutor2::execute::( self, ArrayBuilder { datatype: self.datatype.clone(), @@ -381,7 +381,7 @@ impl Array2 { }, |v, buf| buf.put(&v), ), - ArrayData2::UInt64(_) => UnaryExecutor2::execute::( + ArrayData2::UInt64(_) => UnaryExecutor2::execute::( self, ArrayBuilder { datatype: self.datatype.clone(), @@ -389,7 +389,7 @@ impl Array2 { }, |v, buf| buf.put(&v), ), - ArrayData2::UInt128(_) => UnaryExecutor2::execute::( + ArrayData2::UInt128(_) => UnaryExecutor2::execute::( self, ArrayBuilder { datatype: self.datatype.clone(), @@ -421,7 +421,7 @@ impl Array2 { }, |v, buf| buf.put(&v), ), - ArrayData2::Interval(_) => UnaryExecutor2::execute::( + ArrayData2::Interval(_) => UnaryExecutor2::execute::( self, ArrayBuilder { datatype: self.datatype.clone(), @@ -437,7 +437,7 @@ impl Array2 { // data while just selecting the appropriate metadata. Instead // this will just copy everything. if self.datatype().is_utf8() { - UnaryExecutor2::execute::( + UnaryExecutor2::execute::( self, ArrayBuilder { datatype: self.datatype.clone(), @@ -446,7 +446,7 @@ impl Array2 { |v, buf| buf.put(v), ) } else { - UnaryExecutor2::execute::( + UnaryExecutor2::execute::( self, ArrayBuilder { datatype: self.datatype.clone(), @@ -622,67 +622,67 @@ impl Array2 { UnaryExecutor2::value_at::(self, row).map(|arr_val| arr_val.is_none()) } // None == NULL ScalarValue::Boolean(v) => { - UnaryExecutor2::value_at::(self, row).map(|arr_val| match arr_val { + UnaryExecutor2::value_at::(self, row).map(|arr_val| match arr_val { Some(arr_val) => arr_val == *v, None => false, }) } ScalarValue::Int8(v) => { - UnaryExecutor2::value_at::(self, row).map(|arr_val| match arr_val { + UnaryExecutor2::value_at::(self, row).map(|arr_val| match arr_val { Some(arr_val) => arr_val == *v, None => false, }) } ScalarValue::Int16(v) => { - UnaryExecutor2::value_at::(self, row).map(|arr_val| match arr_val { + UnaryExecutor2::value_at::(self, row).map(|arr_val| match arr_val { Some(arr_val) => arr_val == *v, None => false, }) } ScalarValue::Int32(v) => { - UnaryExecutor2::value_at::(self, row).map(|arr_val| match arr_val { + UnaryExecutor2::value_at::(self, row).map(|arr_val| match arr_val { Some(arr_val) => arr_val == *v, None => false, }) } ScalarValue::Int64(v) => { - UnaryExecutor2::value_at::(self, row).map(|arr_val| match arr_val { + UnaryExecutor2::value_at::(self, row).map(|arr_val| match arr_val { Some(arr_val) => arr_val == *v, None => false, }) } ScalarValue::Int128(v) => { - UnaryExecutor2::value_at::(self, row).map(|arr_val| match arr_val { + UnaryExecutor2::value_at::(self, row).map(|arr_val| match arr_val { Some(arr_val) => arr_val == *v, None => false, }) } ScalarValue::UInt8(v) => { - UnaryExecutor2::value_at::(self, row).map(|arr_val| match arr_val { + UnaryExecutor2::value_at::(self, row).map(|arr_val| match arr_val { Some(arr_val) => arr_val == *v, None => false, }) } ScalarValue::UInt16(v) => { - UnaryExecutor2::value_at::(self, row).map(|arr_val| match arr_val { + UnaryExecutor2::value_at::(self, row).map(|arr_val| match arr_val { Some(arr_val) => arr_val == *v, None => false, }) } ScalarValue::UInt32(v) => { - UnaryExecutor2::value_at::(self, row).map(|arr_val| match arr_val { + UnaryExecutor2::value_at::(self, row).map(|arr_val| match arr_val { Some(arr_val) => arr_val == *v, None => false, }) } ScalarValue::UInt64(v) => { - UnaryExecutor2::value_at::(self, row).map(|arr_val| match arr_val { + UnaryExecutor2::value_at::(self, row).map(|arr_val| match arr_val { Some(arr_val) => arr_val == *v, None => false, }) } ScalarValue::UInt128(v) => { - UnaryExecutor2::value_at::(self, row).map(|arr_val| match arr_val { + UnaryExecutor2::value_at::(self, row).map(|arr_val| match arr_val { Some(arr_val) => arr_val == *v, None => false, }) @@ -700,36 +700,37 @@ impl Array2 { }) } ScalarValue::Date32(v) => { - UnaryExecutor2::value_at::(self, row).map(|arr_val| match arr_val { + UnaryExecutor2::value_at::(self, row).map(|arr_val| match arr_val { Some(arr_val) => arr_val == *v, None => false, }) } ScalarValue::Date64(v) => { - UnaryExecutor2::value_at::(self, row).map(|arr_val| match arr_val { + UnaryExecutor2::value_at::(self, row).map(|arr_val| match arr_val { Some(arr_val) => arr_val == *v, None => false, }) } - ScalarValue::Interval(v) => UnaryExecutor2::value_at::(self, row) + ScalarValue::Interval(v) => UnaryExecutor2::value_at::(self, row) .map(|arr_val| match arr_val { Some(arr_val) => arr_val == *v, None => false, }), ScalarValue::Utf8(v) => { - UnaryExecutor2::value_at::(self, row).map(|arr_val| match arr_val { + UnaryExecutor2::value_at::(self, row).map(|arr_val| match arr_val { Some(arr_val) => arr_val == v.as_ref(), None => false, }) } ScalarValue::Binary(v) => { - UnaryExecutor2::value_at::(self, row).map(|arr_val| match arr_val { + UnaryExecutor2::value_at::(self, row).map(|arr_val| match arr_val + { Some(arr_val) => arr_val == v.as_ref(), None => false, }) } ScalarValue::Timestamp(v) => { - UnaryExecutor2::value_at::(self, row).map(|arr_val| { + UnaryExecutor2::value_at::(self, row).map(|arr_val| { // Assumes time unit is the same match arr_val { Some(arr_val) => arr_val == v.value, @@ -738,7 +739,7 @@ impl Array2 { }) } ScalarValue::Decimal64(v) => { - UnaryExecutor2::value_at::(self, row).map(|arr_val| { + UnaryExecutor2::value_at::(self, row).map(|arr_val| { // Assumes precision/scale are the same. match arr_val { Some(arr_val) => arr_val == v.value, @@ -747,7 +748,7 @@ impl Array2 { }) } ScalarValue::Decimal128(v) => { - UnaryExecutor2::value_at::(self, row).map(|arr_val| { + UnaryExecutor2::value_at::(self, row).map(|arr_val| { // Assumes precision/scale are the same. match arr_val { Some(arr_val) => arr_val == v.value, diff --git a/crates/rayexec_execution/src/arrays/compute/cast/array.rs b/crates/rayexec_execution/src/arrays/compute/cast/array.rs index 7abf9a21e..b0532fd30 100644 --- a/crates/rayexec_execution/src/arrays/compute/cast/array.rs +++ b/crates/rayexec_execution/src/arrays/compute/cast/array.rs @@ -58,22 +58,22 @@ use crate::arrays::executor::builder::{ PrimitiveBuffer, }; use crate::arrays::executor::physical_type::{ - PhysicalBool, + PhysicalBool_2, PhysicalF16_2, PhysicalF32_2, PhysicalF64_2, - PhysicalI128, - PhysicalI16, - PhysicalI32, - PhysicalI64, - PhysicalI8, + PhysicalI128_2, + PhysicalI16_2, + PhysicalI32_2, + PhysicalI64_2, + PhysicalI8_2, PhysicalStorage2, - PhysicalU128, - PhysicalU16, - PhysicalU32, - PhysicalU64, - PhysicalU8, - PhysicalUtf8, + PhysicalU128_2, + PhysicalU16_2, + PhysicalU32_2, + PhysicalU64_2, + PhysicalU8_2, + PhysicalUtf8_2, }; use crate::arrays::executor::scalar::UnaryExecutor2; use crate::arrays::scalar::decimal::{Decimal128Type, Decimal64Type, DecimalType}; @@ -98,34 +98,34 @@ pub fn cast_array(arr: &Array2, to: DataType, behavior: CastFailBehavior) -> Res // Primitive numerics to other primitive numerics. DataType::Int8 if to.is_primitive_numeric() => { - cast_primitive_numeric_helper::(arr, to, behavior)? + cast_primitive_numeric_helper::(arr, to, behavior)? } DataType::Int16 if to.is_primitive_numeric() => { - cast_primitive_numeric_helper::(arr, to, behavior)? + cast_primitive_numeric_helper::(arr, to, behavior)? } DataType::Int32 if to.is_primitive_numeric() => { - cast_primitive_numeric_helper::(arr, to, behavior)? + cast_primitive_numeric_helper::(arr, to, behavior)? } DataType::Int64 if to.is_primitive_numeric() => { - cast_primitive_numeric_helper::(arr, to, behavior)? + cast_primitive_numeric_helper::(arr, to, behavior)? } DataType::Int128 if to.is_primitive_numeric() => { - cast_primitive_numeric_helper::(arr, to, behavior)? + cast_primitive_numeric_helper::(arr, to, behavior)? } DataType::UInt8 if to.is_primitive_numeric() => { - cast_primitive_numeric_helper::(arr, to, behavior)? + cast_primitive_numeric_helper::(arr, to, behavior)? } DataType::UInt16 if to.is_primitive_numeric() => { - cast_primitive_numeric_helper::(arr, to, behavior)? + cast_primitive_numeric_helper::(arr, to, behavior)? } DataType::UInt32 if to.is_primitive_numeric() => { - cast_primitive_numeric_helper::(arr, to, behavior)? + cast_primitive_numeric_helper::(arr, to, behavior)? } DataType::UInt64 if to.is_primitive_numeric() => { - cast_primitive_numeric_helper::(arr, to, behavior)? + cast_primitive_numeric_helper::(arr, to, behavior)? } DataType::UInt128 if to.is_primitive_numeric() => { - cast_primitive_numeric_helper::(arr, to, behavior)? + cast_primitive_numeric_helper::(arr, to, behavior)? } DataType::Float16 if to.is_primitive_numeric() => { cast_primitive_numeric_helper::(arr, to, behavior)? @@ -139,51 +139,51 @@ pub fn cast_array(arr: &Array2, to: DataType, behavior: CastFailBehavior) -> Res // Int to date32 DataType::Int8 if to == DataType::Date32 => { - cast_primitive_numeric::(arr, to, behavior)? + cast_primitive_numeric::(arr, to, behavior)? } DataType::Int16 if to == DataType::Date32 => { - cast_primitive_numeric::(arr, to, behavior)? + cast_primitive_numeric::(arr, to, behavior)? } DataType::Int32 if to == DataType::Date32 => { - cast_primitive_numeric::(arr, to, behavior)? + cast_primitive_numeric::(arr, to, behavior)? } DataType::UInt8 if to == DataType::Date32 => { - cast_primitive_numeric::(arr, to, behavior)? + cast_primitive_numeric::(arr, to, behavior)? } DataType::UInt16 if to == DataType::Date32 => { - cast_primitive_numeric::(arr, to, behavior)? + cast_primitive_numeric::(arr, to, behavior)? } // Int to decimal. DataType::Int8 if to.is_decimal() => { - cast_int_to_decimal_helper::(arr, to, behavior)? + cast_int_to_decimal_helper::(arr, to, behavior)? } DataType::Int16 if to.is_decimal() => { - cast_int_to_decimal_helper::(arr, to, behavior)? + cast_int_to_decimal_helper::(arr, to, behavior)? } DataType::Int32 if to.is_decimal() => { - cast_int_to_decimal_helper::(arr, to, behavior)? + cast_int_to_decimal_helper::(arr, to, behavior)? } DataType::Int64 if to.is_decimal() => { - cast_int_to_decimal_helper::(arr, to, behavior)? + cast_int_to_decimal_helper::(arr, to, behavior)? } DataType::Int128 if to.is_decimal() => { - cast_int_to_decimal_helper::(arr, to, behavior)? + cast_int_to_decimal_helper::(arr, to, behavior)? } DataType::UInt8 if to.is_decimal() => { - cast_int_to_decimal_helper::(arr, to, behavior)? + cast_int_to_decimal_helper::(arr, to, behavior)? } DataType::UInt16 if to.is_decimal() => { - cast_int_to_decimal_helper::(arr, to, behavior)? + cast_int_to_decimal_helper::(arr, to, behavior)? } DataType::UInt32 if to.is_decimal() => { - cast_int_to_decimal_helper::(arr, to, behavior)? + cast_int_to_decimal_helper::(arr, to, behavior)? } DataType::UInt64 if to.is_decimal() => { - cast_int_to_decimal_helper::(arr, to, behavior)? + cast_int_to_decimal_helper::(arr, to, behavior)? } DataType::UInt128 if to.is_decimal() => { - cast_int_to_decimal_helper::(arr, to, behavior)? + cast_int_to_decimal_helper::(arr, to, behavior)? } // Float to decimal. @@ -196,21 +196,21 @@ pub fn cast_array(arr: &Array2, to: DataType, behavior: CastFailBehavior) -> Res // Decimal to decimal DataType::Decimal64(_) if to.is_decimal() => { - decimal_rescale_helper::(arr, to, behavior)? + decimal_rescale_helper::(arr, to, behavior)? } DataType::Decimal128(_) if to.is_decimal() => { - decimal_rescale_helper::(arr, to, behavior)? + decimal_rescale_helper::(arr, to, behavior)? } // Decimal to float. DataType::Decimal64(_) => match to { - DataType::Float32 => cast_decimal_to_float::(arr, to, behavior)?, - DataType::Float64 => cast_decimal_to_float::(arr, to, behavior)?, + DataType::Float32 => cast_decimal_to_float::(arr, to, behavior)?, + DataType::Float64 => cast_decimal_to_float::(arr, to, behavior)?, other => return Err(RayexecError::new(format!("Unhandled data type: {other}"))), }, DataType::Decimal128(_) => match to { - DataType::Float32 => cast_decimal_to_float::(arr, to, behavior)?, - DataType::Float64 => cast_decimal_to_float::(arr, to, behavior)?, + DataType::Float32 => cast_decimal_to_float::(arr, to, behavior)?, + DataType::Float64 => cast_decimal_to_float::(arr, to, behavior)?, other => return Err(RayexecError::new(format!("Unhandled data type: {other}"))), }, @@ -587,27 +587,33 @@ pub fn cast_from_utf8( pub fn cast_to_utf8(arr: &Array2, behavior: CastFailBehavior) -> Result { match arr.datatype() { DataType::Boolean => { - cast_format::(arr, BoolFormatter::default(), behavior) + cast_format::(arr, BoolFormatter::default(), behavior) + } + DataType::Int8 => cast_format::(arr, Int8Formatter::default(), behavior), + DataType::Int16 => { + cast_format::(arr, Int16Formatter::default(), behavior) + } + DataType::Int32 => { + cast_format::(arr, Int32Formatter::default(), behavior) + } + DataType::Int64 => { + cast_format::(arr, Int64Formatter::default(), behavior) } - DataType::Int8 => cast_format::(arr, Int8Formatter::default(), behavior), - DataType::Int16 => cast_format::(arr, Int16Formatter::default(), behavior), - DataType::Int32 => cast_format::(arr, Int32Formatter::default(), behavior), - DataType::Int64 => cast_format::(arr, Int64Formatter::default(), behavior), DataType::Int128 => { - cast_format::(arr, Int128Formatter::default(), behavior) + cast_format::(arr, Int128Formatter::default(), behavior) } - DataType::UInt8 => cast_format::(arr, UInt8Formatter::default(), behavior), + DataType::UInt8 => cast_format::(arr, UInt8Formatter::default(), behavior), DataType::UInt16 => { - cast_format::(arr, UInt16Formatter::default(), behavior) + cast_format::(arr, UInt16Formatter::default(), behavior) } DataType::UInt32 => { - cast_format::(arr, UInt32Formatter::default(), behavior) + cast_format::(arr, UInt32Formatter::default(), behavior) } DataType::UInt64 => { - cast_format::(arr, UInt64Formatter::default(), behavior) + cast_format::(arr, UInt64Formatter::default(), behavior) } DataType::UInt128 => { - cast_format::(arr, UInt128Formatter::default(), behavior) + cast_format::(arr, UInt128Formatter::default(), behavior) } DataType::Float32 => { cast_format::(arr, Float32Formatter::default(), behavior) @@ -615,31 +621,31 @@ pub fn cast_to_utf8(arr: &Array2, behavior: CastFailBehavior) -> Result DataType::Float64 => { cast_format::(arr, Float64Formatter::default(), behavior) } - DataType::Decimal64(m) => cast_format::( + DataType::Decimal64(m) => cast_format::( arr, Decimal64Formatter::new(m.precision, m.scale), behavior, ), - DataType::Decimal128(m) => cast_format::( + DataType::Decimal128(m) => cast_format::( arr, Decimal128Formatter::new(m.precision, m.scale), behavior, ), DataType::Timestamp(m) => match m.unit { TimeUnit::Second => { - cast_format::(arr, TimestampSecondsFormatter::default(), behavior) + cast_format::(arr, TimestampSecondsFormatter::default(), behavior) } - TimeUnit::Millisecond => cast_format::( + TimeUnit::Millisecond => cast_format::( arr, TimestampMillisecondsFormatter::default(), behavior, ), - TimeUnit::Microsecond => cast_format::( + TimeUnit::Microsecond => cast_format::( arr, TimestampMicrosecondsFormatter::default(), behavior, ), - TimeUnit::Nanosecond => cast_format::( + TimeUnit::Nanosecond => cast_format::( arr, TimestampNanosecondsFormatter::default(), behavior, @@ -683,7 +689,7 @@ where fn cast_parse_bool(arr: &Array2, behavior: CastFailBehavior) -> Result { let mut fail_state = behavior.new_state_for_array(arr); - let output = UnaryExecutor2::execute::( + let output = UnaryExecutor2::execute::( arr, ArrayBuilder { datatype: DataType::Boolean, @@ -710,7 +716,7 @@ where ArrayData2: From>, { let mut fail_state = behavior.new_state_for_array(arr); - let output = UnaryExecutor2::execute::( + let output = UnaryExecutor2::execute::( arr, ArrayBuilder { datatype: datatype.clone(), diff --git a/crates/rayexec_execution/src/arrays/compute/date.rs b/crates/rayexec_execution/src/arrays/compute/date.rs index 3770ecd02..86be1f06e 100644 --- a/crates/rayexec_execution/src/arrays/compute/date.rs +++ b/crates/rayexec_execution/src/arrays/compute/date.rs @@ -4,7 +4,7 @@ use rayexec_error::{not_implemented, RayexecError, Result}; use crate::arrays::array::Array2; use crate::arrays::datatype::{DataType, DecimalTypeMeta, TimeUnit}; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::{PhysicalI32, PhysicalI64}; +use crate::arrays::executor::physical_type::{PhysicalI32_2, PhysicalI64_2}; use crate::arrays::executor::scalar::UnaryExecutor2; use crate::arrays::scalar::decimal::{Decimal64Type, DecimalType}; @@ -148,7 +148,7 @@ where B: Fn(i64) -> DateTime, F: Fn(DateTime) -> i64, { - UnaryExecutor2::execute::( + UnaryExecutor2::execute::( arr, ArrayBuilder { datatype: DataType::Decimal64(DecimalTypeMeta { @@ -168,7 +168,7 @@ fn date32_extract_with_fn(arr: &Array2, f: F) -> Result where F: Fn(DateTime) -> i64, { - UnaryExecutor2::execute::( + UnaryExecutor2::execute::( arr, ArrayBuilder { datatype: DataType::Decimal64(DecimalTypeMeta { @@ -189,7 +189,7 @@ fn date64_extract_with_fn(arr: &Array2, f: F) -> Result where F: Fn(DateTime) -> i64, { - UnaryExecutor2::execute::( + UnaryExecutor2::execute::( arr, ArrayBuilder { datatype: DataType::Decimal64(DecimalTypeMeta { diff --git a/crates/rayexec_execution/src/arrays/executor/aggregate/binary.rs b/crates/rayexec_execution/src/arrays/executor/aggregate/binary.rs index ac4128b77..9138c486e 100644 --- a/crates/rayexec_execution/src/arrays/executor/aggregate/binary.rs +++ b/crates/rayexec_execution/src/arrays/executor/aggregate/binary.rs @@ -77,7 +77,7 @@ impl BinaryNonNullUpdater { #[cfg(test)] mod tests { use super::*; - use crate::arrays::executor::physical_type::PhysicalI32; + use crate::arrays::executor::physical_type::PhysicalI32_2; // SUM(col) + PRODUCT(col) #[derive(Debug)] @@ -131,7 +131,7 @@ mod tests { }, ]; - BinaryNonNullUpdater::update::( + BinaryNonNullUpdater::update::( &array1, &array2, mapping, diff --git a/crates/rayexec_execution/src/arrays/executor/aggregate/unary.rs b/crates/rayexec_execution/src/arrays/executor/aggregate/unary.rs index 1b348434b..a69df72ac 100644 --- a/crates/rayexec_execution/src/arrays/executor/aggregate/unary.rs +++ b/crates/rayexec_execution/src/arrays/executor/aggregate/unary.rs @@ -60,7 +60,7 @@ impl UnaryNonNullUpdater { #[cfg(test)] mod tests { use super::*; - use crate::arrays::executor::physical_type::{PhysicalI32, PhysicalUtf8}; + use crate::arrays::executor::physical_type::{PhysicalI32_2, PhysicalUtf8_2}; #[derive(Debug, Default)] struct TestSumState { @@ -102,7 +102,8 @@ mod tests { }, ]; - UnaryNonNullUpdater::update::(&array, mapping, &mut states).unwrap(); + UnaryNonNullUpdater::update::(&array, mapping, &mut states) + .unwrap(); assert_eq!(11, states[0].val); } @@ -126,7 +127,8 @@ mod tests { }, ]; - UnaryNonNullUpdater::update::(&array, mapping, &mut states).unwrap(); + UnaryNonNullUpdater::update::(&array, mapping, &mut states) + .unwrap(); assert_eq!(7, states[0].val); } @@ -154,7 +156,8 @@ mod tests { }, ]; - UnaryNonNullUpdater::update::(&array, mapping, &mut states).unwrap(); + UnaryNonNullUpdater::update::(&array, mapping, &mut states) + .unwrap(); assert_eq!(5, states[0].val); assert_eq!(7, states[1].val); @@ -201,7 +204,8 @@ mod tests { }, ]; - UnaryNonNullUpdater::update::(&array, mapping, &mut states).unwrap(); + UnaryNonNullUpdater::update::(&array, mapping, &mut states) + .unwrap(); assert_eq!("aabbbcccc", &states[0].buf); } diff --git a/crates/rayexec_execution/src/arrays/executor/physical_type.rs b/crates/rayexec_execution/src/arrays/executor/physical_type.rs index 6cd6d2bd0..c25a05926 100644 --- a/crates/rayexec_execution/src/arrays/executor/physical_type.rs +++ b/crates/rayexec_execution/src/arrays/executor/physical_type.rs @@ -232,9 +232,9 @@ impl AddressableStorage for UnitStorage { } #[derive(Debug, Clone, Copy)] -pub struct PhysicalUntypedNull; +pub struct PhysicalUntypedNull_2; -impl PhysicalStorage2 for PhysicalUntypedNull { +impl PhysicalStorage2 for PhysicalUntypedNull_2 { type Type<'a> = UntypedNull; type Storage<'a> = UntypedNullStorage; @@ -247,9 +247,9 @@ impl PhysicalStorage2 for PhysicalUntypedNull { } #[derive(Debug, Clone, Copy)] -pub struct PhysicalBool; +pub struct PhysicalBool_2; -impl PhysicalStorage2 for PhysicalBool { +impl PhysicalStorage2 for PhysicalBool_2 { type Type<'a> = bool; type Storage<'a> = BooleanStorageRef<'a>; @@ -262,9 +262,9 @@ impl PhysicalStorage2 for PhysicalBool { } #[derive(Debug, Clone, Copy)] -pub struct PhysicalI8; +pub struct PhysicalI8_2; -impl PhysicalStorage2 for PhysicalI8 { +impl PhysicalStorage2 for PhysicalI8_2 { type Type<'a> = i8; type Storage<'a> = PrimitiveStorageSlice<'a, i8>; @@ -277,9 +277,9 @@ impl PhysicalStorage2 for PhysicalI8 { } #[derive(Debug, Clone, Copy)] -pub struct PhysicalI16; +pub struct PhysicalI16_2; -impl PhysicalStorage2 for PhysicalI16 { +impl PhysicalStorage2 for PhysicalI16_2 { type Type<'a> = i16; type Storage<'a> = PrimitiveStorageSlice<'a, i16>; @@ -292,9 +292,9 @@ impl PhysicalStorage2 for PhysicalI16 { } #[derive(Debug, Clone, Copy)] -pub struct PhysicalI32; +pub struct PhysicalI32_2; -impl PhysicalStorage2 for PhysicalI32 { +impl PhysicalStorage2 for PhysicalI32_2 { type Type<'a> = i32; type Storage<'a> = PrimitiveStorageSlice<'a, i32>; @@ -307,9 +307,9 @@ impl PhysicalStorage2 for PhysicalI32 { } #[derive(Debug, Clone, Copy)] -pub struct PhysicalI64; +pub struct PhysicalI64_2; -impl PhysicalStorage2 for PhysicalI64 { +impl PhysicalStorage2 for PhysicalI64_2 { type Type<'a> = i64; type Storage<'a> = PrimitiveStorageSlice<'a, i64>; @@ -322,9 +322,9 @@ impl PhysicalStorage2 for PhysicalI64 { } #[derive(Debug, Clone, Copy)] -pub struct PhysicalI128; +pub struct PhysicalI128_2; -impl PhysicalStorage2 for PhysicalI128 { +impl PhysicalStorage2 for PhysicalI128_2 { type Type<'a> = i128; type Storage<'a> = PrimitiveStorageSlice<'a, i128>; @@ -337,9 +337,9 @@ impl PhysicalStorage2 for PhysicalI128 { } #[derive(Debug, Clone, Copy)] -pub struct PhysicalU8; +pub struct PhysicalU8_2; -impl PhysicalStorage2 for PhysicalU8 { +impl PhysicalStorage2 for PhysicalU8_2 { type Type<'a> = u8; type Storage<'a> = PrimitiveStorageSlice<'a, u8>; @@ -352,9 +352,9 @@ impl PhysicalStorage2 for PhysicalU8 { } #[derive(Debug, Clone, Copy)] -pub struct PhysicalU16; +pub struct PhysicalU16_2; -impl PhysicalStorage2 for PhysicalU16 { +impl PhysicalStorage2 for PhysicalU16_2 { type Type<'a> = u16; type Storage<'a> = PrimitiveStorageSlice<'a, u16>; @@ -367,9 +367,9 @@ impl PhysicalStorage2 for PhysicalU16 { } #[derive(Debug, Clone, Copy)] -pub struct PhysicalU32; +pub struct PhysicalU32_2; -impl PhysicalStorage2 for PhysicalU32 { +impl PhysicalStorage2 for PhysicalU32_2 { type Type<'a> = u32; type Storage<'a> = PrimitiveStorageSlice<'a, u32>; @@ -382,9 +382,9 @@ impl PhysicalStorage2 for PhysicalU32 { } #[derive(Debug, Clone, Copy)] -pub struct PhysicalU64; +pub struct PhysicalU64_2; -impl PhysicalStorage2 for PhysicalU64 { +impl PhysicalStorage2 for PhysicalU64_2 { type Type<'a> = u64; type Storage<'a> = PrimitiveStorageSlice<'a, u64>; @@ -397,9 +397,9 @@ impl PhysicalStorage2 for PhysicalU64 { } #[derive(Debug, Clone, Copy)] -pub struct PhysicalU128; +pub struct PhysicalU128_2; -impl PhysicalStorage2 for PhysicalU128 { +impl PhysicalStorage2 for PhysicalU128_2 { type Type<'a> = u128; type Storage<'a> = PrimitiveStorageSlice<'a, u128>; @@ -457,9 +457,9 @@ impl PhysicalStorage2 for PhysicalF64_2 { } #[derive(Debug, Clone, Copy)] -pub struct PhysicalInterval; +pub struct PhysicalInterval_2; -impl PhysicalStorage2 for PhysicalInterval { +impl PhysicalStorage2 for PhysicalInterval_2 { type Type<'a> = Interval; type Storage<'a> = PrimitiveStorageSlice<'a, Interval>; @@ -472,9 +472,9 @@ impl PhysicalStorage2 for PhysicalInterval { } #[derive(Debug, Clone, Copy)] -pub struct PhysicalBinary; +pub struct PhysicalBinary_2; -impl PhysicalStorage2 for PhysicalBinary { +impl PhysicalStorage2 for PhysicalBinary_2 { type Type<'a> = &'a [u8]; type Storage<'a> = BinaryDataStorage<'a>; @@ -495,9 +495,9 @@ impl PhysicalStorage2 for PhysicalBinary { } #[derive(Debug, Clone, Copy)] -pub struct PhysicalUtf8; +pub struct PhysicalUtf8_2; -impl PhysicalStorage2 for PhysicalUtf8 { +impl PhysicalStorage2 for PhysicalUtf8_2 { type Type<'a> = &'a str; type Storage<'a> = StrDataStorage<'a>; @@ -589,9 +589,9 @@ impl<'a> From> for StrDataStorage<'a> { } #[derive(Debug, Clone, Copy)] -pub struct PhysicalList; +pub struct PhysicalList_2; -impl PhysicalStorage2 for PhysicalList { +impl PhysicalStorage2 for PhysicalList_2 { type Type<'a> = ListItemMetadata; type Storage<'a> = PrimitiveStorageSlice<'a, ListItemMetadata>; diff --git a/crates/rayexec_execution/src/arrays/executor/scalar/binary.rs b/crates/rayexec_execution/src/arrays/executor/scalar/binary.rs index e9b0d6939..5d8ea7566 100644 --- a/crates/rayexec_execution/src/arrays/executor/scalar/binary.rs +++ b/crates/rayexec_execution/src/arrays/executor/scalar/binary.rs @@ -97,7 +97,7 @@ mod tests { use super::*; use crate::arrays::datatype::DataType; use crate::arrays::executor::builder::{GermanVarlenBuffer, PrimitiveBuffer}; - use crate::arrays::executor::physical_type::{PhysicalI32, PhysicalUtf8}; + use crate::arrays::executor::physical_type::{PhysicalI32_2, PhysicalUtf8_2}; use crate::arrays::scalar::ScalarValue; #[test] @@ -110,7 +110,7 @@ mod tests { buffer: PrimitiveBuffer::::with_len(3), }; - let got = BinaryExecutor2::execute::( + let got = BinaryExecutor2::execute::( &left, &right, builder, @@ -134,7 +134,7 @@ mod tests { }; let mut string_buf = String::new(); - let got = BinaryExecutor2::execute::( + let got = BinaryExecutor2::execute::( &left, &right, builder, @@ -168,7 +168,7 @@ mod tests { let right = Array2::from_iter([2, 3, 4]); - let got = BinaryExecutor2::execute::( + let got = BinaryExecutor2::execute::( &left, &right, ArrayBuilder { diff --git a/crates/rayexec_execution/src/arrays/executor/scalar/fill.rs b/crates/rayexec_execution/src/arrays/executor/scalar/fill.rs index be169d2e3..c97608c68 100644 --- a/crates/rayexec_execution/src/arrays/executor/scalar/fill.rs +++ b/crates/rayexec_execution/src/arrays/executor/scalar/fill.rs @@ -13,26 +13,26 @@ use crate::arrays::executor::builder::{ PrimitiveBuffer, }; use crate::arrays::executor::physical_type::{ - PhysicalBinary, - PhysicalBool, + PhysicalBinary_2, + PhysicalBool_2, PhysicalF16_2, PhysicalF32_2, PhysicalF64_2, - PhysicalI128, - PhysicalI16, - PhysicalI32, - PhysicalI64, - PhysicalI8, - PhysicalInterval, - PhysicalList, + PhysicalI128_2, + PhysicalI16_2, + PhysicalI32_2, + PhysicalI64_2, + PhysicalI8_2, + PhysicalInterval_2, + PhysicalList_2, PhysicalStorage2, PhysicalType2, - PhysicalU128, - PhysicalU16, - PhysicalU32, - PhysicalU64, - PhysicalU8, - PhysicalUtf8, + PhysicalU128_2, + PhysicalU16_2, + PhysicalU32_2, + PhysicalU64_2, + PhysicalU8_2, + PhysicalUtf8_2, }; use crate::arrays::executor::scalar::UnaryExecutor2; use crate::arrays::selection; @@ -164,77 +164,77 @@ pub(crate) fn concat_with_exact_total_len(arrays: &[&Array2], total_len: usize) datatype: datatype.clone(), buffer: BooleanBuffer::with_len(total_len), }); - concat_with_fill_state::(arrays, state) + concat_with_fill_state::(arrays, state) } PhysicalType2::Int8 => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: PrimitiveBuffer::with_len(total_len), }); - concat_with_fill_state::(arrays, state) + concat_with_fill_state::(arrays, state) } PhysicalType2::Int16 => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: PrimitiveBuffer::with_len(total_len), }); - concat_with_fill_state::(arrays, state) + concat_with_fill_state::(arrays, state) } PhysicalType2::Int32 => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: PrimitiveBuffer::with_len(total_len), }); - concat_with_fill_state::(arrays, state) + concat_with_fill_state::(arrays, state) } PhysicalType2::Int64 => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: PrimitiveBuffer::with_len(total_len), }); - concat_with_fill_state::(arrays, state) + concat_with_fill_state::(arrays, state) } PhysicalType2::Int128 => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: PrimitiveBuffer::with_len(total_len), }); - concat_with_fill_state::(arrays, state) + concat_with_fill_state::(arrays, state) } PhysicalType2::UInt8 => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: PrimitiveBuffer::with_len(total_len), }); - concat_with_fill_state::(arrays, state) + concat_with_fill_state::(arrays, state) } PhysicalType2::UInt16 => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: PrimitiveBuffer::with_len(total_len), }); - concat_with_fill_state::(arrays, state) + concat_with_fill_state::(arrays, state) } PhysicalType2::UInt32 => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: PrimitiveBuffer::with_len(total_len), }); - concat_with_fill_state::(arrays, state) + concat_with_fill_state::(arrays, state) } PhysicalType2::UInt64 => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: PrimitiveBuffer::with_len(total_len), }); - concat_with_fill_state::(arrays, state) + concat_with_fill_state::(arrays, state) } PhysicalType2::UInt128 => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: PrimitiveBuffer::with_len(total_len), }); - concat_with_fill_state::(arrays, state) + concat_with_fill_state::(arrays, state) } PhysicalType2::Float16 => { let state = FillState::new(ArrayBuilder { @@ -262,21 +262,21 @@ pub(crate) fn concat_with_exact_total_len(arrays: &[&Array2], total_len: usize) datatype: datatype.clone(), buffer: PrimitiveBuffer::with_len(total_len), }); - concat_with_fill_state::(arrays, state) + concat_with_fill_state::(arrays, state) } PhysicalType2::Utf8 => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: GermanVarlenBuffer::::with_len(total_len), }); - concat_with_fill_state::(arrays, state) + concat_with_fill_state::(arrays, state) } PhysicalType2::Binary => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: GermanVarlenBuffer::<[u8]>::with_len(total_len), }); - concat_with_fill_state::(arrays, state) + concat_with_fill_state::(arrays, state) } PhysicalType2::List => concat_lists(datatype.clone(), arrays, total_len), } @@ -308,18 +308,21 @@ fn concat_lists(datatype: DataType, arrays: &[&Array2], total_len: usize) -> Res let mut acc_rows = 0; for (array, child_array) in arrays.iter().zip(inner_arrays) { - UnaryExecutor2::for_each::(array, |_row_num, metadata| match metadata { - Some(metadata) => { - metadatas.push(ListItemMetadata { - offset: metadata.offset + acc_rows, - len: metadata.len, - }); - } - None => { - metadatas.push(ListItemMetadata::default()); - validity.set_unchecked(metadatas.len() - 1, false); - } - })?; + UnaryExecutor2::for_each::( + array, + |_row_num, metadata| match metadata { + Some(metadata) => { + metadatas.push(ListItemMetadata { + offset: metadata.offset + acc_rows, + len: metadata.len, + }); + } + None => { + metadatas.push(ListItemMetadata::default()); + validity.set_unchecked(metadatas.len() - 1, false); + } + }, + )?; acc_rows += child_array.logical_len() as i32; } @@ -388,77 +391,77 @@ pub fn interleave(arrays: &[&Array2], indices: &[(usize, usize)]) -> Result(arrays, indices, state) + interleave_with_fill_state::(arrays, indices, state) } PhysicalType2::Int8 => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: PrimitiveBuffer::with_len(indices.len()), }); - interleave_with_fill_state::(arrays, indices, state) + interleave_with_fill_state::(arrays, indices, state) } PhysicalType2::Int16 => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: PrimitiveBuffer::with_len(indices.len()), }); - interleave_with_fill_state::(arrays, indices, state) + interleave_with_fill_state::(arrays, indices, state) } PhysicalType2::Int32 => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: PrimitiveBuffer::with_len(indices.len()), }); - interleave_with_fill_state::(arrays, indices, state) + interleave_with_fill_state::(arrays, indices, state) } PhysicalType2::Int64 => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: PrimitiveBuffer::with_len(indices.len()), }); - interleave_with_fill_state::(arrays, indices, state) + interleave_with_fill_state::(arrays, indices, state) } PhysicalType2::Int128 => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: PrimitiveBuffer::with_len(indices.len()), }); - interleave_with_fill_state::(arrays, indices, state) + interleave_with_fill_state::(arrays, indices, state) } PhysicalType2::UInt8 => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: PrimitiveBuffer::with_len(indices.len()), }); - interleave_with_fill_state::(arrays, indices, state) + interleave_with_fill_state::(arrays, indices, state) } PhysicalType2::UInt16 => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: PrimitiveBuffer::with_len(indices.len()), }); - interleave_with_fill_state::(arrays, indices, state) + interleave_with_fill_state::(arrays, indices, state) } PhysicalType2::UInt32 => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: PrimitiveBuffer::with_len(indices.len()), }); - interleave_with_fill_state::(arrays, indices, state) + interleave_with_fill_state::(arrays, indices, state) } PhysicalType2::UInt64 => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: PrimitiveBuffer::with_len(indices.len()), }); - interleave_with_fill_state::(arrays, indices, state) + interleave_with_fill_state::(arrays, indices, state) } PhysicalType2::UInt128 => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: PrimitiveBuffer::with_len(indices.len()), }); - interleave_with_fill_state::(arrays, indices, state) + interleave_with_fill_state::(arrays, indices, state) } PhysicalType2::Float16 => { let state = FillState::new(ArrayBuilder { @@ -486,21 +489,21 @@ pub fn interleave(arrays: &[&Array2], indices: &[(usize, usize)]) -> Result(arrays, indices, state) + interleave_with_fill_state::(arrays, indices, state) } PhysicalType2::Utf8 => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: GermanVarlenBuffer::::with_len(indices.len()), }); - interleave_with_fill_state::(arrays, indices, state) + interleave_with_fill_state::(arrays, indices, state) } PhysicalType2::Binary => { let state = FillState::new(ArrayBuilder { datatype: datatype.clone(), buffer: GermanVarlenBuffer::<[u8]>::with_len(indices.len()), }); - interleave_with_fill_state::(arrays, indices, state) + interleave_with_fill_state::(arrays, indices, state) } PhysicalType2::List => { // TODO: Also doable @@ -550,7 +553,7 @@ mod tests { use super::*; use crate::arrays::datatype::DataType; use crate::arrays::executor::builder::PrimitiveBuffer; - use crate::arrays::executor::physical_type::PhysicalI32; + use crate::arrays::executor::physical_type::PhysicalI32_2; use crate::arrays::scalar::ScalarValue; #[test] @@ -567,7 +570,7 @@ mod tests { FillMapping { from: 2, to: 2 }, ]; - state.fill::(&arr, mapping).unwrap(); + state.fill::(&arr, mapping).unwrap(); let got = state.finish(); @@ -590,7 +593,7 @@ mod tests { FillMapping { from: 1, to: 2 }, ]; - state.fill::(&arr, mapping).unwrap(); + state.fill::(&arr, mapping).unwrap(); let got = state.finish(); @@ -613,7 +616,7 @@ mod tests { FillMapping { from: 2, to: 0 }, ]; - state.fill::(&arr, mapping).unwrap(); + state.fill::(&arr, mapping).unwrap(); let got = state.finish(); @@ -635,7 +638,7 @@ mod tests { FillMapping { from: 1, to: 4 }, FillMapping { from: 2, to: 0 }, ]; - state.fill::(&arr1, mapping1).unwrap(); + state.fill::(&arr1, mapping1).unwrap(); let arr2 = Array2::from_iter([7, 8, 9]); let mapping2 = [ @@ -643,7 +646,7 @@ mod tests { FillMapping { from: 1, to: 3 }, FillMapping { from: 2, to: 5 }, ]; - state.fill::(&arr2, mapping2).unwrap(); + state.fill::(&arr2, mapping2).unwrap(); let got = state.finish(); diff --git a/crates/rayexec_execution/src/arrays/executor/scalar/hash.rs b/crates/rayexec_execution/src/arrays/executor/scalar/hash.rs index 1477c141a..32fb4485f 100644 --- a/crates/rayexec_execution/src/arrays/executor/scalar/hash.rs +++ b/crates/rayexec_execution/src/arrays/executor/scalar/hash.rs @@ -4,26 +4,26 @@ use rayexec_error::{RayexecError, Result}; use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::executor::physical_type::{ - PhysicalBinary, - PhysicalBool, + PhysicalBinary_2, + PhysicalBool_2, PhysicalF16_2, PhysicalF32_2, PhysicalF64_2, - PhysicalI128, - PhysicalI16, - PhysicalI32, - PhysicalI64, - PhysicalI8, - PhysicalInterval, - PhysicalList, + PhysicalI128_2, + PhysicalI16_2, + PhysicalI32_2, + PhysicalI64_2, + PhysicalI8_2, + PhysicalInterval_2, + PhysicalList_2, PhysicalStorage2, PhysicalType2, - PhysicalU16, - PhysicalU32, - PhysicalU64, - PhysicalU8, - PhysicalUntypedNull, - PhysicalUtf8, + PhysicalU16_2, + PhysicalU32_2, + PhysicalU64_2, + PhysicalU8_2, + PhysicalUntypedNull_2, + PhysicalUtf8_2, }; use crate::arrays::scalar::interval::Interval; use crate::arrays::selection; @@ -41,40 +41,40 @@ impl HashExecutor { pub fn hash_combine(array: &Array2, hashes: &mut [u64]) -> Result<()> { match array.physical_type() { PhysicalType2::UntypedNull => { - Self::hash_one_inner::(array, hashes)? + Self::hash_one_inner::(array, hashes)? } PhysicalType2::Boolean => { - Self::hash_one_inner::(array, hashes)? + Self::hash_one_inner::(array, hashes)? } PhysicalType2::Int8 => { - Self::hash_one_inner::(array, hashes)? + Self::hash_one_inner::(array, hashes)? } PhysicalType2::Int16 => { - Self::hash_one_inner::(array, hashes)? + Self::hash_one_inner::(array, hashes)? } PhysicalType2::Int32 => { - Self::hash_one_inner::(array, hashes)? + Self::hash_one_inner::(array, hashes)? } PhysicalType2::Int64 => { - Self::hash_one_inner::(array, hashes)? + Self::hash_one_inner::(array, hashes)? } PhysicalType2::Int128 => { - Self::hash_one_inner::(array, hashes)? + Self::hash_one_inner::(array, hashes)? } PhysicalType2::UInt8 => { - Self::hash_one_inner::(array, hashes)? + Self::hash_one_inner::(array, hashes)? } PhysicalType2::UInt16 => { - Self::hash_one_inner::(array, hashes)? + Self::hash_one_inner::(array, hashes)? } PhysicalType2::UInt32 => { - Self::hash_one_inner::(array, hashes)? + Self::hash_one_inner::(array, hashes)? } PhysicalType2::UInt64 => { - Self::hash_one_inner::(array, hashes)? + Self::hash_one_inner::(array, hashes)? } PhysicalType2::UInt128 => { - Self::hash_one_inner::(array, hashes)? + Self::hash_one_inner::(array, hashes)? } PhysicalType2::Float16 => { Self::hash_one_inner::(array, hashes)? @@ -86,13 +86,13 @@ impl HashExecutor { Self::hash_one_inner::(array, hashes)? } PhysicalType2::Binary => { - Self::hash_one_inner::(array, hashes)? + Self::hash_one_inner::(array, hashes)? } PhysicalType2::Utf8 => { - Self::hash_one_inner::(array, hashes)? + Self::hash_one_inner::(array, hashes)? } PhysicalType2::Interval => { - Self::hash_one_inner::(array, hashes)? + Self::hash_one_inner::(array, hashes)? } PhysicalType2::List => Self::hash_list::(array, hashes)?, } @@ -105,40 +105,40 @@ impl HashExecutor { pub fn hash_no_combine(array: &Array2, hashes: &mut [u64]) -> Result<()> { match array.physical_type() { PhysicalType2::UntypedNull => { - Self::hash_one_inner::(array, hashes)? + Self::hash_one_inner::(array, hashes)? } PhysicalType2::Boolean => { - Self::hash_one_inner::(array, hashes)? + Self::hash_one_inner::(array, hashes)? } PhysicalType2::Int8 => { - Self::hash_one_inner::(array, hashes)? + Self::hash_one_inner::(array, hashes)? } PhysicalType2::Int16 => { - Self::hash_one_inner::(array, hashes)? + Self::hash_one_inner::(array, hashes)? } PhysicalType2::Int32 => { - Self::hash_one_inner::(array, hashes)? + Self::hash_one_inner::(array, hashes)? } PhysicalType2::Int64 => { - Self::hash_one_inner::(array, hashes)? + Self::hash_one_inner::(array, hashes)? } PhysicalType2::Int128 => { - Self::hash_one_inner::(array, hashes)? + Self::hash_one_inner::(array, hashes)? } PhysicalType2::UInt8 => { - Self::hash_one_inner::(array, hashes)? + Self::hash_one_inner::(array, hashes)? } PhysicalType2::UInt16 => { - Self::hash_one_inner::(array, hashes)? + Self::hash_one_inner::(array, hashes)? } PhysicalType2::UInt32 => { - Self::hash_one_inner::(array, hashes)? + Self::hash_one_inner::(array, hashes)? } PhysicalType2::UInt64 => { - Self::hash_one_inner::(array, hashes)? + Self::hash_one_inner::(array, hashes)? } PhysicalType2::UInt128 => { - Self::hash_one_inner::(array, hashes)? + Self::hash_one_inner::(array, hashes)? } PhysicalType2::Float16 => { Self::hash_one_inner::(array, hashes)? @@ -150,13 +150,13 @@ impl HashExecutor { Self::hash_one_inner::(array, hashes)? } PhysicalType2::Binary => { - Self::hash_one_inner::(array, hashes)? + Self::hash_one_inner::(array, hashes)? } PhysicalType2::Utf8 => { - Self::hash_one_inner::(array, hashes)? + Self::hash_one_inner::(array, hashes)? } PhysicalType2::Interval => { - Self::hash_one_inner::(array, hashes)? + Self::hash_one_inner::(array, hashes)? } PhysicalType2::List => Self::hash_list::(array, hashes)?, } @@ -233,7 +233,7 @@ impl HashExecutor { let mut list_hashes_buf = vec![0; inner.logical_len()]; Self::hash_no_combine(inner, &mut list_hashes_buf)?; - let metadata = PhysicalList::get_storage(&array.data)?; + let metadata = PhysicalList_2::get_storage(&array.data)?; let selection = array.selection_vector(); match array.validity() { diff --git a/crates/rayexec_execution/src/arrays/executor/scalar/list.rs b/crates/rayexec_execution/src/arrays/executor/scalar/list.rs index a16bbabaf..c749c75d5 100644 --- a/crates/rayexec_execution/src/arrays/executor/scalar/list.rs +++ b/crates/rayexec_execution/src/arrays/executor/scalar/list.rs @@ -3,7 +3,7 @@ use rayexec_error::{not_implemented, RayexecError, Result}; use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::bitmap::Bitmap; use crate::arrays::executor::builder::{ArrayBuilder, ArrayDataBuffer}; -use crate::arrays::executor::physical_type::{PhysicalList, PhysicalStorage2}; +use crate::arrays::executor::physical_type::{PhysicalList_2, PhysicalStorage2}; use crate::arrays::executor::scalar::{ can_skip_validity_check, check_validity, @@ -56,8 +56,8 @@ impl let validity2 = array2.validity(); if can_skip_validity_check([validity1, validity2]) { - let metadata1 = PhysicalList::get_storage(array1.array_data())?; - let metadata2 = PhysicalList::get_storage(array2.array_data())?; + let metadata1 = PhysicalList_2::get_storage(array1.array_data())?; + let metadata2 = PhysicalList_2::get_storage(array2.array_data())?; let (values1, inner_validity1) = get_inner_array_storage::(array1)?; let (values2, inner_validity2) = get_inner_array_storage::(array2)?; diff --git a/crates/rayexec_execution/src/arrays/executor/scalar/select.rs b/crates/rayexec_execution/src/arrays/executor/scalar/select.rs index a3f5f1b75..d7cd0ef61 100644 --- a/crates/rayexec_execution/src/arrays/executor/scalar/select.rs +++ b/crates/rayexec_execution/src/arrays/executor/scalar/select.rs @@ -1,7 +1,7 @@ use rayexec_error::Result; use crate::arrays::array::Array2; -use crate::arrays::executor::physical_type::{PhysicalBool, PhysicalStorage2}; +use crate::arrays::executor::physical_type::{PhysicalBool_2, PhysicalStorage2}; use crate::arrays::selection::{self, SelectionVector}; use crate::arrays::storage::AddressableStorage; @@ -19,7 +19,7 @@ impl SelectExecutor { match bool_array.validity() { Some(validity) => { - let values = PhysicalBool::get_storage(&bool_array.data)?; + let values = PhysicalBool_2::get_storage(&bool_array.data)?; for idx in 0..len { let sel = selection::get(selection, idx); @@ -35,7 +35,7 @@ impl SelectExecutor { } } None => { - let values = PhysicalBool::get_storage(&bool_array.data)?; + let values = PhysicalBool_2::get_storage(&bool_array.data)?; for idx in 0..len { let sel = selection::get(selection, idx); diff --git a/crates/rayexec_execution/src/arrays/executor/scalar/unary.rs b/crates/rayexec_execution/src/arrays/executor/scalar/unary.rs index 867e5c084..4e9b04b9b 100644 --- a/crates/rayexec_execution/src/arrays/executor/scalar/unary.rs +++ b/crates/rayexec_execution/src/arrays/executor/scalar/unary.rs @@ -154,7 +154,7 @@ mod tests { use super::*; use crate::arrays::datatype::DataType; use crate::arrays::executor::builder::{GermanVarlenBuffer, PrimitiveBuffer}; - use crate::arrays::executor::physical_type::{PhysicalI32, PhysicalUtf8}; + use crate::arrays::executor::physical_type::{PhysicalI32_2, PhysicalUtf8_2}; use crate::arrays::scalar::ScalarValue; #[test] @@ -165,7 +165,7 @@ mod tests { buffer: PrimitiveBuffer::::with_len(3), }; - let got = UnaryExecutor2::execute::(&array, builder, |v, buf| { + let got = UnaryExecutor2::execute::(&array, builder, |v, buf| { buf.put(&(v + 2)) }) .unwrap(); @@ -194,8 +194,9 @@ mod tests { buf.put(&double) } - let got = UnaryExecutor2::execute::(&array, builder, my_string_double) - .unwrap(); + let got = + UnaryExecutor2::execute::(&array, builder, my_string_double) + .unwrap(); assert_eq!(ScalarValue::from("aa"), got.physical_scalar(0).unwrap()); assert_eq!(ScalarValue::from("bbbb"), got.physical_scalar(1).unwrap()); @@ -227,8 +228,9 @@ mod tests { buf.put(buffer.as_str()) }; - let got = UnaryExecutor2::execute::(&array, builder, my_string_double) - .unwrap(); + let got = + UnaryExecutor2::execute::(&array, builder, my_string_double) + .unwrap(); assert_eq!(ScalarValue::from("aa"), got.physical_scalar(0).unwrap()); assert_eq!(ScalarValue::from("bbbb"), got.physical_scalar(1).unwrap()); @@ -255,7 +257,7 @@ mod tests { }; let got = - UnaryExecutor2::execute::(&array, builder, my_string_truncate) + UnaryExecutor2::execute::(&array, builder, my_string_truncate) .unwrap(); assert_eq!(ScalarValue::from("a"), got.physical_scalar(0).unwrap()); @@ -289,7 +291,7 @@ mod tests { }; let got = - UnaryExecutor2::execute::(&array, builder, my_string_uppercase) + UnaryExecutor2::execute::(&array, builder, my_string_uppercase) .unwrap(); assert_eq!(ScalarValue::from("DDDD"), got.physical_scalar(0).unwrap()); diff --git a/crates/rayexec_execution/src/arrays/executor/scalar/uniform.rs b/crates/rayexec_execution/src/arrays/executor/scalar/uniform.rs index de02c60c8..466aafb3b 100644 --- a/crates/rayexec_execution/src/arrays/executor/scalar/uniform.rs +++ b/crates/rayexec_execution/src/arrays/executor/scalar/uniform.rs @@ -111,7 +111,7 @@ mod tests { use super::*; use crate::arrays::datatype::DataType; use crate::arrays::executor::builder::GermanVarlenBuffer; - use crate::arrays::executor::physical_type::PhysicalUtf8; + use crate::arrays::executor::physical_type::PhysicalUtf8_2; use crate::arrays::scalar::ScalarValue; #[test] @@ -127,7 +127,7 @@ mod tests { let mut string_buffer = String::new(); - let got = UniformExecutor::execute::( + let got = UniformExecutor::execute::( &[&first, &second, &third], builder, |inputs, buf| { @@ -162,7 +162,7 @@ mod tests { let mut string_buffer = String::new(); - let got = UniformExecutor::execute::( + let got = UniformExecutor::execute::( &[&first, &second, &third], builder, |inputs, buf| { @@ -195,7 +195,7 @@ mod tests { let mut string_buffer = String::new(); - let got = UniformExecutor::execute::( + let got = UniformExecutor::execute::( &[&first, &second, &third], builder, |inputs, buf| { diff --git a/crates/rayexec_execution/src/arrays/row/encoding.rs b/crates/rayexec_execution/src/arrays/row/encoding.rs index ec5178630..a84148ef6 100644 --- a/crates/rayexec_execution/src/arrays/row/encoding.rs +++ b/crates/rayexec_execution/src/arrays/row/encoding.rs @@ -4,23 +4,23 @@ use rayexec_error::{not_implemented, RayexecError, Result}; use crate::arrays::array::{Array2, ArrayData2, BinaryData}; use crate::arrays::executor::physical_type::{ AsBytes, - PhysicalBinary, - PhysicalBool, + PhysicalBinary_2, + PhysicalBool_2, PhysicalF16_2, PhysicalF32_2, PhysicalF64_2, - PhysicalI128, - PhysicalI16, - PhysicalI32, - PhysicalI64, - PhysicalI8, - PhysicalInterval, + PhysicalI128_2, + PhysicalI16_2, + PhysicalI32_2, + PhysicalI64_2, + PhysicalI8_2, + PhysicalInterval_2, PhysicalStorage2, - PhysicalU128, - PhysicalU16, - PhysicalU32, - PhysicalU64, - PhysicalU8, + PhysicalU128_2, + PhysicalU16_2, + PhysicalU32_2, + PhysicalU64_2, + PhysicalU8_2, }; use crate::arrays::executor::scalar::UnaryExecutor2; use crate::arrays::scalar::interval::Interval; @@ -195,37 +195,37 @@ impl ComparableRowEncoder { ArrayData2::UntypedNull(_) => { Self::encode_untyped_null(cmp_col, data, row_offset)? } - ArrayData2::Boolean(_) => Self::encode_primitive::( + ArrayData2::Boolean(_) => Self::encode_primitive::( cmp_col, arr, row_idx, data, row_offset, )?, - ArrayData2::Int8(_) => Self::encode_primitive::( + ArrayData2::Int8(_) => Self::encode_primitive::( cmp_col, arr, row_idx, data, row_offset, )?, - ArrayData2::Int16(_) => Self::encode_primitive::( + ArrayData2::Int16(_) => Self::encode_primitive::( cmp_col, arr, row_idx, data, row_offset, )?, - ArrayData2::Int32(_) => Self::encode_primitive::( + ArrayData2::Int32(_) => Self::encode_primitive::( cmp_col, arr, row_idx, data, row_offset, )?, - ArrayData2::Int64(_) => Self::encode_primitive::( + ArrayData2::Int64(_) => Self::encode_primitive::( cmp_col, arr, row_idx, data, row_offset, )?, - ArrayData2::Int128(_) => Self::encode_primitive::( + ArrayData2::Int128(_) => Self::encode_primitive::( cmp_col, arr, row_idx, data, row_offset, )?, - ArrayData2::UInt8(_) => Self::encode_primitive::( + ArrayData2::UInt8(_) => Self::encode_primitive::( cmp_col, arr, row_idx, data, row_offset, )?, - ArrayData2::UInt16(_) => Self::encode_primitive::( + ArrayData2::UInt16(_) => Self::encode_primitive::( cmp_col, arr, row_idx, data, row_offset, )?, - ArrayData2::UInt32(_) => Self::encode_primitive::( + ArrayData2::UInt32(_) => Self::encode_primitive::( cmp_col, arr, row_idx, data, row_offset, )?, - ArrayData2::UInt64(_) => Self::encode_primitive::( + ArrayData2::UInt64(_) => Self::encode_primitive::( cmp_col, arr, row_idx, data, row_offset, )?, - ArrayData2::UInt128(_) => Self::encode_primitive::( + ArrayData2::UInt128(_) => Self::encode_primitive::( cmp_col, arr, row_idx, data, row_offset, )?, ArrayData2::Float16(_) => Self::encode_primitive::( @@ -237,10 +237,10 @@ impl ComparableRowEncoder { ArrayData2::Float64(_) => Self::encode_primitive::( cmp_col, arr, row_idx, data, row_offset, )?, - ArrayData2::Interval(_) => Self::encode_primitive::( + ArrayData2::Interval(_) => Self::encode_primitive::( cmp_col, arr, row_idx, data, row_offset, )?, - ArrayData2::Binary(_) => Self::encode_varlen::( + ArrayData2::Binary(_) => Self::encode_varlen::( cmp_col, arr, row_idx, data, row_offset, )?, ArrayData2::List(_) => not_implemented!("Row encode list"), diff --git a/crates/rayexec_execution/src/arrays/scalar/decimal.rs b/crates/rayexec_execution/src/arrays/scalar/decimal.rs index dcbca9243..f40686592 100644 --- a/crates/rayexec_execution/src/arrays/scalar/decimal.rs +++ b/crates/rayexec_execution/src/arrays/scalar/decimal.rs @@ -5,7 +5,7 @@ use rayexec_error::{RayexecError, Result, ResultExt}; use rayexec_proto::ProtoConv; use serde::{Deserialize, Serialize}; -use crate::arrays::executor::physical_type::{PhysicalI128, PhysicalI64, PhysicalStorage2}; +use crate::arrays::executor::physical_type::{PhysicalI128_2, PhysicalI64_2, PhysicalStorage2}; pub trait DecimalPrimitive: PrimInt + FromPrimitive + Signed + Default + Debug + Display + Sync + Send @@ -71,7 +71,7 @@ pub struct Decimal64Type; impl DecimalType for Decimal64Type { type Primitive = i64; - type Storage = PhysicalI64; + type Storage = PhysicalI64_2; const MAX_PRECISION: u8 = 18; // Note that changing this would require changing some of the date functions // since they assume this is 3. @@ -83,7 +83,7 @@ pub struct Decimal128Type; impl DecimalType for Decimal128Type { type Primitive = i128; - type Storage = PhysicalI128; + type Storage = PhysicalI128_2; const MAX_PRECISION: u8 = 38; const DEFAULT_SCALE: i8 = 9; } diff --git a/crates/rayexec_execution/src/execution/operators/hash_aggregate/compare.rs b/crates/rayexec_execution/src/execution/operators/hash_aggregate/compare.rs index 3008777e5..2446ed345 100644 --- a/crates/rayexec_execution/src/execution/operators/hash_aggregate/compare.rs +++ b/crates/rayexec_execution/src/execution/operators/hash_aggregate/compare.rs @@ -6,26 +6,26 @@ use super::chunk::GroupChunk; use super::hash_table::GroupAddress; use crate::arrays::array::Array2; use crate::arrays::executor::physical_type::{ - PhysicalBinary, - PhysicalBool, + PhysicalBinary_2, + PhysicalBool_2, PhysicalF16_2, PhysicalF32_2, PhysicalF64_2, - PhysicalI128, - PhysicalI16, - PhysicalI32, - PhysicalI64, - PhysicalI8, - PhysicalInterval, + PhysicalI128_2, + PhysicalI16_2, + PhysicalI32_2, + PhysicalI64_2, + PhysicalI8_2, + PhysicalInterval_2, PhysicalStorage2, PhysicalType2, - PhysicalU128, - PhysicalU16, - PhysicalU32, - PhysicalU64, - PhysicalU8, - PhysicalUntypedNull, - PhysicalUtf8, + PhysicalU128_2, + PhysicalU16_2, + PhysicalU32_2, + PhysicalU64_2, + PhysicalU8_2, + PhysicalUntypedNull_2, + PhysicalUtf8_2, }; use crate::arrays::executor::scalar::{can_skip_validity_check, check_validity}; use crate::arrays::selection::{self, SelectionVector}; @@ -96,7 +96,7 @@ where } match array1.physical_type() { - PhysicalType2::UntypedNull => compare_rows_eq::( + PhysicalType2::UntypedNull => compare_rows_eq::( array1, array2, rows1, @@ -104,37 +104,37 @@ where not_eq_rows, )?, PhysicalType2::Boolean => { - compare_rows_eq::(array1, array2, rows1, rows2, not_eq_rows)? + compare_rows_eq::(array1, array2, rows1, rows2, not_eq_rows)? } PhysicalType2::Int8 => { - compare_rows_eq::(array1, array2, rows1, rows2, not_eq_rows)? + compare_rows_eq::(array1, array2, rows1, rows2, not_eq_rows)? } PhysicalType2::Int16 => { - compare_rows_eq::(array1, array2, rows1, rows2, not_eq_rows)? + compare_rows_eq::(array1, array2, rows1, rows2, not_eq_rows)? } PhysicalType2::Int32 => { - compare_rows_eq::(array1, array2, rows1, rows2, not_eq_rows)? + compare_rows_eq::(array1, array2, rows1, rows2, not_eq_rows)? } PhysicalType2::Int64 => { - compare_rows_eq::(array1, array2, rows1, rows2, not_eq_rows)? + compare_rows_eq::(array1, array2, rows1, rows2, not_eq_rows)? } PhysicalType2::Int128 => { - compare_rows_eq::(array1, array2, rows1, rows2, not_eq_rows)? + compare_rows_eq::(array1, array2, rows1, rows2, not_eq_rows)? } PhysicalType2::UInt8 => { - compare_rows_eq::(array1, array2, rows1, rows2, not_eq_rows)? + compare_rows_eq::(array1, array2, rows1, rows2, not_eq_rows)? } PhysicalType2::UInt16 => { - compare_rows_eq::(array1, array2, rows1, rows2, not_eq_rows)? + compare_rows_eq::(array1, array2, rows1, rows2, not_eq_rows)? } PhysicalType2::UInt32 => { - compare_rows_eq::(array1, array2, rows1, rows2, not_eq_rows)? + compare_rows_eq::(array1, array2, rows1, rows2, not_eq_rows)? } PhysicalType2::UInt64 => { - compare_rows_eq::(array1, array2, rows1, rows2, not_eq_rows)? + compare_rows_eq::(array1, array2, rows1, rows2, not_eq_rows)? } PhysicalType2::UInt128 => { - compare_rows_eq::(array1, array2, rows1, rows2, not_eq_rows)? + compare_rows_eq::(array1, array2, rows1, rows2, not_eq_rows)? } PhysicalType2::Float16 => { compare_rows_eq::(array1, array2, rows1, rows2, not_eq_rows)? @@ -145,18 +145,22 @@ where PhysicalType2::Float64 => { compare_rows_eq::(array1, array2, rows1, rows2, not_eq_rows)? } - PhysicalType2::Interval => compare_rows_eq::( + PhysicalType2::Interval => compare_rows_eq::( + array1, + array2, + rows1, + rows2, + not_eq_rows, + )?, + PhysicalType2::Binary => compare_rows_eq::( array1, array2, rows1, rows2, not_eq_rows, )?, - PhysicalType2::Binary => { - compare_rows_eq::(array1, array2, rows1, rows2, not_eq_rows)? - } PhysicalType2::Utf8 => { - compare_rows_eq::(array1, array2, rows1, rows2, not_eq_rows)? + compare_rows_eq::(array1, array2, rows1, rows2, not_eq_rows)? } PhysicalType2::List => { not_implemented!("Row compare list") diff --git a/crates/rayexec_execution/src/execution/operators/hash_aggregate/mod.rs b/crates/rayexec_execution/src/execution/operators/hash_aggregate/mod.rs index 221091f77..8399bda8a 100644 --- a/crates/rayexec_execution/src/execution/operators/hash_aggregate/mod.rs +++ b/crates/rayexec_execution/src/execution/operators/hash_aggregate/mod.rs @@ -21,7 +21,7 @@ use crate::arrays::batch::Batch2; use crate::arrays::bitmap::Bitmap; use crate::arrays::datatype::DataType; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::PhysicalU64; +use crate::arrays::executor::physical_type::PhysicalU64_2; use crate::arrays::executor::scalar::{HashExecutor, UnaryExecutor2}; use crate::arrays::scalar::ScalarValue; use crate::arrays::selection::SelectionVector; @@ -440,7 +440,7 @@ impl ExecutableOperator for PhysicalHashAggregate { buffer: PrimitiveBuffer::with_len(group_ids.logical_len()), }; - let array = UnaryExecutor2::execute::( + let array = UnaryExecutor2::execute::( &group_ids, builder, |id, buf| { diff --git a/crates/rayexec_execution/src/execution/operators/unnest.rs b/crates/rayexec_execution/src/execution/operators/unnest.rs index 76150b37e..dcb08617d 100644 --- a/crates/rayexec_execution/src/execution/operators/unnest.rs +++ b/crates/rayexec_execution/src/execution/operators/unnest.rs @@ -26,25 +26,25 @@ use crate::arrays::executor::builder::{ PrimitiveBuffer, }; use crate::arrays::executor::physical_type::{ - PhysicalBinary, - PhysicalBool, + PhysicalBinary_2, + PhysicalBool_2, PhysicalF16_2, PhysicalF32_2, PhysicalF64_2, - PhysicalI128, - PhysicalI16, - PhysicalI32, - PhysicalI64, - PhysicalI8, - PhysicalList, + PhysicalI128_2, + PhysicalI16_2, + PhysicalI32_2, + PhysicalI64_2, + PhysicalI8_2, + PhysicalList_2, PhysicalStorage2, PhysicalType2, - PhysicalU128, - PhysicalU16, - PhysicalU32, - PhysicalU64, - PhysicalU8, - PhysicalUtf8, + PhysicalU128_2, + PhysicalU16_2, + PhysicalU32_2, + PhysicalU64_2, + PhysicalU8_2, + PhysicalUtf8_2, }; use crate::arrays::executor::scalar::UnaryExecutor2; use crate::arrays::selection::{self, SelectionVector}; @@ -212,7 +212,7 @@ impl ExecutableOperator for PhysicalUnnest { continue; } - if let Some(list_meta) = UnaryExecutor2::value_at::( + if let Some(list_meta) = UnaryExecutor2::value_at::( &state.unnest_inputs[input_idx], state.current_row, )? { @@ -250,7 +250,7 @@ impl ExecutableOperator for PhysicalUnnest { _other => return Err(RayexecError::new("Unexpected storage type")), }; - match UnaryExecutor2::value_at::(arr, state.current_row)? { + match UnaryExecutor2::value_at::(arr, state.current_row)? { Some(meta) => { // Row is a list, unnest. let out = unnest(child, longest as usize, meta)?; @@ -314,77 +314,77 @@ pub(crate) fn unnest(child: &Array2, longest_len: usize, meta: ListItemMetadata) datatype, buffer: BooleanBuffer::with_len(longest_len), }; - unnest_inner::(builder, child, meta) + unnest_inner::(builder, child, meta) } PhysicalType2::Int8 => { let builder = ArrayBuilder { datatype, buffer: PrimitiveBuffer::::with_len(longest_len), }; - unnest_inner::(builder, child, meta) + unnest_inner::(builder, child, meta) } PhysicalType2::Int16 => { let builder = ArrayBuilder { datatype, buffer: PrimitiveBuffer::::with_len(longest_len), }; - unnest_inner::(builder, child, meta) + unnest_inner::(builder, child, meta) } PhysicalType2::Int32 => { let builder = ArrayBuilder { datatype, buffer: PrimitiveBuffer::::with_len(longest_len), }; - unnest_inner::(builder, child, meta) + unnest_inner::(builder, child, meta) } PhysicalType2::Int64 => { let builder = ArrayBuilder { datatype, buffer: PrimitiveBuffer::::with_len(longest_len), }; - unnest_inner::(builder, child, meta) + unnest_inner::(builder, child, meta) } PhysicalType2::Int128 => { let builder = ArrayBuilder { datatype, buffer: PrimitiveBuffer::::with_len(longest_len), }; - unnest_inner::(builder, child, meta) + unnest_inner::(builder, child, meta) } PhysicalType2::UInt8 => { let builder = ArrayBuilder { datatype, buffer: PrimitiveBuffer::::with_len(longest_len), }; - unnest_inner::(builder, child, meta) + unnest_inner::(builder, child, meta) } PhysicalType2::UInt16 => { let builder = ArrayBuilder { datatype, buffer: PrimitiveBuffer::::with_len(longest_len), }; - unnest_inner::(builder, child, meta) + unnest_inner::(builder, child, meta) } PhysicalType2::UInt32 => { let builder = ArrayBuilder { datatype, buffer: PrimitiveBuffer::::with_len(longest_len), }; - unnest_inner::(builder, child, meta) + unnest_inner::(builder, child, meta) } PhysicalType2::UInt64 => { let builder = ArrayBuilder { datatype, buffer: PrimitiveBuffer::::with_len(longest_len), }; - unnest_inner::(builder, child, meta) + unnest_inner::(builder, child, meta) } PhysicalType2::UInt128 => { let builder = ArrayBuilder { datatype, buffer: PrimitiveBuffer::::with_len(longest_len), }; - unnest_inner::(builder, child, meta) + unnest_inner::(builder, child, meta) } PhysicalType2::Float16 => { let builder = ArrayBuilder { @@ -412,14 +412,14 @@ pub(crate) fn unnest(child: &Array2, longest_len: usize, meta: ListItemMetadata) datatype, buffer: GermanVarlenBuffer::::with_len(longest_len), }; - unnest_inner::(builder, child, meta) + unnest_inner::(builder, child, meta) } PhysicalType2::Binary => { let builder = ArrayBuilder { datatype, buffer: GermanVarlenBuffer::<[u8]>::with_len(longest_len), }; - unnest_inner::(builder, child, meta) + unnest_inner::(builder, child, meta) } other => not_implemented!("Unnest for physical type {other:?}"), } diff --git a/crates/rayexec_execution/src/functions/aggregate/builtin/avg.rs b/crates/rayexec_execution/src/functions/aggregate/builtin/avg.rs index aa61e2538..ff463ed47 100644 --- a/crates/rayexec_execution/src/functions/aggregate/builtin/avg.rs +++ b/crates/rayexec_execution/src/functions/aggregate/builtin/avg.rs @@ -11,7 +11,7 @@ use crate::arrays::bitmap::Bitmap; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::aggregate::AggregateState2; use crate::arrays::executor::builder::{ArrayBuilder, ArrayDataBuffer, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::{PhysicalF64_2, PhysicalI64}; +use crate::arrays::executor::physical_type::{PhysicalF64_2, PhysicalI64_2}; use crate::arrays::scalar::decimal::{Decimal128Type, Decimal64Type, DecimalType}; use crate::expr::Expression; use crate::functions::aggregate::states::{ @@ -190,7 +190,7 @@ pub struct AvgInt64Impl; impl AggregateFunctionImpl for AvgInt64Impl { fn new_states(&self) -> Box { - new_unary_aggregate_states::( + new_unary_aggregate_states::( AvgStateF64::::default, move |states| primitive_finalize(DataType::Float64, states), ) diff --git a/crates/rayexec_execution/src/functions/aggregate/builtin/first.rs b/crates/rayexec_execution/src/functions/aggregate/builtin/first.rs index 9fd30ad83..3dbdda2d9 100644 --- a/crates/rayexec_execution/src/functions/aggregate/builtin/first.rs +++ b/crates/rayexec_execution/src/functions/aggregate/builtin/first.rs @@ -9,25 +9,25 @@ use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::aggregate::{AggregateState2, StateFinalizer}; use crate::arrays::executor::builder::{ArrayBuilder, GermanVarlenBuffer}; use crate::arrays::executor::physical_type::{ - PhysicalBinary, - PhysicalBool, + PhysicalBinary_2, + PhysicalBool_2, PhysicalF16_2, PhysicalF32_2, PhysicalF64_2, - PhysicalI128, - PhysicalI16, - PhysicalI32, - PhysicalI64, - PhysicalI8, - PhysicalInterval, + PhysicalI128_2, + PhysicalI16_2, + PhysicalI32_2, + PhysicalI64_2, + PhysicalI8_2, + PhysicalInterval_2, PhysicalStorage2, PhysicalType2, - PhysicalU128, - PhysicalU16, - PhysicalU32, - PhysicalU64, - PhysicalU8, - PhysicalUntypedNull, + PhysicalU128_2, + PhysicalU16_2, + PhysicalU32_2, + PhysicalU64_2, + PhysicalU8_2, + PhysicalUntypedNull_2, }; use crate::arrays::scalar::interval::Interval; use crate::arrays::storage::{PrimitiveStorage, UntypedNull}; @@ -93,38 +93,38 @@ impl AggregateFunction for First { PhysicalType2::Float64 => Box::new(FirstPrimitiveImpl::::new( datatype.clone(), )), - PhysicalType2::Int8 => { - Box::new(FirstPrimitiveImpl::::new(datatype.clone())) - } - PhysicalType2::Int16 => Box::new(FirstPrimitiveImpl::::new( + PhysicalType2::Int8 => Box::new(FirstPrimitiveImpl::::new( datatype.clone(), )), - PhysicalType2::Int32 => Box::new(FirstPrimitiveImpl::::new( + PhysicalType2::Int16 => Box::new(FirstPrimitiveImpl::::new( datatype.clone(), )), - PhysicalType2::Int64 => Box::new(FirstPrimitiveImpl::::new( + PhysicalType2::Int32 => Box::new(FirstPrimitiveImpl::::new( datatype.clone(), )), - PhysicalType2::Int128 => Box::new(FirstPrimitiveImpl::::new( + PhysicalType2::Int64 => Box::new(FirstPrimitiveImpl::::new( datatype.clone(), )), - PhysicalType2::UInt8 => { - Box::new(FirstPrimitiveImpl::::new(datatype.clone())) - } - PhysicalType2::UInt16 => Box::new(FirstPrimitiveImpl::::new( + PhysicalType2::Int128 => Box::new(FirstPrimitiveImpl::::new( + datatype.clone(), + )), + PhysicalType2::UInt8 => Box::new(FirstPrimitiveImpl::::new( + datatype.clone(), + )), + PhysicalType2::UInt16 => Box::new(FirstPrimitiveImpl::::new( datatype.clone(), )), - PhysicalType2::UInt32 => Box::new(FirstPrimitiveImpl::::new( + PhysicalType2::UInt32 => Box::new(FirstPrimitiveImpl::::new( datatype.clone(), )), - PhysicalType2::UInt64 => Box::new(FirstPrimitiveImpl::::new( + PhysicalType2::UInt64 => Box::new(FirstPrimitiveImpl::::new( datatype.clone(), )), - PhysicalType2::UInt128 => Box::new(FirstPrimitiveImpl::::new( + PhysicalType2::UInt128 => Box::new(FirstPrimitiveImpl::::new( datatype.clone(), )), PhysicalType2::Interval => Box::new( - FirstPrimitiveImpl::::new(datatype.clone()), + FirstPrimitiveImpl::::new(datatype.clone()), ), PhysicalType2::Binary => Box::new(FirstBinaryImpl { datatype: datatype.clone(), @@ -157,7 +157,7 @@ impl AggregateFunctionImpl for FirstBinaryImpl { fn new_states(&self) -> Box { let datatype = self.datatype.clone(); - new_unary_aggregate_states::( + new_unary_aggregate_states::( FirstStateBinary::default, move |states| { let builder = ArrayBuilder { @@ -175,7 +175,7 @@ pub struct FirstUntypedNullImpl; impl AggregateFunctionImpl for FirstUntypedNullImpl { fn new_states(&self) -> Box { - new_unary_aggregate_states::( + new_unary_aggregate_states::( FirstState::::default, untyped_null_finalize, ) @@ -187,7 +187,7 @@ pub struct FirstBoolImpl; impl AggregateFunctionImpl for FirstBoolImpl { fn new_states(&self) -> Box { - new_unary_aggregate_states::( + new_unary_aggregate_states::( FirstState::::default, move |states| boolean_finalize(DataType::Boolean, states), ) diff --git a/crates/rayexec_execution/src/functions/aggregate/builtin/minmax.rs b/crates/rayexec_execution/src/functions/aggregate/builtin/minmax.rs index cf69100d3..329a9d75d 100644 --- a/crates/rayexec_execution/src/functions/aggregate/builtin/minmax.rs +++ b/crates/rayexec_execution/src/functions/aggregate/builtin/minmax.rs @@ -9,25 +9,25 @@ use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::aggregate::{AggregateState2, StateFinalizer}; use crate::arrays::executor::builder::{ArrayBuilder, GermanVarlenBuffer}; use crate::arrays::executor::physical_type::{ - PhysicalBinary, - PhysicalBool, + PhysicalBinary_2, + PhysicalBool_2, PhysicalF16_2, PhysicalF32_2, PhysicalF64_2, - PhysicalI128, - PhysicalI16, - PhysicalI32, - PhysicalI64, - PhysicalI8, - PhysicalInterval, + PhysicalI128_2, + PhysicalI16_2, + PhysicalI32_2, + PhysicalI64_2, + PhysicalI8_2, + PhysicalInterval_2, PhysicalStorage2, PhysicalType2, - PhysicalU128, - PhysicalU16, - PhysicalU32, - PhysicalU64, - PhysicalU8, - PhysicalUntypedNull, + PhysicalU128_2, + PhysicalU16_2, + PhysicalU32_2, + PhysicalU64_2, + PhysicalU8_2, + PhysicalUntypedNull_2, }; use crate::arrays::scalar::interval::Interval; use crate::arrays::storage::{PrimitiveStorage, UntypedNull}; @@ -94,37 +94,37 @@ impl AggregateFunction for Min { datatype.clone(), )), PhysicalType2::Int8 => { - Box::new(MinPrimitiveImpl::::new(datatype.clone())) + Box::new(MinPrimitiveImpl::::new(datatype.clone())) } - PhysicalType2::Int16 => { - Box::new(MinPrimitiveImpl::::new(datatype.clone())) - } - PhysicalType2::Int32 => { - Box::new(MinPrimitiveImpl::::new(datatype.clone())) - } - PhysicalType2::Int64 => { - Box::new(MinPrimitiveImpl::::new(datatype.clone())) - } - PhysicalType2::Int128 => Box::new(MinPrimitiveImpl::::new( + PhysicalType2::Int16 => Box::new(MinPrimitiveImpl::::new( + datatype.clone(), + )), + PhysicalType2::Int32 => Box::new(MinPrimitiveImpl::::new( + datatype.clone(), + )), + PhysicalType2::Int64 => Box::new(MinPrimitiveImpl::::new( + datatype.clone(), + )), + PhysicalType2::Int128 => Box::new(MinPrimitiveImpl::::new( datatype.clone(), )), PhysicalType2::UInt8 => { - Box::new(MinPrimitiveImpl::::new(datatype.clone())) + Box::new(MinPrimitiveImpl::::new(datatype.clone())) } - PhysicalType2::UInt16 => { - Box::new(MinPrimitiveImpl::::new(datatype.clone())) - } - PhysicalType2::UInt32 => { - Box::new(MinPrimitiveImpl::::new(datatype.clone())) - } - PhysicalType2::UInt64 => { - Box::new(MinPrimitiveImpl::::new(datatype.clone())) - } - PhysicalType2::UInt128 => Box::new(MinPrimitiveImpl::::new( + PhysicalType2::UInt16 => Box::new(MinPrimitiveImpl::::new( + datatype.clone(), + )), + PhysicalType2::UInt32 => Box::new(MinPrimitiveImpl::::new( + datatype.clone(), + )), + PhysicalType2::UInt64 => Box::new(MinPrimitiveImpl::::new( + datatype.clone(), + )), + PhysicalType2::UInt128 => Box::new(MinPrimitiveImpl::::new( datatype.clone(), )), PhysicalType2::Interval => Box::new( - MinPrimitiveImpl::::new(datatype.clone()), + MinPrimitiveImpl::::new(datatype.clone()), ), PhysicalType2::Binary => Box::new(MinBinaryImpl::new(datatype.clone())), PhysicalType2::Utf8 => Box::new(MinBinaryImpl::new(datatype.clone())), @@ -188,37 +188,37 @@ impl AggregateFunction for Max { datatype.clone(), )), PhysicalType2::Int8 => { - Box::new(MaxPrimitiveImpl::::new(datatype.clone())) + Box::new(MaxPrimitiveImpl::::new(datatype.clone())) } - PhysicalType2::Int16 => { - Box::new(MaxPrimitiveImpl::::new(datatype.clone())) - } - PhysicalType2::Int32 => { - Box::new(MaxPrimitiveImpl::::new(datatype.clone())) - } - PhysicalType2::Int64 => { - Box::new(MaxPrimitiveImpl::::new(datatype.clone())) - } - PhysicalType2::Int128 => Box::new(MaxPrimitiveImpl::::new( + PhysicalType2::Int16 => Box::new(MaxPrimitiveImpl::::new( + datatype.clone(), + )), + PhysicalType2::Int32 => Box::new(MaxPrimitiveImpl::::new( + datatype.clone(), + )), + PhysicalType2::Int64 => Box::new(MaxPrimitiveImpl::::new( + datatype.clone(), + )), + PhysicalType2::Int128 => Box::new(MaxPrimitiveImpl::::new( datatype.clone(), )), PhysicalType2::UInt8 => { - Box::new(MaxPrimitiveImpl::::new(datatype.clone())) + Box::new(MaxPrimitiveImpl::::new(datatype.clone())) } - PhysicalType2::UInt16 => { - Box::new(MaxPrimitiveImpl::::new(datatype.clone())) - } - PhysicalType2::UInt32 => { - Box::new(MaxPrimitiveImpl::::new(datatype.clone())) - } - PhysicalType2::UInt64 => { - Box::new(MaxPrimitiveImpl::::new(datatype.clone())) - } - PhysicalType2::UInt128 => Box::new(MaxPrimitiveImpl::::new( + PhysicalType2::UInt16 => Box::new(MaxPrimitiveImpl::::new( + datatype.clone(), + )), + PhysicalType2::UInt32 => Box::new(MaxPrimitiveImpl::::new( + datatype.clone(), + )), + PhysicalType2::UInt64 => Box::new(MaxPrimitiveImpl::::new( + datatype.clone(), + )), + PhysicalType2::UInt128 => Box::new(MaxPrimitiveImpl::::new( datatype.clone(), )), PhysicalType2::Interval => Box::new( - MaxPrimitiveImpl::::new(datatype.clone()), + MaxPrimitiveImpl::::new(datatype.clone()), ), PhysicalType2::Binary => Box::new(MaxBinaryImpl::new(datatype.clone())), PhysicalType2::Utf8 => Box::new(MaxBinaryImpl::new(datatype.clone())), @@ -242,7 +242,7 @@ pub struct MinMaxUntypedNull; impl AggregateFunctionImpl for MinMaxUntypedNull { fn new_states(&self) -> Box { // Note min vs max doesn't matter. Everything is null. - new_unary_aggregate_states::( + new_unary_aggregate_states::( MinState::::default, untyped_null_finalize, ) @@ -274,7 +274,7 @@ where fn new_states(&self) -> Box { let datatype = self.datatype.clone(); - new_unary_aggregate_states::(M::default, move |states| { + new_unary_aggregate_states::(M::default, move |states| { let builder = ArrayBuilder { datatype: datatype.clone(), buffer: GermanVarlenBuffer::<[u8]>::with_len(states.len()), @@ -309,7 +309,7 @@ where M: AggregateState2 + Default + Sync + Send + 'static, { fn new_states(&self) -> Box { - new_unary_aggregate_states::(M::default, move |states| { + new_unary_aggregate_states::(M::default, move |states| { boolean_finalize(DataType::Boolean, states) }) } diff --git a/crates/rayexec_execution/src/functions/aggregate/builtin/string_agg.rs b/crates/rayexec_execution/src/functions/aggregate/builtin/string_agg.rs index 81f620bab..bf1d7dab2 100644 --- a/crates/rayexec_execution/src/functions/aggregate/builtin/string_agg.rs +++ b/crates/rayexec_execution/src/functions/aggregate/builtin/string_agg.rs @@ -5,7 +5,7 @@ use rayexec_error::{RayexecError, Result}; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::aggregate::{AggregateState2, StateFinalizer}; use crate::arrays::executor::builder::{ArrayBuilder, GermanVarlenBuffer}; -use crate::arrays::executor::physical_type::PhysicalUtf8; +use crate::arrays::executor::physical_type::PhysicalUtf8_2; use crate::arrays::scalar::ScalarValue; use crate::expr::Expression; use crate::functions::aggregate::states::{new_unary_aggregate_states, AggregateGroupStates}; @@ -99,7 +99,7 @@ impl AggregateFunctionImpl for StringAggImpl { string: None, }; - new_unary_aggregate_states::(state_init, move |states| { + new_unary_aggregate_states::(state_init, move |states| { let builder = ArrayBuilder { datatype: DataType::Utf8, buffer: GermanVarlenBuffer::::with_len(states.len()), diff --git a/crates/rayexec_execution/src/functions/aggregate/builtin/sum.rs b/crates/rayexec_execution/src/functions/aggregate/builtin/sum.rs index f72f6362e..4ab0e244c 100644 --- a/crates/rayexec_execution/src/functions/aggregate/builtin/sum.rs +++ b/crates/rayexec_execution/src/functions/aggregate/builtin/sum.rs @@ -9,7 +9,7 @@ use crate::arrays::array::ArrayData2; use crate::arrays::buffer::physical_type::AddressableMut; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::aggregate::AggregateState2; -use crate::arrays::executor::physical_type::{PhysicalF64_2, PhysicalI64}; +use crate::arrays::executor::physical_type::{PhysicalF64_2, PhysicalI64_2}; use crate::arrays::executor_exp::aggregate::AggregateState; use crate::arrays::executor_exp::PutBuffer; use crate::arrays::scalar::decimal::{Decimal128Type, Decimal64Type, DecimalType}; @@ -117,7 +117,7 @@ pub struct SumInt64Impl; impl AggregateFunctionImpl for SumInt64Impl { fn new_states(&self) -> Box { - new_unary_aggregate_states::( + new_unary_aggregate_states::( SumStateCheckedAdd::::default, move |states| primitive_finalize(DataType::Int64, states), ) diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/arith/add.rs b/crates/rayexec_execution/src/functions/scalar/builtin/arith/add.rs index 46bc110bf..d9f88a52b 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/arith/add.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/arith/add.rs @@ -10,17 +10,17 @@ use crate::arrays::executor::physical_type::{ PhysicalF16_2, PhysicalF32_2, PhysicalF64_2, - PhysicalI128, - PhysicalI16, - PhysicalI32, - PhysicalI64, - PhysicalI8, + PhysicalI128_2, + PhysicalI16_2, + PhysicalI32_2, + PhysicalI64_2, + PhysicalI8_2, PhysicalStorage2, - PhysicalU128, - PhysicalU16, - PhysicalU32, - PhysicalU64, - PhysicalU8, + PhysicalU128_2, + PhysicalU16_2, + PhysicalU32_2, + PhysicalU64_2, + PhysicalU8_2, }; use crate::arrays::executor::scalar::BinaryExecutor2; use crate::arrays::storage::PrimitiveStorage; @@ -112,65 +112,65 @@ impl ScalarFunction for Add { DataType::Float64, ), (DataType::Int8, DataType::Int8) => ( - Box::new(AddImpl::::new(DataType::Int8)), + Box::new(AddImpl::::new(DataType::Int8)), DataType::Int8, ), (DataType::Int16, DataType::Int16) => ( - Box::new(AddImpl::::new(DataType::Int16)), + Box::new(AddImpl::::new(DataType::Int16)), DataType::Int16, ), (DataType::Int32, DataType::Int32) => ( - Box::new(AddImpl::::new(DataType::Int32)), + Box::new(AddImpl::::new(DataType::Int32)), DataType::Int32, ), (DataType::Int64, DataType::Int64) => ( - Box::new(AddImpl::::new(DataType::Int64)), + Box::new(AddImpl::::new(DataType::Int64)), DataType::Int64, ), (DataType::Int128, DataType::Int128) => ( - Box::new(AddImpl::::new(DataType::Int128)), + Box::new(AddImpl::::new(DataType::Int128)), DataType::Int128, ), (DataType::UInt8, DataType::UInt8) => ( - Box::new(AddImpl::::new(DataType::UInt8)), + Box::new(AddImpl::::new(DataType::UInt8)), DataType::UInt8, ), (DataType::UInt16, DataType::UInt16) => ( - Box::new(AddImpl::::new(DataType::UInt16)), + Box::new(AddImpl::::new(DataType::UInt16)), DataType::UInt16, ), (DataType::UInt32, DataType::UInt32) => ( - Box::new(AddImpl::::new(DataType::UInt32)), + Box::new(AddImpl::::new(DataType::UInt32)), DataType::UInt32, ), (DataType::UInt64, DataType::UInt64) => ( - Box::new(AddImpl::::new(DataType::UInt64)), + Box::new(AddImpl::::new(DataType::UInt64)), DataType::UInt64, ), (DataType::UInt128, DataType::UInt128) => ( - Box::new(AddImpl::::new(DataType::UInt128)), + Box::new(AddImpl::::new(DataType::UInt128)), DataType::UInt128, ), // TODO: Split out decimal (for scaling) datatypes @ (DataType::Decimal64(_), DataType::Decimal64(_)) => ( - Box::new(AddImpl::::new(datatypes.0.clone())), + Box::new(AddImpl::::new(datatypes.0.clone())), datatypes.0, ), datatypes @ (DataType::Decimal128(_), DataType::Decimal128(_)) => ( - Box::new(AddImpl::::new(datatypes.0.clone())), + Box::new(AddImpl::::new(datatypes.0.clone())), datatypes.0, ), // Date + days (DataType::Date32, DataType::Int32) => ( - Box::new(AddImpl::::new(DataType::Date32)), + Box::new(AddImpl::::new(DataType::Date32)), DataType::Date32, ), // Days + date // Note both are represented as i32 physical type, we don't need to worry about flipping the sides. (DataType::Int32, DataType::Date32) => ( - Box::new(AddImpl::::new(DataType::Date32)), + Box::new(AddImpl::::new(DataType::Date32)), DataType::Date32, ), diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/arith/div.rs b/crates/rayexec_execution/src/functions/scalar/builtin/arith/div.rs index 3685ff5eb..fdc1c55de 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/arith/div.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/arith/div.rs @@ -12,17 +12,17 @@ use crate::arrays::executor::physical_type::{ PhysicalF16_2, PhysicalF32_2, PhysicalF64_2, - PhysicalI128, - PhysicalI16, - PhysicalI32, - PhysicalI64, - PhysicalI8, + PhysicalI128_2, + PhysicalI16_2, + PhysicalI32_2, + PhysicalI64_2, + PhysicalI8_2, PhysicalStorage2, - PhysicalU128, - PhysicalU16, - PhysicalU32, - PhysicalU64, - PhysicalU8, + PhysicalU128_2, + PhysicalU16_2, + PhysicalU32_2, + PhysicalU64_2, + PhysicalU8_2, }; use crate::arrays::executor::scalar::BinaryExecutor2; use crate::arrays::scalar::decimal::{Decimal128Type, Decimal64Type, DecimalType}; @@ -118,43 +118,43 @@ impl ScalarFunction for Div { DataType::Float64, ), (DataType::Int8, DataType::Int8) => ( - Box::new(DivImpl::::new(DataType::Int8)), + Box::new(DivImpl::::new(DataType::Int8)), DataType::Int8, ), (DataType::Int16, DataType::Int16) => ( - Box::new(DivImpl::::new(DataType::Int16)), + Box::new(DivImpl::::new(DataType::Int16)), DataType::Int16, ), (DataType::Int32, DataType::Int32) => ( - Box::new(DivImpl::::new(DataType::Int32)), + Box::new(DivImpl::::new(DataType::Int32)), DataType::Int32, ), (DataType::Int64, DataType::Int64) => ( - Box::new(DivImpl::::new(DataType::Int64)), + Box::new(DivImpl::::new(DataType::Int64)), DataType::Int64, ), (DataType::Int128, DataType::Int128) => ( - Box::new(DivImpl::::new(DataType::Int128)), + Box::new(DivImpl::::new(DataType::Int128)), DataType::Int128, ), (DataType::UInt8, DataType::UInt8) => ( - Box::new(DivImpl::::new(DataType::UInt8)), + Box::new(DivImpl::::new(DataType::UInt8)), DataType::UInt8, ), (DataType::UInt16, DataType::UInt16) => ( - Box::new(DivImpl::::new(DataType::UInt16)), + Box::new(DivImpl::::new(DataType::UInt16)), DataType::UInt16, ), (DataType::UInt32, DataType::UInt32) => ( - Box::new(DivImpl::::new(DataType::UInt32)), + Box::new(DivImpl::::new(DataType::UInt32)), DataType::UInt32, ), (DataType::UInt64, DataType::UInt64) => ( - Box::new(DivImpl::::new(DataType::UInt64)), + Box::new(DivImpl::::new(DataType::UInt64)), DataType::UInt64, ), (DataType::UInt128, DataType::UInt128) => ( - Box::new(DivImpl::::new(DataType::UInt128)), + Box::new(DivImpl::::new(DataType::UInt128)), DataType::UInt128, ), diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/arith/mul.rs b/crates/rayexec_execution/src/functions/scalar/builtin/arith/mul.rs index f5a25812b..77969737d 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/arith/mul.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/arith/mul.rs @@ -11,18 +11,18 @@ use crate::arrays::executor::physical_type::{ PhysicalF16_2, PhysicalF32_2, PhysicalF64_2, - PhysicalI128, - PhysicalI16, - PhysicalI32, - PhysicalI64, - PhysicalI8, - PhysicalInterval, + PhysicalI128_2, + PhysicalI16_2, + PhysicalI32_2, + PhysicalI64_2, + PhysicalI8_2, + PhysicalInterval_2, PhysicalStorage2, - PhysicalU128, - PhysicalU16, - PhysicalU32, - PhysicalU64, - PhysicalU8, + PhysicalU128_2, + PhysicalU16_2, + PhysicalU32_2, + PhysicalU64_2, + PhysicalU8_2, }; use crate::arrays::executor::scalar::BinaryExecutor2; use crate::arrays::scalar::decimal::{Decimal128Type, Decimal64Type, DecimalType}; @@ -129,43 +129,43 @@ impl ScalarFunction for Mul { DataType::Float64, ), (DataType::Int8, DataType::Int8) => ( - Box::new(MulImpl::::new(DataType::Int8)), + Box::new(MulImpl::::new(DataType::Int8)), DataType::Int8, ), (DataType::Int16, DataType::Int16) => ( - Box::new(MulImpl::::new(DataType::Int16)), + Box::new(MulImpl::::new(DataType::Int16)), DataType::Int16, ), (DataType::Int32, DataType::Int32) => ( - Box::new(MulImpl::::new(DataType::Int32)), + Box::new(MulImpl::::new(DataType::Int32)), DataType::Int32, ), (DataType::Int64, DataType::Int64) => ( - Box::new(MulImpl::::new(DataType::Int64)), + Box::new(MulImpl::::new(DataType::Int64)), DataType::Int64, ), (DataType::Int128, DataType::Int128) => ( - Box::new(MulImpl::::new(DataType::Int128)), + Box::new(MulImpl::::new(DataType::Int128)), DataType::Int128, ), (DataType::UInt8, DataType::UInt8) => ( - Box::new(MulImpl::::new(DataType::UInt8)), + Box::new(MulImpl::::new(DataType::UInt8)), DataType::UInt8, ), (DataType::UInt16, DataType::UInt16) => ( - Box::new(MulImpl::::new(DataType::UInt16)), + Box::new(MulImpl::::new(DataType::UInt16)), DataType::UInt16, ), (DataType::UInt32, DataType::UInt32) => ( - Box::new(MulImpl::::new(DataType::UInt32)), + Box::new(MulImpl::::new(DataType::UInt32)), DataType::UInt32, ), (DataType::UInt64, DataType::UInt64) => ( - Box::new(MulImpl::::new(DataType::UInt64)), + Box::new(MulImpl::::new(DataType::UInt64)), DataType::UInt64, ), (DataType::UInt128, DataType::UInt128) => ( - Box::new(MulImpl::::new(DataType::UInt128)), + Box::new(MulImpl::::new(DataType::UInt128)), DataType::UInt128, ), @@ -194,19 +194,19 @@ impl ScalarFunction for Mul { // Interval (DataType::Interval, DataType::Int32) => ( - Box::new(IntervalMulImpl::::new()), + Box::new(IntervalMulImpl::::new()), DataType::Interval, ), (DataType::Interval, DataType::Int64) => ( - Box::new(IntervalMulImpl::::new()), + Box::new(IntervalMulImpl::::new()), DataType::Interval, ), (DataType::Int32, DataType::Interval) => ( - Box::new(IntervalMulImpl::::new()), + Box::new(IntervalMulImpl::::new()), DataType::Interval, ), (DataType::Int64, DataType::Interval) => ( - Box::new(IntervalMulImpl::::new()), + Box::new(IntervalMulImpl::::new()), DataType::Interval, ), @@ -251,7 +251,7 @@ where buffer: PrimitiveBuffer::::with_len(lhs.logical_len()), }; - BinaryExecutor2::execute::(lhs, rhs, builder, |a, b, buf| { + BinaryExecutor2::execute::(lhs, rhs, builder, |a, b, buf| { // TODO: Overflow check buf.put(&Interval { months: a.months * (::from(b).unwrap_or_default()), diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/arith/rem.rs b/crates/rayexec_execution/src/functions/scalar/builtin/arith/rem.rs index 88e05a26e..4940e0047 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/arith/rem.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/arith/rem.rs @@ -10,17 +10,17 @@ use crate::arrays::executor::physical_type::{ PhysicalF16_2, PhysicalF32_2, PhysicalF64_2, - PhysicalI128, - PhysicalI16, - PhysicalI32, - PhysicalI64, - PhysicalI8, + PhysicalI128_2, + PhysicalI16_2, + PhysicalI32_2, + PhysicalI64_2, + PhysicalI8_2, PhysicalStorage2, - PhysicalU128, - PhysicalU16, - PhysicalU32, - PhysicalU64, - PhysicalU8, + PhysicalU128_2, + PhysicalU16_2, + PhysicalU32_2, + PhysicalU64_2, + PhysicalU8_2, }; use crate::arrays::executor::scalar::BinaryExecutor2; use crate::arrays::storage::PrimitiveStorage; @@ -126,43 +126,43 @@ impl ScalarFunction for Rem { DataType::Float64, ), (DataType::Int8, DataType::Int8) => ( - Box::new(RemImpl::::new(DataType::Int8)), + Box::new(RemImpl::::new(DataType::Int8)), DataType::Int8, ), (DataType::Int16, DataType::Int16) => ( - Box::new(RemImpl::::new(DataType::Int16)), + Box::new(RemImpl::::new(DataType::Int16)), DataType::Int16, ), (DataType::Int32, DataType::Int32) => ( - Box::new(RemImpl::::new(DataType::Int32)), + Box::new(RemImpl::::new(DataType::Int32)), DataType::Int32, ), (DataType::Int64, DataType::Int64) => ( - Box::new(RemImpl::::new(DataType::Int64)), + Box::new(RemImpl::::new(DataType::Int64)), DataType::Int64, ), (DataType::Int128, DataType::Int128) => ( - Box::new(RemImpl::::new(DataType::Int128)), + Box::new(RemImpl::::new(DataType::Int128)), DataType::Int128, ), (DataType::UInt8, DataType::UInt8) => ( - Box::new(RemImpl::::new(DataType::UInt8)), + Box::new(RemImpl::::new(DataType::UInt8)), DataType::UInt8, ), (DataType::UInt16, DataType::UInt16) => ( - Box::new(RemImpl::::new(DataType::UInt16)), + Box::new(RemImpl::::new(DataType::UInt16)), DataType::UInt16, ), (DataType::UInt32, DataType::UInt32) => ( - Box::new(RemImpl::::new(DataType::UInt32)), + Box::new(RemImpl::::new(DataType::UInt32)), DataType::UInt32, ), (DataType::UInt64, DataType::UInt64) => ( - Box::new(RemImpl::::new(DataType::UInt64)), + Box::new(RemImpl::::new(DataType::UInt64)), DataType::UInt64, ), (DataType::UInt128, DataType::UInt128) => ( - Box::new(RemImpl::::new(DataType::UInt128)), + Box::new(RemImpl::::new(DataType::UInt128)), DataType::UInt128, ), diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/arith/sub.rs b/crates/rayexec_execution/src/functions/scalar/builtin/arith/sub.rs index 3bf9b3820..868814b34 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/arith/sub.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/arith/sub.rs @@ -10,17 +10,17 @@ use crate::arrays::executor::physical_type::{ PhysicalF16_2, PhysicalF32_2, PhysicalF64_2, - PhysicalI128, - PhysicalI16, - PhysicalI32, - PhysicalI64, - PhysicalI8, + PhysicalI128_2, + PhysicalI16_2, + PhysicalI32_2, + PhysicalI64_2, + PhysicalI8_2, PhysicalStorage2, - PhysicalU128, - PhysicalU16, - PhysicalU32, - PhysicalU64, - PhysicalU8, + PhysicalU128_2, + PhysicalU16_2, + PhysicalU32_2, + PhysicalU64_2, + PhysicalU8_2, }; use crate::arrays::executor::scalar::BinaryExecutor2; use crate::arrays::storage::PrimitiveStorage; @@ -125,59 +125,59 @@ impl ScalarFunction for Sub { DataType::Float64, ), (DataType::Int8, DataType::Int8) => ( - Box::new(SubImpl::::new(DataType::Int8)), + Box::new(SubImpl::::new(DataType::Int8)), DataType::Int8, ), (DataType::Int16, DataType::Int16) => ( - Box::new(SubImpl::::new(DataType::Int16)), + Box::new(SubImpl::::new(DataType::Int16)), DataType::Int16, ), (DataType::Int32, DataType::Int32) => ( - Box::new(SubImpl::::new(DataType::Int32)), + Box::new(SubImpl::::new(DataType::Int32)), DataType::Int32, ), (DataType::Int64, DataType::Int64) => ( - Box::new(SubImpl::::new(DataType::Int64)), + Box::new(SubImpl::::new(DataType::Int64)), DataType::Int64, ), (DataType::Int128, DataType::Int128) => ( - Box::new(SubImpl::::new(DataType::Int128)), + Box::new(SubImpl::::new(DataType::Int128)), DataType::Int128, ), (DataType::UInt8, DataType::UInt8) => ( - Box::new(SubImpl::::new(DataType::UInt8)), + Box::new(SubImpl::::new(DataType::UInt8)), DataType::UInt8, ), (DataType::UInt16, DataType::UInt16) => ( - Box::new(SubImpl::::new(DataType::UInt16)), + Box::new(SubImpl::::new(DataType::UInt16)), DataType::UInt16, ), (DataType::UInt32, DataType::UInt32) => ( - Box::new(SubImpl::::new(DataType::UInt32)), + Box::new(SubImpl::::new(DataType::UInt32)), DataType::UInt32, ), (DataType::UInt64, DataType::UInt64) => ( - Box::new(SubImpl::::new(DataType::UInt64)), + Box::new(SubImpl::::new(DataType::UInt64)), DataType::UInt64, ), (DataType::UInt128, DataType::UInt128) => ( - Box::new(SubImpl::::new(DataType::UInt128)), + Box::new(SubImpl::::new(DataType::UInt128)), DataType::UInt128, ), // TODO: Split out decimal (for scaling) datatypes @ (DataType::Decimal64(_), DataType::Decimal64(_)) => ( - Box::new(SubImpl::::new(datatypes.0.clone())), + Box::new(SubImpl::::new(datatypes.0.clone())), datatypes.0, ), datatypes @ (DataType::Decimal128(_), DataType::Decimal128(_)) => ( - Box::new(SubImpl::::new(datatypes.0.clone())), + Box::new(SubImpl::::new(datatypes.0.clone())), datatypes.0, ), // Date + days (DataType::Date32, DataType::Int32) => ( - Box::new(SubImpl::::new(DataType::Date32)), + Box::new(SubImpl::::new(DataType::Date32)), DataType::Date32, ), diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/boolean.rs b/crates/rayexec_execution/src/functions/scalar/builtin/boolean.rs index 3c2adf139..af673e6fd 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/boolean.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/boolean.rs @@ -7,7 +7,7 @@ use crate::arrays::array::Array2; use crate::arrays::bitmap::Bitmap; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::builder::{ArrayBuilder, BooleanBuffer}; -use crate::arrays::executor::physical_type::PhysicalBool; +use crate::arrays::executor::physical_type::PhysicalBool_2; use crate::arrays::executor::scalar::{BinaryExecutor2, TernaryExecutor, UniformExecutor}; use crate::arrays::storage::BooleanStorage; use crate::expr::Expression; @@ -84,7 +84,7 @@ impl ScalarFunctionImpl for AndImpl { 2 => { let a = inputs[0]; let b = inputs[1]; - BinaryExecutor2::execute::( + BinaryExecutor2::execute::( a, b, ArrayBuilder { @@ -98,7 +98,7 @@ impl ScalarFunctionImpl for AndImpl { let a = inputs[0]; let b = inputs[1]; let c = inputs[2]; - TernaryExecutor::execute::( + TernaryExecutor::execute::( a, b, c, @@ -111,7 +111,7 @@ impl ScalarFunctionImpl for AndImpl { } _ => { let len = inputs[0].logical_len(); - UniformExecutor::execute::( + UniformExecutor::execute::( inputs, ArrayBuilder { datatype: DataType::Boolean, @@ -192,7 +192,7 @@ impl ScalarFunctionImpl for OrImpl { 2 => { let a = inputs[0]; let b = inputs[1]; - BinaryExecutor2::execute::( + BinaryExecutor2::execute::( a, b, ArrayBuilder { @@ -204,7 +204,7 @@ impl ScalarFunctionImpl for OrImpl { } _ => { let len = inputs[0].logical_len(); - UniformExecutor::execute::( + UniformExecutor::execute::( inputs, ArrayBuilder { datatype: DataType::Boolean, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/comparison.rs b/crates/rayexec_execution/src/functions/scalar/builtin/comparison.rs index f69c50624..7d3343e8b 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/comparison.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/comparison.rs @@ -10,26 +10,26 @@ use crate::arrays::compute::cast::behavior::CastFailBehavior; use crate::arrays::datatype::{DataType, DataTypeId, DecimalTypeMeta}; use crate::arrays::executor::builder::{ArrayBuilder, BooleanBuffer}; use crate::arrays::executor::physical_type::{ - PhysicalBinary, - PhysicalBool, + PhysicalBinary_2, + PhysicalBool_2, PhysicalF16_2, PhysicalF32_2, PhysicalF64_2, - PhysicalI128, - PhysicalI16, - PhysicalI32, - PhysicalI64, - PhysicalI8, - PhysicalInterval, + PhysicalI128_2, + PhysicalI16_2, + PhysicalI32_2, + PhysicalI64_2, + PhysicalI8_2, + PhysicalInterval_2, PhysicalStorage2, PhysicalType2, - PhysicalU128, - PhysicalU16, - PhysicalU32, - PhysicalU64, - PhysicalU8, - PhysicalUntypedNull, - PhysicalUtf8, + PhysicalU128_2, + PhysicalU16_2, + PhysicalU32_2, + PhysicalU64_2, + PhysicalU8_2, + PhysicalUntypedNull_2, + PhysicalUtf8_2, }; use crate::arrays::executor::scalar::{BinaryExecutor2, BinaryListReducer, FlexibleListExecutor}; use crate::arrays::scalar::decimal::{Decimal128Type, Decimal64Type, DecimalType}; @@ -517,38 +517,38 @@ fn new_comparison_impl( inputs[1].datatype(table_list)?, ) { (DataType::Boolean, DataType::Boolean) => { - Box::new(BaseComparisonImpl::::new()) + Box::new(BaseComparisonImpl::::new()) } (DataType::Int8, DataType::Int8) => { - Box::new(BaseComparisonImpl::::new()) + Box::new(BaseComparisonImpl::::new()) } (DataType::Int16, DataType::Int16) => { - Box::new(BaseComparisonImpl::::new()) + Box::new(BaseComparisonImpl::::new()) } (DataType::Int32, DataType::Int32) => { - Box::new(BaseComparisonImpl::::new()) + Box::new(BaseComparisonImpl::::new()) } (DataType::Int64, DataType::Int64) => { - Box::new(BaseComparisonImpl::::new()) + Box::new(BaseComparisonImpl::::new()) } (DataType::Int128, DataType::Int128) => { - Box::new(BaseComparisonImpl::::new()) + Box::new(BaseComparisonImpl::::new()) } (DataType::UInt8, DataType::UInt8) => { - Box::new(BaseComparisonImpl::::new()) + Box::new(BaseComparisonImpl::::new()) } (DataType::UInt16, DataType::UInt16) => { - Box::new(BaseComparisonImpl::::new()) + Box::new(BaseComparisonImpl::::new()) } (DataType::UInt32, DataType::UInt32) => { - Box::new(BaseComparisonImpl::::new()) + Box::new(BaseComparisonImpl::::new()) } (DataType::UInt64, DataType::UInt64) => { - Box::new(BaseComparisonImpl::::new()) + Box::new(BaseComparisonImpl::::new()) } (DataType::UInt128, DataType::UInt128) => { - Box::new(BaseComparisonImpl::::new()) + Box::new(BaseComparisonImpl::::new()) } (DataType::Float16, DataType::Float16) => { Box::new(BaseComparisonImpl::::new()) @@ -566,22 +566,22 @@ fn new_comparison_impl( RescalingComparisionImpl::::new(left, right), ), (DataType::Timestamp(_), DataType::Timestamp(_)) => { - Box::new(BaseComparisonImpl::::new()) + Box::new(BaseComparisonImpl::::new()) } (DataType::Interval, DataType::Interval) => { - Box::new(BaseComparisonImpl::::new()) + Box::new(BaseComparisonImpl::::new()) } (DataType::Date32, DataType::Date32) => { - Box::new(BaseComparisonImpl::::new()) + Box::new(BaseComparisonImpl::::new()) } (DataType::Date64, DataType::Date64) => { - Box::new(BaseComparisonImpl::::new()) + Box::new(BaseComparisonImpl::::new()) } (DataType::Utf8, DataType::Utf8) => { - Box::new(BaseComparisonImpl::::new()) + Box::new(BaseComparisonImpl::::new()) } (DataType::Binary, DataType::Binary) => { - Box::new(BaseComparisonImpl::::new()) + Box::new(BaseComparisonImpl::::new()) } (DataType::List(m1), DataType::List(m2)) if m1 == m2 => { // TODO: We'll want to figure out casting for lists. @@ -671,62 +671,62 @@ where let array = match self.inner_physical_type { PhysicalType2::UntypedNull => FlexibleListExecutor::binary_reduce::< - PhysicalUntypedNull, + PhysicalUntypedNull_2, _, ListComparisonReducer<_, O>, >(left, right, builder)?, PhysicalType2::Boolean => { - FlexibleListExecutor::binary_reduce::>( + FlexibleListExecutor::binary_reduce::>( left, right, builder, )? } PhysicalType2::Int8 => { - FlexibleListExecutor::binary_reduce::>( + FlexibleListExecutor::binary_reduce::>( left, right, builder, )? } PhysicalType2::Int16 => { - FlexibleListExecutor::binary_reduce::>( + FlexibleListExecutor::binary_reduce::>( left, right, builder, )? } PhysicalType2::Int32 => { - FlexibleListExecutor::binary_reduce::>( + FlexibleListExecutor::binary_reduce::>( left, right, builder, )? } PhysicalType2::Int64 => { - FlexibleListExecutor::binary_reduce::>( + FlexibleListExecutor::binary_reduce::>( left, right, builder, )? } PhysicalType2::Int128 => { - FlexibleListExecutor::binary_reduce::>( + FlexibleListExecutor::binary_reduce::>( left, right, builder, )? } PhysicalType2::UInt8 => { - FlexibleListExecutor::binary_reduce::>( + FlexibleListExecutor::binary_reduce::>( left, right, builder, )? } PhysicalType2::UInt16 => { - FlexibleListExecutor::binary_reduce::>( + FlexibleListExecutor::binary_reduce::>( left, right, builder, )? } PhysicalType2::UInt32 => { - FlexibleListExecutor::binary_reduce::>( + FlexibleListExecutor::binary_reduce::>( left, right, builder, )? } PhysicalType2::UInt64 => { - FlexibleListExecutor::binary_reduce::>( + FlexibleListExecutor::binary_reduce::>( left, right, builder, )? } PhysicalType2::UInt128 => { - FlexibleListExecutor::binary_reduce::>( + FlexibleListExecutor::binary_reduce::>( left, right, builder, )? } @@ -746,17 +746,17 @@ where )? } PhysicalType2::Interval => FlexibleListExecutor::binary_reduce::< - PhysicalInterval, + PhysicalInterval_2, + _, + ListComparisonReducer<_, O>, + >(left, right, builder)?, + PhysicalType2::Binary => FlexibleListExecutor::binary_reduce::< + PhysicalBinary_2, _, ListComparisonReducer<_, O>, >(left, right, builder)?, - PhysicalType2::Binary => { - FlexibleListExecutor::binary_reduce::>( - left, right, builder, - )? - } PhysicalType2::Utf8 => { - FlexibleListExecutor::binary_reduce::>( + FlexibleListExecutor::binary_reduce::>( left, right, builder, )? } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/datetime/date_trunc.rs b/crates/rayexec_execution/src/functions/scalar/builtin/datetime/date_trunc.rs index 48ab8cbda..c843259cc 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/datetime/date_trunc.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/datetime/date_trunc.rs @@ -5,7 +5,7 @@ use rayexec_error::{not_implemented, RayexecError, Result}; use crate::arrays::array::Array2; use crate::arrays::datatype::{DataType, DataTypeId, TimeUnit, TimestampTypeMeta}; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::PhysicalI64; +use crate::arrays::executor::physical_type::PhysicalI64_2; use crate::arrays::executor::scalar::UnaryExecutor2; use crate::expr::Expression; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunction, ScalarFunctionImpl}; @@ -188,7 +188,7 @@ impl ScalarFunctionImpl for DateTruncImpl { buffer: PrimitiveBuffer::with_len(input.logical_len()), }; - UnaryExecutor2::execute::(input, builder, |v, buf| { + UnaryExecutor2::execute::(input, builder, |v, buf| { let v = (v / trunc) * trunc; buf.put(&v) }) diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/datetime/epoch.rs b/crates/rayexec_execution/src/functions/scalar/builtin/datetime/epoch.rs index e7bcbf5c3..416a46785 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/datetime/epoch.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/datetime/epoch.rs @@ -3,7 +3,7 @@ use rayexec_error::Result; use crate::arrays::array::Array2; use crate::arrays::datatype::{DataType, DataTypeId, TimeUnit, TimestampTypeMeta}; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::PhysicalI64; +use crate::arrays::executor::physical_type::PhysicalI64_2; use crate::arrays::executor::scalar::UnaryExecutor2; use crate::expr::Expression; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunction, ScalarFunctionImpl}; @@ -116,7 +116,7 @@ fn to_timestamp(input: &Array2) -> Result { buffer: PrimitiveBuffer::with_len(input.logical_len()), }; - UnaryExecutor2::execute::(input, builder, |v, buf| { + UnaryExecutor2::execute::(input, builder, |v, buf| { buf.put(&(v * S)); }) } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/is.rs b/crates/rayexec_execution/src/functions/scalar/builtin/is.rs index ba5b07b42..5d8e096c4 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/is.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/is.rs @@ -3,7 +3,7 @@ use rayexec_error::Result; use crate::arrays::array::Array2; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::builder::{ArrayBuilder, BooleanBuffer}; -use crate::arrays::executor::physical_type::{PhysicalAny, PhysicalBool}; +use crate::arrays::executor::physical_type::{PhysicalAny, PhysicalBool_2}; use crate::arrays::executor::scalar::UnaryExecutor2; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation, Example}; @@ -312,7 +312,7 @@ impl ScalarFunctionImpl for CheckBoolImpl(input, builder, |val, buf| { + let array = UnaryExecutor2::execute::(input, builder, |val, buf| { let b = if NOT { val != BOOL } else { val == BOOL }; buf.put(&b) })?; diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/list/list_extract.rs b/crates/rayexec_execution/src/functions/scalar/builtin/list/list_extract.rs index c3db4c4b9..e1a067120 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/list/list_extract.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/list/list_extract.rs @@ -15,25 +15,25 @@ use crate::arrays::executor::builder::{ PrimitiveBuffer, }; use crate::arrays::executor::physical_type::{ - PhysicalBinary, - PhysicalBool, + PhysicalBinary_2, + PhysicalBool_2, PhysicalF16_2, PhysicalF32_2, PhysicalF64_2, - PhysicalI128, - PhysicalI16, - PhysicalI32, - PhysicalI64, - PhysicalI8, - PhysicalList, + PhysicalI128_2, + PhysicalI16_2, + PhysicalI32_2, + PhysicalI64_2, + PhysicalI8_2, + PhysicalList_2, PhysicalStorage2, PhysicalType2, - PhysicalU128, - PhysicalU16, - PhysicalU32, - PhysicalU64, - PhysicalU8, - PhysicalUtf8, + PhysicalU128_2, + PhysicalU16_2, + PhysicalU32_2, + PhysicalU64_2, + PhysicalU8_2, + PhysicalUtf8_2, }; use crate::arrays::executor::scalar::UnaryExecutor2; use crate::expr::Expression; @@ -139,77 +139,77 @@ fn extract(array: &Array2, idx: usize) -> Result { datatype: DataType::Boolean, buffer: BooleanBuffer::with_len(array.logical_len()), }; - extract_inner::(builder, array, data.inner_array(), idx) + extract_inner::(builder, array, data.inner_array(), idx) } PhysicalType2::Int8 => { let builder = ArrayBuilder { datatype: DataType::Int8, buffer: PrimitiveBuffer::::with_len(array.logical_len()), }; - extract_inner::(builder, array, data.inner_array(), idx) + extract_inner::(builder, array, data.inner_array(), idx) } PhysicalType2::Int16 => { let builder = ArrayBuilder { datatype: DataType::Int16, buffer: PrimitiveBuffer::::with_len(array.logical_len()), }; - extract_inner::(builder, array, data.inner_array(), idx) + extract_inner::(builder, array, data.inner_array(), idx) } PhysicalType2::Int32 => { let builder = ArrayBuilder { datatype: DataType::Int32, buffer: PrimitiveBuffer::::with_len(array.logical_len()), }; - extract_inner::(builder, array, data.inner_array(), idx) + extract_inner::(builder, array, data.inner_array(), idx) } PhysicalType2::Int64 => { let builder = ArrayBuilder { datatype: DataType::Int64, buffer: PrimitiveBuffer::::with_len(array.logical_len()), }; - extract_inner::(builder, array, data.inner_array(), idx) + extract_inner::(builder, array, data.inner_array(), idx) } PhysicalType2::Int128 => { let builder = ArrayBuilder { datatype: DataType::Int128, buffer: PrimitiveBuffer::::with_len(array.logical_len()), }; - extract_inner::(builder, array, data.inner_array(), idx) + extract_inner::(builder, array, data.inner_array(), idx) } PhysicalType2::UInt8 => { let builder = ArrayBuilder { datatype: DataType::UInt8, buffer: PrimitiveBuffer::::with_len(array.logical_len()), }; - extract_inner::(builder, array, data.inner_array(), idx) + extract_inner::(builder, array, data.inner_array(), idx) } PhysicalType2::UInt16 => { let builder = ArrayBuilder { datatype: DataType::UInt16, buffer: PrimitiveBuffer::::with_len(array.logical_len()), }; - extract_inner::(builder, array, data.inner_array(), idx) + extract_inner::(builder, array, data.inner_array(), idx) } PhysicalType2::UInt32 => { let builder = ArrayBuilder { datatype: DataType::UInt32, buffer: PrimitiveBuffer::::with_len(array.logical_len()), }; - extract_inner::(builder, array, data.inner_array(), idx) + extract_inner::(builder, array, data.inner_array(), idx) } PhysicalType2::UInt64 => { let builder = ArrayBuilder { datatype: DataType::UInt64, buffer: PrimitiveBuffer::::with_len(array.logical_len()), }; - extract_inner::(builder, array, data.inner_array(), idx) + extract_inner::(builder, array, data.inner_array(), idx) } PhysicalType2::UInt128 => { let builder = ArrayBuilder { datatype: DataType::UInt128, buffer: PrimitiveBuffer::::with_len(array.logical_len()), }; - extract_inner::(builder, array, data.inner_array(), idx) + extract_inner::(builder, array, data.inner_array(), idx) } PhysicalType2::Float16 => { let builder = ArrayBuilder { @@ -237,14 +237,14 @@ fn extract(array: &Array2, idx: usize) -> Result { datatype: DataType::Utf8, buffer: GermanVarlenBuffer::::with_len(array.logical_len()), }; - extract_inner::(builder, array, data.inner_array(), idx) + extract_inner::(builder, array, data.inner_array(), idx) } PhysicalType2::Binary => { let builder = ArrayBuilder { datatype: DataType::Binary, buffer: GermanVarlenBuffer::<[u8]>::with_len(array.logical_len()), }; - extract_inner::(builder, array, data.inner_array(), idx) + extract_inner::(builder, array, data.inner_array(), idx) } other => not_implemented!("List extract for physical type {other:?}"), } @@ -265,7 +265,7 @@ where let mut validity = Bitmap::new_with_all_true(builder.buffer.len()); - UnaryExecutor2::for_each::(outer, |idx, metadata| { + UnaryExecutor2::for_each::(outer, |idx, metadata| { if let Some(metadata) = metadata { if el_idx >= metadata.len { // Indexing outside of the list. Mark null diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/negate.rs b/crates/rayexec_execution/src/functions/scalar/builtin/negate.rs index 7fca34c97..996c3cdeb 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/negate.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/negate.rs @@ -6,15 +6,15 @@ use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::builder::{ArrayBuilder, BooleanBuffer, PrimitiveBuffer}; use crate::arrays::executor::physical_type::{ - PhysicalBool, + PhysicalBool_2, PhysicalF16_2, PhysicalF32_2, PhysicalF64_2, - PhysicalI128, - PhysicalI16, - PhysicalI32, - PhysicalI64, - PhysicalI8, + PhysicalI128_2, + PhysicalI16_2, + PhysicalI32_2, + PhysicalI64_2, + PhysicalI8_2, PhysicalStorage2, }; use crate::arrays::executor::scalar::UnaryExecutor2; @@ -61,11 +61,11 @@ impl ScalarFunction for Negate { // TODO: Interval let function_impl: Box = match dt.clone() { - dt @ DataType::Int8 => Box::new(NegateImpl::::new(dt)), - dt @ DataType::Int16 => Box::new(NegateImpl::::new(dt)), - dt @ DataType::Int32 => Box::new(NegateImpl::::new(dt)), - dt @ DataType::Int64 => Box::new(NegateImpl::::new(dt)), - dt @ DataType::Int128 => Box::new(NegateImpl::::new(dt)), + dt @ DataType::Int8 => Box::new(NegateImpl::::new(dt)), + dt @ DataType::Int16 => Box::new(NegateImpl::::new(dt)), + dt @ DataType::Int32 => Box::new(NegateImpl::::new(dt)), + dt @ DataType::Int64 => Box::new(NegateImpl::::new(dt)), + dt @ DataType::Int128 => Box::new(NegateImpl::::new(dt)), dt @ DataType::Float16 => Box::new(NegateImpl::::new(dt)), dt @ DataType::Float32 => Box::new(NegateImpl::::new(dt)), dt @ DataType::Float64 => Box::new(NegateImpl::::new(dt)), @@ -166,7 +166,7 @@ pub struct NotImpl; impl ScalarFunctionImpl for NotImpl { fn execute2(&self, inputs: &[&Array2]) -> Result { - UnaryExecutor2::execute::( + UnaryExecutor2::execute::( inputs[0], ArrayBuilder { datatype: DataType::Boolean, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/string/ascii.rs b/crates/rayexec_execution/src/functions/scalar/builtin/string/ascii.rs index f102ba675..45d88f2fb 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/string/ascii.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/string/ascii.rs @@ -3,7 +3,7 @@ use rayexec_error::Result; use crate::arrays::array::Array2; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::PhysicalUtf8; +use crate::arrays::executor::physical_type::PhysicalUtf8_2; use crate::arrays::executor::scalar::UnaryExecutor2; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation, Example}; @@ -68,7 +68,7 @@ impl ScalarFunctionImpl for AsciiImpl { buffer: PrimitiveBuffer::with_len(inputs[0].logical_len()), }; - UnaryExecutor2::execute::(input, builder, |v, buf| { + UnaryExecutor2::execute::(input, builder, |v, buf| { let v = v.chars().next().map(|c| c as i32).unwrap_or(0); buf.put(&v) }) diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/string/case.rs b/crates/rayexec_execution/src/functions/scalar/builtin/string/case.rs index 12a840db8..297ec9749 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/string/case.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/string/case.rs @@ -3,7 +3,7 @@ use rayexec_error::{RayexecError, Result}; use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::builder::{ArrayBuilder, GermanVarlenBuffer}; -use crate::arrays::executor::physical_type::PhysicalUtf8; +use crate::arrays::executor::physical_type::PhysicalUtf8_2; use crate::arrays::executor::scalar::UnaryExecutor2; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation, Example}; @@ -135,7 +135,7 @@ where buffer: GermanVarlenBuffer::::with_len_and_data_capacity(input.logical_len(), cap), }; - UnaryExecutor2::execute::(input, builder, |v, buf| { + UnaryExecutor2::execute::(input, builder, |v, buf| { // TODO: Non-allocating variant. buf.put(&case_fn(v)) }) diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/string/concat.rs b/crates/rayexec_execution/src/functions/scalar/builtin/string/concat.rs index 51f0c59c2..55be516e2 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/string/concat.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/string/concat.rs @@ -3,7 +3,7 @@ use rayexec_error::Result; use crate::arrays::array::Array2; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::builder::{ArrayBuilder, GermanVarlenBuffer}; -use crate::arrays::executor::physical_type::PhysicalUtf8; +use crate::arrays::executor::physical_type::PhysicalUtf8_2; use crate::arrays::executor::scalar::{BinaryExecutor2, UniformExecutor}; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation, Example}; @@ -84,7 +84,7 @@ impl ScalarFunctionImpl for StringConcatImpl { // TODO: Compute data capacity. - BinaryExecutor2::execute::( + BinaryExecutor2::execute::( a, b, ArrayBuilder { @@ -102,7 +102,7 @@ impl ScalarFunctionImpl for StringConcatImpl { _ => { let mut string_buf = String::new(); - UniformExecutor::execute::( + UniformExecutor::execute::( inputs, ArrayBuilder { datatype: DataType::Utf8, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/string/contains.rs b/crates/rayexec_execution/src/functions/scalar/builtin/string/contains.rs index 13f00dd2c..3ea1f64a9 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/string/contains.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/string/contains.rs @@ -3,7 +3,7 @@ use rayexec_error::Result; use crate::arrays::array::Array2; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::builder::{ArrayBuilder, BooleanBuffer}; -use crate::arrays::executor::physical_type::PhysicalUtf8; +use crate::arrays::executor::physical_type::PhysicalUtf8_2; use crate::arrays::executor::scalar::{BinaryExecutor2, UnaryExecutor2}; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation, Example}; @@ -88,7 +88,7 @@ impl ScalarFunctionImpl for StringContainsConstantImpl { buffer: BooleanBuffer::with_len(inputs[0].logical_len()), }; - UnaryExecutor2::execute::(inputs[0], builder, |s, buf| { + UnaryExecutor2::execute::(inputs[0], builder, |s, buf| { buf.put(&s.contains(&self.constant)) }) } @@ -104,7 +104,7 @@ impl ScalarFunctionImpl for StringContainsImpl { buffer: BooleanBuffer::with_len(inputs[0].logical_len()), }; - BinaryExecutor2::execute::( + BinaryExecutor2::execute::( inputs[0], inputs[1], builder, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/string/ends_with.rs b/crates/rayexec_execution/src/functions/scalar/builtin/string/ends_with.rs index c2bcb71b6..09a975667 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/string/ends_with.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/string/ends_with.rs @@ -3,7 +3,7 @@ use rayexec_error::Result; use crate::arrays::array::Array2; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::builder::{ArrayBuilder, BooleanBuffer}; -use crate::arrays::executor::physical_type::PhysicalUtf8; +use crate::arrays::executor::physical_type::PhysicalUtf8_2; use crate::arrays::executor::scalar::{BinaryExecutor2, UnaryExecutor2}; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation, Example}; @@ -92,7 +92,7 @@ impl ScalarFunctionImpl for EndsWithConstantImpl { buffer: BooleanBuffer::with_len(inputs[0].logical_len()), }; - UnaryExecutor2::execute::(inputs[0], builder, |s, buf| { + UnaryExecutor2::execute::(inputs[0], builder, |s, buf| { buf.put(&s.ends_with(&self.constant)) }) } @@ -108,7 +108,7 @@ impl ScalarFunctionImpl for EndsWithImpl { buffer: BooleanBuffer::with_len(inputs[0].logical_len()), }; - BinaryExecutor2::execute::( + BinaryExecutor2::execute::( inputs[0], inputs[1], builder, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/string/length.rs b/crates/rayexec_execution/src/functions/scalar/builtin/string/length.rs index d60b0a5f0..6b76bc9f0 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/string/length.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/string/length.rs @@ -3,7 +3,7 @@ use rayexec_error::Result; use crate::arrays::array::Array2; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::{PhysicalBinary, PhysicalUtf8}; +use crate::arrays::executor::physical_type::{PhysicalBinary_2, PhysicalUtf8_2}; use crate::arrays::executor::scalar::UnaryExecutor2; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation, Example}; @@ -72,7 +72,7 @@ impl ScalarFunctionImpl for StrLengthImpl { buffer: PrimitiveBuffer::with_len(input.logical_len()), }; - UnaryExecutor2::execute::(input, builder, |v, buf| { + UnaryExecutor2::execute::(input, builder, |v, buf| { let len = v.chars().count() as i64; buf.put(&len) }) @@ -154,7 +154,7 @@ impl ScalarFunctionImpl for ByteLengthImpl { }; // Binary applicable to both str and [u8]. - UnaryExecutor2::execute::(input, builder, |v, buf| { + UnaryExecutor2::execute::(input, builder, |v, buf| { buf.put(&(v.len() as i64)) }) } @@ -231,7 +231,7 @@ impl ScalarFunctionImpl for BitLengthImpl { }; // Binary applicable to both str and [u8]. - UnaryExecutor2::execute::(input, builder, |v, buf| { + UnaryExecutor2::execute::(input, builder, |v, buf| { let bit_len = v.len() * 8; buf.put(&(bit_len as i64)) }) diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/string/like.rs b/crates/rayexec_execution/src/functions/scalar/builtin/string/like.rs index 549439033..3d626f36b 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/string/like.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/string/like.rs @@ -4,7 +4,7 @@ use regex::{escape, Regex}; use crate::arrays::array::Array2; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::builder::{ArrayBuilder, BooleanBuffer}; -use crate::arrays::executor::physical_type::PhysicalUtf8; +use crate::arrays::executor::physical_type::PhysicalUtf8_2; use crate::arrays::executor::scalar::{BinaryExecutor2, UnaryExecutor2}; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation, Example}; @@ -90,7 +90,7 @@ impl ScalarFunctionImpl for LikeConstImpl { buffer: BooleanBuffer::with_len(inputs[0].logical_len()), }; - UnaryExecutor2::execute::(inputs[0], builder, |s, buf| { + UnaryExecutor2::execute::(inputs[0], builder, |s, buf| { let b = self.constant.is_match(s); buf.put(&b); }) @@ -109,7 +109,7 @@ impl ScalarFunctionImpl for LikeImpl { let mut s_buf = String::new(); - BinaryExecutor2::execute::( + BinaryExecutor2::execute::( inputs[0], inputs[1], builder, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/string/pad.rs b/crates/rayexec_execution/src/functions/scalar/builtin/string/pad.rs index 0a3f4f7b9..49436ccf1 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/string/pad.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/string/pad.rs @@ -3,7 +3,7 @@ use rayexec_error::Result; use crate::arrays::array::Array2; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::builder::{ArrayBuilder, GermanVarlenBuffer}; -use crate::arrays::executor::physical_type::{PhysicalI64, PhysicalUtf8}; +use crate::arrays::executor::physical_type::{PhysicalI64_2, PhysicalUtf8_2}; use crate::arrays::executor::scalar::{BinaryExecutor2, TernaryExecutor}; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation, Example}; @@ -104,7 +104,7 @@ impl ScalarFunctionImpl for LeftPadImpl { }; match inputs.len() { - 2 => BinaryExecutor2::execute::( + 2 => BinaryExecutor2::execute::( inputs[0], inputs[1], builder, @@ -113,7 +113,7 @@ impl ScalarFunctionImpl for LeftPadImpl { buf.put(&string_buf); }, ), - 3 => TernaryExecutor::execute::( + 3 => TernaryExecutor::execute::( inputs[0], inputs[1], inputs[2], @@ -217,7 +217,7 @@ impl ScalarFunctionImpl for RightPadImpl { }; match inputs.len() { - 2 => BinaryExecutor2::execute::( + 2 => BinaryExecutor2::execute::( inputs[0], inputs[1], builder, @@ -226,7 +226,7 @@ impl ScalarFunctionImpl for RightPadImpl { buf.put(&string_buf); }, ), - 3 => TernaryExecutor::execute::( + 3 => TernaryExecutor::execute::( inputs[0], inputs[1], inputs[2], diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/string/regexp_replace.rs b/crates/rayexec_execution/src/functions/scalar/builtin/string/regexp_replace.rs index ac1dc6849..35b1ac9f8 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/string/regexp_replace.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/string/regexp_replace.rs @@ -4,7 +4,7 @@ use regex::Regex; use crate::arrays::array::Array2; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::builder::{ArrayBuilder, GermanVarlenBuffer}; -use crate::arrays::executor::physical_type::PhysicalUtf8; +use crate::arrays::executor::physical_type::PhysicalUtf8_2; use crate::arrays::executor::scalar::{BinaryExecutor2, TernaryExecutor, UnaryExecutor2}; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation, Example}; @@ -106,23 +106,25 @@ impl ScalarFunctionImpl for RegexpReplaceImpl { match (self.pattern.as_ref(), self.replacement.as_ref()) { (Some(pattern), Some(replacement)) => { - UnaryExecutor2::execute::(inputs[0], builder, |s, buf| { + UnaryExecutor2::execute::(inputs[0], builder, |s, buf| { // TODO: Flags to more many. let out = pattern.replace(s, replacement); buf.put(out.as_ref()); }) } - (Some(pattern), None) => BinaryExecutor2::execute::( - inputs[0], - inputs[2], - builder, - |s, replacement, buf| { - let out = pattern.replace(s, replacement); - buf.put(out.as_ref()); - }, - ), + (Some(pattern), None) => { + BinaryExecutor2::execute::( + inputs[0], + inputs[2], + builder, + |s, replacement, buf| { + let out = pattern.replace(s, replacement); + buf.put(out.as_ref()); + }, + ) + } (None, Some(replacement)) => { - BinaryExecutor2::execute::( + BinaryExecutor2::execute::( inputs[0], inputs[1], builder, @@ -141,7 +143,7 @@ impl ScalarFunctionImpl for RegexpReplaceImpl { ) } (None, None) => { - TernaryExecutor::execute::( + TernaryExecutor::execute::( inputs[0], inputs[1], inputs[2], diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/string/repeat.rs b/crates/rayexec_execution/src/functions/scalar/builtin/string/repeat.rs index d4dd46cab..338971205 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/string/repeat.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/string/repeat.rs @@ -5,7 +5,7 @@ use rayexec_error::Result; use crate::arrays::array::Array2; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::builder::{ArrayBuilder, GermanVarlenBuffer}; -use crate::arrays::executor::physical_type::{PhysicalI64, PhysicalUtf8}; +use crate::arrays::executor::physical_type::{PhysicalI64_2, PhysicalUtf8_2}; use crate::arrays::executor::scalar::BinaryExecutor2; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation, Example}; @@ -73,7 +73,7 @@ impl ScalarFunctionImpl for RepeatUtf8Impl { let mut string_buf = String::new(); - BinaryExecutor2::execute::( + BinaryExecutor2::execute::( strings, nums, ArrayBuilder { diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/string/starts_with.rs b/crates/rayexec_execution/src/functions/scalar/builtin/string/starts_with.rs index 4e80eb0aa..6034a7a33 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/string/starts_with.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/string/starts_with.rs @@ -3,7 +3,7 @@ use rayexec_error::Result; use crate::arrays::array::Array2; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::builder::{ArrayBuilder, BooleanBuffer}; -use crate::arrays::executor::physical_type::PhysicalUtf8; +use crate::arrays::executor::physical_type::PhysicalUtf8_2; use crate::arrays::executor::scalar::{BinaryExecutor2, UnaryExecutor2}; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation, Example}; @@ -92,11 +92,11 @@ impl ScalarFunctionImpl for StartsWithImpl { match self.constant.as_ref() { Some(constant) => { - UnaryExecutor2::execute::(inputs[0], builder, |s, buf| { + UnaryExecutor2::execute::(inputs[0], builder, |s, buf| { buf.put(&s.starts_with(constant)) }) } - None => BinaryExecutor2::execute::( + None => BinaryExecutor2::execute::( inputs[0], inputs[1], builder, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/string/substring.rs b/crates/rayexec_execution/src/functions/scalar/builtin/string/substring.rs index 102dc4120..fb1c7de24 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/string/substring.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/string/substring.rs @@ -3,7 +3,7 @@ use rayexec_error::Result; use crate::arrays::array::Array2; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::builder::{ArrayBuilder, GermanVarlenBuffer}; -use crate::arrays::executor::physical_type::{PhysicalI64, PhysicalUtf8}; +use crate::arrays::executor::physical_type::{PhysicalI64_2, PhysicalUtf8_2}; use crate::arrays::executor::scalar::{BinaryExecutor2, TernaryExecutor}; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation, Example}; @@ -107,7 +107,7 @@ pub struct SubstringFromImpl; impl ScalarFunctionImpl for SubstringFromImpl { fn execute2(&self, inputs: &[&Array2]) -> Result { let len = inputs[0].logical_len(); - BinaryExecutor2::execute::( + BinaryExecutor2::execute::( inputs[0], inputs[1], ArrayBuilder { @@ -125,7 +125,7 @@ pub struct SubstringFromToImpl; impl ScalarFunctionImpl for SubstringFromToImpl { fn execute2(&self, inputs: &[&Array2]) -> Result { let len = inputs[0].logical_len(); - TernaryExecutor::execute::( + TernaryExecutor::execute::( inputs[0], inputs[1], inputs[2], diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/string/trim.rs b/crates/rayexec_execution/src/functions/scalar/builtin/string/trim.rs index e192fee34..2fc8507d2 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/string/trim.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/string/trim.rs @@ -6,7 +6,7 @@ use rayexec_error::Result; use crate::arrays::array::Array2; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::builder::{ArrayBuilder, GermanVarlenBuffer}; -use crate::arrays::executor::physical_type::PhysicalUtf8; +use crate::arrays::executor::physical_type::PhysicalUtf8_2; use crate::arrays::executor::scalar::{BinaryExecutor2, UnaryExecutor2}; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation, Example}; @@ -228,7 +228,7 @@ impl ScalarFunctionImpl for TrimWhitespaceImpl { buffer: GermanVarlenBuffer::::with_len(inputs[0].logical_len()), }; - UnaryExecutor2::execute::(inputs[0], builder, |s, buf| { + UnaryExecutor2::execute::(inputs[0], builder, |s, buf| { let trimmed = F::trim_func(s, " "); buf.put(trimmed) }) @@ -253,7 +253,7 @@ impl ScalarFunctionImpl for TrimPatternImpl { buffer: GermanVarlenBuffer::::with_len(inputs[0].logical_len()), }; - BinaryExecutor2::execute::( + BinaryExecutor2::execute::( inputs[0], inputs[1], builder, diff --git a/crates/rayexec_execution/src/functions/table/builtin/series.rs b/crates/rayexec_execution/src/functions/table/builtin/series.rs index 5c379e9cf..df6c5404a 100644 --- a/crates/rayexec_execution/src/functions/table/builtin/series.rs +++ b/crates/rayexec_execution/src/functions/table/builtin/series.rs @@ -6,7 +6,7 @@ use rayexec_error::{RayexecError, Result}; use crate::arrays::array::Array2; use crate::arrays::batch::Batch2; use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::physical_type::PhysicalI64; +use crate::arrays::executor::physical_type::PhysicalI64_2; use crate::arrays::executor::scalar::UnaryExecutor2; use crate::arrays::field::{Field, Schema}; use crate::arrays::scalar::OwnedScalarValue; @@ -259,15 +259,15 @@ impl TableInOutPartitionState for GenerateSeriesInOutPartitionState { }; // Generate new params from row. - let start = UnaryExecutor2::value_at::( + let start = UnaryExecutor2::value_at::( batch.column(0).unwrap(), self.next_row_idx, )?; - let end = UnaryExecutor2::value_at::( + let end = UnaryExecutor2::value_at::( batch.column(1).unwrap(), self.next_row_idx, )?; - let step = UnaryExecutor2::value_at::( + let step = UnaryExecutor2::value_at::( batch.column(2).unwrap(), self.next_row_idx, )?; diff --git a/crates/rayexec_execution/src/functions/table/builtin/unnest.rs b/crates/rayexec_execution/src/functions/table/builtin/unnest.rs index cba16259c..1b46461ca 100644 --- a/crates/rayexec_execution/src/functions/table/builtin/unnest.rs +++ b/crates/rayexec_execution/src/functions/table/builtin/unnest.rs @@ -6,7 +6,7 @@ use rayexec_error::{RayexecError, Result}; use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::batch::Batch2; use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::physical_type::{PhysicalList, PhysicalType2}; +use crate::arrays::executor::physical_type::{PhysicalList_2, PhysicalType2}; use crate::arrays::executor::scalar::UnaryExecutor2; use crate::arrays::field::{Field, Schema}; use crate::arrays::scalar::OwnedScalarValue; @@ -213,7 +213,7 @@ impl TableInOutPartitionState for UnnestInOutPartitionState { _other => return Err(RayexecError::new("Unexpected storage type")), }; - match UnaryExecutor2::value_at::(input, self.current_row)? { + match UnaryExecutor2::value_at::(input, self.current_row)? { Some(meta) => { // Row is a list, unnest. unnest(child, meta.len as usize, meta)? diff --git a/crates/rayexec_parquet/src/writer/mod.rs b/crates/rayexec_parquet/src/writer/mod.rs index f9d41567c..149f17914 100644 --- a/crates/rayexec_parquet/src/writer/mod.rs +++ b/crates/rayexec_parquet/src/writer/mod.rs @@ -15,7 +15,7 @@ use rayexec_error::{not_implemented, OptionExt, RayexecError, Result, ResultExt} use rayexec_execution::arrays::array::{Array2, ArrayData2}; use rayexec_execution::arrays::batch::Batch2; use rayexec_execution::arrays::datatype::DataType; -use rayexec_execution::arrays::executor::physical_type::{PhysicalBinary, PhysicalStorage2}; +use rayexec_execution::arrays::executor::physical_type::{PhysicalBinary_2, PhysicalStorage2}; use rayexec_execution::arrays::field::Schema; use rayexec_execution::arrays::storage::AddressableStorage; use rayexec_io::FileSink; @@ -313,7 +313,7 @@ fn write_array(writer: &mut ColumnWriter

, array: &Array2) -> R // TODO: Try not to copy here. There's a hard requirement on the // physical type being `Bytes`, and so a conversion needs to // happen somewhere. - let storage = PhysicalBinary::get_storage(array.array_data())?; + let storage = PhysicalBinary_2::get_storage(array.array_data())?; let mut data = Vec::with_capacity(storage.len()); for idx in 0..storage.len() { let val = storage.get(idx).required("binary data")?; From f0aa35e415cd49d60aa1afcf7fa05073297696ad Mon Sep 17 00:00:00 2001 From: Sean Smith Date: Sun, 29 Dec 2024 15:06:10 -0500 Subject: [PATCH 18/59] switch add --- .../rayexec_execution/src/arrays/batch_exp.rs | 74 +++++- .../rayexec_execution/src/arrays/testutil.rs | 220 +++++++++++++++--- .../src/execution/operators/util/resizer.rs | 138 +---------- .../src/functions/scalar/builtin/arith/add.rs | 212 ++++++++--------- .../functions/scalar/builtin/numeric/isnan.rs | 2 +- .../functions/scalar/builtin/numeric/mod.rs | 2 +- .../src/functions/scalar/mod.rs | 2 +- 7 files changed, 366 insertions(+), 284 deletions(-) diff --git a/crates/rayexec_execution/src/arrays/batch_exp.rs b/crates/rayexec_execution/src/arrays/batch_exp.rs index 9033d930f..23ff71cbf 100644 --- a/crates/rayexec_execution/src/arrays/batch_exp.rs +++ b/crates/rayexec_execution/src/arrays/batch_exp.rs @@ -1,5 +1,5 @@ use iterutil::IntoExactSizeIterator; -use rayexec_error::Result; +use rayexec_error::{RayexecError, Result}; use super::array::exp::Array; use super::array::selection::Selection; @@ -68,6 +68,61 @@ where }) } + /// Create a new batch from some number of arrays. + /// + /// All arrays are expected to have the same capacity. + /// + /// `row_eq_cap` indicates if the logical cardinality of the batch should + /// equal the capacity of the arrays. If false, the logical cardinality will + /// be set to zero. + pub(crate) fn from_arrays( + arrays: impl IntoIterator>, + rows_eq_cap: bool, + ) -> Result { + let arrays: Vec<_> = arrays.into_iter().collect(); + let capacity = match arrays.first() { + Some(arr) => arr.capacity(), + None => { + return Ok(Batch { + arrays: Vec::new(), + num_rows: 0, + capacity: 0, + }) + } + }; + + for array in &arrays { + if array.capacity() != capacity { + return Err(RayexecError::new( + "Attempted to create batch from arrays with different capacities", + ) + .with_field("expected", capacity) + .with_field("got", array.capacity())); + } + } + + Ok(Batch { + arrays, + num_rows: if rows_eq_cap { capacity } else { 0 }, + capacity, + }) + } + + pub fn num_rows(&self) -> usize { + self.num_rows + } + + pub fn set_num_rows(&mut self, rows: usize) -> Result<()> { + if rows > self.capacity { + return Err(RayexecError::new("Number of rows exceeds capacity") + .with_field("capacity", self.capacity) + .with_field("requested_num_rows", rows)); + } + self.num_rows = rows; + + Ok(()) + } + /// Returns a selection that selects rows [0, num_rows). pub fn selection(&self) -> Selection { Selection::Linear { len: self.num_rows } @@ -81,3 +136,20 @@ where &mut self.arrays } } + +#[cfg(test)] +mod tests { + use iterutil::TryFromExactSizeIterator; + + use super::*; + + #[test] + fn from_arrays_all_same_len() { + let a = Array::try_from_iter([3, 4, 5]).unwrap(); + let b = Array::try_from_iter(["a", "b", "c"]).unwrap(); + + let batch = Batch::from_arrays([a, b], true).unwrap(); + + assert_eq!(3, batch.selection().len()); + } +} diff --git a/crates/rayexec_execution/src/arrays/testutil.rs b/crates/rayexec_execution/src/arrays/testutil.rs index b7ca194b5..11443a546 100644 --- a/crates/rayexec_execution/src/arrays/testutil.rs +++ b/crates/rayexec_execution/src/arrays/testutil.rs @@ -5,63 +5,219 @@ //! //! Should not be used outside of tests. -use crate::arrays::array::Array2; -use crate::arrays::batch::Batch2; +use std::collections::BTreeMap; +use std::fmt::Debug; -/// Asserts that two arrays are logically equal. -pub fn assert_arrays_eq(a: &Array2, b: &Array2) { - assert_eq!(a.datatype(), b.datatype(), "data types differ"); - assert_eq!(a.logical_len(), b.logical_len(), "logical lengths differ"); +use super::array::exp::Array; +use super::batch_exp::Batch; +use crate::arrays::array::flat::FlatArrayView; +use crate::arrays::buffer::physical_type::{ + PhysicalBool, + PhysicalF16, + PhysicalF32, + PhysicalF64, + PhysicalI128, + PhysicalI16, + PhysicalI32, + PhysicalI64, + PhysicalI8, + PhysicalStorage, + PhysicalType, + PhysicalU128, + PhysicalU16, + PhysicalU32, + PhysicalU64, + PhysicalU8, + PhysicalUtf8, +}; +use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; - for row_idx in 0..a.logical_len() { - let a_val = a.logical_value(row_idx).unwrap(); - let b_val = b.logical_value(row_idx).unwrap(); +/// Assert two arrays are logically equal. +/// +/// This will assume that the array's capacity is the array's logical length. +pub fn assert_arrays_eq(array1: &Array, array2: &Array) { + assert_eq!( + array1.capacity(), + array2.capacity(), + "array capacities differ" + ); + assert_arrays_eq_count(array1, array2, array1.capacity()) +} + +/// Asserts that two arrays are logically equal for the first `count` rows. +/// +/// This will check valid and invalid values. Assertion error messages will +/// print out Some/None to represent valid/invalid. +pub fn assert_arrays_eq_count(array1: &Array, array2: &Array, count: usize) { + assert_eq!(array1.datatype, array2.datatype); + + let flat1 = array1.flat_view().unwrap(); + let flat2 = array2.flat_view().unwrap(); + + fn assert_eq_inner(flat1: FlatArrayView, flat2: FlatArrayView, count: usize) + where + S: PhysicalStorage, + S::StorageType: ToOwned, + { + let mut out = BTreeMap::new(); + let sel = 0..count; + + UnaryExecutor::for_each_flat::(flat1, sel.clone(), |idx, v| { + out.insert(idx, v.map(|v| v.to_owned())); + }) + .unwrap(); + + UnaryExecutor::for_each_flat::(flat2, sel, |idx, v| match out.remove(&idx) { + Some(existing) => { + let v = v.map(|v| v.to_owned()); + assert_eq!(existing, v, "values differ at index {idx}"); + } + None => panic!("missing value for index in array 1 {idx}"), + }) + .unwrap(); + + if !out.is_empty() { + panic!("extra entries in array 1: {:?}", out); + } + } - assert_eq!(a_val, b_val); + match array1.datatype.physical_type() { + PhysicalType::Boolean => assert_eq_inner::(flat1, flat2, count), + PhysicalType::Int8 => assert_eq_inner::(flat1, flat2, count), + PhysicalType::Int16 => assert_eq_inner::(flat1, flat2, count), + PhysicalType::Int32 => assert_eq_inner::(flat1, flat2, count), + PhysicalType::Int64 => assert_eq_inner::(flat1, flat2, count), + PhysicalType::Int128 => assert_eq_inner::(flat1, flat2, count), + PhysicalType::UInt8 => assert_eq_inner::(flat1, flat2, count), + PhysicalType::UInt16 => assert_eq_inner::(flat1, flat2, count), + PhysicalType::UInt32 => assert_eq_inner::(flat1, flat2, count), + PhysicalType::UInt64 => assert_eq_inner::(flat1, flat2, count), + PhysicalType::UInt128 => assert_eq_inner::(flat1, flat2, count), + PhysicalType::Float16 => assert_eq_inner::(flat1, flat2, count), + PhysicalType::Float32 => assert_eq_inner::(flat1, flat2, count), + PhysicalType::Float64 => assert_eq_inner::(flat1, flat2, count), + PhysicalType::Utf8 => assert_eq_inner::(flat1, flat2, count), + other => unimplemented!("{other:?}"), } } -/// Asserts that two batches are logically equal. -pub fn assert_batches_eq(a: &Batch2, b: &Batch2) { - assert_eq!(a.num_rows(), b.num_rows(), "num rows differ"); - assert_eq!(a.num_columns(), b.num_columns(), "num columns differ"); +/// Asserts two batches are logically equal. +pub fn assert_batches_eq(batch1: &Batch, batch2: &Batch) { + let arrays1 = batch1.arrays(); + let arrays2 = batch2.arrays(); - for col_idx in 0..a.num_columns() { - let a_col = a.column(col_idx).unwrap(); - let b_col = b.column(col_idx).unwrap(); + assert_eq!( + arrays1.len(), + arrays2.len(), + "batches have different number of arrays" + ); + assert_eq!( + batch1.num_rows(), + batch2.num_rows(), + "batches have different number of rows" + ); - assert_arrays_eq(a_col, b_col); + for (array1, array2) in arrays1.iter().zip(arrays2) { + assert_arrays_eq_count(array1, array2, batch1.num_rows()); } } #[cfg(test)] mod tests { + use iterutil::TryFromExactSizeIterator; + use super::*; - use crate::arrays::selection::SelectionVector; + use crate::arrays::buffer::buffer_manager::NopBufferManager; + + #[test] + fn assert_i32_arrays_eq_simple() { + let array1 = Array::try_from_iter([4, 5, 6]).unwrap(); + let array2 = Array::try_from_iter([4, 5, 6]).unwrap(); + + assert_arrays_eq(&array1, &array2); + } + + #[test] + fn assert_i32_arrays_eq_with_dictionary() { + let array1 = Array::try_from_iter([5, 4, 4]).unwrap(); + let mut array2 = Array::try_from_iter([4, 5]).unwrap(); + array2.select(&NopBufferManager, [1, 0, 0]).unwrap(); + + assert_arrays_eq(&array1, &array2); + } + + #[test] + fn assert_i32_arrays_eq_with_invalid() { + let mut array1 = Array::try_from_iter([4, 5, 6]).unwrap(); + array1.validity.set_invalid(1); + + let mut array2 = Array::try_from_iter([4, 8, 6]).unwrap(); + array2.validity.set_invalid(1); + + assert_arrays_eq(&array1, &array2); + } #[test] - fn arrays_eq() { - let a = Array2::from_iter([1, 2, 3]); - let b = Array2::from_iter([1, 2, 3]); + fn assert_batches_eq_simple() { + let batch1 = Batch::from_arrays( + [ + Array::try_from_iter([4, 5, 6]).unwrap(), + Array::try_from_iter(["a", "b", "c"]).unwrap(), + ], + true, + ) + .unwrap(); + let batch2 = Batch::from_arrays( + [ + Array::try_from_iter([4, 5, 6]).unwrap(), + Array::try_from_iter(["a", "b", "c"]).unwrap(), + ], + true, + ) + .unwrap(); - assert_arrays_eq(&a, &b); + assert_batches_eq(&batch1, &batch2); } #[test] - fn arrays_eq_with_selection() { - let a = Array2::from_iter([2, 2, 2]); - let mut b = Array2::from_iter([2]); - b.select_mut(SelectionVector::repeated(3, 0)); + fn assert_batches_eq_logical_row_count() { + let mut batch1 = Batch::from_arrays( + [ + Array::try_from_iter([4, 5, 6, 7, 8]).unwrap(), + Array::try_from_iter(["a", "b", "c", "d", "e"]).unwrap(), + ], + false, + ) + .unwrap(); + batch1.set_num_rows(3).unwrap(); + + let batch2 = Batch::from_arrays( + [ + Array::try_from_iter([4, 5, 6]).unwrap(), + Array::try_from_iter(["a", "b", "c"]).unwrap(), + ], + true, + ) + .unwrap(); + + assert_batches_eq(&batch1, &batch2); + } + + #[test] + #[should_panic] + fn assert_i32_arrays_eq_not_eq() { + let array1 = Array::try_from_iter([4, 5, 6]).unwrap(); + let array2 = Array::try_from_iter([4, 5, 7]).unwrap(); - assert_arrays_eq(&a, &b); + assert_arrays_eq(&array1, &array2); } #[test] #[should_panic] - fn arrays_not_eq() { - let a = Array2::from_iter([1, 2, 3]); - let b = Array2::from_iter(["a", "b", "c"]); + fn assert_i32_arrays_different_lengths() { + let array1 = Array::try_from_iter([4, 5, 6]).unwrap(); + let array2 = Array::try_from_iter([4, 5]).unwrap(); - assert_arrays_eq(&a, &b); + assert_arrays_eq(&array1, &array2); } } diff --git a/crates/rayexec_execution/src/execution/operators/util/resizer.rs b/crates/rayexec_execution/src/execution/operators/util/resizer.rs index c6146cef9..d9e594da8 100644 --- a/crates/rayexec_execution/src/execution/operators/util/resizer.rs +++ b/crates/rayexec_execution/src/execution/operators/util/resizer.rs @@ -6,6 +6,8 @@ use crate::arrays::batch::Batch2; use crate::arrays::selection::SelectionVector; use crate::execution::computed_batch::ComputedBatches; +// TODO: Delete + // TODO: Shouldn't be a const, should be determined when we create the // executable plans. pub const DEFAULT_TARGET_BATCH_SIZE: usize = 4096; @@ -113,139 +115,3 @@ impl BatchResizer { Ok(ComputedBatches::Single(out)) } } - -#[cfg(test)] -mod tests { - use super::*; - use crate::arrays::array::Array2; - use crate::arrays::testutil::assert_batches_eq; - - #[test] - fn push_within_target() { - let batch1 = Batch2::try_new([ - Array2::from_iter([1, 2, 3]), - Array2::from_iter(["a", "b", "c"]), - ]) - .unwrap(); - - let batch2 = Batch2::try_new([ - Array2::from_iter([4, 5, 6]), - Array2::from_iter(["d", "e", "f"]), - ]) - .unwrap(); - - let mut resizer = BatchResizer::new(4); - - let out = resizer.try_push(batch1).unwrap(); - assert!(matches!(out, ComputedBatches::None)); - - let out = resizer.try_push(batch2).unwrap(); - let got = match out { - ComputedBatches::Single(batch) => batch, - other => panic!("unexpected out: {other:?}"), - }; - - let expected = Batch2::try_new([ - Array2::from_iter([1, 2, 3, 4]), - Array2::from_iter(["a", "b", "c", "d"]), - ]) - .unwrap(); - - assert_batches_eq(&expected, &got); - - let expected_rem = - Batch2::try_new([Array2::from_iter([5, 6]), Array2::from_iter(["e", "f"])]).unwrap(); - - let remaining = match resizer.flush_remaining().unwrap() { - ComputedBatches::Single(batch) => batch, - other => panic!("unexpected out: {other:?}"), - }; - - assert_batches_eq(&expected_rem, &remaining); - } - - #[test] - fn push_large_batch() { - // len(batch) > target && len(batch) < target * 2 - - let batch = Batch2::try_new([ - Array2::from_iter([1, 2, 3, 4, 5]), - Array2::from_iter(["a", "b", "c", "d", "e"]), - ]) - .unwrap(); - - let mut resizer = BatchResizer::new(4); - let got = match resizer.try_push(batch).unwrap() { - ComputedBatches::Single(batch) => batch, - other => panic!("unexpected out: {other:?}"), - }; - - let expected = Batch2::try_new([ - Array2::from_iter([1, 2, 3, 4]), - Array2::from_iter(["a", "b", "c", "d"]), - ]) - .unwrap(); - - assert_batches_eq(&expected, &got); - - let expected_rem = - Batch2::try_new([Array2::from_iter([5]), Array2::from_iter(["e"])]).unwrap(); - - let remaining = match resizer.flush_remaining().unwrap() { - ComputedBatches::Single(batch) => batch, - other => panic!("unexpected out: {other:?}"), - }; - - assert_batches_eq(&expected_rem, &remaining); - } - - #[test] - fn push_very_large_batch() { - // len(batch) > target * 2 - - let batch = Batch2::try_new([ - Array2::from_iter([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]), - Array2::from_iter(["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"]), - ]) - .unwrap(); - - let mut resizer = BatchResizer::new(4); - let gots = match resizer.try_push(batch).unwrap() { - ComputedBatches::Multi(batches) => batches, - other => panic!("unexpected out: {other:?}"), - }; - - assert_eq!(2, gots.len()); - - let expected1 = Batch2::try_new([ - Array2::from_iter([1, 2, 3, 4]), - Array2::from_iter(["a", "b", "c", "d"]), - ]) - .unwrap(); - assert_batches_eq(&expected1, &gots[0]); - - let expected2 = Batch2::try_new([ - Array2::from_iter([5, 6, 7, 8]), - Array2::from_iter(["e", "f", "g", "h"]), - ]) - .unwrap(); - assert_batches_eq(&expected2, &gots[1]); - - let expected_rem = - Batch2::try_new([Array2::from_iter([9, 10]), Array2::from_iter(["i", "j"])]).unwrap(); - - let remaining = match resizer.flush_remaining().unwrap() { - ComputedBatches::Single(batch) => batch, - other => panic!("unexpected out: {other:?}"), - }; - - assert_batches_eq(&expected_rem, &remaining); - } - - #[test] - fn flush_none() { - let mut resizer = BatchResizer::new(4); - let out = resizer.flush_remaining().unwrap(); - assert!(matches!(out, ComputedBatches::None)); - } -} diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/arith/add.rs b/crates/rayexec_execution/src/functions/scalar/builtin/arith/add.rs index d9f88a52b..4d97b33e4 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/arith/add.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/arith/add.rs @@ -3,27 +3,27 @@ use std::marker::PhantomData; use rayexec_error::Result; -use crate::arrays::array::{Array2, ArrayData2}; -use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::{ - PhysicalF16_2, - PhysicalF32_2, - PhysicalF64_2, - PhysicalI128_2, - PhysicalI16_2, - PhysicalI32_2, - PhysicalI64_2, - PhysicalI8_2, - PhysicalStorage2, - PhysicalU128_2, - PhysicalU16_2, - PhysicalU32_2, - PhysicalU64_2, - PhysicalU8_2, +use crate::arrays::array::exp::Array; +use crate::arrays::batch_exp::Batch; +use crate::arrays::buffer::physical_type::{ + MutablePhysicalStorage, + PhysicalF16, + PhysicalF32, + PhysicalF64, + PhysicalI128, + PhysicalI16, + PhysicalI32, + PhysicalI64, + PhysicalI8, + PhysicalU128, + PhysicalU16, + PhysicalU32, + PhysicalU64, + PhysicalU8, }; -use crate::arrays::executor::scalar::BinaryExecutor2; -use crate::arrays::storage::PrimitiveStorage; +use crate::arrays::datatype::{DataType, DataTypeId}; +use crate::arrays::executor_exp::scalar::binary::BinaryExecutor; +use crate::arrays::executor_exp::OutBuffer; use crate::expr::Expression; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunction, ScalarFunctionImpl}; use crate::functions::{invalid_input_types_error, plan_check_num_args, FunctionInfo, Signature}; @@ -99,80 +99,63 @@ impl ScalarFunction for Add { inputs[0].datatype(table_list)?, inputs[1].datatype(table_list)?, ) { - (DataType::Float16, DataType::Float16) => ( - Box::new(AddImpl::::new(DataType::Float16)), - DataType::Float16, - ), - (DataType::Float32, DataType::Float32) => ( - Box::new(AddImpl::::new(DataType::Float32)), - DataType::Float32, - ), - (DataType::Float64, DataType::Float64) => ( - Box::new(AddImpl::::new(DataType::Float64)), - DataType::Float64, - ), - (DataType::Int8, DataType::Int8) => ( - Box::new(AddImpl::::new(DataType::Int8)), - DataType::Int8, - ), - (DataType::Int16, DataType::Int16) => ( - Box::new(AddImpl::::new(DataType::Int16)), - DataType::Int16, - ), - (DataType::Int32, DataType::Int32) => ( - Box::new(AddImpl::::new(DataType::Int32)), - DataType::Int32, - ), - (DataType::Int64, DataType::Int64) => ( - Box::new(AddImpl::::new(DataType::Int64)), - DataType::Int64, - ), - (DataType::Int128, DataType::Int128) => ( - Box::new(AddImpl::::new(DataType::Int128)), - DataType::Int128, - ), - (DataType::UInt8, DataType::UInt8) => ( - Box::new(AddImpl::::new(DataType::UInt8)), - DataType::UInt8, - ), - (DataType::UInt16, DataType::UInt16) => ( - Box::new(AddImpl::::new(DataType::UInt16)), - DataType::UInt16, - ), - (DataType::UInt32, DataType::UInt32) => ( - Box::new(AddImpl::::new(DataType::UInt32)), - DataType::UInt32, - ), - (DataType::UInt64, DataType::UInt64) => ( - Box::new(AddImpl::::new(DataType::UInt64)), - DataType::UInt64, - ), - (DataType::UInt128, DataType::UInt128) => ( - Box::new(AddImpl::::new(DataType::UInt128)), - DataType::UInt128, - ), + (DataType::Float16, DataType::Float16) => { + (Box::new(AddImpl::::new()), DataType::Float16) + } + (DataType::Float32, DataType::Float32) => { + (Box::new(AddImpl::::new()), DataType::Float32) + } + (DataType::Float64, DataType::Float64) => { + (Box::new(AddImpl::::new()), DataType::Float64) + } + (DataType::Int8, DataType::Int8) => { + (Box::new(AddImpl::::new()), DataType::Int8) + } + (DataType::Int16, DataType::Int16) => { + (Box::new(AddImpl::::new()), DataType::Int16) + } + (DataType::Int32, DataType::Int32) => { + (Box::new(AddImpl::::new()), DataType::Int32) + } + (DataType::Int64, DataType::Int64) => { + (Box::new(AddImpl::::new()), DataType::Int64) + } + (DataType::Int128, DataType::Int128) => { + (Box::new(AddImpl::::new()), DataType::Int128) + } + (DataType::UInt8, DataType::UInt8) => { + (Box::new(AddImpl::::new()), DataType::UInt8) + } + (DataType::UInt16, DataType::UInt16) => { + (Box::new(AddImpl::::new()), DataType::UInt16) + } + (DataType::UInt32, DataType::UInt32) => { + (Box::new(AddImpl::::new()), DataType::UInt32) + } + (DataType::UInt64, DataType::UInt64) => { + (Box::new(AddImpl::::new()), DataType::UInt64) + } + (DataType::UInt128, DataType::UInt128) => { + (Box::new(AddImpl::::new()), DataType::UInt128) + } // TODO: Split out decimal (for scaling) - datatypes @ (DataType::Decimal64(_), DataType::Decimal64(_)) => ( - Box::new(AddImpl::::new(datatypes.0.clone())), - datatypes.0, - ), - datatypes @ (DataType::Decimal128(_), DataType::Decimal128(_)) => ( - Box::new(AddImpl::::new(datatypes.0.clone())), - datatypes.0, - ), + datatypes @ (DataType::Decimal64(_), DataType::Decimal64(_)) => { + (Box::new(AddImpl::::new()), datatypes.0) + } + datatypes @ (DataType::Decimal128(_), DataType::Decimal128(_)) => { + (Box::new(AddImpl::::new()), datatypes.0) + } // Date + days - (DataType::Date32, DataType::Int32) => ( - Box::new(AddImpl::::new(DataType::Date32)), - DataType::Date32, - ), + (DataType::Date32, DataType::Int32) => { + (Box::new(AddImpl::::new()), DataType::Date32) + } // Days + date // Note both are represented as i32 physical type, we don't need to worry about flipping the sides. - (DataType::Int32, DataType::Date32) => ( - Box::new(AddImpl::::new(DataType::Date32)), - DataType::Date32, - ), + (DataType::Int32, DataType::Date32) => { + (Box::new(AddImpl::::new()), DataType::Date32) + } // TODO: Interval (a, b) => return Err(invalid_input_types_error(self, &[a, b])), @@ -189,49 +172,52 @@ impl ScalarFunction for Add { #[derive(Debug, Clone)] pub struct AddImpl { - datatype: DataType, _s: PhantomData, } impl AddImpl { - fn new(datatype: DataType) -> Self { - AddImpl { - datatype, - _s: PhantomData, - } + const fn new() -> Self { + AddImpl { _s: PhantomData } } } impl ScalarFunctionImpl for AddImpl where - S: PhysicalStorage2, - for<'a> S::Type<'a>: std::ops::Add> + Default + Copy, - ArrayData2: From>>, + S: MutablePhysicalStorage, + S::StorageType: std::ops::Add + Sized + Copy, { - fn execute2(&self, inputs: &[&Array2]) -> Result { - let a = inputs[0]; - let b = inputs[1]; - - let builder = ArrayBuilder { - datatype: self.datatype.clone(), - buffer: PrimitiveBuffer::with_len(a.logical_len()), - }; - - BinaryExecutor2::execute::(a, b, builder, |a, b, buf| buf.put(&(a + b))) + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + let a = &input.arrays()[0]; + let b = &input.arrays()[1]; + + BinaryExecutor::execute::( + a, + sel, + b, + sel, + OutBuffer::from_array(output)?, + |&a, &b, buf| buf.put(&(a + b)), + ) } } #[cfg(test)] mod tests { + use iterutil::TryFromExactSizeIterator; + use super::*; + use crate::arrays::buffer::buffer_manager::NopBufferManager; use crate::arrays::datatype::DataType; + use crate::arrays::testutil::assert_arrays_eq; use crate::expr; use crate::functions::scalar::ScalarFunction; #[test] fn add_i32() { - let a = Array2::from_iter([1, 2, 3]); - let b = Array2::from_iter([4, 5, 6]); + let a = Array::try_from_iter([1, 2, 3]).unwrap(); + let b = Array::try_from_iter([4, 5, 6]).unwrap(); + let batch = Batch::from_arrays([a, b], true).unwrap(); let mut table_list = TableList::empty(); let table_ref = table_list @@ -249,9 +235,11 @@ mod tests { ) .unwrap(); - let out = planned.function_impl.execute2(&[&a, &b]).unwrap(); - let expected = Array2::from_iter([5, 7, 9]); + let mut out = Array::new(&NopBufferManager, DataType::Int32, 3).unwrap(); + planned.function_impl.execute(&batch, &mut out).unwrap(); + + let expected = Array::try_from_iter([5, 7, 9]).unwrap(); - assert_eq!(expected, out); + assert_arrays_eq(&expected, &out); } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/isnan.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/isnan.rs index 99fb057a6..f3b7e1e49 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/isnan.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/isnan.rs @@ -114,7 +114,7 @@ where UnaryExecutor2::execute::(input, builder, |v, buf| buf.put(&v.is_nan())) } - fn execute(&self, input: Batch, output: &mut Array) -> Result<()> { + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { unimplemented!() } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/mod.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/mod.rs index 671f5cb54..3436227a1 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/mod.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/mod.rs @@ -178,7 +178,7 @@ impl ScalarFunctionImpl for UnaryInputNumericScal } } - fn execute(&self, input: Batch, output: &mut Array) -> Result<()> { + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { let sel = input.selection(); let input = &input.arrays()[0]; diff --git a/crates/rayexec_execution/src/functions/scalar/mod.rs b/crates/rayexec_execution/src/functions/scalar/mod.rs index 09db07146..dde1ffb40 100644 --- a/crates/rayexec_execution/src/functions/scalar/mod.rs +++ b/crates/rayexec_execution/src/functions/scalar/mod.rs @@ -114,7 +114,7 @@ pub trait ScalarFunctionImpl: Debug + Sync + Send + DynClone { /// /// `output` is guaranteed to be the exact size needed for the output as /// well as being the correct physical type. - fn execute(&self, input: Batch, output: &mut Array) -> Result<()> { + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { unimplemented!() } } From 42f56b4afc1d7ff7e4fa898f3e11c62a67546cac Mon Sep 17 00:00:00 2001 From: Sean Smith Date: Sun, 29 Dec 2024 16:33:29 -0500 Subject: [PATCH 19/59] more arith --- .../src/arrays/scalar/decimal.rs | 23 +- .../src/functions/aggregate/builtin/avg.rs | 2 +- .../src/functions/aggregate/builtin/sum.rs | 2 +- .../src/functions/scalar/builtin/arith/div.rs | 235 ++++++++------- .../src/functions/scalar/builtin/arith/mul.rs | 279 +++++++++--------- .../src/functions/scalar/builtin/arith/rem.rs | 183 ++++++------ .../src/functions/scalar/builtin/arith/sub.rs | 209 +++++++------ .../functions/scalar/builtin/comparison.rs | 10 +- 8 files changed, 462 insertions(+), 481 deletions(-) diff --git a/crates/rayexec_execution/src/arrays/scalar/decimal.rs b/crates/rayexec_execution/src/arrays/scalar/decimal.rs index f40686592..73963c4c0 100644 --- a/crates/rayexec_execution/src/arrays/scalar/decimal.rs +++ b/crates/rayexec_execution/src/arrays/scalar/decimal.rs @@ -5,6 +5,12 @@ use rayexec_error::{RayexecError, Result, ResultExt}; use rayexec_proto::ProtoConv; use serde::{Deserialize, Serialize}; +use crate::arrays::buffer::physical_type::{ + MutablePhysicalStorage, + PhysicalI128, + PhysicalI64, + PhysicalStorage, +}; use crate::arrays::executor::physical_type::{PhysicalI128_2, PhysicalI64_2, PhysicalStorage2}; pub trait DecimalPrimitive: @@ -30,11 +36,18 @@ impl DecimalPrimitive for i128 { pub trait DecimalType: Debug + Sync + Send + Copy + 'static where - for<'a> Self::Storage: PhysicalStorage2 = Self::Primitive>, + for<'a> Self::Storage2: PhysicalStorage2 = Self::Primitive>, { /// The underlying primitive type storing the decimal's value. type Primitive: DecimalPrimitive; - type Storage: PhysicalStorage2; + + type Storage: MutablePhysicalStorage< + PrimaryBufferType = Self::Primitive, + StorageType = Self::Primitive, + >; + + // TODO: Remove + type Storage2: PhysicalStorage2; /// Max precision for this decimal type. const MAX_PRECISION: u8; @@ -71,7 +84,8 @@ pub struct Decimal64Type; impl DecimalType for Decimal64Type { type Primitive = i64; - type Storage = PhysicalI64_2; + type Storage = PhysicalI64; + type Storage2 = PhysicalI64_2; const MAX_PRECISION: u8 = 18; // Note that changing this would require changing some of the date functions // since they assume this is 3. @@ -83,7 +97,8 @@ pub struct Decimal128Type; impl DecimalType for Decimal128Type { type Primitive = i128; - type Storage = PhysicalI128_2; + type Storage = PhysicalI128; + type Storage2 = PhysicalI128_2; const MAX_PRECISION: u8 = 38; const DEFAULT_SCALE: i8 = 9; } diff --git a/crates/rayexec_execution/src/functions/aggregate/builtin/avg.rs b/crates/rayexec_execution/src/functions/aggregate/builtin/avg.rs index ff463ed47..061ebd7d8 100644 --- a/crates/rayexec_execution/src/functions/aggregate/builtin/avg.rs +++ b/crates/rayexec_execution/src/functions/aggregate/builtin/avg.rs @@ -166,7 +166,7 @@ where )) }; - new_unary_aggregate_states::( + new_unary_aggregate_states::( AvgStateDecimal::::default, state_finalize, ) diff --git a/crates/rayexec_execution/src/functions/aggregate/builtin/sum.rs b/crates/rayexec_execution/src/functions/aggregate/builtin/sum.rs index 4ab0e244c..7991c8f37 100644 --- a/crates/rayexec_execution/src/functions/aggregate/builtin/sum.rs +++ b/crates/rayexec_execution/src/functions/aggregate/builtin/sum.rs @@ -159,7 +159,7 @@ where fn new_states(&self) -> Box { let datatype = self.datatype.clone(); - new_unary_aggregate_states::( + new_unary_aggregate_states::( SumStateCheckedAdd::::default, move |states| primitive_finalize(datatype.clone(), states), ) diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/arith/div.rs b/crates/rayexec_execution/src/functions/scalar/builtin/arith/div.rs index fdc1c55de..596dd83fd 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/arith/div.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/arith/div.rs @@ -3,30 +3,28 @@ use std::marker::PhantomData; use rayexec_error::Result; -use crate::arrays::array::{Array2, ArrayData2}; -use crate::arrays::compute::cast::array::cast_decimal_to_float; -use crate::arrays::compute::cast::behavior::CastFailBehavior; -use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::{ - PhysicalF16_2, - PhysicalF32_2, - PhysicalF64_2, - PhysicalI128_2, - PhysicalI16_2, - PhysicalI32_2, - PhysicalI64_2, - PhysicalI8_2, - PhysicalStorage2, - PhysicalU128_2, - PhysicalU16_2, - PhysicalU32_2, - PhysicalU64_2, - PhysicalU8_2, +use crate::arrays::array::exp::Array; +use crate::arrays::batch_exp::Batch; +use crate::arrays::buffer::physical_type::{ + MutablePhysicalStorage, + PhysicalF16, + PhysicalF32, + PhysicalF64, + PhysicalI128, + PhysicalI16, + PhysicalI32, + PhysicalI64, + PhysicalI8, + PhysicalU128, + PhysicalU16, + PhysicalU32, + PhysicalU64, + PhysicalU8, }; -use crate::arrays::executor::scalar::BinaryExecutor2; +use crate::arrays::datatype::{DataType, DataTypeId}; +use crate::arrays::executor_exp::scalar::binary::BinaryExecutor; +use crate::arrays::executor_exp::OutBuffer; use crate::arrays::scalar::decimal::{Decimal128Type, Decimal64Type, DecimalType}; -use crate::arrays::storage::PrimitiveStorage; use crate::expr::Expression; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunction, ScalarFunctionImpl}; use crate::functions::{invalid_input_types_error, plan_check_num_args, FunctionInfo, Signature}; @@ -105,58 +103,45 @@ impl ScalarFunction for Div { inputs[0].datatype(table_list)?, inputs[1].datatype(table_list)?, ) { - (DataType::Float16, DataType::Float16) => ( - Box::new(DivImpl::::new(DataType::Float16)), - DataType::Float16, - ), - (DataType::Float32, DataType::Float32) => ( - Box::new(DivImpl::::new(DataType::Float32)), - DataType::Float32, - ), - (DataType::Float64, DataType::Float64) => ( - Box::new(DivImpl::::new(DataType::Float64)), - DataType::Float64, - ), - (DataType::Int8, DataType::Int8) => ( - Box::new(DivImpl::::new(DataType::Int8)), - DataType::Int8, - ), - (DataType::Int16, DataType::Int16) => ( - Box::new(DivImpl::::new(DataType::Int16)), - DataType::Int16, - ), - (DataType::Int32, DataType::Int32) => ( - Box::new(DivImpl::::new(DataType::Int32)), - DataType::Int32, - ), - (DataType::Int64, DataType::Int64) => ( - Box::new(DivImpl::::new(DataType::Int64)), - DataType::Int64, - ), - (DataType::Int128, DataType::Int128) => ( - Box::new(DivImpl::::new(DataType::Int128)), - DataType::Int128, - ), - (DataType::UInt8, DataType::UInt8) => ( - Box::new(DivImpl::::new(DataType::UInt8)), - DataType::UInt8, - ), - (DataType::UInt16, DataType::UInt16) => ( - Box::new(DivImpl::::new(DataType::UInt16)), - DataType::UInt16, - ), - (DataType::UInt32, DataType::UInt32) => ( - Box::new(DivImpl::::new(DataType::UInt32)), - DataType::UInt32, - ), - (DataType::UInt64, DataType::UInt64) => ( - Box::new(DivImpl::::new(DataType::UInt64)), - DataType::UInt64, - ), - (DataType::UInt128, DataType::UInt128) => ( - Box::new(DivImpl::::new(DataType::UInt128)), - DataType::UInt128, - ), + (DataType::Float16, DataType::Float16) => { + (Box::new(DivImpl::::new()), DataType::Float16) + } + (DataType::Float32, DataType::Float32) => { + (Box::new(DivImpl::::new()), DataType::Float32) + } + (DataType::Float64, DataType::Float64) => { + (Box::new(DivImpl::::new()), DataType::Float64) + } + (DataType::Int8, DataType::Int8) => { + (Box::new(DivImpl::::new()), DataType::Int8) + } + (DataType::Int16, DataType::Int16) => { + (Box::new(DivImpl::::new()), DataType::Int16) + } + (DataType::Int32, DataType::Int32) => { + (Box::new(DivImpl::::new()), DataType::Int32) + } + (DataType::Int64, DataType::Int64) => { + (Box::new(DivImpl::::new()), DataType::Int64) + } + (DataType::Int128, DataType::Int128) => { + (Box::new(DivImpl::::new()), DataType::Int128) + } + (DataType::UInt8, DataType::UInt8) => { + (Box::new(DivImpl::::new()), DataType::UInt8) + } + (DataType::UInt16, DataType::UInt16) => { + (Box::new(DivImpl::::new()), DataType::UInt16) + } + (DataType::UInt32, DataType::UInt32) => { + (Box::new(DivImpl::::new()), DataType::UInt32) + } + (DataType::UInt64, DataType::UInt64) => { + (Box::new(DivImpl::::new()), DataType::UInt64) + } + (DataType::UInt128, DataType::UInt128) => { + (Box::new(DivImpl::::new()), DataType::UInt128) + } // Decimals (DataType::Decimal64(_), DataType::Decimal64(_)) => ( @@ -198,80 +183,88 @@ impl ScalarFunctionImpl for DecimalDivImpl where D: DecimalType, { - fn execute2(&self, inputs: &[&Array2]) -> Result { - let a = inputs[0]; - let b = inputs[1]; + // fn execute2(&self, inputs: &[&Array2]) -> Result { + // unimplemented!() + // let a = inputs[0]; + // let b = inputs[1]; - let a = cast_decimal_to_float::( - a, - DataType::Float64, - CastFailBehavior::Error, - )?; - let b = cast_decimal_to_float::( - b, - DataType::Float64, - CastFailBehavior::Error, - )?; + // let a = cast_decimal_to_float::( + // a, + // DataType::Float64, + // CastFailBehavior::Error, + // )?; + // let b = cast_decimal_to_float::( + // b, + // DataType::Float64, + // CastFailBehavior::Error, + // )?; - let builder = ArrayBuilder { - datatype: DataType::Float64, - buffer: PrimitiveBuffer::with_len(a.logical_len()), - }; + // let builder = ArrayBuilder { + // datatype: DataType::Float64, + // buffer: PrimitiveBuffer::with_len(a.logical_len()), + // }; - BinaryExecutor2::execute::( - &a, - &b, - builder, - |a, b, buf| buf.put(&(a / b)), - ) + // BinaryExecutor2::execute::( + // &a, + // &b, + // builder, + // |a, b, buf| buf.put(&(a / b)), + // ) + // } + + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + unimplemented!() } } #[derive(Debug, Clone)] pub struct DivImpl { - datatype: DataType, _s: PhantomData, } impl DivImpl { - fn new(datatype: DataType) -> Self { - DivImpl { - datatype, - _s: PhantomData, - } + const fn new() -> Self { + DivImpl { _s: PhantomData } } } impl ScalarFunctionImpl for DivImpl where - S: PhysicalStorage2, - for<'a> S::Type<'a>: std::ops::Div> + Default + Copy, - ArrayData2: From>>, + S: MutablePhysicalStorage, + S::StorageType: std::ops::Div + Sized + Copy, { - fn execute2(&self, inputs: &[&Array2]) -> Result { - let a = inputs[0]; - let b = inputs[1]; + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + let a = &input.arrays()[0]; + let b = &input.arrays()[1]; - let builder = ArrayBuilder { - datatype: self.datatype.clone(), - buffer: PrimitiveBuffer::with_len(a.logical_len()), - }; - - BinaryExecutor2::execute::(a, b, builder, |a, b, buf| buf.put(&(a / b))) + BinaryExecutor::execute::( + a, + sel, + b, + sel, + OutBuffer::from_array(output)?, + |&a, &b, buf| buf.put(&(a / b)), + ) } } #[cfg(test)] mod tests { + use iterutil::TryFromExactSizeIterator; + use super::*; + use crate::arrays::buffer::buffer_manager::NopBufferManager; use crate::arrays::datatype::DataType; + use crate::arrays::testutil::assert_arrays_eq; use crate::expr; use crate::functions::scalar::ScalarFunction; #[test] fn div_i32() { - let a = Array2::from_iter([4, 5, 6]); - let b = Array2::from_iter([1, 2, 3]); + let a = Array::try_from_iter([4, 5, 6]).unwrap(); + let b = Array::try_from_iter([1, 2, 3]).unwrap(); + let batch = Batch::from_arrays([a, b], true).unwrap(); let mut table_list = TableList::empty(); let table_ref = table_list @@ -289,9 +282,11 @@ mod tests { ) .unwrap(); - let out = planned.function_impl.execute2(&[&a, &b]).unwrap(); - let expected = Array2::from_iter([4, 2, 2]); + let mut out = Array::new(&NopBufferManager, DataType::Int32, 3).unwrap(); + planned.function_impl.execute(&batch, &mut out).unwrap(); + + let expected = Array::try_from_iter([4, 2, 2]).unwrap(); - assert_eq!(expected, out); + assert_arrays_eq(&expected, &out); } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/arith/mul.rs b/crates/rayexec_execution/src/functions/scalar/builtin/arith/mul.rs index 77969737d..a657354a4 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/arith/mul.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/arith/mul.rs @@ -4,30 +4,31 @@ use std::marker::PhantomData; use num_traits::{NumCast, PrimInt}; use rayexec_error::Result; -use crate::arrays::array::{Array2, ArrayData2}; -use crate::arrays::datatype::{DataType, DataTypeId, DecimalTypeMeta}; -use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::{ - PhysicalF16_2, - PhysicalF32_2, - PhysicalF64_2, - PhysicalI128_2, - PhysicalI16_2, - PhysicalI32_2, - PhysicalI64_2, - PhysicalI8_2, - PhysicalInterval_2, - PhysicalStorage2, - PhysicalU128_2, - PhysicalU16_2, - PhysicalU32_2, - PhysicalU64_2, - PhysicalU8_2, +use crate::arrays::array::exp::Array; +use crate::arrays::batch_exp::Batch; +use crate::arrays::buffer::physical_type::{ + MutablePhysicalStorage, + PhysicalF16, + PhysicalF32, + PhysicalF64, + PhysicalI128, + PhysicalI16, + PhysicalI32, + PhysicalI64, + PhysicalI8, + PhysicalInterval, + PhysicalStorage, + PhysicalU128, + PhysicalU16, + PhysicalU32, + PhysicalU64, + PhysicalU8, }; -use crate::arrays::executor::scalar::BinaryExecutor2; +use crate::arrays::datatype::{DataType, DataTypeId, DecimalTypeMeta}; +use crate::arrays::executor_exp::scalar::binary::BinaryExecutor; +use crate::arrays::executor_exp::OutBuffer; use crate::arrays::scalar::decimal::{Decimal128Type, Decimal64Type, DecimalType}; use crate::arrays::scalar::interval::Interval; -use crate::arrays::storage::PrimitiveStorage; use crate::expr::Expression; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunction, ScalarFunctionImpl}; use crate::functions::{invalid_input_types_error, plan_check_num_args, FunctionInfo, Signature}; @@ -116,58 +117,45 @@ impl ScalarFunction for Mul { inputs[0].datatype(table_list)?, inputs[1].datatype(table_list)?, ) { - (DataType::Float16, DataType::Float16) => ( - Box::new(MulImpl::::new(DataType::Float16)), - DataType::Float16, - ), - (DataType::Float32, DataType::Float32) => ( - Box::new(MulImpl::::new(DataType::Float32)), - DataType::Float32, - ), - (DataType::Float64, DataType::Float64) => ( - Box::new(MulImpl::::new(DataType::Float64)), - DataType::Float64, - ), - (DataType::Int8, DataType::Int8) => ( - Box::new(MulImpl::::new(DataType::Int8)), - DataType::Int8, - ), - (DataType::Int16, DataType::Int16) => ( - Box::new(MulImpl::::new(DataType::Int16)), - DataType::Int16, - ), - (DataType::Int32, DataType::Int32) => ( - Box::new(MulImpl::::new(DataType::Int32)), - DataType::Int32, - ), - (DataType::Int64, DataType::Int64) => ( - Box::new(MulImpl::::new(DataType::Int64)), - DataType::Int64, - ), - (DataType::Int128, DataType::Int128) => ( - Box::new(MulImpl::::new(DataType::Int128)), - DataType::Int128, - ), - (DataType::UInt8, DataType::UInt8) => ( - Box::new(MulImpl::::new(DataType::UInt8)), - DataType::UInt8, - ), - (DataType::UInt16, DataType::UInt16) => ( - Box::new(MulImpl::::new(DataType::UInt16)), - DataType::UInt16, - ), - (DataType::UInt32, DataType::UInt32) => ( - Box::new(MulImpl::::new(DataType::UInt32)), - DataType::UInt32, - ), - (DataType::UInt64, DataType::UInt64) => ( - Box::new(MulImpl::::new(DataType::UInt64)), - DataType::UInt64, - ), - (DataType::UInt128, DataType::UInt128) => ( - Box::new(MulImpl::::new(DataType::UInt128)), - DataType::UInt128, - ), + (DataType::Float16, DataType::Float16) => { + (Box::new(MulImpl::::new()), DataType::Float16) + } + (DataType::Float32, DataType::Float32) => { + (Box::new(MulImpl::::new()), DataType::Float32) + } + (DataType::Float64, DataType::Float64) => { + (Box::new(MulImpl::::new()), DataType::Float64) + } + (DataType::Int8, DataType::Int8) => { + (Box::new(MulImpl::::new()), DataType::Int8) + } + (DataType::Int16, DataType::Int16) => { + (Box::new(MulImpl::::new()), DataType::Int16) + } + (DataType::Int32, DataType::Int32) => { + (Box::new(MulImpl::::new()), DataType::Int32) + } + (DataType::Int64, DataType::Int64) => { + (Box::new(MulImpl::::new()), DataType::Int64) + } + (DataType::Int128, DataType::Int128) => { + (Box::new(MulImpl::::new()), DataType::Int128) + } + (DataType::UInt8, DataType::UInt8) => { + (Box::new(MulImpl::::new()), DataType::UInt8) + } + (DataType::UInt16, DataType::UInt16) => { + (Box::new(MulImpl::::new()), DataType::UInt16) + } + (DataType::UInt32, DataType::UInt32) => { + (Box::new(MulImpl::::new()), DataType::UInt32) + } + (DataType::UInt64, DataType::UInt64) => { + (Box::new(MulImpl::::new()), DataType::UInt64) + } + (DataType::UInt128, DataType::UInt128) => { + (Box::new(MulImpl::::new()), DataType::UInt128) + } // Decimal (DataType::Decimal64(a), DataType::Decimal64(b)) => { @@ -178,7 +166,7 @@ impl ScalarFunction for Mul { let scale = a.scale + b.scale; let return_type = DataType::Decimal64(DecimalTypeMeta { precision, scale }); ( - Box::new(DecimalMulImpl::::new(return_type.clone())), + Box::new(DecimalMulImpl::::new()), return_type, ) } @@ -187,26 +175,26 @@ impl ScalarFunction for Mul { let scale = a.scale + b.scale; let return_type = DataType::Decimal128(DecimalTypeMeta { precision, scale }); ( - Box::new(DecimalMulImpl::::new(return_type.clone())), + Box::new(DecimalMulImpl::::new()), return_type, ) } // Interval (DataType::Interval, DataType::Int32) => ( - Box::new(IntervalMulImpl::::new()), + Box::new(IntervalMulImpl::::new()), DataType::Interval, ), (DataType::Interval, DataType::Int64) => ( - Box::new(IntervalMulImpl::::new()), + Box::new(IntervalMulImpl::::new()), DataType::Interval, ), (DataType::Int32, DataType::Interval) => ( - Box::new(IntervalMulImpl::::new()), + Box::new(IntervalMulImpl::::new()), DataType::Interval, ), (DataType::Int64, DataType::Interval) => ( - Box::new(IntervalMulImpl::::new()), + Box::new(IntervalMulImpl::::new()), DataType::Interval, ), @@ -236,112 +224,113 @@ impl IntervalMulImpl { impl ScalarFunctionImpl for IntervalMulImpl where - Rhs: PhysicalStorage2, - for<'a> Rhs::Type<'a>: PrimInt, + Rhs: PhysicalStorage, + Rhs::StorageType: PrimInt, { - fn execute2(&self, inputs: &[&Array2]) -> Result { - let (lhs, rhs) = if LHS_RHS_FLIPPED { - (inputs[1], inputs[0]) - } else { - (inputs[0], inputs[1]) - }; - - let builder = ArrayBuilder { - datatype: DataType::Interval, - buffer: PrimitiveBuffer::::with_len(lhs.logical_len()), - }; - - BinaryExecutor2::execute::(lhs, rhs, builder, |a, b, buf| { - // TODO: Overflow check - buf.put(&Interval { - months: a.months * (::from(b).unwrap_or_default()), - days: a.days * (::from(b).unwrap_or_default()), - nanos: a.nanos * (::from(b).unwrap_or_default()), - }) - }) + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + let a = &input.arrays()[0]; + let b = &input.arrays()[1]; + + let (lhs, rhs) = if LHS_RHS_FLIPPED { (b, a) } else { (a, b) }; + + BinaryExecutor::execute::( + lhs, + sel, + rhs, + sel, + OutBuffer::from_array(output)?, + |&a, &b, buf| { + // TODO: Overflow check + buf.put(&Interval { + months: a.months * (::from(b).unwrap_or_default()), + days: a.days * (::from(b).unwrap_or_default()), + nanos: a.nanos * (::from(b).unwrap_or_default()), + }) + }, + ) } } #[derive(Debug, Clone)] pub struct DecimalMulImpl { - datatype: DataType, _d: PhantomData, } impl DecimalMulImpl { - fn new(datatype: DataType) -> Self { - DecimalMulImpl { - datatype, - _d: PhantomData, - } + const fn new() -> Self { + DecimalMulImpl { _d: PhantomData } } } impl ScalarFunctionImpl for DecimalMulImpl where D: DecimalType, - ArrayData2: From>, { - fn execute2(&self, inputs: &[&Array2]) -> Result { - let a = inputs[0]; - let b = inputs[1]; - - let builder = ArrayBuilder { - datatype: self.datatype.clone(), - buffer: PrimitiveBuffer::::with_len(a.logical_len()), - }; - - BinaryExecutor2::execute::(a, b, builder, |a, b, buf| { - buf.put(&(a * b)) - }) + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + let a = &input.arrays()[0]; + let b = &input.arrays()[1]; + + BinaryExecutor::execute::( + a, + sel, + b, + sel, + OutBuffer::from_array(output)?, + |&a, &b, buf| buf.put(&(a * b)), + ) } } #[derive(Debug, Clone)] pub struct MulImpl { - datatype: DataType, _s: PhantomData, } impl MulImpl { - fn new(datatype: DataType) -> Self { - MulImpl { - datatype, - _s: PhantomData, - } + const fn new() -> Self { + MulImpl { _s: PhantomData } } } impl ScalarFunctionImpl for MulImpl where - S: PhysicalStorage2, - for<'a> S::Type<'a>: std::ops::Mul> + Default + Copy, - ArrayData2: From>>, + S: MutablePhysicalStorage, + S::StorageType: std::ops::Mul + Sized + Copy, { - fn execute2(&self, inputs: &[&Array2]) -> Result { - let a = inputs[0]; - let b = inputs[1]; - - let builder = ArrayBuilder { - datatype: self.datatype.clone(), - buffer: PrimitiveBuffer::with_len(a.logical_len()), - }; - - BinaryExecutor2::execute::(a, b, builder, |a, b, buf| buf.put(&(a * b))) + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + let a = &input.arrays()[0]; + let b = &input.arrays()[1]; + + BinaryExecutor::execute::( + a, + sel, + b, + sel, + OutBuffer::from_array(output)?, + |&a, &b, buf| buf.put(&(a * b)), + ) } } #[cfg(test)] mod tests { + use iterutil::TryFromExactSizeIterator; + use super::*; + use crate::arrays::buffer::buffer_manager::NopBufferManager; use crate::arrays::datatype::DataType; + use crate::arrays::testutil::assert_arrays_eq; use crate::expr; use crate::functions::scalar::ScalarFunction; #[test] fn mul_i32() { - let a = Array2::from_iter([4, 5, 6]); - let b = Array2::from_iter([1, 2, 3]); + let a = Array::try_from_iter([4, 5, 6]).unwrap(); + let b = Array::try_from_iter([1, 2, 3]).unwrap(); + let batch = Batch::from_arrays([a, b], true).unwrap(); let mut table_list = TableList::empty(); let table_ref = table_list @@ -359,9 +348,11 @@ mod tests { ) .unwrap(); - let out = planned.function_impl.execute2(&[&a, &b]).unwrap(); - let expected = Array2::from_iter([4, 10, 18]); + let mut out = Array::new(&NopBufferManager, DataType::Int32, 3).unwrap(); + planned.function_impl.execute(&batch, &mut out).unwrap(); + + let expected = Array::try_from_iter([4, 10, 18]).unwrap(); - assert_eq!(expected, out); + assert_arrays_eq(&expected, &out); } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/arith/rem.rs b/crates/rayexec_execution/src/functions/scalar/builtin/arith/rem.rs index 4940e0047..7f151e9b4 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/arith/rem.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/arith/rem.rs @@ -3,27 +3,27 @@ use std::marker::PhantomData; use rayexec_error::Result; -use crate::arrays::array::{Array2, ArrayData2}; -use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::{ - PhysicalF16_2, - PhysicalF32_2, - PhysicalF64_2, - PhysicalI128_2, - PhysicalI16_2, - PhysicalI32_2, - PhysicalI64_2, - PhysicalI8_2, - PhysicalStorage2, - PhysicalU128_2, - PhysicalU16_2, - PhysicalU32_2, - PhysicalU64_2, - PhysicalU8_2, +use crate::arrays::array::exp::Array; +use crate::arrays::batch_exp::Batch; +use crate::arrays::buffer::physical_type::{ + MutablePhysicalStorage, + PhysicalF16, + PhysicalF32, + PhysicalF64, + PhysicalI128, + PhysicalI16, + PhysicalI32, + PhysicalI64, + PhysicalI8, + PhysicalU128, + PhysicalU16, + PhysicalU32, + PhysicalU64, + PhysicalU8, }; -use crate::arrays::executor::scalar::BinaryExecutor2; -use crate::arrays::storage::PrimitiveStorage; +use crate::arrays::datatype::{DataType, DataTypeId}; +use crate::arrays::executor_exp::scalar::binary::BinaryExecutor; +use crate::arrays::executor_exp::OutBuffer; use crate::expr::Expression; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunction, ScalarFunctionImpl}; use crate::functions::{invalid_input_types_error, plan_check_num_args, FunctionInfo, Signature}; @@ -113,58 +113,45 @@ impl ScalarFunction for Rem { inputs[0].datatype(table_list)?, inputs[1].datatype(table_list)?, ) { - (DataType::Float16, DataType::Float16) => ( - Box::new(RemImpl::::new(DataType::Float16)), - DataType::Float16, - ), - (DataType::Float32, DataType::Float32) => ( - Box::new(RemImpl::::new(DataType::Float32)), - DataType::Float32, - ), - (DataType::Float64, DataType::Float64) => ( - Box::new(RemImpl::::new(DataType::Float64)), - DataType::Float64, - ), - (DataType::Int8, DataType::Int8) => ( - Box::new(RemImpl::::new(DataType::Int8)), - DataType::Int8, - ), - (DataType::Int16, DataType::Int16) => ( - Box::new(RemImpl::::new(DataType::Int16)), - DataType::Int16, - ), - (DataType::Int32, DataType::Int32) => ( - Box::new(RemImpl::::new(DataType::Int32)), - DataType::Int32, - ), - (DataType::Int64, DataType::Int64) => ( - Box::new(RemImpl::::new(DataType::Int64)), - DataType::Int64, - ), - (DataType::Int128, DataType::Int128) => ( - Box::new(RemImpl::::new(DataType::Int128)), - DataType::Int128, - ), - (DataType::UInt8, DataType::UInt8) => ( - Box::new(RemImpl::::new(DataType::UInt8)), - DataType::UInt8, - ), - (DataType::UInt16, DataType::UInt16) => ( - Box::new(RemImpl::::new(DataType::UInt16)), - DataType::UInt16, - ), - (DataType::UInt32, DataType::UInt32) => ( - Box::new(RemImpl::::new(DataType::UInt32)), - DataType::UInt32, - ), - (DataType::UInt64, DataType::UInt64) => ( - Box::new(RemImpl::::new(DataType::UInt64)), - DataType::UInt64, - ), - (DataType::UInt128, DataType::UInt128) => ( - Box::new(RemImpl::::new(DataType::UInt128)), - DataType::UInt128, - ), + (DataType::Float16, DataType::Float16) => { + (Box::new(RemImpl::::new()), DataType::Float16) + } + (DataType::Float32, DataType::Float32) => { + (Box::new(RemImpl::::new()), DataType::Float32) + } + (DataType::Float64, DataType::Float64) => { + (Box::new(RemImpl::::new()), DataType::Float64) + } + (DataType::Int8, DataType::Int8) => { + (Box::new(RemImpl::::new()), DataType::Int8) + } + (DataType::Int16, DataType::Int16) => { + (Box::new(RemImpl::::new()), DataType::Int16) + } + (DataType::Int32, DataType::Int32) => { + (Box::new(RemImpl::::new()), DataType::Int32) + } + (DataType::Int64, DataType::Int64) => { + (Box::new(RemImpl::::new()), DataType::Int64) + } + (DataType::Int128, DataType::Int128) => { + (Box::new(RemImpl::::new()), DataType::Int128) + } + (DataType::UInt8, DataType::UInt8) => { + (Box::new(RemImpl::::new()), DataType::UInt8) + } + (DataType::UInt16, DataType::UInt16) => { + (Box::new(RemImpl::::new()), DataType::UInt16) + } + (DataType::UInt32, DataType::UInt32) => { + (Box::new(RemImpl::::new()), DataType::UInt32) + } + (DataType::UInt64, DataType::UInt64) => { + (Box::new(RemImpl::::new()), DataType::UInt64) + } + (DataType::UInt128, DataType::UInt128) => { + (Box::new(RemImpl::::new()), DataType::UInt128) + } // TODO: Interval, date, decimal (a, b) => return Err(invalid_input_types_error(self, &[a, b])), @@ -181,49 +168,52 @@ impl ScalarFunction for Rem { #[derive(Debug, Clone)] pub struct RemImpl { - datatype: DataType, _s: PhantomData, } impl RemImpl { - fn new(datatype: DataType) -> Self { - RemImpl { - datatype, - _s: PhantomData, - } + const fn new() -> Self { + RemImpl { _s: PhantomData } } } impl ScalarFunctionImpl for RemImpl where - S: PhysicalStorage2, - for<'a> S::Type<'a>: std::ops::Rem> + Default + Copy, - ArrayData2: From>>, + S: MutablePhysicalStorage, + S::StorageType: std::ops::Rem + Sized + Copy, { - fn execute2(&self, inputs: &[&Array2]) -> Result { - let a = inputs[0]; - let b = inputs[1]; - - let builder = ArrayBuilder { - datatype: self.datatype.clone(), - buffer: PrimitiveBuffer::with_len(a.logical_len()), - }; - - BinaryExecutor2::execute::(a, b, builder, |a, b, buf| buf.put(&(a % b))) + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + let a = &input.arrays()[0]; + let b = &input.arrays()[1]; + + BinaryExecutor::execute::( + a, + sel, + b, + sel, + OutBuffer::from_array(output)?, + |&a, &b, buf| buf.put(&(a % b)), + ) } } #[cfg(test)] mod tests { + use iterutil::TryFromExactSizeIterator; + use super::*; + use crate::arrays::buffer::buffer_manager::NopBufferManager; use crate::arrays::datatype::DataType; + use crate::arrays::testutil::assert_arrays_eq; use crate::expr; use crate::functions::scalar::ScalarFunction; #[test] fn rem_i32() { - let a = Array2::from_iter([4, 5, 6]); - let b = Array2::from_iter([1, 2, 3]); + let a = Array::try_from_iter([4, 5, 6]).unwrap(); + let b = Array::try_from_iter([1, 2, 3]).unwrap(); + let batch = Batch::from_arrays([a, b], true).unwrap(); let mut table_list = TableList::empty(); let table_ref = table_list @@ -241,9 +231,10 @@ mod tests { ) .unwrap(); - let out = planned.function_impl.execute2(&[&a, &b]).unwrap(); - let expected = Array2::from_iter([0, 1, 0]); + let mut out = Array::new(&NopBufferManager, DataType::Int32, 3).unwrap(); + planned.function_impl.execute(&batch, &mut out).unwrap(); + let expected = Array::try_from_iter([0, 1, 0]).unwrap(); - assert_eq!(expected, out); + assert_arrays_eq(&expected, &out); } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/arith/sub.rs b/crates/rayexec_execution/src/functions/scalar/builtin/arith/sub.rs index 868814b34..5157ac36c 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/arith/sub.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/arith/sub.rs @@ -3,27 +3,27 @@ use std::marker::PhantomData; use rayexec_error::Result; -use crate::arrays::array::{Array2, ArrayData2}; -use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::{ - PhysicalF16_2, - PhysicalF32_2, - PhysicalF64_2, - PhysicalI128_2, - PhysicalI16_2, - PhysicalI32_2, - PhysicalI64_2, - PhysicalI8_2, - PhysicalStorage2, - PhysicalU128_2, - PhysicalU16_2, - PhysicalU32_2, - PhysicalU64_2, - PhysicalU8_2, +use crate::arrays::array::exp::Array; +use crate::arrays::batch_exp::Batch; +use crate::arrays::buffer::physical_type::{ + MutablePhysicalStorage, + PhysicalF16, + PhysicalF32, + PhysicalF64, + PhysicalI128, + PhysicalI16, + PhysicalI32, + PhysicalI64, + PhysicalI8, + PhysicalU128, + PhysicalU16, + PhysicalU32, + PhysicalU64, + PhysicalU8, }; -use crate::arrays::executor::scalar::BinaryExecutor2; -use crate::arrays::storage::PrimitiveStorage; +use crate::arrays::datatype::{DataType, DataTypeId}; +use crate::arrays::executor_exp::scalar::binary::BinaryExecutor; +use crate::arrays::executor_exp::OutBuffer; use crate::expr::Expression; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunction, ScalarFunctionImpl}; use crate::functions::{invalid_input_types_error, plan_check_num_args, FunctionInfo, Signature}; @@ -112,74 +112,58 @@ impl ScalarFunction for Sub { inputs[0].datatype(table_list)?, inputs[1].datatype(table_list)?, ) { - (DataType::Float16, DataType::Float16) => ( - Box::new(SubImpl::::new(DataType::Float16)), - DataType::Float16, - ), - (DataType::Float32, DataType::Float32) => ( - Box::new(SubImpl::::new(DataType::Float32)), - DataType::Float32, - ), - (DataType::Float64, DataType::Float64) => ( - Box::new(SubImpl::::new(DataType::Float64)), - DataType::Float64, - ), - (DataType::Int8, DataType::Int8) => ( - Box::new(SubImpl::::new(DataType::Int8)), - DataType::Int8, - ), - (DataType::Int16, DataType::Int16) => ( - Box::new(SubImpl::::new(DataType::Int16)), - DataType::Int16, - ), - (DataType::Int32, DataType::Int32) => ( - Box::new(SubImpl::::new(DataType::Int32)), - DataType::Int32, - ), - (DataType::Int64, DataType::Int64) => ( - Box::new(SubImpl::::new(DataType::Int64)), - DataType::Int64, - ), - (DataType::Int128, DataType::Int128) => ( - Box::new(SubImpl::::new(DataType::Int128)), - DataType::Int128, - ), - (DataType::UInt8, DataType::UInt8) => ( - Box::new(SubImpl::::new(DataType::UInt8)), - DataType::UInt8, - ), - (DataType::UInt16, DataType::UInt16) => ( - Box::new(SubImpl::::new(DataType::UInt16)), - DataType::UInt16, - ), - (DataType::UInt32, DataType::UInt32) => ( - Box::new(SubImpl::::new(DataType::UInt32)), - DataType::UInt32, - ), - (DataType::UInt64, DataType::UInt64) => ( - Box::new(SubImpl::::new(DataType::UInt64)), - DataType::UInt64, - ), - (DataType::UInt128, DataType::UInt128) => ( - Box::new(SubImpl::::new(DataType::UInt128)), - DataType::UInt128, - ), + (DataType::Float16, DataType::Float16) => { + (Box::new(SubImpl::::new()), DataType::Float16) + } + (DataType::Float32, DataType::Float32) => { + (Box::new(SubImpl::::new()), DataType::Float32) + } + (DataType::Float64, DataType::Float64) => { + (Box::new(SubImpl::::new()), DataType::Float64) + } + (DataType::Int8, DataType::Int8) => { + (Box::new(SubImpl::::new()), DataType::Int8) + } + (DataType::Int16, DataType::Int16) => { + (Box::new(SubImpl::::new()), DataType::Int16) + } + (DataType::Int32, DataType::Int32) => { + (Box::new(SubImpl::::new()), DataType::Int32) + } + (DataType::Int64, DataType::Int64) => { + (Box::new(SubImpl::::new()), DataType::Int64) + } + (DataType::Int128, DataType::Int128) => { + (Box::new(SubImpl::::new()), DataType::Int128) + } + (DataType::UInt8, DataType::UInt8) => { + (Box::new(SubImpl::::new()), DataType::UInt8) + } + (DataType::UInt16, DataType::UInt16) => { + (Box::new(SubImpl::::new()), DataType::UInt16) + } + (DataType::UInt32, DataType::UInt32) => { + (Box::new(SubImpl::::new()), DataType::UInt32) + } + (DataType::UInt64, DataType::UInt64) => { + (Box::new(SubImpl::::new()), DataType::UInt64) + } + (DataType::UInt128, DataType::UInt128) => { + (Box::new(SubImpl::::new()), DataType::UInt128) + } // TODO: Split out decimal (for scaling) - datatypes @ (DataType::Decimal64(_), DataType::Decimal64(_)) => ( - Box::new(SubImpl::::new(datatypes.0.clone())), - datatypes.0, - ), - datatypes @ (DataType::Decimal128(_), DataType::Decimal128(_)) => ( - Box::new(SubImpl::::new(datatypes.0.clone())), - datatypes.0, - ), - - // Date + days - (DataType::Date32, DataType::Int32) => ( - Box::new(SubImpl::::new(DataType::Date32)), - DataType::Date32, - ), + datatypes @ (DataType::Decimal64(_), DataType::Decimal64(_)) => { + (Box::new(SubImpl::::new()), datatypes.0) + } + datatypes @ (DataType::Decimal128(_), DataType::Decimal128(_)) => { + (Box::new(SubImpl::::new()), datatypes.0) + } + + // Date - days + (DataType::Date32, DataType::Int32) => { + (Box::new(SubImpl::::new()), DataType::Date32) + } // TODO: Interval (a, b) => return Err(invalid_input_types_error(self, &[a, b])), @@ -196,49 +180,52 @@ impl ScalarFunction for Sub { #[derive(Debug, Clone)] pub struct SubImpl { - datatype: DataType, _s: PhantomData, } impl SubImpl { - fn new(datatype: DataType) -> Self { - SubImpl { - datatype, - _s: PhantomData, - } + const fn new() -> Self { + SubImpl { _s: PhantomData } } } impl ScalarFunctionImpl for SubImpl where - S: PhysicalStorage2, - for<'a> S::Type<'a>: std::ops::Sub> + Default + Copy, - ArrayData2: From>>, + S: MutablePhysicalStorage, + S::StorageType: std::ops::Sub + Sized + Copy, { - fn execute2(&self, inputs: &[&Array2]) -> Result { - let a = inputs[0]; - let b = inputs[1]; - - let builder = ArrayBuilder { - datatype: self.datatype.clone(), - buffer: PrimitiveBuffer::with_len(a.logical_len()), - }; - - BinaryExecutor2::execute::(a, b, builder, |a, b, buf| buf.put(&(a - b))) + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + let a = &input.arrays()[0]; + let b = &input.arrays()[1]; + + BinaryExecutor::execute::( + a, + sel, + b, + sel, + OutBuffer::from_array(output)?, + |&a, &b, buf| buf.put(&(a - b)), + ) } } #[cfg(test)] mod tests { + use iterutil::TryFromExactSizeIterator; + use super::*; + use crate::arrays::buffer::buffer_manager::NopBufferManager; use crate::arrays::datatype::DataType; + use crate::arrays::testutil::assert_arrays_eq; use crate::expr; use crate::functions::scalar::ScalarFunction; #[test] fn sub_i32() { - let a = Array2::from_iter([4, 5, 6]); - let b = Array2::from_iter([1, 2, 3]); + let a = Array::try_from_iter([4, 5, 6]).unwrap(); + let b = Array::try_from_iter([1, 2, 3]).unwrap(); + let batch = Batch::from_arrays([a, b], true).unwrap(); let mut table_list = TableList::empty(); let table_ref = table_list @@ -256,9 +243,11 @@ mod tests { ) .unwrap(); - let out = planned.function_impl.execute2(&[&a, &b]).unwrap(); - let expected = Array2::from_iter([3, 3, 3]); + let mut out = Array::new(&NopBufferManager, DataType::Int32, 3).unwrap(); + planned.function_impl.execute(&batch, &mut out).unwrap(); + + let expected = Array::try_from_iter([3, 3, 3]).unwrap(); - assert_eq!(expected, out); + assert_arrays_eq(&expected, &out); } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/comparison.rs b/crates/rayexec_execution/src/functions/scalar/builtin/comparison.rs index 7d3343e8b..0395a985d 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/comparison.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/comparison.rs @@ -856,13 +856,13 @@ where match self.left.scale.cmp(&self.right.scale) { Ordering::Greater => { - let scaled_right = decimal_rescale::( + let scaled_right = decimal_rescale::( right, left.datatype().clone(), CastFailBehavior::Error, )?; - BinaryExecutor2::execute::( + BinaryExecutor2::execute::( left, &scaled_right, builder, @@ -870,20 +870,20 @@ where ) } Ordering::Less => { - let scaled_left = decimal_rescale::( + let scaled_left = decimal_rescale::( left, right.datatype().clone(), CastFailBehavior::Error, )?; - BinaryExecutor2::execute::( + BinaryExecutor2::execute::( &scaled_left, right, builder, |a, b, buf| buf.put(&O::compare(a, b)), ) } - Ordering::Equal => BinaryExecutor2::execute::( + Ordering::Equal => BinaryExecutor2::execute::( left, right, builder, From 8543379ec55ed5f566ba76b7222e900e76837180 Mon Sep 17 00:00:00 2001 From: Sean Smith Date: Sun, 29 Dec 2024 16:47:35 -0500 Subject: [PATCH 20/59] date extract --- .../src/arrays/compute/date.rs | 195 +++++++++++------- .../scalar/builtin/datetime/date_part.rs | 9 +- 2 files changed, 121 insertions(+), 83 deletions(-) diff --git a/crates/rayexec_execution/src/arrays/compute/date.rs b/crates/rayexec_execution/src/arrays/compute/date.rs index 86be1f06e..255fec887 100644 --- a/crates/rayexec_execution/src/arrays/compute/date.rs +++ b/crates/rayexec_execution/src/arrays/compute/date.rs @@ -1,11 +1,12 @@ use chrono::{DateTime, Datelike, NaiveDate, Timelike, Utc}; +use iterutil::IntoExactSizeIterator; use rayexec_error::{not_implemented, RayexecError, Result}; -use crate::arrays::array::Array2; -use crate::arrays::datatype::{DataType, DecimalTypeMeta, TimeUnit}; -use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::{PhysicalI32_2, PhysicalI64_2}; -use crate::arrays::executor::scalar::UnaryExecutor2; +use crate::arrays::array::exp::Array; +use crate::arrays::buffer::physical_type::{PhysicalI32, PhysicalI64}; +use crate::arrays::datatype::{DataType, TimeUnit}; +use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; +use crate::arrays::executor_exp::OutBuffer; use crate::arrays::scalar::decimal::{Decimal64Type, DecimalType}; pub const EPOCH_NAIVE_DATE: NaiveDate = match NaiveDate::from_ymd_opt(1970, 1, 1) { @@ -69,48 +70,63 @@ pub enum DatePart { /// /// The results should be decimal representing the part extracted, and should /// use the Decimal64 default precision and scale. -pub fn extract_date_part(part: DatePart, arr: &Array2) -> Result { +pub fn extract_date_part( + part: DatePart, + arr: &Array, + sel: impl IntoExactSizeIterator, + out: &mut Array, +) -> Result<()> { let datatype = arr.datatype(); match datatype { DataType::Date32 => match part { - DatePart::Microseconds => date32_extract_with_fn(arr, extract_microseconds), - DatePart::Milliseconds => date32_extract_with_fn(arr, extract_milliseconds), - DatePart::Second => date32_extract_with_fn(arr, extract_seconds), - DatePart::Minute => date32_extract_with_fn(arr, extract_minute), - DatePart::DayOfWeek => date32_extract_with_fn(arr, extract_day_of_week), - DatePart::IsoDayOfWeek => date32_extract_with_fn(arr, extract_iso_day_of_week), - DatePart::Day => date32_extract_with_fn(arr, extract_day), - DatePart::Month => date32_extract_with_fn(arr, extract_month), - DatePart::Quarter => date32_extract_with_fn(arr, extract_quarter), - DatePart::Year => date32_extract_with_fn(arr, extract_year), + DatePart::Microseconds => date32_extract_with_fn(arr, sel, extract_microseconds, out), + DatePart::Milliseconds => date32_extract_with_fn(arr, sel, extract_milliseconds, out), + DatePart::Second => date32_extract_with_fn(arr, sel, extract_seconds, out), + DatePart::Minute => date32_extract_with_fn(arr, sel, extract_minute, out), + DatePart::DayOfWeek => date32_extract_with_fn(arr, sel, extract_day_of_week, out), + DatePart::IsoDayOfWeek => { + date32_extract_with_fn(arr, sel, extract_iso_day_of_week, out) + } + DatePart::Day => date32_extract_with_fn(arr, sel, extract_day, out), + DatePart::Month => date32_extract_with_fn(arr, sel, extract_month, out), + DatePart::Quarter => date32_extract_with_fn(arr, sel, extract_quarter, out), + DatePart::Year => date32_extract_with_fn(arr, sel, extract_year, out), other => not_implemented!("Extract {other:?} from {datatype}"), }, DataType::Date64 => match part { - DatePart::Microseconds => date64_extract_with_fn(arr, extract_microseconds), - DatePart::Milliseconds => date64_extract_with_fn(arr, extract_milliseconds), - DatePart::Second => date64_extract_with_fn(arr, extract_seconds), - DatePart::Minute => date64_extract_with_fn(arr, extract_minute), - DatePart::DayOfWeek => date64_extract_with_fn(arr, extract_day_of_week), - DatePart::IsoDayOfWeek => date64_extract_with_fn(arr, extract_iso_day_of_week), - DatePart::Day => date64_extract_with_fn(arr, extract_day), - DatePart::Month => date64_extract_with_fn(arr, extract_month), - DatePart::Quarter => date64_extract_with_fn(arr, extract_quarter), - DatePart::Year => date64_extract_with_fn(arr, extract_year), + DatePart::Microseconds => date64_extract_with_fn(arr, sel, extract_microseconds, out), + DatePart::Milliseconds => date64_extract_with_fn(arr, sel, extract_milliseconds, out), + DatePart::Second => date64_extract_with_fn(arr, sel, extract_seconds, out), + DatePart::Minute => date64_extract_with_fn(arr, sel, extract_minute, out), + DatePart::DayOfWeek => date64_extract_with_fn(arr, sel, extract_day_of_week, out), + DatePart::IsoDayOfWeek => { + date64_extract_with_fn(arr, sel, extract_iso_day_of_week, out) + } + DatePart::Day => date64_extract_with_fn(arr, sel, extract_day, out), + DatePart::Month => date64_extract_with_fn(arr, sel, extract_month, out), + DatePart::Quarter => date64_extract_with_fn(arr, sel, extract_quarter, out), + DatePart::Year => date64_extract_with_fn(arr, sel, extract_year, out), other => not_implemented!("Extract {other:?} from {datatype}"), }, DataType::Timestamp(m) => match part { - DatePart::Microseconds => timestamp_extract_with_fn(m.unit, arr, extract_microseconds), - DatePart::Milliseconds => timestamp_extract_with_fn(m.unit, arr, extract_milliseconds), - DatePart::Second => timestamp_extract_with_fn(m.unit, arr, extract_seconds), - DatePart::Minute => timestamp_extract_with_fn(m.unit, arr, extract_minute), - DatePart::DayOfWeek => timestamp_extract_with_fn(m.unit, arr, extract_day_of_week), + DatePart::Microseconds => { + timestamp_extract_with_fn(m.unit, arr, sel, extract_microseconds, out) + } + DatePart::Milliseconds => { + timestamp_extract_with_fn(m.unit, arr, sel, extract_milliseconds, out) + } + DatePart::Second => timestamp_extract_with_fn(m.unit, arr, sel, extract_seconds, out), + DatePart::Minute => timestamp_extract_with_fn(m.unit, arr, sel, extract_minute, out), + DatePart::DayOfWeek => { + timestamp_extract_with_fn(m.unit, arr, sel, extract_day_of_week, out) + } DatePart::IsoDayOfWeek => { - timestamp_extract_with_fn(m.unit, arr, extract_iso_day_of_week) + timestamp_extract_with_fn(m.unit, arr, sel, extract_iso_day_of_week, out) } - DatePart::Day => timestamp_extract_with_fn(m.unit, arr, extract_day), - DatePart::Month => timestamp_extract_with_fn(m.unit, arr, extract_month), - DatePart::Quarter => timestamp_extract_with_fn(m.unit, arr, extract_quarter), - DatePart::Year => timestamp_extract_with_fn(m.unit, arr, extract_year), + DatePart::Day => timestamp_extract_with_fn(m.unit, arr, sel, extract_day, out), + DatePart::Month => timestamp_extract_with_fn(m.unit, arr, sel, extract_month, out), + DatePart::Quarter => timestamp_extract_with_fn(m.unit, arr, sel, extract_quarter, out), + DatePart::Year => timestamp_extract_with_fn(m.unit, arr, sel, extract_year, out), other => not_implemented!("Extract {other:?} from {datatype}"), }, other => Err(RayexecError::new(format!( @@ -119,65 +135,84 @@ pub fn extract_date_part(part: DatePart, arr: &Array2) -> Result { } } -fn timestamp_extract_with_fn(unit: TimeUnit, arr: &Array2, f: F) -> Result +fn timestamp_extract_with_fn( + unit: TimeUnit, + arr: &Array, + sel: impl IntoExactSizeIterator, + f: F, + out: &mut Array, +) -> Result<()> where F: Fn(DateTime) -> i64, { match unit { - TimeUnit::Second => timestamp_extract_with_fn_and_datetime_builder(arr, f, |val| { - DateTime::from_timestamp(val, 0).unwrap_or_default() - }), - TimeUnit::Millisecond => timestamp_extract_with_fn_and_datetime_builder(arr, f, |val| { - DateTime::from_timestamp_millis(val).unwrap_or_default() - }), - TimeUnit::Microsecond => timestamp_extract_with_fn_and_datetime_builder(arr, f, |val| { - DateTime::from_timestamp_micros(val).unwrap_or_default() - }), - TimeUnit::Nanosecond => timestamp_extract_with_fn_and_datetime_builder(arr, f, |val| { - DateTime::from_timestamp_nanos(val) - }), + TimeUnit::Second => timestamp_extract_with_fn_and_datetime_builder( + arr, + sel, + f, + |val| DateTime::from_timestamp(val, 0).unwrap_or_default(), + out, + ), + TimeUnit::Millisecond => timestamp_extract_with_fn_and_datetime_builder( + arr, + sel, + f, + |val| DateTime::from_timestamp_millis(val).unwrap_or_default(), + out, + ), + TimeUnit::Microsecond => timestamp_extract_with_fn_and_datetime_builder( + arr, + sel, + f, + |val| DateTime::from_timestamp_micros(val).unwrap_or_default(), + out, + ), + TimeUnit::Nanosecond => timestamp_extract_with_fn_and_datetime_builder( + arr, + sel, + f, + |val| DateTime::from_timestamp_nanos(val), + out, + ), } } fn timestamp_extract_with_fn_and_datetime_builder( - arr: &Array2, + arr: &Array, + sel: impl IntoExactSizeIterator, f: F, builder: B, -) -> Result + out: &mut Array, +) -> Result<()> where B: Fn(i64) -> DateTime, F: Fn(DateTime) -> i64, { - UnaryExecutor2::execute::( + UnaryExecutor::execute::( arr, - ArrayBuilder { - datatype: DataType::Decimal64(DecimalTypeMeta { - precision: Decimal64Type::MAX_PRECISION, - scale: Decimal64Type::DEFAULT_SCALE, - }), - buffer: PrimitiveBuffer::with_len(arr.logical_len()), - }, - |val, buf| { + sel, + OutBuffer::from_array(out)?, + |&val, buf| { let date = builder(val); buf.put(&f(date)) }, ) } -fn date32_extract_with_fn(arr: &Array2, f: F) -> Result +fn date32_extract_with_fn( + arr: &Array, + sel: impl IntoExactSizeIterator, + f: F, + out: &mut Array, +) -> Result<()> where F: Fn(DateTime) -> i64, { - UnaryExecutor2::execute::( + UnaryExecutor::execute::( arr, - ArrayBuilder { - datatype: DataType::Decimal64(DecimalTypeMeta { - precision: Decimal64Type::MAX_PRECISION, - scale: Decimal64Type::DEFAULT_SCALE, - }), - buffer: PrimitiveBuffer::with_len(arr.logical_len()), - }, - |val, buf| { + sel, + OutBuffer::from_array(out)?, + |&val, buf| { // TODO: Can this actually fail? let date = DateTime::from_timestamp(val as i64 * SECONDS_IN_DAY, 0).unwrap_or_default(); buf.put(&f(date)) @@ -185,20 +220,20 @@ where ) } -fn date64_extract_with_fn(arr: &Array2, f: F) -> Result +fn date64_extract_with_fn( + arr: &Array, + sel: impl IntoExactSizeIterator, + f: F, + out: &mut Array, +) -> Result<()> where F: Fn(DateTime) -> i64, { - UnaryExecutor2::execute::( + UnaryExecutor::execute::( arr, - ArrayBuilder { - datatype: DataType::Decimal64(DecimalTypeMeta { - precision: Decimal64Type::MAX_PRECISION, - scale: Decimal64Type::DEFAULT_SCALE, - }), - buffer: PrimitiveBuffer::with_len(arr.logical_len()), - }, - |val, buf| { + sel, + OutBuffer::from_array(out)?, + |&val, buf| { // TODO: Can this actually fail? let date = DateTime::from_timestamp_millis(val).unwrap_or_default(); buf.put(&f(date)) diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/datetime/date_part.rs b/crates/rayexec_execution/src/functions/scalar/builtin/datetime/date_part.rs index b980b6bc6..a1f43a097 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/datetime/date_part.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/datetime/date_part.rs @@ -1,7 +1,8 @@ use rayexec_error::Result; use rayexec_parser::ast; -use crate::arrays::array::Array2; +use crate::arrays::array::exp::Array; +use crate::arrays::batch_exp::Batch; use crate::arrays::compute::date::{self, extract_date_part}; use crate::arrays::datatype::{DataType, DataTypeId, DecimalTypeMeta}; use crate::arrays::scalar::decimal::{Decimal64Type, DecimalType}; @@ -101,9 +102,11 @@ pub struct DatePartImpl { } impl ScalarFunctionImpl for DatePartImpl { - fn execute2(&self, inputs: &[&Array2]) -> Result { + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); // First input ignored (the constant "part" to extract) - extract_date_part(self.part, inputs[1]) + let input = &input.arrays()[1]; + extract_date_part(self.part, input, sel, output) } } From 94d560656c11db9a907e8d35f3bd05792a0d2cd1 Mon Sep 17 00:00:00 2001 From: Sean Smith Date: Sun, 29 Dec 2024 16:52:14 -0500 Subject: [PATCH 21/59] date trunc --- .../scalar/builtin/datetime/date_trunc.rs | 43 +++++++++---------- 1 file changed, 21 insertions(+), 22 deletions(-) diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/datetime/date_trunc.rs b/crates/rayexec_execution/src/functions/scalar/builtin/datetime/date_trunc.rs index c843259cc..4a74e0f24 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/datetime/date_trunc.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/datetime/date_trunc.rs @@ -2,11 +2,12 @@ use std::str::FromStr; use rayexec_error::{not_implemented, RayexecError, Result}; -use crate::arrays::array::Array2; +use crate::arrays::array::exp::Array; +use crate::arrays::batch_exp::Batch; +use crate::arrays::buffer::physical_type::PhysicalI64; use crate::arrays::datatype::{DataType, DataTypeId, TimeUnit, TimestampTypeMeta}; -use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::PhysicalI64_2; -use crate::arrays::executor::scalar::UnaryExecutor2; +use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; +use crate::arrays::executor_exp::OutBuffer; use crate::expr::Expression; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunction, ScalarFunctionImpl}; use crate::functions::{invalid_input_types_error, plan_check_num_args, FunctionInfo, Signature}; @@ -140,21 +141,21 @@ pub struct DateTruncImpl { } impl ScalarFunctionImpl for DateTruncImpl { - fn execute2(&self, inputs: &[&Array2]) -> Result { - let input = &inputs[1]; + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + // First element is field name, skip. + let input = &input.arrays()[0]; let trunc = match self.input_unit { TimeUnit::Second => match self.field { - TruncField::Microseconds | TruncField::Milliseconds | TruncField::Second => { - return Ok((*input).clone()) - } + TruncField::Microseconds | TruncField::Milliseconds | TruncField::Second => 1, TruncField::Minute => 60, TruncField::Hour => 60 * 60, TruncField::Day => 24 * 60 * 60, other => not_implemented!("trunc field: {other:?}"), }, TimeUnit::Millisecond => match self.field { - TruncField::Microseconds | TruncField::Milliseconds => return Ok((*input).clone()), + TruncField::Microseconds | TruncField::Milliseconds => 1, TruncField::Second => 1000, TruncField::Minute => 60 * 1000, TruncField::Hour => 60 * 60 * 1000, @@ -162,7 +163,7 @@ impl ScalarFunctionImpl for DateTruncImpl { other => not_implemented!("trunc field: {other:?}"), }, TimeUnit::Microsecond => match self.field { - TruncField::Microseconds => return Ok((*input).clone()), + TruncField::Microseconds => 1, TruncField::Milliseconds => 1000, TruncField::Second => 1000 * 1000, TruncField::Minute => 60 * 1000 * 1000, @@ -181,16 +182,14 @@ impl ScalarFunctionImpl for DateTruncImpl { }, }; - let builder = ArrayBuilder { - datatype: DataType::Timestamp(TimestampTypeMeta { - unit: self.input_unit, - }), - buffer: PrimitiveBuffer::with_len(input.logical_len()), - }; - - UnaryExecutor2::execute::(input, builder, |v, buf| { - let v = (v / trunc) * trunc; - buf.put(&v) - }) + UnaryExecutor::execute::( + input, + sel, + OutBuffer::from_array(output)?, + |&v, buf| { + let v = (v / trunc) * trunc; + buf.put(&v) + }, + ) } } From 11520aeb29f9c87a648ec99c529db4f6362dd3cc Mon Sep 17 00:00:00 2001 From: Sean Smith Date: Sun, 29 Dec 2024 16:56:14 -0500 Subject: [PATCH 22/59] epoch --- .../scalar/builtin/datetime/epoch.rs | 39 ++++++++++--------- 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/datetime/epoch.rs b/crates/rayexec_execution/src/functions/scalar/builtin/datetime/epoch.rs index 416a46785..42b444978 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/datetime/epoch.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/datetime/epoch.rs @@ -1,10 +1,12 @@ +use iterutil::IntoExactSizeIterator; use rayexec_error::Result; -use crate::arrays::array::Array2; +use crate::arrays::array::exp::Array; +use crate::arrays::batch_exp::Batch; +use crate::arrays::buffer::physical_type::PhysicalI64; use crate::arrays::datatype::{DataType, DataTypeId, TimeUnit, TimestampTypeMeta}; -use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::PhysicalI64_2; -use crate::arrays::executor::scalar::UnaryExecutor2; +use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; +use crate::arrays::executor_exp::OutBuffer; use crate::expr::Expression; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunction, ScalarFunctionImpl}; use crate::functions::{invalid_input_types_error, plan_check_num_args, FunctionInfo, Signature}; @@ -102,21 +104,22 @@ impl ScalarFunction for EpochMs { pub struct EpochImpl; impl ScalarFunctionImpl for EpochImpl { - fn execute2(&self, inputs: &[&Array2]) -> Result { - let input = inputs[0]; - to_timestamp::(input) + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + let input = &input.arrays()[0]; + to_timestamp::(input, sel, output) } } -fn to_timestamp(input: &Array2) -> Result { - let builder = ArrayBuilder { - datatype: DataType::Timestamp(TimestampTypeMeta { - unit: TimeUnit::Microsecond, - }), - buffer: PrimitiveBuffer::with_len(input.logical_len()), - }; - - UnaryExecutor2::execute::(input, builder, |v, buf| { - buf.put(&(v * S)); - }) +fn to_timestamp( + input: &Array, + sel: impl IntoExactSizeIterator, + out: &mut Array, +) -> Result<()> { + UnaryExecutor::execute::( + input, + sel, + OutBuffer::from_array(out)?, + |&v, buf| buf.put(&(v * S)), + ) } From 1369e38f8f8efb1fb8ae7cf74b968d6aa6592d07 Mon Sep 17 00:00:00 2001 From: Sean Smith Date: Sun, 29 Dec 2024 17:34:31 -0500 Subject: [PATCH 23/59] ternary --- .../src/arrays/executor_exp/scalar/mod.rs | 1 + .../src/arrays/executor_exp/scalar/ternary.rs | 306 ++++++++++++++++++ .../functions/scalar/builtin/string/ascii.rs | 31 +- .../functions/scalar/builtin/string/case.rs | 54 ++-- 4 files changed, 352 insertions(+), 40 deletions(-) create mode 100644 crates/rayexec_execution/src/arrays/executor_exp/scalar/ternary.rs diff --git a/crates/rayexec_execution/src/arrays/executor_exp/scalar/mod.rs b/crates/rayexec_execution/src/arrays/executor_exp/scalar/mod.rs index 7f1c4b5a8..32cd06b56 100644 --- a/crates/rayexec_execution/src/arrays/executor_exp/scalar/mod.rs +++ b/crates/rayexec_execution/src/arrays/executor_exp/scalar/mod.rs @@ -1,2 +1,3 @@ pub mod binary; +pub mod ternary; pub mod unary; diff --git a/crates/rayexec_execution/src/arrays/executor_exp/scalar/ternary.rs b/crates/rayexec_execution/src/arrays/executor_exp/scalar/ternary.rs new file mode 100644 index 000000000..179be7a19 --- /dev/null +++ b/crates/rayexec_execution/src/arrays/executor_exp/scalar/ternary.rs @@ -0,0 +1,306 @@ +use iterutil::IntoExactSizeIterator; +use rayexec_error::Result; + +use crate::arrays::array::exp::Array; +use crate::arrays::array::flat::FlatArrayView; +use crate::arrays::buffer::physical_type::{Addressable, MutablePhysicalStorage, PhysicalStorage}; +use crate::arrays::executor_exp::{OutBuffer, PutBuffer}; + +#[derive(Debug, Clone)] +pub struct TernaryExecutor; + +impl TernaryExecutor { + pub fn execute( + array1: &Array, + sel1: impl IntoExactSizeIterator, + array2: &Array, + sel2: impl IntoExactSizeIterator, + array3: &Array, + sel3: impl IntoExactSizeIterator, + out: OutBuffer, + mut op: Op, + ) -> Result<()> + where + S1: PhysicalStorage, + S2: PhysicalStorage, + S3: PhysicalStorage, + O: MutablePhysicalStorage, + for<'a> Op: FnMut( + &S1::StorageType, + &S2::StorageType, + &S3::StorageType, + PutBuffer>, + ), + { + if array1.is_dictionary() || array2.is_dictionary() || array3.is_dictionary() { + let flat1 = array1.flat_view()?; + let flat2 = array2.flat_view()?; + let flat3 = array3.flat_view()?; + + return Self::execute_flat::( + flat1, sel1, flat2, sel2, flat3, sel3, out, op, + ); + } + + // TODO: length validation. + + let input1 = S1::get_addressable(array1.data())?; + let input2 = S2::get_addressable(array2.data())?; + let input3 = S3::get_addressable(array3.data())?; + + let mut output = O::get_addressable_mut(out.buffer)?; + + let validity1 = array1.validity(); + let validity2 = array2.validity(); + let validity3 = array3.validity(); + + if validity1.all_valid() && validity2.all_valid() && validity3.all_valid() { + for (output_idx, (input1_idx, (input2_idx, input3_idx))) in sel1 + .into_iter() + .zip(sel2.into_iter().zip(sel3.into_iter())) + .enumerate() + { + let val1 = input1.get(input1_idx).unwrap(); + let val2 = input2.get(input2_idx).unwrap(); + let val3 = input3.get(input3_idx).unwrap(); + + op( + val1, + val2, + val3, + PutBuffer::new(output_idx, &mut output, out.validity), + ); + } + } else { + for (output_idx, (input1_idx, (input2_idx, input3_idx))) in sel1 + .into_iter() + .zip(sel2.into_iter().zip(sel3.into_iter())) + .enumerate() + { + if validity1.is_valid(input1_idx) + && validity2.is_valid(input2_idx) + && validity3.is_valid(input3_idx) + { + let val1 = input1.get(input1_idx).unwrap(); + let val2 = input2.get(input2_idx).unwrap(); + let val3 = input3.get(input3_idx).unwrap(); + + op( + val1, + val2, + val3, + PutBuffer::new(output_idx, &mut output, out.validity), + ); + } else { + out.validity.set_invalid(output_idx); + } + } + } + + Ok(()) + } + + pub fn execute_flat<'a, S1, S2, S3, O, Op>( + array1: FlatArrayView<'a>, + sel1: impl IntoExactSizeIterator, + array2: FlatArrayView<'a>, + sel2: impl IntoExactSizeIterator, + array3: FlatArrayView<'a>, + sel3: impl IntoExactSizeIterator, + out: OutBuffer, + mut op: Op, + ) -> Result<()> + where + S1: PhysicalStorage, + S2: PhysicalStorage, + S3: PhysicalStorage, + O: MutablePhysicalStorage, + for<'b> Op: FnMut( + &S1::StorageType, + &S2::StorageType, + &S3::StorageType, + PutBuffer>, + ), + { + // TODO: length validation. + + let input1 = S1::get_addressable(array1.array_buffer)?; + let input2 = S2::get_addressable(array2.array_buffer)?; + let input3 = S3::get_addressable(array3.array_buffer)?; + + let mut output = O::get_addressable_mut(out.buffer)?; + + let validity1 = &array1.validity; + let validity2 = &array2.validity; + let validity3 = &array3.validity; + + if validity1.all_valid() && validity2.all_valid() && validity3.all_valid() { + for (output_idx, (input1_idx, (input2_idx, input3_idx))) in sel1 + .into_iter() + .zip(sel2.into_iter().zip(sel3.into_iter())) + .enumerate() + { + let sel1 = array1.selection.get(input1_idx).unwrap(); + let sel2 = array2.selection.get(input2_idx).unwrap(); + let sel3 = array3.selection.get(input3_idx).unwrap(); + + let val1 = input1.get(sel1).unwrap(); + let val2 = input2.get(sel2).unwrap(); + let val3 = input3.get(sel3).unwrap(); + + op( + val1, + val2, + val3, + PutBuffer::new(output_idx, &mut output, out.validity), + ); + } + } else { + for (output_idx, (input1_idx, (input2_idx, input3_idx))) in sel1 + .into_iter() + .zip(sel2.into_iter().zip(sel3.into_iter())) + .enumerate() + { + let sel1 = array1.selection.get(input1_idx).unwrap(); + let sel2 = array2.selection.get(input2_idx).unwrap(); + let sel3 = array3.selection.get(input3_idx).unwrap(); + + if validity1.is_valid(sel1) && validity2.is_valid(sel2) && validity3.is_valid(sel3) + { + let val1 = input1.get(sel1).unwrap(); + let val2 = input2.get(sel2).unwrap(); + let val3 = input3.get(sel3).unwrap(); + + op( + val1, + val2, + val3, + PutBuffer::new(output_idx, &mut output, out.validity), + ); + } else { + out.validity.set_invalid(output_idx); + } + } + } + + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use iterutil::TryFromExactSizeIterator; + + use super::*; + use crate::arrays::buffer::buffer_manager::NopBufferManager; + use crate::arrays::buffer::physical_type::{PhysicalI32, PhysicalUtf8}; + use crate::arrays::datatype::DataType; + use crate::arrays::testutil::assert_arrays_eq; + + #[test] + fn ternary_left_prepend_simple() { + let strings = Array::try_from_iter(["a", "b", "c"]).unwrap(); + let count = Array::try_from_iter([1, 2, 3]).unwrap(); + let pad = Array::try_from_iter(["<", ".", "!"]).unwrap(); + + let mut out = Array::new(&NopBufferManager, DataType::Utf8, 3).unwrap(); + + let mut str_buf = String::new(); + + TernaryExecutor::execute::( + &strings, + 0..3, + &count, + 0..3, + &pad, + 0..3, + OutBuffer::from_array(&mut out).unwrap(), + |s, &count, pad, buf| { + str_buf.clear(); + for _ in 0..count { + str_buf.push_str(pad); + } + str_buf.push_str(s); + + buf.put(&str_buf); + }, + ) + .unwrap(); + + let expected = Array::try_from_iter(["( + &strings, + 0..3, + &count, + 0..3, + &pad, + 0..3, + OutBuffer::from_array(&mut out).unwrap(), + |s, &count, pad, buf| { + str_buf.clear(); + for _ in 0..count { + str_buf.push_str(pad); + } + str_buf.push_str(s); + + buf.put(&str_buf); + }, + ) + .unwrap(); + + let expected = Array::try_from_iter([None, None, Some("!!!c")]).unwrap(); + + assert_arrays_eq(&expected, &out); + } + + #[test] + fn ternary_left_prepend_dictionary() { + let strings = Array::try_from_iter(["a", "b", "c"]).unwrap(); + let count = Array::try_from_iter([1, 2, 3]).unwrap(); + let mut pad = Array::try_from_iter(["<", ".", "!"]).unwrap(); + // '[".", ".", "<"]' + pad.select(&NopBufferManager, [1, 1, 0]).unwrap(); + + let mut out = Array::new(&NopBufferManager, DataType::Utf8, 3).unwrap(); + + let mut str_buf = String::new(); + + TernaryExecutor::execute::( + &strings, + 0..3, + &count, + 0..3, + &pad, + 0..3, + OutBuffer::from_array(&mut out).unwrap(), + |s, &count, pad, buf| { + str_buf.clear(); + for _ in 0..count { + str_buf.push_str(pad); + } + str_buf.push_str(s); + + buf.put(&str_buf); + }, + ) + .unwrap(); + + let expected = Array::try_from_iter([".a", "..b", "<< Result { - let input = inputs[0]; - let builder = ArrayBuilder { - datatype: DataType::Int32, - buffer: PrimitiveBuffer::with_len(inputs[0].logical_len()), - }; + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + let input = &input.arrays()[0]; - UnaryExecutor2::execute::(input, builder, |v, buf| { - let v = v.chars().next().map(|c| c as i32).unwrap_or(0); - buf.put(&v) - }) + UnaryExecutor::execute::( + input, + sel, + OutBuffer::from_array(output)?, + |v, buf| { + let v = v.chars().next().map(|c| c as i32).unwrap_or(0); + buf.put(&v) + }, + ) } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/string/case.rs b/crates/rayexec_execution/src/functions/scalar/builtin/string/case.rs index 297ec9749..016a7ad79 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/string/case.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/string/case.rs @@ -1,10 +1,12 @@ -use rayexec_error::{RayexecError, Result}; +use iterutil::IntoExactSizeIterator; +use rayexec_error::Result; -use crate::arrays::array::{Array2, ArrayData2}; +use crate::arrays::array::exp::Array; +use crate::arrays::batch_exp::Batch; +use crate::arrays::buffer::physical_type::PhysicalUtf8; use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::builder::{ArrayBuilder, GermanVarlenBuffer}; -use crate::arrays::executor::physical_type::PhysicalUtf8_2; -use crate::arrays::executor::scalar::UnaryExecutor2; +use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; +use crate::arrays::executor_exp::OutBuffer; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation, Example}; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunction, ScalarFunctionImpl}; @@ -60,9 +62,10 @@ impl ScalarFunction for Lower { pub struct LowerImpl; impl ScalarFunctionImpl for LowerImpl { - fn execute2(&self, inputs: &[&Array2]) -> Result { - let input = inputs[0]; - case_convert_execute(input, str::to_lowercase) + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + let input = &input.arrays()[0]; + case_convert_execute(input, sel, str::to_lowercase, output) } } @@ -115,28 +118,27 @@ impl ScalarFunction for Upper { pub struct UpperImpl; impl ScalarFunctionImpl for UpperImpl { - fn execute2(&self, inputs: &[&Array2]) -> Result { - let input = inputs[0]; - case_convert_execute(input, str::to_uppercase) + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + let input = &input.arrays()[0]; + case_convert_execute(input, sel, str::to_uppercase, output) } } -fn case_convert_execute(input: &Array2, case_fn: F) -> Result +// TODO: Reusable string buffer. +fn case_convert_execute( + input: &Array, + sel: impl IntoExactSizeIterator, + case_fn: F, + output: &mut Array, +) -> Result<()> where F: Fn(&str) -> String, { - let cap = match input.array_data() { - ArrayData2::Binary(bin) => bin.binary_data_size_bytes(), - _ => return Err(RayexecError::new("Unexpected array data type")), - }; - - let builder = ArrayBuilder { - datatype: DataType::Utf8, - buffer: GermanVarlenBuffer::::with_len_and_data_capacity(input.logical_len(), cap), - }; - - UnaryExecutor2::execute::(input, builder, |v, buf| { - // TODO: Non-allocating variant. - buf.put(&case_fn(v)) - }) + UnaryExecutor::execute::( + input, + sel, + OutBuffer::from_array(output)?, + |v, buf| buf.put(&case_fn(v)), + ) } From c6a41108bb96f027722c7d8b03ef333fc0c0b0f1 Mon Sep 17 00:00:00 2001 From: Sean Smith Date: Sun, 29 Dec 2024 18:22:23 -0500 Subject: [PATCH 24/59] uniform --- .../src/arrays/executor_exp/scalar/mod.rs | 1 + .../src/arrays/executor_exp/scalar/uniform.rs | 272 ++++++++++++++++++ 2 files changed, 273 insertions(+) create mode 100644 crates/rayexec_execution/src/arrays/executor_exp/scalar/uniform.rs diff --git a/crates/rayexec_execution/src/arrays/executor_exp/scalar/mod.rs b/crates/rayexec_execution/src/arrays/executor_exp/scalar/mod.rs index 32cd06b56..9e14a0695 100644 --- a/crates/rayexec_execution/src/arrays/executor_exp/scalar/mod.rs +++ b/crates/rayexec_execution/src/arrays/executor_exp/scalar/mod.rs @@ -1,3 +1,4 @@ pub mod binary; pub mod ternary; pub mod unary; +pub mod uniform; diff --git a/crates/rayexec_execution/src/arrays/executor_exp/scalar/uniform.rs b/crates/rayexec_execution/src/arrays/executor_exp/scalar/uniform.rs new file mode 100644 index 000000000..08092741a --- /dev/null +++ b/crates/rayexec_execution/src/arrays/executor_exp/scalar/uniform.rs @@ -0,0 +1,272 @@ +use iterutil::IntoExactSizeIterator; +use rayexec_error::Result; + +use crate::arrays::array::exp::Array; +use crate::arrays::array::flat::FlatArrayView; +use crate::arrays::buffer::physical_type::{Addressable, MutablePhysicalStorage, PhysicalStorage}; +use crate::arrays::executor_exp::{OutBuffer, PutBuffer}; + +#[derive(Debug, Clone)] +pub struct UniformExecutor; + +impl UniformExecutor { + /// Executes an operation across uniform array types. + /// + /// The selection applies to all arrays. + pub fn execute( + arrays: &[Array], + sel: impl IntoExactSizeIterator, + out: OutBuffer, + mut op: Op, + ) -> Result<()> + where + S: PhysicalStorage, + O: MutablePhysicalStorage, + for<'a> Op: FnMut(&[&S::StorageType], PutBuffer>), + { + if arrays.iter().any(|arr| arr.is_dictionary()) { + let flats = arrays + .iter() + .map(|arr| arr.flat_view()) + .collect::>>()?; + + return Self::execute_flat::(&flats, sel, out, op); + } + + let inputs = arrays + .iter() + .map(|arr| S::get_addressable(arr.data())) + .collect::>>()?; + + let all_valid = arrays.iter().all(|arr| arr.validity().all_valid()); + + let mut output = O::get_addressable_mut(out.buffer)?; + + let mut op_inputs = Vec::with_capacity(arrays.len()); + + if all_valid { + for (output_idx, input_idx) in sel.into_iter().enumerate() { + op_inputs.clear(); + for input in &inputs { + op_inputs.push(input.get(input_idx).unwrap()); + } + + op( + &op_inputs, + PutBuffer::new(output_idx, &mut output, out.validity), + ); + } + } else { + let validities: Vec<_> = arrays.iter().map(|arr| arr.validity()).collect(); + + for (output_idx, input_idx) in sel.into_iter().enumerate() { + let all_valid = validities.iter().all(|v| v.is_valid(input_idx)); + + if all_valid { + op_inputs.clear(); + for input in &inputs { + op_inputs.push(input.get(input_idx).unwrap()); + } + + op( + &op_inputs, + PutBuffer::new(output_idx, &mut output, out.validity), + ); + } else { + out.validity.set_invalid(output_idx); + } + } + } + + Ok(()) + } + + pub fn execute_flat( + arrays: &[FlatArrayView], + sel: impl IntoExactSizeIterator, + out: OutBuffer, + mut op: Op, + ) -> Result<()> + where + S: PhysicalStorage, + O: MutablePhysicalStorage, + for<'a> Op: FnMut(&[&S::StorageType], PutBuffer>), + { + // TODO: length check + + let inputs = arrays + .iter() + .map(|arr| S::get_addressable(&arr.array_buffer)) + .collect::>>()?; + + let all_valid = arrays.iter().all(|arr| arr.validity.all_valid()); + + let mut output = O::get_addressable_mut(out.buffer)?; + + let mut op_inputs = Vec::with_capacity(arrays.len()); + + if all_valid { + for (output_idx, input_idx) in sel.into_iter().enumerate() { + op_inputs.clear(); + for (input, array) in inputs.iter().zip(arrays) { + let sel_idx = array.selection.get(input_idx).unwrap(); + op_inputs.push(input.get(sel_idx).unwrap()); + } + + op( + &op_inputs, + PutBuffer::new(output_idx, &mut output, out.validity), + ); + } + } else { + for (output_idx, input_idx) in sel.into_iter().enumerate() { + let mut all_valid = true; + + for array in arrays { + let sel_idx = array.selection.get(input_idx).unwrap(); + all_valid = all_valid && array.validity.is_valid(sel_idx); + } + + if all_valid { + for (input, array) in inputs.iter().zip(arrays) { + let sel_idx = array.selection.get(input_idx).unwrap(); + op_inputs.push(input.get(sel_idx).unwrap()); + } + + op( + &op_inputs, + PutBuffer::new(output_idx, &mut output, out.validity), + ); + } else { + out.validity.set_invalid(output_idx); + } + } + } + + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use iterutil::TryFromExactSizeIterator; + + use super::*; + use crate::arrays::buffer::buffer_manager::NopBufferManager; + use crate::arrays::buffer::physical_type::{PhysicalBool, PhysicalUtf8}; + use crate::arrays::datatype::DataType; + use crate::arrays::testutil::assert_arrays_eq; + + #[test] + fn uniform_and_simple() { + let a = Array::try_from_iter([true, true, true]).unwrap(); + let b = Array::try_from_iter([true, true, false]).unwrap(); + let c = Array::try_from_iter([true, false, false]).unwrap(); + + let mut out = Array::new(&NopBufferManager, DataType::Boolean, 3).unwrap(); + + UniformExecutor::execute::( + &[a, b, c], + 0..3, + OutBuffer::from_array(&mut out).unwrap(), + |bools, buf| { + let v = bools.iter().all(|b| **b); + buf.put(&v); + }, + ) + .unwrap(); + + let expected = Array::try_from_iter([true, false, false]).unwrap(); + + assert_arrays_eq(&expected, &out); + } + + #[test] + fn uniform_string_concat_row_wise() { + let a = Array::try_from_iter(["a", "b", "c"]).unwrap(); + let b = Array::try_from_iter(["1", "2", "3"]).unwrap(); + let c = Array::try_from_iter(["dog", "cat", "horse"]).unwrap(); + + let mut out = Array::new(&NopBufferManager, DataType::Utf8, 3).unwrap(); + + let mut str_buf = String::new(); + + UniformExecutor::execute::( + &[a, b, c], + 0..3, + OutBuffer::from_array(&mut out).unwrap(), + |strings, buf| { + str_buf.clear(); + for s in strings { + str_buf.push_str(s); + } + buf.put(&str_buf); + }, + ) + .unwrap(); + + let expected = Array::try_from_iter(["a1dog", "b2cat", "c3horse"]).unwrap(); + + assert_arrays_eq(&expected, &out); + } + + #[test] + fn uniform_string_concat_row_wise_with_invalid() { + let a = Array::try_from_iter([Some("a"), Some("b"), None]).unwrap(); + let b = Array::try_from_iter(["1", "2", "3"]).unwrap(); + let c = Array::try_from_iter([Some("dog"), None, Some("horse")]).unwrap(); + + let mut out = Array::new(&NopBufferManager, DataType::Utf8, 3).unwrap(); + + let mut str_buf = String::new(); + + UniformExecutor::execute::( + &[a, b, c], + 0..3, + OutBuffer::from_array(&mut out).unwrap(), + |strings, buf| { + str_buf.clear(); + for s in strings { + str_buf.push_str(s); + } + buf.put(&str_buf); + }, + ) + .unwrap(); + + let expected = Array::try_from_iter([Some("a1dog"), None, None]).unwrap(); + + assert_arrays_eq(&expected, &out); + } + + #[test] + fn uniform_string_concat_row_wise_with_dictionary() { + let a = Array::try_from_iter(["a", "b", "c"]).unwrap(); + let b = Array::try_from_iter(["1", "2", "3"]).unwrap(); + let mut c = Array::try_from_iter(["dog", "cat", "horse"]).unwrap(); + // '["horse", "horse", "dog"] + c.select(&NopBufferManager, [2, 2, 0]).unwrap(); + + let mut out = Array::new(&NopBufferManager, DataType::Utf8, 3).unwrap(); + + let mut str_buf = String::new(); + + UniformExecutor::execute::( + &[a, b, c], + 0..3, + OutBuffer::from_array(&mut out).unwrap(), + |strings, buf| { + str_buf.clear(); + for s in strings { + str_buf.push_str(s); + } + buf.put(&str_buf); + }, + ) + .unwrap(); + + let expected = Array::try_from_iter(["a1horse", "b2horse", "c3dog"]).unwrap(); + + assert_arrays_eq(&expected, &out); + } +} From 9178c78532d6ccbe75b4540f87e9456595936d62 Mon Sep 17 00:00:00 2001 From: Sean Smith Date: Sun, 29 Dec 2024 20:04:19 -0500 Subject: [PATCH 25/59] some string funcs --- .../rayexec_execution/src/arrays/array/exp.rs | 4 + .../src/arrays/buffer/mod.rs | 43 +++++++++ .../src/arrays/buffer/physical_type.rs | 28 ++++++ .../src/arrays/buffer/string_view.rs | 43 +++++++++ .../src/arrays/executor/scalar/uniform.rs | 10 +- .../src/functions/scalar/builtin/boolean.rs | 6 +- .../functions/scalar/builtin/string/concat.rs | 96 +++++++++++-------- .../scalar/builtin/string/contains.rs | 54 ++++++----- .../scalar/builtin/string/ends_with.rs | 58 ++++++----- .../functions/scalar/builtin/string/length.rs | 76 ++++++++------- .../functions/scalar/builtin/string/like.rs | 58 ++++++----- 11 files changed, 320 insertions(+), 156 deletions(-) diff --git a/crates/rayexec_execution/src/arrays/array/exp.rs b/crates/rayexec_execution/src/arrays/array/exp.rs index 51b50ea66..0838fd003 100644 --- a/crates/rayexec_execution/src/arrays/array/exp.rs +++ b/crates/rayexec_execution/src/arrays/array/exp.rs @@ -122,6 +122,10 @@ where &self.data } + pub fn data_mut(&mut self) -> &mut ArrayData { + &mut self.data + } + pub fn validity(&self) -> &Validity { &self.validity } diff --git a/crates/rayexec_execution/src/arrays/buffer/mod.rs b/crates/rayexec_execution/src/arrays/buffer/mod.rs index f3e73bea7..e30de93c1 100644 --- a/crates/rayexec_execution/src/arrays/buffer/mod.rs +++ b/crates/rayexec_execution/src/arrays/buffer/mod.rs @@ -5,10 +5,13 @@ pub mod string_view; mod raw; use buffer_manager::{BufferManager, NopBufferManager}; +use fmtutil::IntoDisplayableSlice; use physical_type::{PhysicalStorage, PhysicalType}; use raw::RawBufferParts; use rayexec_error::{RayexecError, Result}; use string_view::{ + BinaryViewAddressable, + BinaryViewAddressableMut, StringViewAddressable, StringViewAddressableMut, StringViewHeap, @@ -110,6 +113,34 @@ where Ok(StringViewAddressableMut { metadata, heap }) } + pub fn try_as_binary_view_addressable(&self) -> Result { + self.check_type_one_of(&[PhysicalType::Utf8, PhysicalType::Binary])?; + + let metadata = unsafe { self.primary.as_slice::() }; + let heap = match self.secondary.as_ref() { + SecondaryBuffer::StringViewHeap(heap) => heap, + _ => return Err(RayexecError::new("Missing string heap")), + }; + + Ok(BinaryViewAddressable { metadata, heap }) + } + + pub fn try_as_binary_view_addressable_mut(&mut self) -> Result { + // Note that unlike the non-mut version of this function, we only allow + // physical binary types here. For reads, treating strings as binary is + // completely fine, but allowing writing raw binary to a logical string + // array could lead to invalid utf8. + self.check_type(PhysicalType::Binary)?; + + let metadata = unsafe { self.primary.as_slice_mut::() }; + let heap = match self.secondary.as_mut() { + SecondaryBuffer::StringViewHeap(heap) => heap, + _ => return Err(RayexecError::new("Missing string heap")), + }; + + Ok(BinaryViewAddressableMut { metadata, heap }) + } + fn check_type(&self, want: PhysicalType) -> Result<()> { if want != self.physical_type { return Err(RayexecError::new("Physical types don't match") @@ -119,6 +150,18 @@ where Ok(()) } + + fn check_type_one_of(&self, oneof: &[PhysicalType]) -> Result<()> { + if !oneof.contains(&self.physical_type) { + return Err( + RayexecError::new("Physical type not one of requested types") + .with_field("have", self.physical_type) + .with_field("oneof", oneof.display_as_list().to_string()), + ); + } + + Ok(()) + } } impl Drop for ArrayBuffer { diff --git a/crates/rayexec_execution/src/arrays/buffer/physical_type.rs b/crates/rayexec_execution/src/arrays/buffer/physical_type.rs index 57aecab24..00a96090b 100644 --- a/crates/rayexec_execution/src/arrays/buffer/physical_type.rs +++ b/crates/rayexec_execution/src/arrays/buffer/physical_type.rs @@ -5,6 +5,8 @@ use rayexec_error::Result; use super::buffer_manager::BufferManager; use super::string_view::{ + BinaryViewAddressable, + BinaryViewAddressableMut, StringViewAddressable, StringViewAddressableMut, StringViewMetadataUnion, @@ -273,6 +275,32 @@ impl MutablePhysicalStorage for PhysicalUtf8 { } } +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct PhysicalBinary; + +impl PhysicalStorage for PhysicalBinary { + const PHYSICAL_TYPE: PhysicalType = PhysicalType::Binary; + + type PrimaryBufferType = StringViewMetadataUnion; + type StorageType = [u8]; + + type Addressable<'a> = BinaryViewAddressable<'a>; + + fn get_addressable(buffer: &ArrayBuffer) -> Result> { + buffer.try_as_binary_view_addressable() + } +} + +impl MutablePhysicalStorage for PhysicalBinary { + type AddressableMut<'a> = BinaryViewAddressableMut<'a>; + + fn get_addressable_mut( + buffer: &mut ArrayBuffer, + ) -> Result> { + buffer.try_as_binary_view_addressable_mut() + } +} + /// Dictionary arrays have the selection vector as the primary data buffer. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub struct PhysicalDictionary; diff --git a/crates/rayexec_execution/src/arrays/buffer/string_view.rs b/crates/rayexec_execution/src/arrays/buffer/string_view.rs index 31968b618..2d32f57f4 100644 --- a/crates/rayexec_execution/src/arrays/buffer/string_view.rs +++ b/crates/rayexec_execution/src/arrays/buffer/string_view.rs @@ -48,6 +48,49 @@ impl<'a> AddressableMut for StringViewAddressableMut<'a> { } } +#[derive(Debug)] +pub struct BinaryViewAddressable<'a> { + pub(crate) metadata: &'a [StringViewMetadataUnion], + pub(crate) heap: &'a StringViewHeap, +} + +impl<'a> Addressable for BinaryViewAddressable<'a> { + type T = [u8]; + + fn len(&self) -> usize { + self.metadata.len() + } + + fn get(&self, idx: usize) -> Option<&Self::T> { + let m = self.metadata.get(idx)?; + self.heap.get(m) + } +} + +#[derive(Debug)] +pub struct BinaryViewAddressableMut<'a> { + pub(crate) metadata: &'a mut [StringViewMetadataUnion], + pub(crate) heap: &'a mut StringViewHeap, +} + +impl<'a> AddressableMut for BinaryViewAddressableMut<'a> { + type T = [u8]; + + fn len(&self) -> usize { + self.metadata.len() + } + + fn get_mut(&mut self, idx: usize) -> Option<&mut Self::T> { + let m = self.metadata.get_mut(idx)?; + self.heap.get_mut(m) + } + + fn put(&mut self, idx: usize, val: &Self::T) { + let new_m = self.heap.push_bytes(val); + self.metadata[idx] = new_m; + } +} + /// Metadata for small (<= 12 bytes) varlen data. #[derive(Debug, Clone, Copy, PartialEq, Eq)] #[repr(C)] diff --git a/crates/rayexec_execution/src/arrays/executor/scalar/uniform.rs b/crates/rayexec_execution/src/arrays/executor/scalar/uniform.rs index 466aafb3b..3b0362b6a 100644 --- a/crates/rayexec_execution/src/arrays/executor/scalar/uniform.rs +++ b/crates/rayexec_execution/src/arrays/executor/scalar/uniform.rs @@ -10,9 +10,9 @@ use crate::arrays::selection; use crate::arrays::storage::AddressableStorage; #[derive(Debug, Clone, Copy)] -pub struct UniformExecutor; +pub struct UniformExecutor2; -impl UniformExecutor { +impl UniformExecutor2 { pub fn execute<'a, S, B, Op>( arrays: &[&'a Array2], builder: ArrayBuilder, @@ -127,7 +127,7 @@ mod tests { let mut string_buffer = String::new(); - let got = UniformExecutor::execute::( + let got = UniformExecutor2::execute::( &[&first, &second, &third], builder, |inputs, buf| { @@ -162,7 +162,7 @@ mod tests { let mut string_buffer = String::new(); - let got = UniformExecutor::execute::( + let got = UniformExecutor2::execute::( &[&first, &second, &third], builder, |inputs, buf| { @@ -195,7 +195,7 @@ mod tests { let mut string_buffer = String::new(); - let got = UniformExecutor::execute::( + let got = UniformExecutor2::execute::( &[&first, &second, &third], builder, |inputs, buf| { diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/boolean.rs b/crates/rayexec_execution/src/functions/scalar/builtin/boolean.rs index af673e6fd..8b12bb776 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/boolean.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/boolean.rs @@ -8,7 +8,7 @@ use crate::arrays::bitmap::Bitmap; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::builder::{ArrayBuilder, BooleanBuffer}; use crate::arrays::executor::physical_type::PhysicalBool_2; -use crate::arrays::executor::scalar::{BinaryExecutor2, TernaryExecutor, UniformExecutor}; +use crate::arrays::executor::scalar::{BinaryExecutor2, TernaryExecutor, UniformExecutor2}; use crate::arrays::storage::BooleanStorage; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation, Example}; @@ -111,7 +111,7 @@ impl ScalarFunctionImpl for AndImpl { } _ => { let len = inputs[0].logical_len(); - UniformExecutor::execute::( + UniformExecutor2::execute::( inputs, ArrayBuilder { datatype: DataType::Boolean, @@ -204,7 +204,7 @@ impl ScalarFunctionImpl for OrImpl { } _ => { let len = inputs[0].logical_len(); - UniformExecutor::execute::( + UniformExecutor2::execute::( inputs, ArrayBuilder { datatype: DataType::Boolean, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/string/concat.rs b/crates/rayexec_execution/src/functions/scalar/builtin/string/concat.rs index 55be516e2..20dac7387 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/string/concat.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/string/concat.rs @@ -1,10 +1,13 @@ use rayexec_error::Result; -use crate::arrays::array::Array2; +use crate::arrays::array::exp::Array; +use crate::arrays::batch_exp::Batch; +use crate::arrays::buffer::physical_type::{AddressableMut, PhysicalUtf8}; use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::builder::{ArrayBuilder, GermanVarlenBuffer}; -use crate::arrays::executor::physical_type::PhysicalUtf8_2; -use crate::arrays::executor::scalar::{BinaryExecutor2, UniformExecutor}; +use crate::arrays::executor_exp::scalar::binary::BinaryExecutor; +use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; +use crate::arrays::executor_exp::scalar::uniform::UniformExecutor; +use crate::arrays::executor_exp::OutBuffer; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation, Example}; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunction, ScalarFunctionImpl}; @@ -68,55 +71,70 @@ impl ScalarFunction for Concat { pub struct StringConcatImpl; impl ScalarFunctionImpl for StringConcatImpl { - fn execute2(&self, inputs: &[&Array2]) -> Result { - match inputs.len() { + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + + match input.arrays().len() { 0 => { - let mut array = Array2::from_iter([""]); - array.set_physical_validity(0, false); - Ok(array) + // TODO: Zero args should actually error during planning. + // Currently this just sets everything to an empty string. + let mut addressable = output + .data_mut() + .try_as_mut()? + .try_as_string_view_addressable_mut()?; + + for idx in 0..addressable.len() { + addressable.put(idx, ""); + } } - 1 => Ok(inputs[0].clone()), - 2 => { - let a = inputs[0]; - let b = inputs[1]; + 1 => { + let input = &input.arrays()[0]; - let mut string_buf = String::new(); + UnaryExecutor::execute::( + input, + sel, + OutBuffer::from_array(output)?, + |s, buf| buf.put(s), + )?; + } + 2 => { + let a = &input.arrays()[0]; + let b = &input.arrays()[0]; - // TODO: Compute data capacity. + let mut str_buf = String::new(); - BinaryExecutor2::execute::( + BinaryExecutor::execute::( a, + sel, b, - ArrayBuilder { - datatype: DataType::Utf8, - buffer: GermanVarlenBuffer::with_len(a.logical_len()), - }, - |a, b, buf| { - string_buf.clear(); - string_buf.push_str(a); - string_buf.push_str(b); - buf.put(string_buf.as_str()); + sel, + OutBuffer::from_array(output)?, + |s1, s2, buf| { + str_buf.clear(); + str_buf.push_str(s1); + str_buf.push_str(s2); + buf.put(&str_buf); }, - ) + )?; } _ => { - let mut string_buf = String::new(); + let mut str_buf = String::new(); - UniformExecutor::execute::( - inputs, - ArrayBuilder { - datatype: DataType::Utf8, - buffer: GermanVarlenBuffer::with_len(inputs[0].logical_len()), - }, - |strings, buf| { - string_buf.clear(); - for s in strings { - string_buf.push_str(s); + UniformExecutor::execute::( + input.arrays(), + sel, + OutBuffer::from_array(output)?, + |ss, buf| { + str_buf.clear(); + for s in ss { + str_buf.push_str(s); } - buf.put(string_buf.as_str()); + buf.put(&str_buf); }, - ) + )?; } } + + Ok(()) } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/string/contains.rs b/crates/rayexec_execution/src/functions/scalar/builtin/string/contains.rs index 3ea1f64a9..2197a7ea0 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/string/contains.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/string/contains.rs @@ -1,10 +1,12 @@ use rayexec_error::Result; -use crate::arrays::array::Array2; +use crate::arrays::array::exp::Array; +use crate::arrays::batch_exp::Batch; +use crate::arrays::buffer::physical_type::{PhysicalBool, PhysicalUtf8}; use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::builder::{ArrayBuilder, BooleanBuffer}; -use crate::arrays::executor::physical_type::PhysicalUtf8_2; -use crate::arrays::executor::scalar::{BinaryExecutor2, UnaryExecutor2}; +use crate::arrays::executor_exp::scalar::binary::BinaryExecutor; +use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; +use crate::arrays::executor_exp::OutBuffer; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation, Example}; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunction, ScalarFunctionImpl}; @@ -82,15 +84,19 @@ pub struct StringContainsConstantImpl { } impl ScalarFunctionImpl for StringContainsConstantImpl { - fn execute2(&self, inputs: &[&Array2]) -> Result { - let builder = ArrayBuilder { - datatype: DataType::Boolean, - buffer: BooleanBuffer::with_len(inputs[0].logical_len()), - }; + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + let haystack = &input.arrays()[0]; - UnaryExecutor2::execute::(inputs[0], builder, |s, buf| { - buf.put(&s.contains(&self.constant)) - }) + UnaryExecutor::execute::( + haystack, + sel, + OutBuffer::from_array(output)?, + |haystack, buf| { + let v = haystack.contains(&self.constant); + buf.put(&v); + }, + ) } } @@ -98,17 +104,21 @@ impl ScalarFunctionImpl for StringContainsConstantImpl { pub struct StringContainsImpl; impl ScalarFunctionImpl for StringContainsImpl { - fn execute2(&self, inputs: &[&Array2]) -> Result { - let builder = ArrayBuilder { - datatype: DataType::Boolean, - buffer: BooleanBuffer::with_len(inputs[0].logical_len()), - }; + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + let haystack = &input.arrays()[0]; + let needle = &input.arrays()[1]; - BinaryExecutor2::execute::( - inputs[0], - inputs[1], - builder, - |s, c, buf| buf.put(&s.contains(c)), + BinaryExecutor::execute::( + haystack, + sel, + needle, + sel, + OutBuffer::from_array(output)?, + |haystack, needle, buf| { + let v = haystack.contains(needle); + buf.put(&v); + }, ) } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/string/ends_with.rs b/crates/rayexec_execution/src/functions/scalar/builtin/string/ends_with.rs index 09a975667..102b594fe 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/string/ends_with.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/string/ends_with.rs @@ -1,10 +1,12 @@ use rayexec_error::Result; -use crate::arrays::array::Array2; +use crate::arrays::array::exp::Array; +use crate::arrays::batch_exp::Batch; +use crate::arrays::buffer::physical_type::{PhysicalBool, PhysicalUtf8}; use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::builder::{ArrayBuilder, BooleanBuffer}; -use crate::arrays::executor::physical_type::PhysicalUtf8_2; -use crate::arrays::executor::scalar::{BinaryExecutor2, UnaryExecutor2}; +use crate::arrays::executor_exp::scalar::binary::BinaryExecutor; +use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; +use crate::arrays::executor_exp::OutBuffer; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation, Example}; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunction, ScalarFunctionImpl}; @@ -86,15 +88,19 @@ pub struct EndsWithConstantImpl { } impl ScalarFunctionImpl for EndsWithConstantImpl { - fn execute2(&self, inputs: &[&Array2]) -> Result { - let builder = ArrayBuilder { - datatype: DataType::Boolean, - buffer: BooleanBuffer::with_len(inputs[0].logical_len()), - }; - - UnaryExecutor2::execute::(inputs[0], builder, |s, buf| { - buf.put(&s.ends_with(&self.constant)) - }) + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + let input = &input.arrays()[0]; + + UnaryExecutor::execute::( + input, + sel, + OutBuffer::from_array(output)?, + |s, buf| { + let v = s.ends_with(&self.constant); + buf.put(&v); + }, + ) } } @@ -102,17 +108,21 @@ impl ScalarFunctionImpl for EndsWithConstantImpl { pub struct EndsWithImpl; impl ScalarFunctionImpl for EndsWithImpl { - fn execute2(&self, inputs: &[&Array2]) -> Result { - let builder = ArrayBuilder { - datatype: DataType::Boolean, - buffer: BooleanBuffer::with_len(inputs[0].logical_len()), - }; - - BinaryExecutor2::execute::( - inputs[0], - inputs[1], - builder, - |s, c, buf| buf.put(&s.ends_with(c)), + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + let strings = &input.arrays()[0]; + let suffix = &input.arrays()[1]; + + BinaryExecutor::execute::( + strings, + sel, + suffix, + sel, + OutBuffer::from_array(output)?, + |s, suffix, buf| { + let v = s.ends_with(&suffix); + buf.put(&v); + }, ) } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/string/length.rs b/crates/rayexec_execution/src/functions/scalar/builtin/string/length.rs index 6b76bc9f0..d471afb60 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/string/length.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/string/length.rs @@ -1,10 +1,11 @@ use rayexec_error::Result; -use crate::arrays::array::Array2; +use crate::arrays::array::exp::Array; +use crate::arrays::batch_exp::Batch; +use crate::arrays::buffer::physical_type::{PhysicalBinary, PhysicalI64, PhysicalUtf8}; use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::{PhysicalBinary_2, PhysicalUtf8_2}; -use crate::arrays::executor::scalar::UnaryExecutor2; +use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; +use crate::arrays::executor_exp::OutBuffer; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation, Example}; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunction, ScalarFunctionImpl}; @@ -64,18 +65,19 @@ impl ScalarFunction for Length { pub struct StrLengthImpl; impl ScalarFunctionImpl for StrLengthImpl { - fn execute2(&self, inputs: &[&Array2]) -> Result { - let input = inputs[0]; - - let builder = ArrayBuilder { - datatype: DataType::Int64, - buffer: PrimitiveBuffer::with_len(input.logical_len()), - }; - - UnaryExecutor2::execute::(input, builder, |v, buf| { - let len = v.chars().count() as i64; - buf.put(&len) - }) + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + let input = &input.arrays()[0]; + + UnaryExecutor::execute::( + input, + sel, + OutBuffer::from_array(output)?, + |s, buf| { + let len = s.chars().count() as i64; + buf.put(&len) + }, + ) } } @@ -145,18 +147,17 @@ impl ScalarFunction for ByteLength { pub struct ByteLengthImpl; impl ScalarFunctionImpl for ByteLengthImpl { - fn execute2(&self, inputs: &[&Array2]) -> Result { - let input = inputs[0]; - - let builder = ArrayBuilder { - datatype: DataType::Int64, - buffer: PrimitiveBuffer::with_len(input.logical_len()), - }; + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + let input = &input.arrays()[0]; // Binary applicable to both str and [u8]. - UnaryExecutor2::execute::(input, builder, |v, buf| { - buf.put(&(v.len() as i64)) - }) + UnaryExecutor::execute::( + input, + sel, + OutBuffer::from_array(output)?, + |v, buf| buf.put(&(v.len() as i64)), + ) } } @@ -222,18 +223,19 @@ impl ScalarFunction for BitLength { pub struct BitLengthImpl; impl ScalarFunctionImpl for BitLengthImpl { - fn execute2(&self, inputs: &[&Array2]) -> Result { - let input = inputs[0]; - - let builder = ArrayBuilder { - datatype: DataType::Int64, - buffer: PrimitiveBuffer::with_len(input.logical_len()), - }; + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + let input = &input.arrays()[0]; // Binary applicable to both str and [u8]. - UnaryExecutor2::execute::(input, builder, |v, buf| { - let bit_len = v.len() * 8; - buf.put(&(bit_len as i64)) - }) + UnaryExecutor::execute::( + input, + sel, + OutBuffer::from_array(output)?, + |v, buf| { + let bit_len = v.len() * 8; + buf.put(&(bit_len as i64)) + }, + ) } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/string/like.rs b/crates/rayexec_execution/src/functions/scalar/builtin/string/like.rs index 3d626f36b..2452725e0 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/string/like.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/string/like.rs @@ -1,11 +1,13 @@ use rayexec_error::{Result, ResultExt}; use regex::{escape, Regex}; -use crate::arrays::array::Array2; +use crate::arrays::array::exp::Array; +use crate::arrays::batch_exp::Batch; +use crate::arrays::buffer::physical_type::{PhysicalBool, PhysicalUtf8}; use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::builder::{ArrayBuilder, BooleanBuffer}; -use crate::arrays::executor::physical_type::PhysicalUtf8_2; -use crate::arrays::executor::scalar::{BinaryExecutor2, UnaryExecutor2}; +use crate::arrays::executor_exp::scalar::binary::BinaryExecutor; +use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; +use crate::arrays::executor_exp::OutBuffer; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation, Example}; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunction, ScalarFunctionImpl}; @@ -84,16 +86,19 @@ pub struct LikeConstImpl { } impl ScalarFunctionImpl for LikeConstImpl { - fn execute2(&self, inputs: &[&Array2]) -> Result { - let builder = ArrayBuilder { - datatype: DataType::Boolean, - buffer: BooleanBuffer::with_len(inputs[0].logical_len()), - }; - - UnaryExecutor2::execute::(inputs[0], builder, |s, buf| { - let b = self.constant.is_match(s); - buf.put(&b); - }) + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + let input = &input.arrays()[0]; + + UnaryExecutor::execute::( + input, + sel, + OutBuffer::from_array(output)?, + |s, buf| { + let b = self.constant.is_match(s); + buf.put(&b); + }, + ) } } @@ -101,22 +106,23 @@ impl ScalarFunctionImpl for LikeConstImpl { pub struct LikeImpl; impl ScalarFunctionImpl for LikeImpl { - fn execute2(&self, inputs: &[&Array2]) -> Result { - let builder = ArrayBuilder { - datatype: DataType::Boolean, - buffer: BooleanBuffer::with_len(inputs[0].logical_len()), - }; + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + let strings = &input.arrays()[0]; + let patterns = &input.arrays()[2]; let mut s_buf = String::new(); - BinaryExecutor2::execute::( - inputs[0], - inputs[1], - builder, - |a, b, buf| { - match like_pattern_to_regex(&mut s_buf, b, Some('\\')) { + BinaryExecutor::execute::( + strings, + sel, + patterns, + sel, + OutBuffer::from_array(output)?, + |s, pattern, buf| { + match like_pattern_to_regex(&mut s_buf, pattern, Some('\\')) { Ok(pat) => { - let b = pat.is_match(a); + let b = pat.is_match(s); buf.put(&b); } Err(_) => { From 3fe5e6539882a91ec76c8ddd97be563426e28dc2 Mon Sep 17 00:00:00 2001 From: Sean Smith Date: Sun, 29 Dec 2024 20:32:40 -0500 Subject: [PATCH 26/59] more string --- .../src/arrays/executor/scalar/ternary.rs | 4 +- .../src/functions/scalar/builtin/boolean.rs | 4 +- .../functions/scalar/builtin/string/pad.rs | 104 ++++++++++-------- .../scalar/builtin/string/regexp_replace.rs | 66 ++++++----- .../functions/scalar/builtin/string/repeat.rs | 39 ++++--- .../scalar/builtin/string/starts_with.rs | 40 +++---- .../scalar/builtin/string/substring.rs | 51 +++++---- .../functions/scalar/builtin/string/trim.rs | 49 +++++---- 8 files changed, 198 insertions(+), 159 deletions(-) diff --git a/crates/rayexec_execution/src/arrays/executor/scalar/ternary.rs b/crates/rayexec_execution/src/arrays/executor/scalar/ternary.rs index 9e2fd283c..69fb87d77 100644 --- a/crates/rayexec_execution/src/arrays/executor/scalar/ternary.rs +++ b/crates/rayexec_execution/src/arrays/executor/scalar/ternary.rs @@ -12,9 +12,9 @@ use crate::arrays::selection; use crate::arrays::storage::AddressableStorage; #[derive(Debug, Clone, Copy)] -pub struct TernaryExecutor; +pub struct TernaryExecutor2; -impl TernaryExecutor { +impl TernaryExecutor2 { pub fn execute<'a, S1, S2, S3, B, Op>( array1: &'a Array2, array2: &'a Array2, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/boolean.rs b/crates/rayexec_execution/src/functions/scalar/builtin/boolean.rs index 8b12bb776..68da99e5f 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/boolean.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/boolean.rs @@ -8,7 +8,7 @@ use crate::arrays::bitmap::Bitmap; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::builder::{ArrayBuilder, BooleanBuffer}; use crate::arrays::executor::physical_type::PhysicalBool_2; -use crate::arrays::executor::scalar::{BinaryExecutor2, TernaryExecutor, UniformExecutor2}; +use crate::arrays::executor::scalar::{BinaryExecutor2, TernaryExecutor2, UniformExecutor2}; use crate::arrays::storage::BooleanStorage; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation, Example}; @@ -98,7 +98,7 @@ impl ScalarFunctionImpl for AndImpl { let a = inputs[0]; let b = inputs[1]; let c = inputs[2]; - TernaryExecutor::execute::( + TernaryExecutor2::execute::( a, b, c, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/string/pad.rs b/crates/rayexec_execution/src/functions/scalar/builtin/string/pad.rs index 49436ccf1..6a6cf572a 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/string/pad.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/string/pad.rs @@ -1,10 +1,12 @@ use rayexec_error::Result; -use crate::arrays::array::Array2; +use crate::arrays::array::exp::Array; +use crate::arrays::batch_exp::Batch; +use crate::arrays::buffer::physical_type::{PhysicalI64, PhysicalUtf8}; use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::builder::{ArrayBuilder, GermanVarlenBuffer}; -use crate::arrays::executor::physical_type::{PhysicalI64_2, PhysicalUtf8_2}; -use crate::arrays::executor::scalar::{BinaryExecutor2, TernaryExecutor}; +use crate::arrays::executor_exp::scalar::binary::BinaryExecutor; +use crate::arrays::executor_exp::scalar::ternary::TernaryExecutor; +use crate::arrays::executor_exp::OutBuffer; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation, Example}; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunction, ScalarFunctionImpl}; @@ -96,33 +98,38 @@ impl ScalarFunction for LeftPad { pub struct LeftPadImpl; impl ScalarFunctionImpl for LeftPadImpl { - fn execute2(&self, inputs: &[&Array2]) -> Result { + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + let mut string_buf = String::new(); - let builder = ArrayBuilder { - datatype: DataType::Utf8, - buffer: GermanVarlenBuffer::::with_len(inputs[0].logical_len()), - }; - match inputs.len() { - 2 => BinaryExecutor2::execute::( - inputs[0], - inputs[1], - builder, - |s, count, buf| { + match input.arrays().len() { + 2 => BinaryExecutor::execute::( + &input.arrays[0], + sel, + &input.arrays()[1], + sel, + OutBuffer::from_array(output)?, + |s, &count, buf| { lpad(s, count, " ", &mut string_buf); buf.put(&string_buf); }, ), - 3 => TernaryExecutor::execute::( - inputs[0], - inputs[1], - inputs[2], - builder, - |s, count, pad, buf| { - lpad(s, count, pad, &mut string_buf); - buf.put(&string_buf); - }, - ), + 3 => { + TernaryExecutor::execute::( + &input.arrays[0], + sel, + &input.arrays()[1], + sel, + &input.arrays()[2], + sel, + OutBuffer::from_array(output)?, + |s, &count, pad, buf| { + lpad(s, count, pad, &mut string_buf); + buf.put(&string_buf); + }, + ) + } other => unreachable!("num inputs checked, got {other}"), } } @@ -209,33 +216,38 @@ impl ScalarFunction for RightPad { pub struct RightPadImpl; impl ScalarFunctionImpl for RightPadImpl { - fn execute2(&self, inputs: &[&Array2]) -> Result { + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + let mut string_buf = String::new(); - let builder = ArrayBuilder { - datatype: DataType::Utf8, - buffer: GermanVarlenBuffer::::with_len(inputs[0].logical_len()), - }; - match inputs.len() { - 2 => BinaryExecutor2::execute::( - inputs[0], - inputs[1], - builder, - |s, count, buf| { + match input.arrays().len() { + 2 => BinaryExecutor::execute::( + &input.arrays[0], + sel, + &input.arrays()[1], + sel, + OutBuffer::from_array(output)?, + |s, &count, buf| { rpad(s, count, " ", &mut string_buf); buf.put(&string_buf); }, ), - 3 => TernaryExecutor::execute::( - inputs[0], - inputs[1], - inputs[2], - builder, - |s, count, pad, buf| { - rpad(s, count, pad, &mut string_buf); - buf.put(&string_buf); - }, - ), + 3 => { + TernaryExecutor::execute::( + &input.arrays[0], + sel, + &input.arrays()[1], + sel, + &input.arrays()[2], + sel, + OutBuffer::from_array(output)?, + |s, &count, pad, buf| { + rpad(s, count, pad, &mut string_buf); + buf.put(&string_buf); + }, + ) + } other => unreachable!("num inputs checked, got {other}"), } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/string/regexp_replace.rs b/crates/rayexec_execution/src/functions/scalar/builtin/string/regexp_replace.rs index 35b1ac9f8..cd1037423 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/string/regexp_replace.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/string/regexp_replace.rs @@ -1,11 +1,14 @@ use rayexec_error::{Result, ResultExt}; use regex::Regex; -use crate::arrays::array::Array2; +use crate::arrays::array::exp::Array; +use crate::arrays::batch_exp::Batch; +use crate::arrays::buffer::physical_type::PhysicalUtf8; use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::builder::{ArrayBuilder, GermanVarlenBuffer}; -use crate::arrays::executor::physical_type::PhysicalUtf8_2; -use crate::arrays::executor::scalar::{BinaryExecutor2, TernaryExecutor, UnaryExecutor2}; +use crate::arrays::executor_exp::scalar::binary::BinaryExecutor; +use crate::arrays::executor_exp::scalar::ternary::TernaryExecutor; +use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; +use crate::arrays::executor_exp::OutBuffer; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation, Example}; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunction, ScalarFunctionImpl}; @@ -98,25 +101,29 @@ pub struct RegexpReplaceImpl { } impl ScalarFunctionImpl for RegexpReplaceImpl { - fn execute2(&self, inputs: &[&Array2]) -> Result { - let builder = ArrayBuilder { - datatype: DataType::Utf8, - buffer: GermanVarlenBuffer::::with_len(inputs[0].logical_len()), - }; + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); match (self.pattern.as_ref(), self.replacement.as_ref()) { (Some(pattern), Some(replacement)) => { - UnaryExecutor2::execute::(inputs[0], builder, |s, buf| { - // TODO: Flags to more many. - let out = pattern.replace(s, replacement); - buf.put(out.as_ref()); - }) + UnaryExecutor::execute::( + &input.arrays()[0], + sel, + OutBuffer::from_array(output)?, + |s, buf| { + // TODO: Flags to more many. + let out = pattern.replace(s, replacement); + buf.put(out.as_ref()); + }, + ) } (Some(pattern), None) => { - BinaryExecutor2::execute::( - inputs[0], - inputs[2], - builder, + BinaryExecutor::execute::( + &input.arrays()[0], + sel, + &input.arrays()[2], + sel, + OutBuffer::from_array(output)?, |s, replacement, buf| { let out = pattern.replace(s, replacement); buf.put(out.as_ref()); @@ -124,10 +131,12 @@ impl ScalarFunctionImpl for RegexpReplaceImpl { ) } (None, Some(replacement)) => { - BinaryExecutor2::execute::( - inputs[0], - inputs[1], - builder, + BinaryExecutor::execute::( + &input.arrays()[0], + sel, + &input.arrays()[1], + sel, + OutBuffer::from_array(output)?, |s, pattern, buf| { let pattern = match Regex::new(pattern) { Ok(pattern) => pattern, @@ -143,11 +152,14 @@ impl ScalarFunctionImpl for RegexpReplaceImpl { ) } (None, None) => { - TernaryExecutor::execute::( - inputs[0], - inputs[1], - inputs[2], - builder, + TernaryExecutor::execute::( + &input.arrays()[0], + sel, + &input.arrays()[1], + sel, + &input.arrays()[2], + sel, + OutBuffer::from_array(output)?, |s, pattern, replacement, buf| { let pattern = match Regex::new(pattern) { Ok(pattern) => pattern, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/string/repeat.rs b/crates/rayexec_execution/src/functions/scalar/builtin/string/repeat.rs index 338971205..a84e1fe54 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/string/repeat.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/string/repeat.rs @@ -2,11 +2,12 @@ use std::fmt::Debug; use rayexec_error::Result; -use crate::arrays::array::Array2; +use crate::arrays::array::exp::Array; +use crate::arrays::batch_exp::Batch; +use crate::arrays::buffer::physical_type::{PhysicalI64, PhysicalUtf8}; use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::builder::{ArrayBuilder, GermanVarlenBuffer}; -use crate::arrays::executor::physical_type::{PhysicalI64_2, PhysicalUtf8_2}; -use crate::arrays::executor::scalar::BinaryExecutor2; +use crate::arrays::executor_exp::scalar::binary::BinaryExecutor; +use crate::arrays::executor_exp::OutBuffer; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation, Example}; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunction, ScalarFunctionImpl}; @@ -65,27 +66,25 @@ impl ScalarFunction for Repeat { pub struct RepeatUtf8Impl; impl ScalarFunctionImpl for RepeatUtf8Impl { - fn execute2(&self, inputs: &[&Array2]) -> Result { - let strings = inputs[0]; - let nums = inputs[1]; + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + let strings = &input.arrays()[0]; + let counts = &input.arrays()[1]; - // TODO: Capacity + let mut str_buf = String::new(); - let mut string_buf = String::new(); - - BinaryExecutor2::execute::( + BinaryExecutor::execute::( strings, - nums, - ArrayBuilder { - datatype: DataType::Utf8, - buffer: GermanVarlenBuffer::with_len(strings.logical_len()), - }, - |s, num, buf| { - string_buf.clear(); + sel, + counts, + sel, + OutBuffer::from_array(output)?, + |s, &num, buf| { + str_buf.clear(); for _ in 0..num { - string_buf.push_str(s); + str_buf.push_str(s); } - buf.put(string_buf.as_str()) + buf.put(str_buf.as_str()) }, ) } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/string/starts_with.rs b/crates/rayexec_execution/src/functions/scalar/builtin/string/starts_with.rs index 6034a7a33..af5684316 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/string/starts_with.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/string/starts_with.rs @@ -1,10 +1,12 @@ use rayexec_error::Result; -use crate::arrays::array::Array2; +use crate::arrays::array::exp::Array; +use crate::arrays::batch_exp::Batch; +use crate::arrays::buffer::physical_type::{PhysicalBool, PhysicalUtf8}; use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::builder::{ArrayBuilder, BooleanBuffer}; -use crate::arrays::executor::physical_type::PhysicalUtf8_2; -use crate::arrays::executor::scalar::{BinaryExecutor2, UnaryExecutor2}; +use crate::arrays::executor_exp::scalar::binary::BinaryExecutor; +use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; +use crate::arrays::executor_exp::OutBuffer; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation, Example}; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunction, ScalarFunctionImpl}; @@ -84,23 +86,23 @@ pub struct StartsWithImpl { } impl ScalarFunctionImpl for StartsWithImpl { - fn execute2(&self, inputs: &[&Array2]) -> Result { - let builder = ArrayBuilder { - datatype: DataType::Boolean, - buffer: BooleanBuffer::with_len(inputs[0].logical_len()), - }; + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); match self.constant.as_ref() { - Some(constant) => { - UnaryExecutor2::execute::(inputs[0], builder, |s, buf| { - buf.put(&s.starts_with(constant)) - }) - } - None => BinaryExecutor2::execute::( - inputs[0], - inputs[1], - builder, - |s, c, buf| buf.put(&s.starts_with(c)), + Some(prefix) => UnaryExecutor::execute::( + &input.arrays()[0], + sel, + OutBuffer::from_array(output)?, + |s, buf| buf.put(&s.starts_with(prefix)), + ), + None => BinaryExecutor::execute::( + &input.arrays()[0], + sel, + &input.arrays()[1], + sel, + OutBuffer::from_array(output)?, + |s, prefix, buf| buf.put(&s.starts_with(prefix)), ), } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/string/substring.rs b/crates/rayexec_execution/src/functions/scalar/builtin/string/substring.rs index fb1c7de24..894abfdcf 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/string/substring.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/string/substring.rs @@ -1,10 +1,16 @@ use rayexec_error::Result; +use crate::arrays::array::exp::Array; use crate::arrays::array::Array2; +use crate::arrays::batch_exp::Batch; +use crate::arrays::buffer::physical_type::{PhysicalI64, PhysicalUtf8}; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::builder::{ArrayBuilder, GermanVarlenBuffer}; use crate::arrays::executor::physical_type::{PhysicalI64_2, PhysicalUtf8_2}; -use crate::arrays::executor::scalar::{BinaryExecutor2, TernaryExecutor}; +use crate::arrays::executor::scalar::{BinaryExecutor2, TernaryExecutor2}; +use crate::arrays::executor_exp::scalar::binary::BinaryExecutor; +use crate::arrays::executor_exp::scalar::ternary::TernaryExecutor; +use crate::arrays::executor_exp::OutBuffer; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation, Example}; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunction, ScalarFunctionImpl}; @@ -105,16 +111,16 @@ impl ScalarFunction for Substring { pub struct SubstringFromImpl; impl ScalarFunctionImpl for SubstringFromImpl { - fn execute2(&self, inputs: &[&Array2]) -> Result { - let len = inputs[0].logical_len(); - BinaryExecutor2::execute::( - inputs[0], - inputs[1], - ArrayBuilder { - datatype: DataType::Utf8, - buffer: GermanVarlenBuffer::with_len(len), - }, - |s, from, buf| buf.put(substring_from(s, from)), + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + + BinaryExecutor::execute::( + &input.arrays()[0], + sel, + &input.arrays()[1], + sel, + OutBuffer::from_array(output)?, + |s, &from, buf| buf.put(substring_from(s, from)), ) } } @@ -123,17 +129,18 @@ impl ScalarFunctionImpl for SubstringFromImpl { pub struct SubstringFromToImpl; impl ScalarFunctionImpl for SubstringFromToImpl { - fn execute2(&self, inputs: &[&Array2]) -> Result { - let len = inputs[0].logical_len(); - TernaryExecutor::execute::( - inputs[0], - inputs[1], - inputs[2], - ArrayBuilder { - datatype: DataType::Utf8, - buffer: GermanVarlenBuffer::with_len(len), - }, - |s, from, count, buf| buf.put(substring_from_count(s, from, count)), + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + + TernaryExecutor::execute::( + &input.arrays()[0], + sel, + &input.arrays()[1], + sel, + &input.arrays()[2], + sel, + OutBuffer::from_array(output)?, + |s, &from, &count, buf| buf.put(substring_from_count(s, from, count)), ) } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/string/trim.rs b/crates/rayexec_execution/src/functions/scalar/builtin/string/trim.rs index 2fc8507d2..02ba39e98 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/string/trim.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/string/trim.rs @@ -3,11 +3,17 @@ use std::marker::PhantomData; use rayexec_error::Result; +use crate::arrays::array::exp::Array; use crate::arrays::array::Array2; +use crate::arrays::batch_exp::Batch; +use crate::arrays::buffer::physical_type::PhysicalUtf8; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::builder::{ArrayBuilder, GermanVarlenBuffer}; use crate::arrays::executor::physical_type::PhysicalUtf8_2; use crate::arrays::executor::scalar::{BinaryExecutor2, UnaryExecutor2}; +use crate::arrays::executor_exp::scalar::binary::BinaryExecutor; +use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; +use crate::arrays::executor_exp::OutBuffer; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation, Example}; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunction, ScalarFunctionImpl}; @@ -222,16 +228,18 @@ impl TrimWhitespaceImpl { } impl ScalarFunctionImpl for TrimWhitespaceImpl { - fn execute2(&self, inputs: &[&Array2]) -> Result { - let builder = ArrayBuilder { - datatype: DataType::Utf8, - buffer: GermanVarlenBuffer::::with_len(inputs[0].logical_len()), - }; - - UnaryExecutor2::execute::(inputs[0], builder, |s, buf| { - let trimmed = F::trim_func(s, " "); - buf.put(trimmed) - }) + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + + UnaryExecutor::execute::( + &input.arrays()[0], + sel, + OutBuffer::from_array(output)?, + |s, buf| { + let trimmed = F::trim_func(s, " "); + buf.put(trimmed); + }, + ) } } @@ -247,19 +255,18 @@ impl TrimPatternImpl { } impl ScalarFunctionImpl for TrimPatternImpl { - fn execute2(&self, inputs: &[&Array2]) -> Result { - let builder = ArrayBuilder { - datatype: DataType::Utf8, - buffer: GermanVarlenBuffer::::with_len(inputs[0].logical_len()), - }; - - BinaryExecutor2::execute::( - inputs[0], - inputs[1], - builder, + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + + BinaryExecutor::execute::( + &input.arrays()[0], + sel, + &input.arrays()[1], + sel, + OutBuffer::from_array(output)?, |s, pattern, buf| { let trimmed = F::trim_func(s, pattern); - buf.put(trimmed) + buf.put(trimmed); }, ) } From bfe09e96a9f2ca60eaabae01e5f52839be5bd732 Mon Sep 17 00:00:00 2001 From: Sean Smith Date: Sun, 29 Dec 2024 20:45:06 -0500 Subject: [PATCH 27/59] negate --- .../src/arrays/executor_exp/scalar/unary.rs | 15 ++- .../src/functions/scalar/builtin/negate.rs | 97 +++++++++---------- .../src/functions/scalar/builtin/random.rs | 18 ++-- 3 files changed, 64 insertions(+), 66 deletions(-) diff --git a/crates/rayexec_execution/src/arrays/executor_exp/scalar/unary.rs b/crates/rayexec_execution/src/arrays/executor_exp/scalar/unary.rs index 4e93b53cb..67f6a39cc 100644 --- a/crates/rayexec_execution/src/arrays/executor_exp/scalar/unary.rs +++ b/crates/rayexec_execution/src/arrays/executor_exp/scalar/unary.rs @@ -108,7 +108,11 @@ impl UnaryExecutor { /// Note that changing the lengths for variable length data is not yet /// supported, as the length change won't persist since the metadata isn't /// being changed. - pub fn execute_in_place(array: &mut Array, mut op: Op) -> Result<()> + pub fn execute_in_place( + array: &mut Array, + selection: impl IntoExactSizeIterator, + mut op: Op, + ) -> Result<()> where S: MutablePhysicalStorage, Op: FnMut(&mut S::StorageType), @@ -117,11 +121,11 @@ impl UnaryExecutor { let mut input = S::get_addressable_mut(array.data.try_as_mut()?)?; if validity.all_valid() { - for idx in 0..input.len() { + for idx in selection.into_iter() { op(input.get_mut(idx).unwrap()); } } else { - for idx in 0..input.len() { + for idx in selection.into_iter() { if validity.is_valid(idx) { op(input.get_mut(idx).unwrap()); } @@ -238,7 +242,8 @@ mod tests { fn int32_inc_by_2_in_place() { let mut array = Array::try_from_iter([1, 2, 3]).unwrap(); - UnaryExecutor::execute_in_place::(&mut array, |v| *v = *v + 2).unwrap(); + UnaryExecutor::execute_in_place::(&mut array, 0..3, |v| *v = *v + 2) + .unwrap(); let arr_slice = array.data().try_as_slice::().unwrap(); assert_eq!(&[3, 4, 5], arr_slice); @@ -351,7 +356,7 @@ mod tests { fn string_uppercase_in_place() { let mut array = Array::try_from_iter(["a", "bb", "ccc"]).unwrap(); - UnaryExecutor::execute_in_place::(&mut array, |v| { + UnaryExecutor::execute_in_place::(&mut array, 0..3, |v| { v.make_ascii_uppercase() }) .unwrap(); diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/negate.rs b/crates/rayexec_execution/src/functions/scalar/builtin/negate.rs index 996c3cdeb..02c851ab3 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/negate.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/negate.rs @@ -2,23 +2,23 @@ use std::marker::PhantomData; use rayexec_error::Result; -use crate::arrays::array::{Array2, ArrayData2}; -use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::builder::{ArrayBuilder, BooleanBuffer, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::{ - PhysicalBool_2, - PhysicalF16_2, - PhysicalF32_2, - PhysicalF64_2, - PhysicalI128_2, - PhysicalI16_2, - PhysicalI32_2, - PhysicalI64_2, - PhysicalI8_2, - PhysicalStorage2, +use crate::arrays::array::exp::Array; +use crate::arrays::batch_exp::Batch; +use crate::arrays::buffer::physical_type::{ + MutablePhysicalStorage, + PhysicalBool, + PhysicalF16, + PhysicalF32, + PhysicalF64, + PhysicalI128, + PhysicalI16, + PhysicalI32, + PhysicalI64, + PhysicalI8, }; -use crate::arrays::executor::scalar::UnaryExecutor2; -use crate::arrays::storage::PrimitiveStorage; +use crate::arrays::datatype::{DataType, DataTypeId}; +use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; +use crate::arrays::executor_exp::OutBuffer; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation, Example}; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunction, ScalarFunctionImpl}; @@ -61,14 +61,14 @@ impl ScalarFunction for Negate { // TODO: Interval let function_impl: Box = match dt.clone() { - dt @ DataType::Int8 => Box::new(NegateImpl::::new(dt)), - dt @ DataType::Int16 => Box::new(NegateImpl::::new(dt)), - dt @ DataType::Int32 => Box::new(NegateImpl::::new(dt)), - dt @ DataType::Int64 => Box::new(NegateImpl::::new(dt)), - dt @ DataType::Int128 => Box::new(NegateImpl::::new(dt)), - dt @ DataType::Float16 => Box::new(NegateImpl::::new(dt)), - dt @ DataType::Float32 => Box::new(NegateImpl::::new(dt)), - dt @ DataType::Float64 => Box::new(NegateImpl::::new(dt)), + DataType::Int8 => Box::new(NegateImpl::::new()), + DataType::Int16 => Box::new(NegateImpl::::new()), + DataType::Int32 => Box::new(NegateImpl::::new()), + DataType::Int64 => Box::new(NegateImpl::::new()), + DataType::Int128 => Box::new(NegateImpl::::new()), + DataType::Float16 => Box::new(NegateImpl::::new()), + DataType::Float32 => Box::new(NegateImpl::::new()), + DataType::Float64 => Box::new(NegateImpl::::new()), other => return Err(invalid_input_types_error(self, &[other])), }; @@ -83,36 +83,29 @@ impl ScalarFunction for Negate { #[derive(Debug, Clone)] pub struct NegateImpl { - datatype: DataType, // TODO: Would be nice not needing to store this. _s: PhantomData, } impl NegateImpl { - fn new(datatype: DataType) -> Self { - NegateImpl { - datatype, - _s: PhantomData, - } + const fn new() -> Self { + NegateImpl { _s: PhantomData } } } impl ScalarFunctionImpl for NegateImpl where - S: PhysicalStorage2, - for<'a> S::Type<'a>: std::ops::Neg> + Default + Copy, - ArrayData2: From>>, + S: MutablePhysicalStorage, + S::StorageType: std::ops::Neg + Copy, { - fn execute2(&self, inputs: &[&Array2]) -> Result { - use std::ops::Neg; - - let a = inputs[0]; - let datatype = self.datatype.clone(); - let builder = ArrayBuilder { - datatype, - buffer: PrimitiveBuffer::with_len(a.logical_len()), - }; - - UnaryExecutor2::execute::(a, builder, |a, buf| buf.put(&(a.neg()))) + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + + UnaryExecutor::execute::( + &input.arrays()[0], + sel, + OutBuffer::from_array(output)?, + |&a, buf| buf.put(&(-a)), + ) } } @@ -165,14 +158,14 @@ impl ScalarFunction for Not { pub struct NotImpl; impl ScalarFunctionImpl for NotImpl { - fn execute2(&self, inputs: &[&Array2]) -> Result { - UnaryExecutor2::execute::( - inputs[0], - ArrayBuilder { - datatype: DataType::Boolean, - buffer: BooleanBuffer::with_len(inputs[0].logical_len()), - }, - |b, buf| buf.put(&(!b)), + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + + UnaryExecutor::execute::( + &input.arrays()[0], + sel, + OutBuffer::from_array(output)?, + |&b, buf| buf.put(&(!b)), ) } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/random.rs b/crates/rayexec_execution/src/functions/scalar/builtin/random.rs index abefb8879..8c40f2692 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/random.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/random.rs @@ -1,9 +1,11 @@ use rayexec_error::Result; use serde::{Deserialize, Serialize}; -use crate::arrays::array::Array2; +use crate::arrays::array::exp::Array; +use crate::arrays::batch_exp::Batch; +use crate::arrays::buffer::physical_type::PhysicalF64; use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::storage::PrimitiveStorage; +use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation}; use crate::functions::scalar::{ @@ -62,12 +64,10 @@ impl ScalarFunction for Random { pub struct RandomImpl; impl ScalarFunctionImpl for RandomImpl { - fn execute2(&self, _inputs: &[&Array2]) -> Result { - // TODO: Need to pass in dummy input to produce all unique values. - let val = rand::random::(); - Ok(Array2::new_with_array_data( - DataType::Float64, - PrimitiveStorage::from(vec![val]), - )) + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + UnaryExecutor::execute_in_place::(output, sel, |v| { + *v = rand::random::() + }) } } From c5e64ae5a8c43ada0cc6ed73453848715f7bd2bb Mon Sep 17 00:00:00 2001 From: Sean Smith Date: Mon, 30 Dec 2024 15:32:10 -0500 Subject: [PATCH 28/59] and/or --- .../src/functions/scalar/builtin/boolean.rs | 213 ++++++++++-------- 1 file changed, 114 insertions(+), 99 deletions(-) diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/boolean.rs b/crates/rayexec_execution/src/functions/scalar/builtin/boolean.rs index 68da99e5f..4ec43590c 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/boolean.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/boolean.rs @@ -3,13 +3,14 @@ use std::fmt::Debug; use rayexec_error::Result; use serde::{Deserialize, Serialize}; -use crate::arrays::array::Array2; -use crate::arrays::bitmap::Bitmap; +use crate::arrays::array::exp::Array; +use crate::arrays::batch_exp::Batch; +use crate::arrays::buffer::physical_type::PhysicalBool; use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::builder::{ArrayBuilder, BooleanBuffer}; -use crate::arrays::executor::physical_type::PhysicalBool_2; -use crate::arrays::executor::scalar::{BinaryExecutor2, TernaryExecutor2, UniformExecutor2}; -use crate::arrays::storage::BooleanStorage; +use crate::arrays::executor_exp::scalar::binary::BinaryExecutor; +use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; +use crate::arrays::executor_exp::scalar::uniform::UniformExecutor; +use crate::arrays::executor_exp::OutBuffer; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation, Example}; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunction, ScalarFunctionImpl}; @@ -70,57 +71,52 @@ impl ScalarFunction for And { pub struct AndImpl; impl ScalarFunctionImpl for AndImpl { - fn execute2(&self, inputs: &[&Array2]) -> Result { - match inputs.len() { + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + + match input.arrays().len() { 0 => { - let mut array = Array2::new_with_array_data( - DataType::Boolean, - BooleanStorage::from(Bitmap::new_with_val(false, 1)), - ); - array.set_physical_validity(0, false); - Ok(array) + // TODO: Default to false? + let vals = output + .data_mut() + .try_as_mut()? + .try_as_slice_mut::()?; + + for v in vals { + *v = false; + } } - 1 => Ok(inputs[0].clone()), - 2 => { - let a = inputs[0]; - let b = inputs[1]; - BinaryExecutor2::execute::( - a, - b, - ArrayBuilder { - datatype: DataType::Boolean, - buffer: BooleanBuffer::with_len(a.logical_len()), - }, - |a, b, buf| buf.put(&(a && b)), - ) + 1 => { + let input = &input.arrays()[0]; + UnaryExecutor::execute::( + input, + sel, + OutBuffer::from_array(output)?, + |v, buf| buf.put(v), + )?; } - 3 => { - let a = inputs[0]; - let b = inputs[1]; - let c = inputs[2]; - TernaryExecutor2::execute::( + 2 => { + let a = &input.arrays()[0]; + let b = &input.arrays()[1]; + + BinaryExecutor::execute::( a, + sel, b, - c, - ArrayBuilder { - datatype: DataType::Boolean, - buffer: BooleanBuffer::with_len(a.logical_len()), - }, - |a, b, c, buf| buf.put(&(a && b && c)), - ) - } - _ => { - let len = inputs[0].logical_len(); - UniformExecutor2::execute::( - inputs, - ArrayBuilder { - datatype: DataType::Boolean, - buffer: BooleanBuffer::with_len(len), - }, - |bools, buf| buf.put(&(bools.iter().all(|b| *b))), - ) + sel, + OutBuffer::from_array(output)?, + |&a, &b, buf| buf.put(&(a && b)), + )?; } + _ => UniformExecutor::execute::( + input.arrays(), + sel, + OutBuffer::from_array(output)?, + |bools, buf| buf.put(&(bools.iter().all(|b| **b))), + )?, } + + Ok(()) } } @@ -178,55 +174,69 @@ impl ScalarFunction for Or { pub struct OrImpl; impl ScalarFunctionImpl for OrImpl { - fn execute2(&self, inputs: &[&Array2]) -> Result { - match inputs.len() { + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + + match input.arrays().len() { 0 => { - let mut array = Array2::new_with_array_data( - DataType::Boolean, - BooleanStorage::from(Bitmap::new_with_val(false, 1)), - ); - array.set_physical_validity(0, false); - Ok(array) + // TODO: Default to false? + let vals = output + .data_mut() + .try_as_mut()? + .try_as_slice_mut::()?; + + for v in vals { + *v = false; + } + } + 1 => { + let input = &input.arrays()[0]; + UnaryExecutor::execute::( + input, + sel, + OutBuffer::from_array(output)?, + |v, buf| buf.put(v), + )?; } - 1 => Ok(inputs[0].clone()), 2 => { - let a = inputs[0]; - let b = inputs[1]; - BinaryExecutor2::execute::( + let a = &input.arrays()[0]; + let b = &input.arrays()[1]; + + BinaryExecutor::execute::( a, + sel, b, - ArrayBuilder { - datatype: DataType::Boolean, - buffer: BooleanBuffer::with_len(a.logical_len()), - }, - |a, b, buf| buf.put(&(a || b)), - ) - } - _ => { - let len = inputs[0].logical_len(); - UniformExecutor2::execute::( - inputs, - ArrayBuilder { - datatype: DataType::Boolean, - buffer: BooleanBuffer::with_len(len), - }, - |bools, buf| buf.put(&(bools.iter().any(|b| *b))), - ) + sel, + OutBuffer::from_array(output)?, + |&a, &b, buf| buf.put(&(a || b)), + )?; } + _ => UniformExecutor::execute::( + input.arrays(), + sel, + OutBuffer::from_array(output)?, + |bools, buf| buf.put(&(bools.iter().any(|b| **b))), + )?, } + + Ok(()) } } #[cfg(test)] mod tests { + use iterutil::TryFromExactSizeIterator; + use super::*; - use crate::arrays::scalar::ScalarValue; + use crate::arrays::buffer::buffer_manager::NopBufferManager; + use crate::arrays::testutil::assert_arrays_eq; use crate::expr; #[test] fn and_bool_2() { - let a = Array2::from_iter([true, false, false]); - let b = Array2::from_iter([true, true, false]); + let a = Array::try_from_iter([true, false, false]).unwrap(); + let b = Array::try_from_iter([true, true, false]).unwrap(); + let batch = Batch::from_arrays([a, b], true).unwrap(); let mut table_list = TableList::empty(); let table_ref = table_list @@ -244,18 +254,20 @@ mod tests { ) .unwrap(); - let out = planned.function_impl.execute2(&[&a, &b]).unwrap(); + let mut out = Array::new(&NopBufferManager, DataType::Boolean, 3).unwrap(); + planned.function_impl.execute(&batch, &mut out).unwrap(); + + let expected = Array::try_from_iter([true, false, false]).unwrap(); - assert_eq!(ScalarValue::from(true), out.logical_value(0).unwrap()); - assert_eq!(ScalarValue::from(false), out.logical_value(1).unwrap()); - assert_eq!(ScalarValue::from(false), out.logical_value(2).unwrap()); + assert_arrays_eq(&expected, &out); } #[test] fn and_bool_3() { - let a = Array2::from_iter([true, true, true]); - let b = Array2::from_iter([false, true, true]); - let c = Array2::from_iter([true, true, false]); + let a = Array::try_from_iter([true, true, true]).unwrap(); + let b = Array::try_from_iter([false, true, true]).unwrap(); + let c = Array::try_from_iter([true, true, false]).unwrap(); + let batch = Batch::from_arrays([a, b, c], true).unwrap(); let mut table_list = TableList::empty(); let table_ref = table_list @@ -277,17 +289,19 @@ mod tests { ) .unwrap(); - let out = planned.function_impl.execute2(&[&a, &b, &c]).unwrap(); + let mut out = Array::new(&NopBufferManager, DataType::Boolean, 3).unwrap(); + planned.function_impl.execute(&batch, &mut out).unwrap(); - assert_eq!(ScalarValue::from(false), out.logical_value(0).unwrap()); - assert_eq!(ScalarValue::from(true), out.logical_value(1).unwrap()); - assert_eq!(ScalarValue::from(false), out.logical_value(2).unwrap()); + let expected = Array::try_from_iter([false, true, false]).unwrap(); + + assert_arrays_eq(&expected, &out); } #[test] fn or_bool_2() { - let a = Array2::from_iter([true, false, false]); - let b = Array2::from_iter([true, true, false]); + let a = Array::try_from_iter([true, false, false]).unwrap(); + let b = Array::try_from_iter([true, true, false]).unwrap(); + let batch = Batch::from_arrays([a, b], true).unwrap(); let mut table_list = TableList::empty(); let table_ref = table_list @@ -305,10 +319,11 @@ mod tests { ) .unwrap(); - let out = planned.function_impl.execute2(&[&a, &b]).unwrap(); + let mut out = Array::new(&NopBufferManager, DataType::Boolean, 3).unwrap(); + planned.function_impl.execute(&batch, &mut out).unwrap(); + + let expected = Array::try_from_iter([true, true, false]).unwrap(); - assert_eq!(ScalarValue::from(true), out.logical_value(0).unwrap()); - assert_eq!(ScalarValue::from(true), out.logical_value(1).unwrap()); - assert_eq!(ScalarValue::from(false), out.logical_value(2).unwrap()); + assert_arrays_eq(&expected, &out); } } From 3a0de151edb699b76d5416e854cf18b59337a9af Mon Sep 17 00:00:00 2001 From: Sean Smith Date: Mon, 30 Dec 2024 18:29:48 -0500 Subject: [PATCH 29/59] reduce --- .../rayexec_execution/src/arrays/array/exp.rs | 25 ++- .../src/arrays/buffer/any.rs | 35 ++++ .../src/arrays/buffer/mod.rs | 28 +++ .../src/arrays/buffer/physical_type.rs | 53 ++++- .../arrays/executor_exp/scalar/list_reduce.rs | 150 +++++++++++++ .../src/arrays/executor_exp/scalar/mod.rs | 1 + .../rayexec_execution/src/arrays/testutil.rs | 3 + .../functions/scalar/builtin/comparison.rs | 16 ++ .../src/functions/scalar/builtin/is.rs | 198 ++++++++++++++---- .../scalar/builtin/list/list_values.rs | 9 + 10 files changed, 472 insertions(+), 46 deletions(-) create mode 100644 crates/rayexec_execution/src/arrays/buffer/any.rs create mode 100644 crates/rayexec_execution/src/arrays/executor_exp/scalar/list_reduce.rs diff --git a/crates/rayexec_execution/src/arrays/array/exp.rs b/crates/rayexec_execution/src/arrays/array/exp.rs index 0838fd003..eb45d62e2 100644 --- a/crates/rayexec_execution/src/arrays/array/exp.rs +++ b/crates/rayexec_execution/src/arrays/array/exp.rs @@ -21,6 +21,7 @@ use crate::arrays::buffer::physical_type::{ PhysicalI64, PhysicalI8, PhysicalInterval, + PhysicalList, PhysicalType, PhysicalU128, PhysicalU16, @@ -30,7 +31,7 @@ use crate::arrays::buffer::physical_type::{ PhysicalUtf8, }; use crate::arrays::buffer::string_view::StringViewHeap; -use crate::arrays::buffer::{ArrayBuffer, DictionaryBuffer, SecondaryBuffer}; +use crate::arrays::buffer::{ArrayBuffer, DictionaryBuffer, ListBuffer, SecondaryBuffer}; use crate::arrays::datatype::DataType; use crate::arrays::scalar::interval::Interval; @@ -102,6 +103,24 @@ where buffer.put_secondary_buffer(SecondaryBuffer::StringViewHeap(StringViewHeap::new())); buffer } + PhysicalType::List => { + let inner_type = match &datatype { + DataType::List(m) => m.datatype.as_ref().clone(), + other => { + return Err(RayexecError::new(format!( + "Expected list datatype, got {other}" + ))) + } + }; + + let child = Self::new(manager, inner_type, capacity)?; + + let mut buffer = + ArrayBuffer::with_primary_capacity::(manager, capacity)?; + buffer.put_secondary_buffer(SecondaryBuffer::List(ListBuffer::new(child))); + + buffer + } _ => unimplemented!(), }; @@ -118,6 +137,10 @@ where &self.datatype } + pub fn physical_type(&self) -> PhysicalType { + self.data().physical_type() + } + pub fn data(&self) -> &ArrayData { &self.data } diff --git a/crates/rayexec_execution/src/arrays/buffer/any.rs b/crates/rayexec_execution/src/arrays/buffer/any.rs new file mode 100644 index 000000000..3da75fb1d --- /dev/null +++ b/crates/rayexec_execution/src/arrays/buffer/any.rs @@ -0,0 +1,35 @@ +use super::buffer_manager::BufferManager; +use super::physical_type::Addressable; +use super::ArrayBuffer; + +/// Representation of the existence of a value. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct AnyValue; + +/// Wrapper around an array buffer for providing `AddressableStorage` +/// functionality for any array buffer type. +/// +/// This is used when the values themselves don't matter, only that they exist. +#[derive(Debug)] +pub struct AnyAddressable<'a, B: BufferManager> { + pub(crate) buffer: &'a ArrayBuffer, +} + +impl<'a, B> Addressable for AnyAddressable<'a, B> +where + B: BufferManager, +{ + type T = AnyValue; + + fn len(&self) -> usize { + self.buffer.capacity() + } + + fn get(&self, idx: usize) -> Option<&Self::T> { + if idx < self.buffer.capacity() { + Some(&AnyValue) + } else { + None + } + } +} diff --git a/crates/rayexec_execution/src/arrays/buffer/mod.rs b/crates/rayexec_execution/src/arrays/buffer/mod.rs index e30de93c1..55321c132 100644 --- a/crates/rayexec_execution/src/arrays/buffer/mod.rs +++ b/crates/rayexec_execution/src/arrays/buffer/mod.rs @@ -1,9 +1,11 @@ +pub mod any; pub mod buffer_manager; pub mod physical_type; pub mod string_view; mod raw; +use any::AnyAddressable; use buffer_manager::{BufferManager, NopBufferManager}; use fmtutil::IntoDisplayableSlice; use physical_type::{PhysicalStorage, PhysicalType}; @@ -19,6 +21,7 @@ use string_view::{ }; use super::array::array_data::ArrayData; +use super::array::exp::Array; use super::array::validity::Validity; #[derive(Debug)] @@ -89,6 +92,10 @@ where &mut self.secondary } + pub fn as_any_addressable(&self) -> AnyAddressable { + AnyAddressable { buffer: self } + } + pub fn try_as_string_view_addressable(&self) -> Result { self.check_type(PhysicalType::Utf8)?; @@ -182,6 +189,7 @@ impl Drop for ArrayBuffer { pub enum SecondaryBuffer { StringViewHeap(StringViewHeap), Dictionary(DictionaryBuffer), + List(ListBuffer), None, } @@ -200,3 +208,23 @@ where DictionaryBuffer { buffer, validity } } } + +#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)] +pub struct ListItemMetadata { + pub offset: i32, + pub len: i32, +} + +#[derive(Debug)] +pub struct ListBuffer { + pub(crate) child: Array, +} + +impl ListBuffer +where + B: BufferManager, +{ + pub fn new(child: Array) -> Self { + ListBuffer { child } + } +} diff --git a/crates/rayexec_execution/src/arrays/buffer/physical_type.rs b/crates/rayexec_execution/src/arrays/buffer/physical_type.rs index 00a96090b..6dc29de20 100644 --- a/crates/rayexec_execution/src/arrays/buffer/physical_type.rs +++ b/crates/rayexec_execution/src/arrays/buffer/physical_type.rs @@ -11,7 +11,7 @@ use super::string_view::{ StringViewAddressableMut, StringViewMetadataUnion, }; -use super::ArrayBuffer; +use super::{ArrayBuffer, ListItemMetadata}; use crate::arrays::scalar::interval::Interval; #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -42,6 +42,7 @@ pub enum PhysicalType { impl PhysicalType { pub const fn primary_buffer_mem_size(&self) -> usize { match self { + Self::UntypedNull => PhysicalUntypedNull::PRIMARY_BUFFER_TYPE_SIZE, Self::Boolean => PhysicalBool::PRIMARY_BUFFER_TYPE_SIZE, Self::Int8 => PhysicalI8::PRIMARY_BUFFER_TYPE_SIZE, Self::Int16 => PhysicalI16::PRIMARY_BUFFER_TYPE_SIZE, @@ -58,6 +59,7 @@ impl PhysicalType { Self::Float64 => PhysicalF64::PRIMARY_BUFFER_TYPE_SIZE, Self::Interval => PhysicalInterval::PRIMARY_BUFFER_TYPE_SIZE, Self::Utf8 => PhysicalInterval::PRIMARY_BUFFER_TYPE_SIZE, + Self::List => PhysicalList::PRIMARY_BUFFER_TYPE_SIZE, Self::Dictionary => PhysicalInterval::PRIMARY_BUFFER_TYPE_SIZE, _ => unimplemented!(), @@ -249,6 +251,29 @@ generate_primitive!(f64, PhysicalF64, Float64); generate_primitive!(Interval, PhysicalInterval, Interval); +/// Marker type representing a null value without an associated type. +/// +/// This will be the type we use for queries like `SELECT NULL` where there's no +/// additional type information in the query. +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)] +pub struct UntypedNull; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct PhysicalUntypedNull; + +impl PhysicalStorage for PhysicalUntypedNull { + const PHYSICAL_TYPE: PhysicalType = PhysicalType::UntypedNull; + + type PrimaryBufferType = UntypedNull; + type StorageType = UntypedNull; + + type Addressable<'a> = &'a [UntypedNull]; + + fn get_addressable(buffer: &ArrayBuffer) -> Result> { + buffer.try_as_slice::() + } +} + #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub struct PhysicalUtf8; @@ -317,3 +342,29 @@ impl PhysicalStorage for PhysicalDictionary { buffer.try_as_slice::() } } + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct PhysicalList; + +impl PhysicalStorage for PhysicalList { + const PHYSICAL_TYPE: PhysicalType = PhysicalType::List; + + type PrimaryBufferType = ListItemMetadata; + type StorageType = Self::PrimaryBufferType; + + type Addressable<'a> = &'a [Self::StorageType]; + + fn get_addressable(buffer: &ArrayBuffer) -> Result> { + buffer.try_as_slice::() + } +} + +impl MutablePhysicalStorage for PhysicalList { + type AddressableMut<'a> = &'a mut [Self::StorageType]; + + fn get_addressable_mut( + buffer: &mut ArrayBuffer, + ) -> Result> { + buffer.try_as_slice_mut::() + } +} diff --git a/crates/rayexec_execution/src/arrays/executor_exp/scalar/list_reduce.rs b/crates/rayexec_execution/src/arrays/executor_exp/scalar/list_reduce.rs new file mode 100644 index 000000000..00292c744 --- /dev/null +++ b/crates/rayexec_execution/src/arrays/executor_exp/scalar/list_reduce.rs @@ -0,0 +1,150 @@ +use iterutil::IntoExactSizeIterator; +use rayexec_error::{RayexecError, Result}; + +use crate::arrays::array::exp::Array; +use crate::arrays::buffer::physical_type::{ + Addressable, + AddressableMut, + MutablePhysicalStorage, + PhysicalList, + PhysicalStorage, +}; +use crate::arrays::buffer::SecondaryBuffer; +use crate::arrays::executor_exp::OutBuffer; + +pub trait BinaryReducer: Default { + /// Put two values from each list into the reducer. + fn put_values(&mut self, v1: T1, v2: T2); + /// Produce the final value from the reducer. + fn finish(self) -> O; +} + +#[derive(Debug, Clone)] +pub struct BinaryListReducer; + +impl BinaryListReducer { + /// Iterate two list arrays, reducing lists from each array. + /// + /// List reduction requires that if both lists for a given row are non-null, + /// then both lists must be the same length and not contain nulls. + /// + /// If either list is null, the output row will be set to null (same as + /// other executor logic). + /// + /// `R` is used to create a new reducer for each pair of lists. + /// + /// `S1` and `S2` should be for the inner type within the list. + pub fn reduce( + array1: &Array, + sel1: impl IntoExactSizeIterator, + array2: &Array, + sel2: impl IntoExactSizeIterator, + out: OutBuffer, + ) -> Result<()> + where + S1: PhysicalStorage, + S2: PhysicalStorage, + O: MutablePhysicalStorage, + for<'a> R: BinaryReducer<&'a S1::StorageType, &'a S2::StorageType, &'a O::StorageType>, + { + if array1.is_dictionary() || array2.is_dictionary() { + // TODO + } + + let inner1 = match array1.data().get_secondary() { + SecondaryBuffer::List(list) => &list.child, + _ => return Err(RayexecError::new("Array 1 not a list array")), + }; + + let inner2 = match array2.data().get_secondary() { + SecondaryBuffer::List(list) => &list.child, + _ => return Err(RayexecError::new("Array 2 not a list array")), + }; + + if !inner1.validity().all_valid() || inner2.validity().all_valid() { + // TODO: This can be more selective. Rows that don't conform + // could be skipped with the selections. + return Err(RayexecError::new( + "List reduction requires all values be non-null", + )); + } + + let metadata1 = PhysicalList::get_addressable(array1.data())?; + let metadata2 = PhysicalList::get_addressable(array2.data())?; + + let validity1 = array1.validity(); + let validity2 = array2.validity(); + + let mut output = O::get_addressable_mut(out.buffer)?; + + let input1 = S1::get_addressable(inner1.data())?; + let input2 = S2::get_addressable(inner2.data())?; + + if validity1.all_valid() && validity2.all_valid() { + for (output_idx, (input1_idx, input2_idx)) in + sel1.into_iter().zip(sel2.into_iter()).enumerate() + { + let meta1 = metadata1.get(input1_idx).unwrap(); + let meta2 = metadata2.get(input2_idx).unwrap(); + + if meta1.len != meta2.len { + return Err(RayexecError::new( + "List reduction requires lists be the same length", + ) + .with_field("len1", meta1.len) + .with_field("len2", meta2.len)); + } + + let mut reducer = R::default(); + + for offset in 0..meta1.len { + let idx1 = meta1.offset + offset; + let idx2 = meta2.offset + offset; + + let v1 = input1.get(idx1 as usize).unwrap(); + let v2 = input2.get(idx2 as usize).unwrap(); + + reducer.put_values(v1, v2); + } + + output.put(output_idx, reducer.finish()); + } + } else { + for (output_idx, (input1_idx, input2_idx)) in + sel1.into_iter().zip(sel2.into_iter()).enumerate() + { + if !validity1.is_valid(input1_idx) || !validity2.is_valid(input2_idx) { + out.validity.set_invalid(output_idx); + continue; + } + + let meta1 = metadata1.get(input1_idx).unwrap(); + let meta2 = metadata2.get(input2_idx).unwrap(); + + if meta1.len != meta2.len { + return Err(RayexecError::new( + "List reduction requires lists be the same length", + ) + .with_field("len1", meta1.len) + .with_field("len2", meta2.len)); + } + + let mut reducer = R::default(); + + for offset in 0..meta1.len { + let idx1 = meta1.offset + offset; + let idx2 = meta2.offset + offset; + + let v1 = input1.get(idx1 as usize).unwrap(); + let v2 = input2.get(idx2 as usize).unwrap(); + + reducer.put_values(v1, v2); + } + + output.put(output_idx, reducer.finish()); + } + } + + Ok(()) + } +} diff --git a/crates/rayexec_execution/src/arrays/executor_exp/scalar/mod.rs b/crates/rayexec_execution/src/arrays/executor_exp/scalar/mod.rs index 9e14a0695..286006da0 100644 --- a/crates/rayexec_execution/src/arrays/executor_exp/scalar/mod.rs +++ b/crates/rayexec_execution/src/arrays/executor_exp/scalar/mod.rs @@ -1,4 +1,5 @@ pub mod binary; +pub mod list_reduce; pub mod ternary; pub mod unary; pub mod uniform; diff --git a/crates/rayexec_execution/src/arrays/testutil.rs b/crates/rayexec_execution/src/arrays/testutil.rs index 11443a546..a135c255e 100644 --- a/crates/rayexec_execution/src/arrays/testutil.rs +++ b/crates/rayexec_execution/src/arrays/testutil.rs @@ -35,6 +35,7 @@ use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; /// Assert two arrays are logically equal. /// /// This will assume that the array's capacity is the array's logical length. +#[track_caller] pub fn assert_arrays_eq(array1: &Array, array2: &Array) { assert_eq!( array1.capacity(), @@ -48,6 +49,7 @@ pub fn assert_arrays_eq(array1: &Array, array2: &Array) { /// /// This will check valid and invalid values. Assertion error messages will /// print out Some/None to represent valid/invalid. +#[track_caller] pub fn assert_arrays_eq_count(array1: &Array, array2: &Array, count: usize) { assert_eq!(array1.datatype, array2.datatype); @@ -102,6 +104,7 @@ pub fn assert_arrays_eq_count(array1: &Array, array2: &Array, count: usize) { } /// Asserts two batches are logically equal. +#[track_caller] pub fn assert_batches_eq(batch1: &Batch, batch2: &Batch) { let arrays1 = batch1.arrays(); let arrays2 = batch2.arrays(); diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/comparison.rs b/crates/rayexec_execution/src/functions/scalar/builtin/comparison.rs index 0395a985d..b1876989e 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/comparison.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/comparison.rs @@ -5,6 +5,22 @@ use std::marker::PhantomData; use rayexec_error::{RayexecError, Result}; use crate::arrays::array::{Array2, ArrayData2}; +use crate::arrays::buffer::physical_type::{ + MutablePhysicalStorage, + PhysicalF16, + PhysicalF32, + PhysicalF64, + PhysicalI128, + PhysicalI16, + PhysicalI32, + PhysicalI64, + PhysicalI8, + PhysicalU128, + PhysicalU16, + PhysicalU32, + PhysicalU64, + PhysicalU8, +}; use crate::arrays::compute::cast::array::decimal_rescale; use crate::arrays::compute::cast::behavior::CastFailBehavior; use crate::arrays::datatype::{DataType, DataTypeId, DecimalTypeMeta}; diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/is.rs b/crates/rayexec_execution/src/functions/scalar/builtin/is.rs index 5d8e096c4..9aab1b382 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/is.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/is.rs @@ -1,10 +1,9 @@ use rayexec_error::Result; -use crate::arrays::array::Array2; +use crate::arrays::array::exp::Array; +use crate::arrays::batch_exp::Batch; +use crate::arrays::buffer::physical_type::{PhysicalBool, PhysicalType}; use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::builder::{ArrayBuilder, BooleanBuffer}; -use crate::arrays::executor::physical_type::{PhysicalAny, PhysicalBool_2}; -use crate::arrays::executor::scalar::UnaryExecutor2; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation, Example}; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunction, ScalarFunctionImpl}; @@ -101,29 +100,34 @@ impl ScalarFunction for IsNotNull { pub struct CheckNullImpl; impl ScalarFunctionImpl for CheckNullImpl { - fn execute2(&self, inputs: &[&Array2]) -> Result { - let input = inputs[0]; - - let (initial, updated) = if IS_NULL { - // Executor will only execute on non-null inputs, so we can assume - // everything is null first then selectively set false for things - // that the executor executes. - (true, false) - } else { - (false, true) - }; - - let builder = ArrayBuilder { - datatype: DataType::Boolean, - buffer: BooleanBuffer::with_len_and_default_value(input.logical_len(), initial), - }; - let array = UnaryExecutor2::execute::(input, builder, |_, buf| { - buf.put(&updated) - })?; - - // Drop validity. - let data = array.into_array_data(); - Ok(Array2::new_with_array_data(DataType::Boolean, data)) + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + let input = &input.arrays()[0]; + + let out = output + .data_mut() + .try_as_mut()? + .try_as_slice_mut::()?; + + if input.physical_type() == PhysicalType::UntypedNull { + // Everything null, just set to default value. + out.iter_mut().for_each(|v| *v = IS_NULL); + + return Ok(()); + } + + let flat = input.flat_view()?; + + for (output_idx, idx) in sel.into_iter().enumerate() { + let is_valid = flat.validity.is_valid(idx); + if is_valid { + out[output_idx] = !IS_NULL; + } else { + out[output_idx] = IS_NULL; + } + } + + Ok(()) } } @@ -303,22 +307,128 @@ impl ScalarFunction for IsNotFalse { pub struct CheckBoolImpl; impl ScalarFunctionImpl for CheckBoolImpl { - fn execute2(&self, inputs: &[&Array2]) -> Result { - let input = inputs[0]; - - let initial = NOT; - - let builder = ArrayBuilder { - datatype: DataType::Boolean, - buffer: BooleanBuffer::with_len_and_default_value(input.logical_len(), initial), - }; - let array = UnaryExecutor2::execute::(input, builder, |val, buf| { - let b = if NOT { val != BOOL } else { val == BOOL }; - buf.put(&b) - })?; - - // Drop validity. - let data = array.into_array_data(); - Ok(Array2::new_with_array_data(DataType::Boolean, data)) + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + let input = &input.arrays()[0]; + + let out = output + .data_mut() + .try_as_mut()? + .try_as_slice_mut::()?; + + let flat = input.flat_view()?; + let input = flat.array_buffer.try_as_slice::()?; + + for (output_idx, idx) in sel.into_iter().enumerate() { + let is_valid = flat.validity.is_valid(idx); + if is_valid { + let val = input[idx]; + out[output_idx] = if NOT { val != BOOL } else { val == BOOL } + } else { + // 'IS TRUE', 'IS FALSE' => false + // 'IS NOT TRUE', 'IS NOT FALSE' => true + out[output_idx] = NOT; + } + } + + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use iterutil::TryFromExactSizeIterator; + + use super::*; + use crate::arrays::buffer::buffer_manager::NopBufferManager; + use crate::arrays::testutil::assert_arrays_eq; + use crate::expr; + + #[test] + fn is_null_all_valid() { + let a = Array::try_from_iter([1, 2, 3]).unwrap(); + let batch = Batch::from_arrays([a], true).unwrap(); + + let mut table_list = TableList::empty(); + let table_ref = table_list + .push_table(None, vec![DataType::Boolean], vec!["a".to_string()]) + .unwrap(); + + let planned = IsNull + .plan(&table_list, vec![expr::col_ref(table_ref, 0)]) + .unwrap(); + + let mut out = Array::new(&NopBufferManager, DataType::Boolean, 3).unwrap(); + planned.function_impl.execute(&batch, &mut out).unwrap(); + + let expected = Array::try_from_iter([false, false, false]).unwrap(); + + assert_arrays_eq(&expected, &out); + } + + #[test] + fn is_null_some_invalid() { + let a = Array::try_from_iter([Some(1), None, None]).unwrap(); + let batch = Batch::from_arrays([a], true).unwrap(); + + let mut table_list = TableList::empty(); + let table_ref = table_list + .push_table(None, vec![DataType::Boolean], vec!["a".to_string()]) + .unwrap(); + + let planned = IsNull + .plan(&table_list, vec![expr::col_ref(table_ref, 0)]) + .unwrap(); + + let mut out = Array::new(&NopBufferManager, DataType::Boolean, 3).unwrap(); + planned.function_impl.execute(&batch, &mut out).unwrap(); + + let expected = Array::try_from_iter([false, true, true]).unwrap(); + + assert_arrays_eq(&expected, &out); + } + + #[test] + fn is_true() { + let a = Array::try_from_iter([Some(true), Some(false), None]).unwrap(); + let batch = Batch::from_arrays([a], true).unwrap(); + + let mut table_list = TableList::empty(); + let table_ref = table_list + .push_table(None, vec![DataType::Boolean], vec!["a".to_string()]) + .unwrap(); + + let planned = IsTrue + .plan(&table_list, vec![expr::col_ref(table_ref, 0)]) + .unwrap(); + + let mut out = Array::new(&NopBufferManager, DataType::Boolean, 3).unwrap(); + planned.function_impl.execute(&batch, &mut out).unwrap(); + + let expected = Array::try_from_iter([Some(true), Some(false), Some(false)]).unwrap(); + + assert_arrays_eq(&expected, &out); + } + + #[test] + fn is_not_true() { + let a = Array::try_from_iter([Some(true), Some(false), None]).unwrap(); + let batch = Batch::from_arrays([a], true).unwrap(); + + let mut table_list = TableList::empty(); + let table_ref = table_list + .push_table(None, vec![DataType::Boolean], vec!["a".to_string()]) + .unwrap(); + + let planned = IsNotTrue + .plan(&table_list, vec![expr::col_ref(table_ref, 0)]) + .unwrap(); + + let mut out = Array::new(&NopBufferManager, DataType::Boolean, 3).unwrap(); + planned.function_impl.execute(&batch, &mut out).unwrap(); + + let expected = Array::try_from_iter([Some(false), Some(true), Some(true)]).unwrap(); + + assert_arrays_eq(&expected, &out); } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/list/list_values.rs b/crates/rayexec_execution/src/functions/scalar/builtin/list/list_values.rs index 13844e2cb..8da65c461 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/list/list_values.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/list/list_values.rs @@ -1,6 +1,8 @@ use rayexec_error::{RayexecError, Result}; +use crate::arrays::array::exp::Array; use crate::arrays::array::Array2; +use crate::arrays::batch_exp::Batch; use crate::arrays::datatype::{DataType, DataTypeId, ListTypeMeta}; use crate::arrays::executor::scalar::concat; use crate::arrays::storage::ListStorage; @@ -90,6 +92,13 @@ pub struct ListValuesImpl { } impl ScalarFunctionImpl for ListValuesImpl { + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + let arrays = input.arrays(); + + unimplemented!() + } + fn execute2(&self, inputs: &[&Array2]) -> Result { if inputs.is_empty() { let inner_type = match &self.list_datatype { From 04d6d381e29805dc7b0a42073e71cc9e22a3c108 Mon Sep 17 00:00:00 2001 From: Sean Smith Date: Mon, 30 Dec 2024 19:54:09 -0500 Subject: [PATCH 30/59] list values --- .../rayexec_execution/src/arrays/array/exp.rs | 4 + .../src/arrays/buffer/mod.rs | 87 +++++++ .../src/arrays/buffer/physical_type.rs | 10 + .../rayexec_execution/src/arrays/testutil.rs | 160 +++++++++---- .../scalar/builtin/list/list_values.rs | 213 +++++++++++++++--- 5 files changed, 396 insertions(+), 78 deletions(-) diff --git a/crates/rayexec_execution/src/arrays/array/exp.rs b/crates/rayexec_execution/src/arrays/array/exp.rs index eb45d62e2..e06fa914e 100644 --- a/crates/rayexec_execution/src/arrays/array/exp.rs +++ b/crates/rayexec_execution/src/arrays/array/exp.rs @@ -153,6 +153,10 @@ where &self.validity } + pub fn validity_mut(&mut self) -> &mut Validity { + &mut self.validity + } + pub fn put_validity(&mut self, validity: Validity) -> Result<()> { if validity.len() != self.data().capacity() { return Err(RayexecError::new("Invalid validity length") diff --git a/crates/rayexec_execution/src/arrays/buffer/mod.rs b/crates/rayexec_execution/src/arrays/buffer/mod.rs index 55321c132..342c31712 100644 --- a/crates/rayexec_execution/src/arrays/buffer/mod.rs +++ b/crates/rayexec_execution/src/arrays/buffer/mod.rs @@ -24,6 +24,12 @@ use super::array::array_data::ArrayData; use super::array::exp::Array; use super::array::validity::Validity; +/// Buffer for arrays. +/// +/// Buffers are able to hold a fixed number of elements in the primary buffer. +/// Some types make use of secondary buffers for additional data. In such cases, +/// the primary buffer may hold things like metadata or offsets depending on the +/// type. #[derive(Debug)] pub struct ArrayBuffer { /// Physical type of the buffer. @@ -148,6 +154,38 @@ where Ok(BinaryViewAddressableMut { metadata, heap }) } + /// Resize the primary buffer to be able to hold `capacity` elements. + pub fn resize_primary( + &mut self, + manager: &B, + capacity: usize, + ) -> Result<()> { + self.check_type(S::PHYSICAL_TYPE)?; + + unsafe { + self.primary + .resize::(manager, capacity) + } + } + + /// Ensure the primary buffer can hold `capacity` elements. + /// + /// Does nothing if the primary buffer already has enough capacity. + pub fn reserve_primary( + &mut self, + manager: &B, + capacity: usize, + ) -> Result<()> { + self.check_type(S::PHYSICAL_TYPE)?; + + if self.capacity() >= capacity { + return Ok(()); + } + + self.resize_primary::(manager, capacity) + } + + /// Checks that the physical type of this buffer matches `want`. fn check_type(&self, want: PhysicalType) -> Result<()> { if want != self.physical_type { return Err(RayexecError::new("Physical types don't match") @@ -228,3 +266,52 @@ where ListBuffer { child } } } + +#[cfg(test)] +mod tests { + use physical_type::PhysicalI32; + + use super::*; + + #[test] + fn resize_primitive_increase_size() { + let mut buffer = + ArrayBuffer::with_primary_capacity::(&NopBufferManager, 4).unwrap(); + + let s = buffer.try_as_slice::().unwrap(); + assert_eq!(4, s.len()); + + buffer + .resize_primary::(&NopBufferManager, 8) + .unwrap(); + + let s = buffer.try_as_slice_mut::().unwrap(); + assert_eq!(8, s.len()); + + // Sanity check, make sure we can write to it. + s.iter_mut().for_each(|v| *v = 12); + + assert_eq!(vec![12; 8].as_slice(), s); + } + + #[test] + fn resize_primitive_decrease_size() { + let mut buffer = + ArrayBuffer::with_primary_capacity::(&NopBufferManager, 4).unwrap(); + + let s = buffer.try_as_slice::().unwrap(); + assert_eq!(4, s.len()); + + buffer + .resize_primary::(&NopBufferManager, 2) + .unwrap(); + + let s = buffer.try_as_slice_mut::().unwrap(); + assert_eq!(2, s.len()); + + // Sanity check, make sure we can write to it. + s.iter_mut().for_each(|v| *v = 12); + + assert_eq!(vec![12; 2].as_slice(), s); + } +} diff --git a/crates/rayexec_execution/src/arrays/buffer/physical_type.rs b/crates/rayexec_execution/src/arrays/buffer/physical_type.rs index 6dc29de20..f03063aff 100644 --- a/crates/rayexec_execution/src/arrays/buffer/physical_type.rs +++ b/crates/rayexec_execution/src/arrays/buffer/physical_type.rs @@ -274,6 +274,16 @@ impl PhysicalStorage for PhysicalUntypedNull { } } +impl MutablePhysicalStorage for PhysicalUntypedNull { + type AddressableMut<'a> = &'a mut [UntypedNull]; + + fn get_addressable_mut( + buffer: &mut ArrayBuffer, + ) -> Result> { + buffer.try_as_slice_mut::() + } +} + #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub struct PhysicalUtf8; diff --git a/crates/rayexec_execution/src/arrays/testutil.rs b/crates/rayexec_execution/src/arrays/testutil.rs index a135c255e..8492dc81f 100644 --- a/crates/rayexec_execution/src/arrays/testutil.rs +++ b/crates/rayexec_execution/src/arrays/testutil.rs @@ -8,6 +8,8 @@ use std::collections::BTreeMap; use std::fmt::Debug; +use iterutil::IntoExactSizeIterator; + use super::array::exp::Array; use super::batch_exp::Batch; use crate::arrays::array::flat::FlatArrayView; @@ -21,6 +23,7 @@ use crate::arrays::buffer::physical_type::{ PhysicalI32, PhysicalI64, PhysicalI8, + PhysicalList, PhysicalStorage, PhysicalType, PhysicalU128, @@ -30,6 +33,7 @@ use crate::arrays::buffer::physical_type::{ PhysicalU8, PhysicalUtf8, }; +use crate::arrays::buffer::SecondaryBuffer; use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; /// Assert two arrays are logically equal. @@ -42,7 +46,10 @@ pub fn assert_arrays_eq(array1: &Array, array2: &Array) { array2.capacity(), "array capacities differ" ); - assert_arrays_eq_count(array1, array2, array1.capacity()) + + let sel = 0..array1.capacity(); + + assert_arrays_eq_sel(array1, sel.clone(), array2, sel) } /// Asserts that two arrays are logically equal for the first `count` rows. @@ -50,56 +57,122 @@ pub fn assert_arrays_eq(array1: &Array, array2: &Array) { /// This will check valid and invalid values. Assertion error messages will /// print out Some/None to represent valid/invalid. #[track_caller] -pub fn assert_arrays_eq_count(array1: &Array, array2: &Array, count: usize) { +pub fn assert_arrays_eq_sel( + array1: &Array, + sel1: impl IntoExactSizeIterator, + array2: &Array, + sel2: impl IntoExactSizeIterator, +) { assert_eq!(array1.datatype, array2.datatype); let flat1 = array1.flat_view().unwrap(); let flat2 = array2.flat_view().unwrap(); - fn assert_eq_inner(flat1: FlatArrayView, flat2: FlatArrayView, count: usize) - where - S: PhysicalStorage, - S::StorageType: ToOwned, - { - let mut out = BTreeMap::new(); - let sel = 0..count; - - UnaryExecutor::for_each_flat::(flat1, sel.clone(), |idx, v| { - out.insert(idx, v.map(|v| v.to_owned())); - }) - .unwrap(); + match array1.datatype.physical_type() { + PhysicalType::Boolean => { + assert_arrays_eq_sel_inner::(flat1, sel1, flat2, sel2) + } + PhysicalType::Int8 => assert_arrays_eq_sel_inner::(flat1, sel1, flat2, sel2), + PhysicalType::Int16 => assert_arrays_eq_sel_inner::(flat1, sel1, flat2, sel2), + PhysicalType::Int32 => assert_arrays_eq_sel_inner::(flat1, sel1, flat2, sel2), + PhysicalType::Int64 => assert_arrays_eq_sel_inner::(flat1, sel1, flat2, sel2), + PhysicalType::Int128 => { + assert_arrays_eq_sel_inner::(flat1, sel1, flat2, sel2) + } + PhysicalType::UInt8 => assert_arrays_eq_sel_inner::(flat1, sel1, flat2, sel2), + PhysicalType::UInt16 => assert_arrays_eq_sel_inner::(flat1, sel1, flat2, sel2), + PhysicalType::UInt32 => assert_arrays_eq_sel_inner::(flat1, sel1, flat2, sel2), + PhysicalType::UInt64 => assert_arrays_eq_sel_inner::(flat1, sel1, flat2, sel2), + PhysicalType::UInt128 => { + assert_arrays_eq_sel_inner::(flat1, sel1, flat2, sel2) + } + PhysicalType::Float16 => { + assert_arrays_eq_sel_inner::(flat1, sel1, flat2, sel2) + } + PhysicalType::Float32 => { + assert_arrays_eq_sel_inner::(flat1, sel1, flat2, sel2) + } + PhysicalType::Float64 => { + assert_arrays_eq_sel_inner::(flat1, sel1, flat2, sel2) + } + PhysicalType::Utf8 => assert_arrays_eq_sel_inner::(flat1, sel1, flat2, sel2), + PhysicalType::List => { + assert_arrays_eq_sel_list_inner(flat1, sel1, flat2, sel2); + } + other => unimplemented!("{other:?}"), + } +} - UnaryExecutor::for_each_flat::(flat2, sel, |idx, v| match out.remove(&idx) { - Some(existing) => { - let v = v.map(|v| v.to_owned()); - assert_eq!(existing, v, "values differ at index {idx}"); - } - None => panic!("missing value for index in array 1 {idx}"), - }) - .unwrap(); +fn assert_arrays_eq_sel_list_inner( + flat1: FlatArrayView, + sel1: impl IntoExactSizeIterator, + flat2: FlatArrayView, + sel2: impl IntoExactSizeIterator, +) { + let inner1 = match flat1.array_buffer.get_secondary() { + SecondaryBuffer::List(list) => &list.child, + _ => panic!("Missing child for array 1"), + }; - if !out.is_empty() { - panic!("extra entries in array 1: {:?}", out); - } + let inner2 = match flat2.array_buffer.get_secondary() { + SecondaryBuffer::List(list) => &list.child, + _ => panic!("Missing child for array 2"), + }; + + let metas1 = PhysicalList::get_addressable(&flat1.array_buffer).unwrap(); + let metas2 = PhysicalList::get_addressable(&flat2.array_buffer).unwrap(); + + let sel1 = sel1.into_iter(); + let sel2 = sel2.into_iter(); + assert_eq!(sel1.len(), sel2.len()); + + for (row_idx, (idx1, idx2)) in sel1.zip(sel2).enumerate() { + let idx1 = flat1.selection.get(idx1).unwrap(); + let idx2 = flat1.selection.get(idx2).unwrap(); + + assert_eq!( + flat1.validity.is_valid(idx1), + flat2.validity.is_valid(idx2), + "validity mismatch for row {row_idx}" + ); + + let m1 = metas1.get(idx1).unwrap(); + let m2 = metas2.get(idx2).unwrap(); + + let sel1 = (m1.offset as usize)..((m1.offset + m1.len) as usize); + let sel2 = (m2.offset as usize)..((m2.offset + m2.len) as usize); + + assert_arrays_eq_sel(inner1, sel1, inner2, sel2); } +} - match array1.datatype.physical_type() { - PhysicalType::Boolean => assert_eq_inner::(flat1, flat2, count), - PhysicalType::Int8 => assert_eq_inner::(flat1, flat2, count), - PhysicalType::Int16 => assert_eq_inner::(flat1, flat2, count), - PhysicalType::Int32 => assert_eq_inner::(flat1, flat2, count), - PhysicalType::Int64 => assert_eq_inner::(flat1, flat2, count), - PhysicalType::Int128 => assert_eq_inner::(flat1, flat2, count), - PhysicalType::UInt8 => assert_eq_inner::(flat1, flat2, count), - PhysicalType::UInt16 => assert_eq_inner::(flat1, flat2, count), - PhysicalType::UInt32 => assert_eq_inner::(flat1, flat2, count), - PhysicalType::UInt64 => assert_eq_inner::(flat1, flat2, count), - PhysicalType::UInt128 => assert_eq_inner::(flat1, flat2, count), - PhysicalType::Float16 => assert_eq_inner::(flat1, flat2, count), - PhysicalType::Float32 => assert_eq_inner::(flat1, flat2, count), - PhysicalType::Float64 => assert_eq_inner::(flat1, flat2, count), - PhysicalType::Utf8 => assert_eq_inner::(flat1, flat2, count), - other => unimplemented!("{other:?}"), +fn assert_arrays_eq_sel_inner( + flat1: FlatArrayView, + sel1: impl IntoExactSizeIterator, + flat2: FlatArrayView, + sel2: impl IntoExactSizeIterator, +) where + S: PhysicalStorage, + S::StorageType: ToOwned, +{ + let mut out = BTreeMap::new(); + + UnaryExecutor::for_each_flat::(flat1, sel1, |idx, v| { + out.insert(idx, v.map(|v| v.to_owned())); + }) + .unwrap(); + + UnaryExecutor::for_each_flat::(flat2, sel2, |idx, v| match out.remove(&idx) { + Some(existing) => { + let v = v.map(|v| v.to_owned()); + assert_eq!(existing, v, "values differ at index {idx}"); + } + None => panic!("missing value for index in array 1 {idx}"), + }) + .unwrap(); + + if !out.is_empty() { + panic!("extra entries in array 1: {:?}", out); } } @@ -121,7 +194,8 @@ pub fn assert_batches_eq(batch1: &Batch, batch2: &Batch) { ); for (array1, array2) in arrays1.iter().zip(arrays2) { - assert_arrays_eq_count(array1, array2, batch1.num_rows()); + let sel = 0..batch1.num_rows(); + assert_arrays_eq_sel(array1, sel.clone(), array2, sel); } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/list/list_values.rs b/crates/rayexec_execution/src/functions/scalar/builtin/list/list_values.rs index 8da65c461..0b5f04ae3 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/list/list_values.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/list/list_values.rs @@ -1,11 +1,35 @@ -use rayexec_error::{RayexecError, Result}; +use rayexec_error::{not_implemented, RayexecError, Result}; use crate::arrays::array::exp::Array; -use crate::arrays::array::Array2; +use crate::arrays::array::validity::Validity; use crate::arrays::batch_exp::Batch; +use crate::arrays::buffer::buffer_manager::NopBufferManager; +use crate::arrays::buffer::physical_type::{ + Addressable, + AddressableMut, + MutablePhysicalStorage, + PhysicalBinary, + PhysicalBool, + PhysicalF16, + PhysicalF32, + PhysicalF64, + PhysicalI128, + PhysicalI16, + PhysicalI32, + PhysicalI64, + PhysicalI8, + PhysicalList, + PhysicalType, + PhysicalU128, + PhysicalU16, + PhysicalU32, + PhysicalU64, + PhysicalU8, + PhysicalUntypedNull, + PhysicalUtf8, +}; +use crate::arrays::buffer::{ListItemMetadata, SecondaryBuffer}; use crate::arrays::datatype::{DataType, DataTypeId, ListTypeMeta}; -use crate::arrays::executor::scalar::concat; -use crate::arrays::storage::ListStorage; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation, Example}; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunction, ScalarFunctionImpl}; @@ -54,9 +78,7 @@ impl ScalarFunction for ListValues { function: Box::new(*self), return_type: return_type.clone(), inputs, - function_impl: Box::new(ListValuesImpl { - list_datatype: return_type, - }), + function_impl: Box::new(ListValuesImpl), }); } }; @@ -79,47 +101,168 @@ impl ScalarFunction for ListValues { function: Box::new(*self), return_type: return_type.clone(), inputs, - function_impl: Box::new(ListValuesImpl { - list_datatype: return_type, - }), + function_impl: Box::new(ListValuesImpl), }) } } #[derive(Debug, Clone)] -pub struct ListValuesImpl { - list_datatype: DataType, -} +pub struct ListValuesImpl; impl ScalarFunctionImpl for ListValuesImpl { fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { - let sel = input.selection(); - let arrays = input.arrays(); + let inner_type = match output.datatype() { + DataType::List(m) => m.datatype.physical_type(), + other => { + return Err(RayexecError::new(format!( + "Expected output to be list datatype, got {other}", + ))) + } + }; - unimplemented!() + match inner_type { + PhysicalType::UntypedNull => execute_list_values::(input, output), + PhysicalType::Boolean => execute_list_values::(input, output), + PhysicalType::Int8 => execute_list_values::(input, output), + PhysicalType::Int16 => execute_list_values::(input, output), + PhysicalType::Int32 => execute_list_values::(input, output), + PhysicalType::Int64 => execute_list_values::(input, output), + PhysicalType::Int128 => execute_list_values::(input, output), + PhysicalType::UInt8 => execute_list_values::(input, output), + PhysicalType::UInt16 => execute_list_values::(input, output), + PhysicalType::UInt32 => execute_list_values::(input, output), + PhysicalType::UInt64 => execute_list_values::(input, output), + PhysicalType::UInt128 => execute_list_values::(input, output), + PhysicalType::Float16 => execute_list_values::(input, output), + PhysicalType::Float32 => execute_list_values::(input, output), + PhysicalType::Float64 => execute_list_values::(input, output), + PhysicalType::Utf8 => execute_list_values::(input, output), + PhysicalType::Binary => execute_list_values::(input, output), + other => not_implemented!("list values for physical type {other}"), + } } +} + +/// Helper for constructing the list values and writing them to `output`. +/// +/// `S` should be the inner type. +fn execute_list_values(input: &Batch, output: &mut Array) -> Result<()> { + // TODO: Dictionary - fn execute2(&self, inputs: &[&Array2]) -> Result { - if inputs.is_empty() { - let inner_type = match &self.list_datatype { - DataType::List(l) => l.datatype.as_ref(), - other => panic!("invalid data type: {other}"), - }; - - let data = - ListStorage::empty_list(Array2::new_typed_null_array(inner_type.clone(), 1)?); - return Ok(Array2::new_with_array_data( - self.list_datatype.clone(), - data, - )); + let sel = input.selection(); + let inputs = input + .arrays() + .iter() + .map(|arr| S::get_addressable(arr.data())) + .collect::>>()?; + + let capacity = sel.len() * inputs.len(); + + let list_buf = match output.data_mut().try_as_mut()?.get_secondary_mut() { + SecondaryBuffer::List(list) => list, + _ => return Err(RayexecError::new("Expected list buffer")), + }; + + // Resize secondary buffer (and validity) to hold everything. + // + // TODO: Need to store buffer manager somewhere else. + list_buf + .child + .data_mut() + .try_as_mut()? + .reserve_primary::(&NopBufferManager, capacity)?; + + // Replace validity with properly sized one. + list_buf + .child + .put_validity(Validity::new_all_valid(capacity))?; + + let mut child_outputs = S::get_addressable_mut(list_buf.child.data.try_as_mut()?)?; + let child_validity = &mut list_buf.child.validity; + + // Write the list values from the input batch. + let mut output_idx = 0; + for row_idx in sel.into_iter() { + for (col, validity) in inputs + .iter() + .zip(input.arrays().iter().map(|arr| arr.validity())) + { + if validity.is_valid(row_idx) { + child_outputs.put(output_idx, col.get(row_idx).unwrap()); + } else { + child_validity.set_invalid(output_idx); + } + + output_idx += 1; } + } + std::mem::drop(child_outputs); + + // Now generate and set the metadatas. + let mut out = PhysicalList::get_addressable_mut(output.data_mut().try_as_mut()?)?; + + let len = inputs.len() as i32; + for output_idx in 0..sel.len() { + // Note top-level not possible if we're provided a batch. + out.put( + output_idx, + &ListItemMetadata { + offset: (output_idx as i32) * len, + len, + }, + ); + } + + Ok(()) +} + +#[cfg(test)] +mod tests { + use iterutil::TryFromExactSizeIterator; + + use super::*; + use crate::arrays::buffer::physical_type::PhysicalStorage; + use crate::expr; + + #[test] + fn list_values_primitive() { + let a = Array::try_from_iter([1, 2, 3]).unwrap(); + let b = Array::try_from_iter([4, 5, 6]).unwrap(); + let batch = Batch::from_arrays([a, b], true).unwrap(); + + let mut table_list = TableList::empty(); + let table_ref = table_list + .push_table( + None, + vec![DataType::Int32, DataType::Int32], + vec!["a".to_string(), "b".to_string()], + ) + .unwrap(); + + let planned = ListValues + .plan( + &table_list, + vec![expr::col_ref(table_ref, 0), expr::col_ref(table_ref, 1)], + ) + .unwrap(); + + let mut out = Array::new( + &NopBufferManager, + DataType::List(ListTypeMeta::new(DataType::Int32)), + 3, + ) + .unwrap(); + planned.function_impl.execute(&batch, &mut out).unwrap(); + + // TODO: Assert list equality. - let out = concat(inputs)?; - let data = ListStorage::single_list(out); + let expected_metas = &[ + ListItemMetadata { offset: 0, len: 2 }, + ListItemMetadata { offset: 2, len: 2 }, + ListItemMetadata { offset: 4, len: 2 }, + ]; - Ok(Array2::new_with_array_data( - self.list_datatype.clone(), - data, - )) + let s = PhysicalList::get_addressable(&out.data).unwrap(); + assert_eq!(expected_metas, s); } } From 3b9ea901c04a5c8713a2c2cbbfec67886692a576 Mon Sep 17 00:00:00 2001 From: Sean Smith Date: Tue, 31 Dec 2024 11:26:15 -0500 Subject: [PATCH 31/59] numeric cleanup --- .../functions/scalar/builtin/numeric/abs.rs | 13 ----- .../functions/scalar/builtin/numeric/acos.rs | 13 ----- .../functions/scalar/builtin/numeric/asin.rs | 13 ----- .../functions/scalar/builtin/numeric/atan.rs | 13 ----- .../functions/scalar/builtin/numeric/cbrt.rs | 13 ----- .../functions/scalar/builtin/numeric/ceil.rs | 13 ----- .../functions/scalar/builtin/numeric/cos.rs | 13 ----- .../scalar/builtin/numeric/degrees.rs | 13 ----- .../functions/scalar/builtin/numeric/exp.rs | 13 ----- .../functions/scalar/builtin/numeric/floor.rs | 13 ----- .../functions/scalar/builtin/numeric/isnan.rs | 54 +++++++++---------- .../functions/scalar/builtin/numeric/ln.rs | 13 ----- .../functions/scalar/builtin/numeric/log.rs | 26 --------- .../functions/scalar/builtin/numeric/mod.rs | 32 +---------- .../scalar/builtin/numeric/radians.rs | 13 ----- .../functions/scalar/builtin/numeric/sin.rs | 13 ----- .../functions/scalar/builtin/numeric/sqrt.rs | 13 ----- .../functions/scalar/builtin/numeric/tan.rs | 19 ------- 18 files changed, 27 insertions(+), 286 deletions(-) diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/abs.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/abs.rs index 0c3438743..d2ffb9b58 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/abs.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/abs.rs @@ -23,19 +23,6 @@ impl UnaryInputNumericOperation for AbsOp { const NAME: &'static str = "abs"; const DESCRIPTION: &'static str = "Compute the absolute value of a number"; - fn execute_float2<'a, S>(input: &'a Array2, ret: DataType) -> Result - where - S: PhysicalStorage2, - S::Type<'a>: Float + Default, - ArrayData2: From>>, - { - let builder = ArrayBuilder { - datatype: ret, - buffer: PrimitiveBuffer::with_len(input.logical_len()), - }; - UnaryExecutor2::execute::(input, builder, |v, buf| buf.put(&v.abs())) - } - fn execute_float( input: &Array, selection: impl IntoExactSizeIterator, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/acos.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/acos.rs index 2a01e8966..cd3f9f61e 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/acos.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/acos.rs @@ -23,19 +23,6 @@ impl UnaryInputNumericOperation for AcosOp { const NAME: &'static str = "acos"; const DESCRIPTION: &'static str = "Compute the arccosine of value"; - fn execute_float2<'a, S>(input: &'a Array2, ret: DataType) -> Result - where - S: PhysicalStorage2, - S::Type<'a>: Float + Default, - ArrayData2: From>>, - { - let builder = ArrayBuilder { - datatype: ret, - buffer: PrimitiveBuffer::with_len(input.logical_len()), - }; - UnaryExecutor2::execute::(input, builder, |v, buf| buf.put(&v.acos())) - } - fn execute_float( input: &Array, selection: impl IntoExactSizeIterator, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/asin.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/asin.rs index d56b539d7..48aec0004 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/asin.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/asin.rs @@ -23,19 +23,6 @@ impl UnaryInputNumericOperation for AsinOp { const NAME: &'static str = "asin"; const DESCRIPTION: &'static str = "Compute the arcsine of value"; - fn execute_float2<'a, S>(input: &'a Array2, ret: DataType) -> Result - where - S: PhysicalStorage2, - S::Type<'a>: Float + Default, - ArrayData2: From>>, - { - let builder = ArrayBuilder { - datatype: ret, - buffer: PrimitiveBuffer::with_len(input.logical_len()), - }; - UnaryExecutor2::execute::(input, builder, |v, buf| buf.put(&v.asin())) - } - fn execute_float( input: &Array, selection: impl IntoExactSizeIterator, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/atan.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/atan.rs index 324f823ba..57b043d97 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/atan.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/atan.rs @@ -23,19 +23,6 @@ impl UnaryInputNumericOperation for AtanOp { const NAME: &'static str = "atan"; const DESCRIPTION: &'static str = "Compute the arctangent of value"; - fn execute_float2<'a, S>(input: &'a Array2, ret: DataType) -> Result - where - S: PhysicalStorage2, - S::Type<'a>: Float + Default, - ArrayData2: From>>, - { - let builder = ArrayBuilder { - datatype: ret, - buffer: PrimitiveBuffer::with_len(input.logical_len()), - }; - UnaryExecutor2::execute::(input, builder, |v, buf| buf.put(&v.atan())) - } - fn execute_float( input: &Array, selection: impl IntoExactSizeIterator, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/cbrt.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/cbrt.rs index 105b82dc3..6869f7fc4 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/cbrt.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/cbrt.rs @@ -23,19 +23,6 @@ impl UnaryInputNumericOperation for CbrtOp { const NAME: &'static str = "cbrt"; const DESCRIPTION: &'static str = "Compute the cube root of value"; - fn execute_float2<'a, S>(input: &'a Array2, ret: DataType) -> Result - where - S: PhysicalStorage2, - S::Type<'a>: Float + Default, - ArrayData2: From>>, - { - let builder = ArrayBuilder { - datatype: ret, - buffer: PrimitiveBuffer::with_len(input.logical_len()), - }; - UnaryExecutor2::execute::(input, builder, |v, buf| buf.put(&v.cbrt())) - } - fn execute_float( input: &Array, selection: impl IntoExactSizeIterator, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/ceil.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/ceil.rs index a50f6d282..5a2c5bd99 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/ceil.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/ceil.rs @@ -23,19 +23,6 @@ impl UnaryInputNumericOperation for CeilOp { const NAME: &'static str = "ceil"; const DESCRIPTION: &'static str = "Round number up"; - fn execute_float2<'a, S>(input: &'a Array2, ret: DataType) -> Result - where - S: PhysicalStorage2, - S::Type<'a>: Float + Default, - ArrayData2: From>>, - { - let builder = ArrayBuilder { - datatype: ret, - buffer: PrimitiveBuffer::with_len(input.logical_len()), - }; - UnaryExecutor2::execute::(input, builder, |v, buf| buf.put(&v.ceil())) - } - fn execute_float( input: &Array, selection: impl IntoExactSizeIterator, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/cos.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/cos.rs index 3bd5d5826..7be1dbf36 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/cos.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/cos.rs @@ -23,19 +23,6 @@ impl UnaryInputNumericOperation for CosOp { const NAME: &'static str = "cos"; const DESCRIPTION: &'static str = "Compute the cosine of value"; - fn execute_float2<'a, S>(input: &'a Array2, ret: DataType) -> Result - where - S: PhysicalStorage2, - S::Type<'a>: Float + Default, - ArrayData2: From>>, - { - let builder = ArrayBuilder { - datatype: ret, - buffer: PrimitiveBuffer::with_len(input.logical_len()), - }; - UnaryExecutor2::execute::(input, builder, |v, buf| buf.put(&v.cos())) - } - fn execute_float( input: &Array, selection: impl IntoExactSizeIterator, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/degrees.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/degrees.rs index 7a445054a..1024e2db1 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/degrees.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/degrees.rs @@ -23,19 +23,6 @@ impl UnaryInputNumericOperation for DegreesOp { const NAME: &'static str = "degrees"; const DESCRIPTION: &'static str = "Converts radians to degrees"; - fn execute_float2<'a, S>(input: &'a Array2, ret: DataType) -> Result - where - S: PhysicalStorage2, - S::Type<'a>: Float + Default, - ArrayData2: From>>, - { - let builder = ArrayBuilder { - datatype: ret, - buffer: PrimitiveBuffer::with_len(input.logical_len()), - }; - UnaryExecutor2::execute::(input, builder, |v, buf| buf.put(&v.to_degrees())) - } - fn execute_float( input: &Array, selection: impl IntoExactSizeIterator, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/exp.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/exp.rs index f8a1df45f..7d3ff3fb5 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/exp.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/exp.rs @@ -23,19 +23,6 @@ impl UnaryInputNumericOperation for ExpOp { const NAME: &'static str = "exp"; const DESCRIPTION: &'static str = "Compute `e ^ val`"; - fn execute_float2<'a, S>(input: &'a Array2, ret: DataType) -> Result - where - S: PhysicalStorage2, - S::Type<'a>: Float + Default, - ArrayData2: From>>, - { - let builder = ArrayBuilder { - datatype: ret, - buffer: PrimitiveBuffer::with_len(input.logical_len()), - }; - UnaryExecutor2::execute::(input, builder, |v, buf| buf.put(&v.exp())) - } - fn execute_float( input: &Array, selection: impl IntoExactSizeIterator, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/floor.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/floor.rs index 6d932f661..b1b7d9611 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/floor.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/floor.rs @@ -23,19 +23,6 @@ impl UnaryInputNumericOperation for FloorOp { const NAME: &'static str = "floor"; const DESCRIPTION: &'static str = "Round number down"; - fn execute_float2<'a, S>(input: &'a Array2, ret: DataType) -> Result - where - S: PhysicalStorage2, - S::Type<'a>: Float + Default, - ArrayData2: From>>, - { - let builder = ArrayBuilder { - datatype: ret, - buffer: PrimitiveBuffer::with_len(input.logical_len()), - }; - UnaryExecutor2::execute::(input, builder, |v, buf| buf.put(&v.floor())) - } - fn execute_float( input: &Array, selection: impl IntoExactSizeIterator, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/isnan.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/isnan.rs index f3b7e1e49..031462ce4 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/isnan.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/isnan.rs @@ -5,17 +5,17 @@ use rayexec_error::Result; use super::ScalarFunction; use crate::arrays::array::exp::Array; -use crate::arrays::array::Array2; use crate::arrays::batch_exp::Batch; -use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::builder::{ArrayBuilder, BooleanBuffer}; -use crate::arrays::executor::physical_type::{ - PhysicalF16_2, - PhysicalF32_2, - PhysicalF64_2, - PhysicalStorage2, +use crate::arrays::buffer::physical_type::{ + PhysicalBool, + PhysicalF16, + PhysicalF32, + PhysicalF64, + PhysicalStorage, }; -use crate::arrays::executor::scalar::UnaryExecutor2; +use crate::arrays::datatype::{DataType, DataTypeId}; +use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; +use crate::arrays::executor_exp::OutBuffer; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation, Example}; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunctionImpl}; @@ -73,9 +73,9 @@ impl ScalarFunction for IsNan { plan_check_num_args(self, &inputs, 1)?; let function_impl: Box = match inputs[0].datatype(table_list)? { - DataType::Float16 => Box::new(IsNanImpl::::new()), - DataType::Float32 => Box::new(IsNanImpl::::new()), - DataType::Float64 => Box::new(IsNanImpl::::new()), + DataType::Float16 => Box::new(IsNanImpl::::new()), + DataType::Float32 => Box::new(IsNanImpl::::new()), + DataType::Float64 => Box::new(IsNanImpl::::new()), other => return Err(invalid_input_types_error(self, &[other])), }; @@ -89,32 +89,30 @@ impl ScalarFunction for IsNan { } #[derive(Debug, Clone, Copy)] -pub struct IsNanImpl { +pub struct IsNanImpl { _s: PhantomData, } -impl IsNanImpl { - fn new() -> Self { +impl IsNanImpl { + const fn new() -> Self { IsNanImpl { _s: PhantomData } } } impl ScalarFunctionImpl for IsNanImpl where - S: PhysicalStorage2, - for<'a> S::Type<'a>: Float, + S: PhysicalStorage, + S::StorageType: Float, { - fn execute2(&self, inputs: &[&Array2]) -> Result { - let input = inputs[0]; - let builder = ArrayBuilder { - datatype: DataType::Boolean, - buffer: BooleanBuffer::with_len(input.logical_len()), - }; - - UnaryExecutor2::execute::(input, builder, |v, buf| buf.put(&v.is_nan())) - } - fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { - unimplemented!() + let sel = input.selection(); + let input = &input.arrays()[0]; + + UnaryExecutor::execute::( + input, + sel, + OutBuffer::from_array(output)?, + |&v, buf| buf.put(&v.is_nan()), + ) } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/ln.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/ln.rs index 3945a8d89..38ab5ba16 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/ln.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/ln.rs @@ -23,19 +23,6 @@ impl UnaryInputNumericOperation for LnOp { const NAME: &'static str = "ln"; const DESCRIPTION: &'static str = "Compute natural log of value"; - fn execute_float2<'a, S>(input: &'a Array2, ret: DataType) -> Result - where - S: PhysicalStorage2, - S::Type<'a>: Float + Default, - ArrayData2: From>>, - { - let builder = ArrayBuilder { - datatype: ret, - buffer: PrimitiveBuffer::with_len(input.logical_len()), - }; - UnaryExecutor2::execute::(input, builder, |v, buf| buf.put(&v.ln())) - } - fn execute_float( input: &Array, selection: impl IntoExactSizeIterator, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/log.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/log.rs index 9c2c312c9..9c4cbdeb0 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/log.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/log.rs @@ -23,19 +23,6 @@ impl UnaryInputNumericOperation for LogOp { const NAME: &'static str = "log"; const DESCRIPTION: &'static str = "Compute base-10 log of value"; - fn execute_float2<'a, S>(input: &'a Array2, ret: DataType) -> Result - where - S: PhysicalStorage2, - S::Type<'a>: Float + Default, - ArrayData2: From>>, - { - let builder = ArrayBuilder { - datatype: ret, - buffer: PrimitiveBuffer::with_len(input.logical_len()), - }; - UnaryExecutor2::execute::(input, builder, |v, buf| buf.put(&v.log10())) - } - fn execute_float( input: &Array, selection: impl IntoExactSizeIterator, @@ -63,19 +50,6 @@ impl UnaryInputNumericOperation for LogOp2 { const NAME: &'static str = "log2"; const DESCRIPTION: &'static str = "Compute base-2 log of value"; - fn execute_float2<'a, S>(input: &'a Array2, ret: DataType) -> Result - where - S: PhysicalStorage2, - S::Type<'a>: Float + Default, - ArrayData2: From>>, - { - let builder = ArrayBuilder { - datatype: ret, - buffer: PrimitiveBuffer::with_len(input.logical_len()), - }; - UnaryExecutor2::execute::(input, builder, |v, buf| buf.put(&v.log2())) - } - fn execute_float( input: &Array, selection: impl IntoExactSizeIterator, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/mod.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/mod.rs index 3436227a1..95310d2ce 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/mod.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/mod.rs @@ -40,7 +40,6 @@ pub use sqrt::*; pub use tan::*; use crate::arrays::array::exp::Array; -use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::batch_exp::Batch; use crate::arrays::buffer::physical_type::{ MutablePhysicalStorage, @@ -49,14 +48,6 @@ use crate::arrays::buffer::physical_type::{ PhysicalF64, }; use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::physical_type::{ - PhysicalF16_2, - PhysicalF32_2, - PhysicalF64_2, - PhysicalStorage2, - PhysicalType2, -}; -use crate::arrays::storage::PrimitiveStorage; use crate::expr::Expression; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunction, ScalarFunctionImpl}; use crate::functions::{invalid_input_types_error, plan_check_num_args, FunctionInfo, Signature}; @@ -90,12 +81,6 @@ pub trait UnaryInputNumericOperation: Debug + Clone + Copy + Sync + Send + 'stat const NAME: &'static str; const DESCRIPTION: &'static str; - fn execute_float2<'a, S>(input: &'a Array2, ret: DataType) -> Result - where - S: PhysicalStorage2, - S::Type<'a>: Float + Default, - ArrayData2: From>>; - fn execute_float( input: &Array, selection: impl IntoExactSizeIterator, @@ -103,10 +88,7 @@ pub trait UnaryInputNumericOperation: Debug + Clone + Copy + Sync + Send + 'stat ) -> Result<()> where S: MutablePhysicalStorage, - S::StorageType: Float, - { - unimplemented!() - } + S::StorageType: Float; } /// Helper struct for creating functions that accept and produce a single @@ -166,18 +148,6 @@ pub(crate) struct UnaryInputNumericScalarImpl { } impl ScalarFunctionImpl for UnaryInputNumericScalarImpl { - fn execute2(&self, inputs: &[&Array2]) -> Result { - let input = inputs[0]; - match input.physical_type() { - PhysicalType2::Float16 => O::execute_float2::(input, self.ret.clone()), - PhysicalType2::Float32 => O::execute_float2::(input, self.ret.clone()), - PhysicalType2::Float64 => O::execute_float2::(input, self.ret.clone()), - other => Err(RayexecError::new(format!( - "Invalid physical type: {other:?}" - ))), - } - } - fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { let sel = input.selection(); let input = &input.arrays()[0]; diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/radians.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/radians.rs index 2bd34f2c6..41305b084 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/radians.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/radians.rs @@ -23,19 +23,6 @@ impl UnaryInputNumericOperation for RadiansOp { const NAME: &'static str = "radians"; const DESCRIPTION: &'static str = "Converts degrees to radians"; - fn execute_float2<'a, S>(input: &'a Array2, ret: DataType) -> Result - where - S: PhysicalStorage2, - S::Type<'a>: Float + Default, - ArrayData2: From>>, - { - let builder = ArrayBuilder { - datatype: ret, - buffer: PrimitiveBuffer::with_len(input.logical_len()), - }; - UnaryExecutor2::execute::(input, builder, |v, buf| buf.put(&v.to_radians())) - } - fn execute_float( input: &Array, selection: impl IntoExactSizeIterator, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/sin.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/sin.rs index d18bdc42b..1c6aefbe3 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/sin.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/sin.rs @@ -23,19 +23,6 @@ impl UnaryInputNumericOperation for SinOp { const NAME: &'static str = "sin"; const DESCRIPTION: &'static str = "Compute the sin of value"; - fn execute_float2<'a, S>(input: &'a Array2, ret: DataType) -> Result - where - S: PhysicalStorage2, - S::Type<'a>: Float + Default, - ArrayData2: From>>, - { - let builder = ArrayBuilder { - datatype: ret, - buffer: PrimitiveBuffer::with_len(input.logical_len()), - }; - UnaryExecutor2::execute::(input, builder, |v, buf| buf.put(&v.sin())) - } - fn execute_float( input: &Array, selection: impl IntoExactSizeIterator, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/sqrt.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/sqrt.rs index bf4eb2b67..b2b33cc7c 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/sqrt.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/sqrt.rs @@ -23,19 +23,6 @@ impl UnaryInputNumericOperation for SqrtOp { const NAME: &'static str = "sqrt"; const DESCRIPTION: &'static str = "Compute the square root of value"; - fn execute_float2<'a, S>(input: &'a Array2, ret: DataType) -> Result - where - S: PhysicalStorage2, - S::Type<'a>: Float + Default, - ArrayData2: From>>, - { - let builder = ArrayBuilder { - datatype: ret, - buffer: PrimitiveBuffer::with_len(input.logical_len()), - }; - UnaryExecutor2::execute::(input, builder, |v, buf| buf.put(&v.sqrt())) - } - fn execute_float( input: &Array, selection: impl IntoExactSizeIterator, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/tan.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/tan.rs index 9566eecb9..0d92567d2 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/tan.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/tan.rs @@ -4,15 +4,9 @@ use rayexec_error::Result; use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; use crate::arrays::array::exp::Array; -use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::buffer::physical_type::MutablePhysicalStorage; -use crate::arrays::datatype::DataType; -use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::PhysicalStorage2; -use crate::arrays::executor::scalar::UnaryExecutor2; use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; use crate::arrays::executor_exp::OutBuffer; -use crate::arrays::storage::PrimitiveStorage; pub type Tan = UnaryInputNumericScalar; @@ -23,19 +17,6 @@ impl UnaryInputNumericOperation for TanOp { const NAME: &'static str = "tan"; const DESCRIPTION: &'static str = "Compute the tangent of value"; - fn execute_float2<'a, S>(input: &'a Array2, ret: DataType) -> Result - where - S: PhysicalStorage2, - S::Type<'a>: Float + Default, - ArrayData2: From>>, - { - let builder = ArrayBuilder { - datatype: ret, - buffer: PrimitiveBuffer::with_len(input.logical_len()), - }; - UnaryExecutor2::execute::(input, builder, |v, buf| buf.put(&v.tan())) - } - fn execute_float( input: &Array, selection: impl IntoExactSizeIterator, From 85cd55dc4aa3ebe2412095c8fcc49ffd34acf9f1 Mon Sep 17 00:00:00 2001 From: Sean Smith Date: Tue, 31 Dec 2024 11:47:08 -0500 Subject: [PATCH 32/59] list extract --- .../scalar/builtin/list/list_extract.rs | 312 +++++++----------- 1 file changed, 111 insertions(+), 201 deletions(-) diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/list/list_extract.rs b/crates/rayexec_execution/src/functions/scalar/builtin/list/list_extract.rs index e1a067120..75ebfcf67 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/list/list_extract.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/list/list_extract.rs @@ -1,41 +1,36 @@ -use std::borrow::Borrow; - -use half::f16; +use iterutil::IntoExactSizeIterator; use rayexec_error::{not_implemented, RayexecError, Result}; -use serde::{Deserialize, Serialize}; -use crate::arrays::array::{Array2, ArrayData2}; -use crate::arrays::bitmap::Bitmap; -use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::builder::{ - ArrayBuilder, - ArrayDataBuffer, - BooleanBuffer, - GermanVarlenBuffer, - PrimitiveBuffer, +use crate::arrays::array::exp::Array; +use crate::arrays::batch_exp::Batch; +use crate::arrays::buffer::physical_type::{ + Addressable, + AddressableMut, + MutablePhysicalStorage, + PhysicalBinary, + PhysicalBool, + PhysicalF16, + PhysicalF32, + PhysicalF64, + PhysicalI128, + PhysicalI16, + PhysicalI32, + PhysicalI64, + PhysicalI8, + PhysicalInterval, + PhysicalList, + PhysicalStorage, + PhysicalType, + PhysicalU128, + PhysicalU16, + PhysicalU32, + PhysicalU64, + PhysicalU8, + PhysicalUntypedNull, + PhysicalUtf8, }; -use crate::arrays::executor::physical_type::{ - PhysicalBinary_2, - PhysicalBool_2, - PhysicalF16_2, - PhysicalF32_2, - PhysicalF64_2, - PhysicalI128_2, - PhysicalI16_2, - PhysicalI32_2, - PhysicalI64_2, - PhysicalI8_2, - PhysicalList_2, - PhysicalStorage2, - PhysicalType2, - PhysicalU128_2, - PhysicalU16_2, - PhysicalU32_2, - PhysicalU64_2, - PhysicalU8_2, - PhysicalUtf8_2, -}; -use crate::arrays::executor::scalar::UnaryExecutor2; +use crate::arrays::buffer::SecondaryBuffer; +use crate::arrays::datatype::{DataType, DataTypeId}; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation, Example}; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunction, ScalarFunctionImpl}; @@ -105,194 +100,109 @@ impl ScalarFunction for ListExtract { function: Box::new(*self), return_type: inner_datatype.clone(), inputs, - function_impl: Box::new(ListExtractImpl { - index, - inner_datatype, - }), + function_impl: Box::new(ListExtractImpl { index }), }) } } -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[derive(Debug, Clone, PartialEq, Eq)] pub struct ListExtractImpl { - inner_datatype: DataType, index: usize, } impl ScalarFunctionImpl for ListExtractImpl { - fn execute2(&self, inputs: &[&Array2]) -> Result { - let input = inputs[0]; - extract(input, self.index) + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + let input = &input.arrays()[0]; + list_extract(input, sel, output, self.index) } } -fn extract(array: &Array2, idx: usize) -> Result { - let data = match array.array_data() { - ArrayData2::List(list) => list.as_ref(), - _other => return Err(RayexecError::new("Unexpected storage type")), - }; - - match data.inner_array().physical_type() { - PhysicalType2::UntypedNull => not_implemented!("NULL list extract"), - PhysicalType2::Boolean => { - let builder = ArrayBuilder { - datatype: DataType::Boolean, - buffer: BooleanBuffer::with_len(array.logical_len()), - }; - extract_inner::(builder, array, data.inner_array(), idx) - } - PhysicalType2::Int8 => { - let builder = ArrayBuilder { - datatype: DataType::Int8, - buffer: PrimitiveBuffer::::with_len(array.logical_len()), - }; - extract_inner::(builder, array, data.inner_array(), idx) - } - PhysicalType2::Int16 => { - let builder = ArrayBuilder { - datatype: DataType::Int16, - buffer: PrimitiveBuffer::::with_len(array.logical_len()), - }; - extract_inner::(builder, array, data.inner_array(), idx) - } - PhysicalType2::Int32 => { - let builder = ArrayBuilder { - datatype: DataType::Int32, - buffer: PrimitiveBuffer::::with_len(array.logical_len()), - }; - extract_inner::(builder, array, data.inner_array(), idx) - } - PhysicalType2::Int64 => { - let builder = ArrayBuilder { - datatype: DataType::Int64, - buffer: PrimitiveBuffer::::with_len(array.logical_len()), - }; - extract_inner::(builder, array, data.inner_array(), idx) - } - PhysicalType2::Int128 => { - let builder = ArrayBuilder { - datatype: DataType::Int128, - buffer: PrimitiveBuffer::::with_len(array.logical_len()), - }; - extract_inner::(builder, array, data.inner_array(), idx) - } - PhysicalType2::UInt8 => { - let builder = ArrayBuilder { - datatype: DataType::UInt8, - buffer: PrimitiveBuffer::::with_len(array.logical_len()), - }; - extract_inner::(builder, array, data.inner_array(), idx) - } - PhysicalType2::UInt16 => { - let builder = ArrayBuilder { - datatype: DataType::UInt16, - buffer: PrimitiveBuffer::::with_len(array.logical_len()), - }; - extract_inner::(builder, array, data.inner_array(), idx) - } - PhysicalType2::UInt32 => { - let builder = ArrayBuilder { - datatype: DataType::UInt32, - buffer: PrimitiveBuffer::::with_len(array.logical_len()), - }; - extract_inner::(builder, array, data.inner_array(), idx) - } - PhysicalType2::UInt64 => { - let builder = ArrayBuilder { - datatype: DataType::UInt64, - buffer: PrimitiveBuffer::::with_len(array.logical_len()), - }; - extract_inner::(builder, array, data.inner_array(), idx) - } - PhysicalType2::UInt128 => { - let builder = ArrayBuilder { - datatype: DataType::UInt128, - buffer: PrimitiveBuffer::::with_len(array.logical_len()), - }; - extract_inner::(builder, array, data.inner_array(), idx) - } - PhysicalType2::Float16 => { - let builder = ArrayBuilder { - datatype: DataType::Float16, - buffer: PrimitiveBuffer::::with_len(array.logical_len()), - }; - extract_inner::(builder, array, data.inner_array(), idx) - } - PhysicalType2::Float32 => { - let builder = ArrayBuilder { - datatype: DataType::Float32, - buffer: PrimitiveBuffer::::with_len(array.logical_len()), - }; - extract_inner::(builder, array, data.inner_array(), idx) - } - PhysicalType2::Float64 => { - let builder = ArrayBuilder { - datatype: DataType::Float64, - buffer: PrimitiveBuffer::::with_len(array.logical_len()), - }; - extract_inner::(builder, array, data.inner_array(), idx) - } - PhysicalType2::Utf8 => { - let builder = ArrayBuilder { - datatype: DataType::Utf8, - buffer: GermanVarlenBuffer::::with_len(array.logical_len()), - }; - extract_inner::(builder, array, data.inner_array(), idx) - } - PhysicalType2::Binary => { - let builder = ArrayBuilder { - datatype: DataType::Binary, - buffer: GermanVarlenBuffer::<[u8]>::with_len(array.logical_len()), - }; - extract_inner::(builder, array, data.inner_array(), idx) - } - other => not_implemented!("List extract for physical type {other:?}"), +/// Extract an element from each list within a list array. +/// +/// If the element index falls outside the bounds of a list, the result for that +/// row will be NULL. +pub fn list_extract( + array: &Array, + sel: impl IntoExactSizeIterator, + output: &mut Array, + element_idx: usize, +) -> Result<()> { + match output.datatype().physical_type() { + PhysicalType::UntypedNull => { + extract_inner::(array, sel, output, element_idx) + } + PhysicalType::Boolean => extract_inner::(array, sel, output, element_idx), + PhysicalType::Int8 => extract_inner::(array, sel, output, element_idx), + PhysicalType::Int16 => extract_inner::(array, sel, output, element_idx), + PhysicalType::Int32 => extract_inner::(array, sel, output, element_idx), + PhysicalType::Int64 => extract_inner::(array, sel, output, element_idx), + PhysicalType::Int128 => extract_inner::(array, sel, output, element_idx), + PhysicalType::UInt8 => extract_inner::(array, sel, output, element_idx), + PhysicalType::UInt16 => extract_inner::(array, sel, output, element_idx), + PhysicalType::UInt32 => extract_inner::(array, sel, output, element_idx), + PhysicalType::UInt64 => extract_inner::(array, sel, output, element_idx), + PhysicalType::UInt128 => extract_inner::(array, sel, output, element_idx), + PhysicalType::Float16 => extract_inner::(array, sel, output, element_idx), + PhysicalType::Float32 => extract_inner::(array, sel, output, element_idx), + PhysicalType::Float64 => extract_inner::(array, sel, output, element_idx), + PhysicalType::Interval => { + extract_inner::(array, sel, output, element_idx) + } + PhysicalType::Utf8 => extract_inner::(array, sel, output, element_idx), + PhysicalType::Binary => extract_inner::(array, sel, output, element_idx), + other => not_implemented!("List extract for datatype {other}"), } } -fn extract_inner<'a, S, B>( - mut builder: ArrayBuilder, - outer: &Array2, - inner: &'a Array2, - el_idx: usize, -) -> Result +fn extract_inner( + array: &Array, + sel: impl IntoExactSizeIterator, + output: &mut Array, + element_idx: usize, +) -> Result<()> where - S: PhysicalStorage2, - B: ArrayDataBuffer, - S::Type<'a>: Borrow<::Type>, + S: MutablePhysicalStorage, { - let el_idx = el_idx as i32; + let flat = array.flat_view()?; - let mut validity = Bitmap::new_with_all_true(builder.buffer.len()); + let metas = PhysicalList::get_addressable(&flat.array_buffer)?; + let child = match flat.array_buffer.get_secondary() { + SecondaryBuffer::List(l) => &l.child, + _ => return Err(RayexecError::new("Missing secondary buffer for list")), + }; + + let child_buf = S::get_addressable(child.data())?; + let child_validity = child.validity(); + + let mut out_buffer = S::get_addressable_mut(output.data.try_as_mut()?)?; + let out_validity = &mut output.validity; + + for (output_idx, input_idx) in sel.into_iter().enumerate() { + let sel_idx = flat.selection.get(input_idx).unwrap(); - UnaryExecutor2::for_each::(outer, |idx, metadata| { - if let Some(metadata) = metadata { - if el_idx >= metadata.len { - // Indexing outside of the list. Mark null - validity.set_unchecked(idx, false); - return; + if flat.validity.is_valid(sel_idx) { + let meta = metas.get(sel_idx).unwrap(); + if element_idx >= meta.len as usize { + // Indexing outside of the list. User is allowed to do that, set + // the value to null. + out_validity.set_invalid(output_idx); + continue; } - // Otherwise put the element into the builder. - let inner_el_idx = metadata.offset + el_idx; - match UnaryExecutor2::value_at::(inner, inner_el_idx as usize) { - Ok(Some(el)) => { - builder.buffer.put(idx, el.borrow()); - return; - } - _ => { - // TODO: Do something if Err, just fall through right now. - } + let offset = meta.offset as usize + element_idx; + if !child_validity.is_valid(offset) { + // Element inside list is null. + out_validity.set_invalid(output_idx); + continue; } - } - // Metadata null, tried to extract from null array, mark null. - validity.set_unchecked(idx, false); - })?; + let val = child_buf.get(offset).unwrap(); + out_buffer.put(output_idx, val); + } else { + out_validity.set_invalid(output_idx); + } + } - Ok(Array2::new_with_validity_and_array_data( - builder.datatype, - validity, - builder.buffer.into_data(), - )) + Ok(()) } From d57cf93c8fcb694d08994979b002008262246b72 Mon Sep 17 00:00:00 2001 From: Sean Smith Date: Tue, 31 Dec 2024 12:09:26 -0500 Subject: [PATCH 33/59] tests --- .../scalar/builtin/list/list_extract.rs | 103 ++++++++++++++++++ .../scalar/builtin/list/list_values.rs | 93 +++++++++------- 2 files changed, 155 insertions(+), 41 deletions(-) diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/list/list_extract.rs b/crates/rayexec_execution/src/functions/scalar/builtin/list/list_extract.rs index 75ebfcf67..2e846f6b4 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/list/list_extract.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/list/list_extract.rs @@ -206,3 +206,106 @@ where Ok(()) } + +#[cfg(test)] +mod tests { + use iterutil::TryFromExactSizeIterator; + + use super::*; + use crate::arrays::buffer::buffer_manager::NopBufferManager; + use crate::arrays::datatype::ListTypeMeta; + use crate::arrays::testutil::assert_arrays_eq; + use crate::functions::scalar::builtin::list::list_values; + + #[test] + fn list_extract_primitive() { + let a = Array::try_from_iter([1, 2, 3]).unwrap(); + let b = Array::try_from_iter([4, 5, 6]).unwrap(); + + let mut lists = Array::new( + &NopBufferManager, + DataType::List(ListTypeMeta::new(DataType::Int32)), + 3, + ) + .unwrap(); + + list_values(&[a, b], 0..3, &mut lists).unwrap(); + + let mut second_elements = Array::new(&NopBufferManager, DataType::Int32, 3).unwrap(); + list_extract(&lists, 0..3, &mut second_elements, 1).unwrap(); + + let expected = Array::try_from_iter([4, 5, 6]).unwrap(); + assert_arrays_eq(&expected, &second_elements); + } + + #[test] + fn list_extract_out_of_bounds() { + let a = Array::try_from_iter([1, 2, 3]).unwrap(); + let b = Array::try_from_iter([4, 5, 6]).unwrap(); + + let mut lists = Array::new( + &NopBufferManager, + DataType::List(ListTypeMeta::new(DataType::Int32)), + 3, + ) + .unwrap(); + + list_values(&[a, b], 0..3, &mut lists).unwrap(); + + let mut extracted_elements = Array::new(&NopBufferManager, DataType::Int32, 3).unwrap(); + list_extract(&lists, 0..3, &mut extracted_elements, 2).unwrap(); + + let expected = Array::try_from_iter([None as Option, None, None]).unwrap(); + assert_arrays_eq(&expected, &extracted_elements); + } + + #[test] + fn list_extract_child_invalid() { + let a = Array::try_from_iter([1, 2, 3]).unwrap(); + let b = Array::try_from_iter([Some(4), None, Some(6)]).unwrap(); + + let mut lists = Array::new( + &NopBufferManager, + DataType::List(ListTypeMeta::new(DataType::Int32)), + 3, + ) + .unwrap(); + + list_values(&[a, b], 0..3, &mut lists).unwrap(); + + let mut second_elements = Array::new(&NopBufferManager, DataType::Int32, 3).unwrap(); + list_extract(&lists, 0..3, &mut second_elements, 1).unwrap(); + + let expected = Array::try_from_iter([Some(4), None, Some(6)]).unwrap(); + assert_arrays_eq(&expected, &second_elements); + + // Elements as index 0 should still be all non-null. + let mut first_elements = Array::new(&NopBufferManager, DataType::Int32, 3).unwrap(); + list_extract(&lists, 0..3, &mut first_elements, 0).unwrap(); + + let expected = Array::try_from_iter([1, 2, 3]).unwrap(); + assert_arrays_eq(&expected, &first_elements); + } + + #[test] + fn list_extract_parent_invalid() { + let a = Array::try_from_iter([1, 2, 3]).unwrap(); + let b = Array::try_from_iter([4, 5, 6]).unwrap(); + + let mut lists = Array::new( + &NopBufferManager, + DataType::List(ListTypeMeta::new(DataType::Int32)), + 3, + ) + .unwrap(); + + list_values(&[a, b], 0..3, &mut lists).unwrap(); + lists.validity.set_invalid(1); // [2, 5] => NULL + + let mut second_elements = Array::new(&NopBufferManager, DataType::Int32, 3).unwrap(); + list_extract(&lists, 0..3, &mut second_elements, 1).unwrap(); + + let expected = Array::try_from_iter([Some(4), None, Some(6)]).unwrap(); + assert_arrays_eq(&expected, &second_elements); + } +} diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/list/list_values.rs b/crates/rayexec_execution/src/functions/scalar/builtin/list/list_values.rs index 0b5f04ae3..2917027dc 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/list/list_values.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/list/list_values.rs @@ -1,3 +1,4 @@ +use iterutil::IntoExactSizeIterator; use rayexec_error::{not_implemented, RayexecError, Result}; use crate::arrays::array::exp::Array; @@ -111,51 +112,58 @@ pub struct ListValuesImpl; impl ScalarFunctionImpl for ListValuesImpl { fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { - let inner_type = match output.datatype() { - DataType::List(m) => m.datatype.physical_type(), - other => { - return Err(RayexecError::new(format!( - "Expected output to be list datatype, got {other}", - ))) - } - }; + list_values(input.arrays(), input.selection(), output) + } +} - match inner_type { - PhysicalType::UntypedNull => execute_list_values::(input, output), - PhysicalType::Boolean => execute_list_values::(input, output), - PhysicalType::Int8 => execute_list_values::(input, output), - PhysicalType::Int16 => execute_list_values::(input, output), - PhysicalType::Int32 => execute_list_values::(input, output), - PhysicalType::Int64 => execute_list_values::(input, output), - PhysicalType::Int128 => execute_list_values::(input, output), - PhysicalType::UInt8 => execute_list_values::(input, output), - PhysicalType::UInt16 => execute_list_values::(input, output), - PhysicalType::UInt32 => execute_list_values::(input, output), - PhysicalType::UInt64 => execute_list_values::(input, output), - PhysicalType::UInt128 => execute_list_values::(input, output), - PhysicalType::Float16 => execute_list_values::(input, output), - PhysicalType::Float32 => execute_list_values::(input, output), - PhysicalType::Float64 => execute_list_values::(input, output), - PhysicalType::Utf8 => execute_list_values::(input, output), - PhysicalType::Binary => execute_list_values::(input, output), - other => not_implemented!("list values for physical type {other}"), +pub fn list_values( + inputs: &[Array], + sel: impl IntoExactSizeIterator, + output: &mut Array, +) -> Result<()> { + let inner_type = match output.datatype() { + DataType::List(m) => m.datatype.physical_type(), + other => { + return Err(RayexecError::new(format!( + "Expected output to be list datatype, got {other}", + ))) } + }; + + match inner_type { + PhysicalType::UntypedNull => list_values_inner::(inputs, sel, output), + PhysicalType::Boolean => list_values_inner::(inputs, sel, output), + PhysicalType::Int8 => list_values_inner::(inputs, sel, output), + PhysicalType::Int16 => list_values_inner::(inputs, sel, output), + PhysicalType::Int32 => list_values_inner::(inputs, sel, output), + PhysicalType::Int64 => list_values_inner::(inputs, sel, output), + PhysicalType::Int128 => list_values_inner::(inputs, sel, output), + PhysicalType::UInt8 => list_values_inner::(inputs, sel, output), + PhysicalType::UInt16 => list_values_inner::(inputs, sel, output), + PhysicalType::UInt32 => list_values_inner::(inputs, sel, output), + PhysicalType::UInt64 => list_values_inner::(inputs, sel, output), + PhysicalType::UInt128 => list_values_inner::(inputs, sel, output), + PhysicalType::Float16 => list_values_inner::(inputs, sel, output), + PhysicalType::Float32 => list_values_inner::(inputs, sel, output), + PhysicalType::Float64 => list_values_inner::(inputs, sel, output), + PhysicalType::Utf8 => list_values_inner::(inputs, sel, output), + PhysicalType::Binary => list_values_inner::(inputs, sel, output), + other => not_implemented!("list values for physical type {other}"), } } /// Helper for constructing the list values and writing them to `output`. /// /// `S` should be the inner type. -fn execute_list_values(input: &Batch, output: &mut Array) -> Result<()> { +fn list_values_inner( + inputs: &[Array], + sel: impl IntoExactSizeIterator, + output: &mut Array, +) -> Result<()> { // TODO: Dictionary - let sel = input.selection(); - let inputs = input - .arrays() - .iter() - .map(|arr| S::get_addressable(arr.data())) - .collect::>>()?; - + let sel = sel.into_iter(); + let sel_len = sel.len(); let capacity = sel.len() * inputs.len(); let list_buf = match output.data_mut().try_as_mut()?.get_secondary_mut() { @@ -180,13 +188,16 @@ fn execute_list_values(input: &Batch, output: &mut Ar let mut child_outputs = S::get_addressable_mut(list_buf.child.data.try_as_mut()?)?; let child_validity = &mut list_buf.child.validity; + // TODO: Possibly avoid allocating here? + let col_bufs = inputs + .iter() + .map(|arr| S::get_addressable(arr.data())) + .collect::>>()?; + // Write the list values from the input batch. let mut output_idx = 0; - for row_idx in sel.into_iter() { - for (col, validity) in inputs - .iter() - .zip(input.arrays().iter().map(|arr| arr.validity())) - { + for row_idx in sel { + for (col, validity) in col_bufs.iter().zip(inputs.iter().map(|arr| arr.validity())) { if validity.is_valid(row_idx) { child_outputs.put(output_idx, col.get(row_idx).unwrap()); } else { @@ -202,7 +213,7 @@ fn execute_list_values(input: &Batch, output: &mut Ar let mut out = PhysicalList::get_addressable_mut(output.data_mut().try_as_mut()?)?; let len = inputs.len() as i32; - for output_idx in 0..sel.len() { + for output_idx in 0..sel_len { // Note top-level not possible if we're provided a batch. out.put( output_idx, From ba4091c9446de921dae912720b084c08f35d815d Mon Sep 17 00:00:00 2001 From: Sean Smith Date: Tue, 31 Dec 2024 13:01:28 -0500 Subject: [PATCH 34/59] l2 --- .../src/arrays/executor/scalar/list.rs | 4 +- .../arrays/executor_exp/scalar/list_reduce.rs | 9 +- .../functions/scalar/builtin/comparison.rs | 4 +- .../scalar/builtin/similarity/l2_distance.rs | 142 ++++++++++++++---- 4 files changed, 118 insertions(+), 41 deletions(-) diff --git a/crates/rayexec_execution/src/arrays/executor/scalar/list.rs b/crates/rayexec_execution/src/arrays/executor/scalar/list.rs index c749c75d5..ec53a50de 100644 --- a/crates/rayexec_execution/src/arrays/executor/scalar/list.rs +++ b/crates/rayexec_execution/src/arrays/executor/scalar/list.rs @@ -12,7 +12,7 @@ use crate::arrays::executor::scalar::{ use crate::arrays::selection::{self, SelectionVector}; use crate::arrays::storage::{AddressableStorage, ListItemMetadata}; -pub trait BinaryListReducer { +pub trait BinaryListReducer2 { fn new(left_len: i32, right_len: i32) -> Self; fn put_values(&mut self, v1: T, v2: T); fn finish(self) -> O; @@ -41,7 +41,7 @@ impl mut builder: ArrayBuilder, ) -> Result where - R: BinaryListReducer, B::Type>, + R: BinaryListReducer2, B::Type>, S: PhysicalStorage2, B: ArrayDataBuffer, ::Type: Sized, diff --git a/crates/rayexec_execution/src/arrays/executor_exp/scalar/list_reduce.rs b/crates/rayexec_execution/src/arrays/executor_exp/scalar/list_reduce.rs index 00292c744..a1860e6e2 100644 --- a/crates/rayexec_execution/src/arrays/executor_exp/scalar/list_reduce.rs +++ b/crates/rayexec_execution/src/arrays/executor_exp/scalar/list_reduce.rs @@ -45,7 +45,8 @@ impl BinaryListReducer { S1: PhysicalStorage, S2: PhysicalStorage, O: MutablePhysicalStorage, - for<'a> R: BinaryReducer<&'a S1::StorageType, &'a S2::StorageType, &'a O::StorageType>, + O::StorageType: Sized, + for<'a> R: BinaryReducer<&'a S1::StorageType, &'a S2::StorageType, O::StorageType>, { if array1.is_dictionary() || array2.is_dictionary() { // TODO @@ -61,7 +62,7 @@ impl BinaryListReducer { _ => return Err(RayexecError::new("Array 2 not a list array")), }; - if !inner1.validity().all_valid() || inner2.validity().all_valid() { + if !inner1.validity().all_valid() || !inner2.validity().all_valid() { // TODO: This can be more selective. Rows that don't conform // could be skipped with the selections. return Err(RayexecError::new( @@ -107,7 +108,7 @@ impl BinaryListReducer { reducer.put_values(v1, v2); } - output.put(output_idx, reducer.finish()); + output.put(output_idx, &reducer.finish()); } } else { for (output_idx, (input1_idx, input2_idx)) in @@ -141,7 +142,7 @@ impl BinaryListReducer { reducer.put_values(v1, v2); } - output.put(output_idx, reducer.finish()); + output.put(output_idx, &reducer.finish()); } } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/comparison.rs b/crates/rayexec_execution/src/functions/scalar/builtin/comparison.rs index b1876989e..8fbb96487 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/comparison.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/comparison.rs @@ -47,7 +47,7 @@ use crate::arrays::executor::physical_type::{ PhysicalUntypedNull_2, PhysicalUtf8_2, }; -use crate::arrays::executor::scalar::{BinaryExecutor2, BinaryListReducer, FlexibleListExecutor}; +use crate::arrays::executor::scalar::{BinaryExecutor2, BinaryListReducer2, FlexibleListExecutor}; use crate::arrays::scalar::decimal::{Decimal128Type, Decimal64Type, DecimalType}; use crate::arrays::storage::PrimitiveStorage; use crate::expr::Expression; @@ -618,7 +618,7 @@ struct ListComparisonReducer { _op: PhantomData, } -impl BinaryListReducer for ListComparisonReducer +impl BinaryListReducer2 for ListComparisonReducer where T: PartialEq + PartialOrd, O: ComparisonOperation, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/similarity/l2_distance.rs b/crates/rayexec_execution/src/functions/scalar/builtin/similarity/l2_distance.rs index 0adae6425..14f62a52a 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/similarity/l2_distance.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/similarity/l2_distance.rs @@ -4,16 +4,19 @@ use std::ops::AddAssign; use num_traits::{AsPrimitive, Float}; use rayexec_error::Result; +use crate::arrays::array::exp::Array; use crate::arrays::array::Array2; -use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::{ - PhysicalF16_2, - PhysicalF32_2, - PhysicalF64_2, - PhysicalStorage2, +use crate::arrays::batch_exp::Batch; +use crate::arrays::buffer::physical_type::{ + MutablePhysicalStorage, + PhysicalF16, + PhysicalF32, + PhysicalF64, + PhysicalStorage, }; -use crate::arrays::executor::scalar::{BinaryListReducer, ListExecutor}; +use crate::arrays::datatype::{DataType, DataTypeId}; +use crate::arrays::executor_exp::scalar::list_reduce::{BinaryListReducer, BinaryReducer}; +use crate::arrays::executor_exp::OutBuffer; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation, Example}; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunction, ScalarFunctionImpl}; @@ -68,13 +71,13 @@ impl ScalarFunction for L2Distance { (DataType::List(a), DataType::List(b)) => { match (a.datatype.as_ref(), b.datatype.as_ref()) { (DataType::Float16, DataType::Float16) => { - Box::new(L2DistanceImpl::::new()) + Box::new(L2DistanceImpl::::new()) } (DataType::Float32, DataType::Float32) => { - Box::new(L2DistanceImpl::::new()) + Box::new(L2DistanceImpl::::new()) } (DataType::Float64, DataType::Float64) => { - Box::new(L2DistanceImpl::::new()) + Box::new(L2DistanceImpl::::new()) } (a, b) => return Err(invalid_input_types_error(self, &[a, b])), } @@ -92,13 +95,13 @@ impl ScalarFunction for L2Distance { } #[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub struct L2DistanceImpl { +pub struct L2DistanceImpl { _s: PhantomData, } impl L2DistanceImpl where - S: PhysicalStorage2, + S: PhysicalStorage, { fn new() -> Self { L2DistanceImpl { _s: PhantomData } @@ -107,19 +110,21 @@ where impl ScalarFunctionImpl for L2DistanceImpl where - S: PhysicalStorage2, - for<'a> S::Type<'a>: Float + AddAssign + AsPrimitive + Default + Copy, + S: MutablePhysicalStorage, + S::StorageType: Float + AddAssign + AsPrimitive + Default + Copy, { - fn execute2(&self, inputs: &[&Array2]) -> Result { - let a = inputs[0]; - let b = inputs[1]; - - let builder = ArrayBuilder { - datatype: DataType::Float64, - buffer: PrimitiveBuffer::with_len(a.logical_len()), - }; - - ListExecutor::::binary_reduce::>(a, b, builder) + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + let a = &input.arrays()[0]; + let b = &input.arrays()[1]; + + BinaryListReducer::reduce::, PhysicalF64>( + a, + sel, + b, + sel, + OutBuffer::from_array(output)?, + ) } } @@ -128,16 +133,11 @@ pub(crate) struct L2DistanceReducer { pub distance: F, } -impl BinaryListReducer for L2DistanceReducer +impl BinaryReducer<&F, &F, f64> for L2DistanceReducer where - F: Float + AddAssign + AsPrimitive + Default, + F: Float + AddAssign + AsPrimitive + Default + Copy, { - fn new(left_len: i32, right_len: i32) -> Self { - debug_assert_eq!(left_len, right_len); - Self::default() - } - - fn put_values(&mut self, v1: F, v2: F) { + fn put_values(&mut self, &v1: &F, &v2: &F) { let diff = v1 - v2; self.distance += diff * diff; } @@ -146,3 +146,79 @@ where self.distance.as_().sqrt() } } + +#[cfg(test)] +mod tests { + use iterutil::TryFromExactSizeIterator; + + use super::*; + use crate::arrays::buffer::buffer_manager::NopBufferManager; + use crate::arrays::datatype::ListTypeMeta; + use crate::arrays::testutil::assert_arrays_eq; + use crate::expr; + use crate::functions::scalar::builtin::list::list_values; + + #[test] + fn l2_distance_ok() { + let mut a = Array::new( + &NopBufferManager, + DataType::List(ListTypeMeta::new(DataType::Float64)), + 1, + ) + .unwrap(); + list_values( + &[ + Array::try_from_iter([1.0]).unwrap(), + Array::try_from_iter([2.0]).unwrap(), + Array::try_from_iter([3.0]).unwrap(), + ], + 0..1, + &mut a, + ) + .unwrap(); + + let mut b = Array::new( + &NopBufferManager, + DataType::List(ListTypeMeta::new(DataType::Float64)), + 1, + ) + .unwrap(); + list_values( + &[ + Array::try_from_iter([1.0]).unwrap(), + Array::try_from_iter([2.0]).unwrap(), + Array::try_from_iter([4.0]).unwrap(), + ], + 0..1, + &mut b, + ) + .unwrap(); + + let batch = Batch::from_arrays([a, b], true).unwrap(); + + let mut table_list = TableList::empty(); + let table_ref = table_list + .push_table( + None, + vec![ + DataType::List(ListTypeMeta::new(DataType::Float64)), + DataType::List(ListTypeMeta::new(DataType::Float64)), + ], + vec!["a".to_string(), "b".to_string()], + ) + .unwrap(); + + let planned = L2Distance + .plan( + &table_list, + vec![expr::col_ref(table_ref, 0), expr::col_ref(table_ref, 1)], + ) + .unwrap(); + + let mut out = Array::new(&NopBufferManager, DataType::Float64, 1).unwrap(); + planned.function_impl.execute(&batch, &mut out).unwrap(); + + let expected = Array::try_from_iter([1.0]).unwrap(); + assert_arrays_eq(&expected, &out); + } +} From f71f7ace901f0242e8c36c97f5bfea21b2ffce5d Mon Sep 17 00:00:00 2001 From: Sean Smith Date: Tue, 31 Dec 2024 13:39:44 -0500 Subject: [PATCH 35/59] most comparisons --- .../functions/scalar/builtin/comparison.rs | 462 ++++++++---------- 1 file changed, 208 insertions(+), 254 deletions(-) diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/comparison.rs b/crates/rayexec_execution/src/functions/scalar/builtin/comparison.rs index 8fbb96487..40e8da1cc 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/comparison.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/comparison.rs @@ -4,9 +4,12 @@ use std::marker::PhantomData; use rayexec_error::{RayexecError, Result}; -use crate::arrays::array::{Array2, ArrayData2}; +use crate::arrays::array::exp::Array; +use crate::arrays::array::Array2; +use crate::arrays::batch_exp::Batch; use crate::arrays::buffer::physical_type::{ - MutablePhysicalStorage, + PhysicalBinary, + PhysicalBool, PhysicalF16, PhysicalF32, PhysicalF64, @@ -15,15 +18,16 @@ use crate::arrays::buffer::physical_type::{ PhysicalI32, PhysicalI64, PhysicalI8, + PhysicalInterval, + PhysicalStorage, PhysicalU128, PhysicalU16, PhysicalU32, PhysicalU64, PhysicalU8, + PhysicalUtf8, }; -use crate::arrays::compute::cast::array::decimal_rescale; -use crate::arrays::compute::cast::behavior::CastFailBehavior; -use crate::arrays::datatype::{DataType, DataTypeId, DecimalTypeMeta}; +use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::builder::{ArrayBuilder, BooleanBuffer}; use crate::arrays::executor::physical_type::{ PhysicalBinary_2, @@ -48,8 +52,9 @@ use crate::arrays::executor::physical_type::{ PhysicalUtf8_2, }; use crate::arrays::executor::scalar::{BinaryExecutor2, BinaryListReducer2, FlexibleListExecutor}; -use crate::arrays::scalar::decimal::{Decimal128Type, Decimal64Type, DecimalType}; -use crate::arrays::storage::PrimitiveStorage; +use crate::arrays::executor_exp::scalar::binary::BinaryExecutor; +use crate::arrays::executor_exp::OutBuffer; +use crate::expr::cast_expr::CastExpr; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation, Example}; use crate::functions::scalar::{PlannedScalarFunction, ScalarFunction, ScalarFunctionImpl}; @@ -223,12 +228,7 @@ impl ScalarFunction for Eq { table_list: &TableList, inputs: Vec, ) -> Result { - Ok(PlannedScalarFunction { - function: Box::new(*self), - return_type: DataType::Boolean, - function_impl: new_comparison_impl::(self, &inputs, table_list)?, - inputs, - }) + new_planned_comparison_function::<_, EqOperation>(*self, inputs, table_list) } } @@ -268,12 +268,7 @@ impl ScalarFunction for Neq { table_list: &TableList, inputs: Vec, ) -> Result { - Ok(PlannedScalarFunction { - function: Box::new(*self), - return_type: DataType::Boolean, - function_impl: new_comparison_impl::(self, &inputs, table_list)?, - inputs, - }) + new_planned_comparison_function::<_, NotEqOperation>(*self, inputs, table_list) } } @@ -309,12 +304,7 @@ impl ScalarFunction for Lt { table_list: &TableList, inputs: Vec, ) -> Result { - Ok(PlannedScalarFunction { - function: Box::new(*self), - return_type: DataType::Boolean, - function_impl: new_comparison_impl::(self, &inputs, table_list)?, - inputs, - }) + new_planned_comparison_function::<_, LtOperation>(*self, inputs, table_list) } } @@ -350,12 +340,7 @@ impl ScalarFunction for LtEq { table_list: &TableList, inputs: Vec, ) -> Result { - Ok(PlannedScalarFunction { - function: Box::new(*self), - return_type: DataType::Boolean, - function_impl: new_comparison_impl::(self, &inputs, table_list)?, - inputs, - }) + new_planned_comparison_function::<_, LtEqOperation>(*self, inputs, table_list) } } @@ -391,12 +376,7 @@ impl ScalarFunction for Gt { table_list: &TableList, inputs: Vec, ) -> Result { - Ok(PlannedScalarFunction { - function: Box::new(*self), - return_type: DataType::Boolean, - function_impl: new_comparison_impl::(self, &inputs, table_list)?, - inputs, - }) + new_planned_comparison_function::<_, GtOperation>(*self, inputs, table_list) } } @@ -432,12 +412,7 @@ impl ScalarFunction for GtEq { table_list: &TableList, inputs: Vec, ) -> Result { - Ok(PlannedScalarFunction { - function: Box::new(*self), - return_type: DataType::Boolean, - function_impl: new_comparison_impl::(self, &inputs, table_list)?, - inputs, - }) + new_planned_comparison_function::<_, GtEqOperation>(*self, inputs, table_list) } } @@ -520,92 +495,136 @@ impl ComparisonOperation for GtEqOperation { } } -/// Creates a new scalar function implementation based on input types. -fn new_comparison_impl( - func: &impl FunctionInfo, - inputs: &[Expression], +/// Create new planned scalar function for some comparison operation. +/// +/// This will normalize input expressions as required. +fn new_planned_comparison_function( + func: F, + mut inputs: Vec, table_list: &TableList, -) -> Result> { - plan_check_num_args(func, inputs, 2)?; - Ok( - match ( - inputs[0].datatype(table_list)?, - inputs[1].datatype(table_list)?, - ) { - (DataType::Boolean, DataType::Boolean) => { - Box::new(BaseComparisonImpl::::new()) - } - (DataType::Int8, DataType::Int8) => { - Box::new(BaseComparisonImpl::::new()) - } - (DataType::Int16, DataType::Int16) => { - Box::new(BaseComparisonImpl::::new()) - } - (DataType::Int32, DataType::Int32) => { - Box::new(BaseComparisonImpl::::new()) - } - (DataType::Int64, DataType::Int64) => { - Box::new(BaseComparisonImpl::::new()) +) -> Result +where + F: ScalarFunction + 'static, + O: ComparisonOperation, +{ + plan_check_num_args(&func, &inputs, 2)?; + + let function_impl: Box = match ( + inputs[0].datatype(table_list)?, + inputs[1].datatype(table_list)?, + ) { + (DataType::Boolean, DataType::Boolean) => { + Box::new(UnnestedComparisonImpl::::new()) + } + (DataType::Int8, DataType::Int8) => { + Box::new(UnnestedComparisonImpl::::new()) + } + (DataType::Int16, DataType::Int16) => { + Box::new(UnnestedComparisonImpl::::new()) + } + (DataType::Int32, DataType::Int32) => { + Box::new(UnnestedComparisonImpl::::new()) + } + (DataType::Int64, DataType::Int64) => { + Box::new(UnnestedComparisonImpl::::new()) + } + (DataType::Int128, DataType::Int128) => { + Box::new(UnnestedComparisonImpl::::new()) + } + (DataType::UInt8, DataType::UInt8) => { + Box::new(UnnestedComparisonImpl::::new()) + } + (DataType::UInt16, DataType::UInt16) => { + Box::new(UnnestedComparisonImpl::::new()) + } + (DataType::UInt32, DataType::UInt32) => { + Box::new(UnnestedComparisonImpl::::new()) + } + (DataType::UInt64, DataType::UInt64) => { + Box::new(UnnestedComparisonImpl::::new()) + } + (DataType::UInt128, DataType::UInt128) => { + Box::new(UnnestedComparisonImpl::::new()) + } + (DataType::Float16, DataType::Float16) => { + Box::new(UnnestedComparisonImpl::::new()) + } + (DataType::Float32, DataType::Float32) => { + Box::new(UnnestedComparisonImpl::::new()) + } + (DataType::Float64, DataType::Float64) => { + Box::new(UnnestedComparisonImpl::::new()) + } + (DataType::Decimal64(left), DataType::Decimal64(right)) => { + // Normalize decimals. + match left.scale.cmp(&right.scale) { + Ordering::Less => { + // Scale up left. + inputs[0] = Expression::Cast(CastExpr { + to: DataType::Decimal64(right), + expr: Box::new(inputs[0].clone()), + }) + } + Ordering::Greater => { + // Scale up right. + inputs[1] = Expression::Cast(CastExpr { + to: DataType::Decimal64(left), + expr: Box::new(inputs[1].clone()), + }) + } + Ordering::Equal => (), // Nothing to do } - (DataType::Int128, DataType::Int128) => { - Box::new(BaseComparisonImpl::::new()) + Box::new(UnnestedComparisonImpl::::new()) + } + (DataType::Decimal128(left), DataType::Decimal128(right)) => { + // Normalize decimals. + match left.scale.cmp(&right.scale) { + Ordering::Less => { + // Scale up left. + inputs[0] = Expression::Cast(CastExpr { + to: DataType::Decimal128(right), + expr: Box::new(inputs[0].clone()), + }) + } + Ordering::Greater => { + // Scale up right. + inputs[1] = Expression::Cast(CastExpr { + to: DataType::Decimal128(left), + expr: Box::new(inputs[1].clone()), + }) + } + Ordering::Equal => (), // Nothing to do } + Box::new(UnnestedComparisonImpl::::new()) + } + (DataType::Timestamp(_), DataType::Timestamp(_)) => { + Box::new(UnnestedComparisonImpl::::new()) + } + (DataType::Interval, DataType::Interval) => { + Box::new(UnnestedComparisonImpl::::new()) + } + (DataType::Date32, DataType::Date32) => { + Box::new(UnnestedComparisonImpl::::new()) + } + (DataType::Date64, DataType::Date64) => { + Box::new(UnnestedComparisonImpl::::new()) + } + (DataType::Utf8, DataType::Utf8) => { + Box::new(UnnestedComparisonImpl::::new()) + } + (DataType::Binary, DataType::Binary) => { + Box::new(UnnestedComparisonImpl::::new()) + } - (DataType::UInt8, DataType::UInt8) => { - Box::new(BaseComparisonImpl::::new()) - } - (DataType::UInt16, DataType::UInt16) => { - Box::new(BaseComparisonImpl::::new()) - } - (DataType::UInt32, DataType::UInt32) => { - Box::new(BaseComparisonImpl::::new()) - } - (DataType::UInt64, DataType::UInt64) => { - Box::new(BaseComparisonImpl::::new()) - } - (DataType::UInt128, DataType::UInt128) => { - Box::new(BaseComparisonImpl::::new()) - } - (DataType::Float16, DataType::Float16) => { - Box::new(BaseComparisonImpl::::new()) - } - (DataType::Float32, DataType::Float32) => { - Box::new(BaseComparisonImpl::::new()) - } - (DataType::Float64, DataType::Float64) => { - Box::new(BaseComparisonImpl::::new()) - } - (DataType::Decimal64(left), DataType::Decimal64(right)) => Box::new( - RescalingComparisionImpl::::new(left, right), - ), - (DataType::Decimal128(left), DataType::Decimal128(right)) => Box::new( - RescalingComparisionImpl::::new(left, right), - ), - (DataType::Timestamp(_), DataType::Timestamp(_)) => { - Box::new(BaseComparisonImpl::::new()) - } - (DataType::Interval, DataType::Interval) => { - Box::new(BaseComparisonImpl::::new()) - } - (DataType::Date32, DataType::Date32) => { - Box::new(BaseComparisonImpl::::new()) - } - (DataType::Date64, DataType::Date64) => { - Box::new(BaseComparisonImpl::::new()) - } - (DataType::Utf8, DataType::Utf8) => { - Box::new(BaseComparisonImpl::::new()) - } - (DataType::Binary, DataType::Binary) => { - Box::new(BaseComparisonImpl::::new()) - } - (DataType::List(m1), DataType::List(m2)) if m1 == m2 => { - // TODO: We'll want to figure out casting for lists. - Box::new(ListComparisonImpl::::new(m1.datatype.physical_type2()?)) - } - (a, b) => return Err(invalid_input_types_error(func, &[a, b])), - }, - ) + (a, b) => return Err(invalid_input_types_error(&func, &[a, b])), + }; + + Ok(PlannedScalarFunction { + function: Box::new(func), + return_type: DataType::Boolean, + inputs, + function_impl, + }) } #[derive(Debug)] @@ -788,137 +807,61 @@ where } #[derive(Debug, Clone)] -struct BaseComparisonImpl { +struct UnnestedComparisonImpl { _op: PhantomData, _s: PhantomData, } -impl BaseComparisonImpl +impl UnnestedComparisonImpl where O: ComparisonOperation, - S: PhysicalStorage2, - for<'a> S::Type<'a>: PartialEq + PartialOrd, + S: PhysicalStorage, { - fn new() -> Self { - BaseComparisonImpl { + const fn new() -> Self { + UnnestedComparisonImpl { _op: PhantomData, _s: PhantomData, } } } -impl ScalarFunctionImpl for BaseComparisonImpl +impl ScalarFunctionImpl for UnnestedComparisonImpl where O: ComparisonOperation, - S: PhysicalStorage2, - for<'a> S::Type<'a>: PartialEq + PartialOrd, + S: PhysicalStorage, + S::StorageType: PartialEq + PartialOrd, { - fn execute2(&self, inputs: &[&Array2]) -> Result { - let left = inputs[0]; - let right = inputs[1]; - - let builder = ArrayBuilder { - datatype: DataType::Boolean, - buffer: BooleanBuffer::with_len(left.logical_len()), - }; + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + let sel = input.selection(); + let left = &input.arrays()[0]; + let right = &input.arrays()[1]; - BinaryExecutor2::execute::(left, right, builder, |a, b, buf| { - buf.put(&O::compare(a, b)) - }) - } -} - -// TODO: Determine if this is still needed. Ideally scaling happens prior to -// calling the comparison function. -#[derive(Debug, Clone)] -struct RescalingComparisionImpl { - _op: PhantomData, - _t: PhantomData, - - left: DecimalTypeMeta, - right: DecimalTypeMeta, -} - -impl RescalingComparisionImpl -where - O: ComparisonOperation, - T: DecimalType, - ArrayData2: From>, -{ - fn new(left: DecimalTypeMeta, right: DecimalTypeMeta) -> Self { - RescalingComparisionImpl { - _op: PhantomData, - _t: PhantomData, + BinaryExecutor::execute::( left, + sel, right, - } - } -} - -impl ScalarFunctionImpl for RescalingComparisionImpl -where - O: ComparisonOperation, - T: DecimalType, - ArrayData2: From>, -{ - fn execute2(&self, inputs: &[&Array2]) -> Result { - let left = inputs[0]; - let right = inputs[1]; - - let builder = ArrayBuilder { - datatype: DataType::Boolean, - buffer: BooleanBuffer::with_len(left.logical_len()), - }; - - match self.left.scale.cmp(&self.right.scale) { - Ordering::Greater => { - let scaled_right = decimal_rescale::( - right, - left.datatype().clone(), - CastFailBehavior::Error, - )?; - - BinaryExecutor2::execute::( - left, - &scaled_right, - builder, - |a, b, buf| buf.put(&O::compare(a, b)), - ) - } - Ordering::Less => { - let scaled_left = decimal_rescale::( - left, - right.datatype().clone(), - CastFailBehavior::Error, - )?; - - BinaryExecutor2::execute::( - &scaled_left, - right, - builder, - |a, b, buf| buf.put(&O::compare(a, b)), - ) - } - Ordering::Equal => BinaryExecutor2::execute::( - left, - right, - builder, - |a, b, buf| buf.put(&O::compare(a, b)), - ), - } + sel, + OutBuffer::from_array(output)?, + |left, right, buf| buf.put(&O::compare(left, right)), + ) } } #[cfg(test)] mod tests { + use iterutil::TryFromExactSizeIterator; + use super::*; + use crate::arrays::buffer::buffer_manager::NopBufferManager; + use crate::arrays::testutil::assert_arrays_eq; use crate::expr; #[test] fn eq_i32() { - let a = Array2::from_iter([1, 2, 3]); - let b = Array2::from_iter([2, 2, 6]); + let a = Array::try_from_iter([1, 2, 3]).unwrap(); + let b = Array::try_from_iter([2, 2, 6]).unwrap(); + let batch = Batch::from_arrays([a, b], true).unwrap(); let mut table_list = TableList::empty(); let table_ref = table_list @@ -936,16 +879,18 @@ mod tests { ) .unwrap(); - let out = planned.function_impl.execute2(&[&a, &b]).unwrap(); - let expected = Array2::from_iter([false, true, false]); + let mut out = Array::new(&NopBufferManager, DataType::Boolean, 3).unwrap(); + planned.function_impl.execute(&batch, &mut out).unwrap(); + let expected = Array::try_from_iter([false, true, false]).unwrap(); - assert_eq!(expected, out); + assert_arrays_eq(&expected, &out); } #[test] fn neq_i32() { - let a = Array2::from_iter([1, 2, 3]); - let b = Array2::from_iter([2, 2, 6]); + let a = Array::try_from_iter([1, 2, 3]).unwrap(); + let b = Array::try_from_iter([2, 2, 6]).unwrap(); + let batch = Batch::from_arrays([a, b], true).unwrap(); let mut table_list = TableList::empty(); let table_ref = table_list @@ -963,16 +908,18 @@ mod tests { ) .unwrap(); - let out = planned.function_impl.execute2(&[&a, &b]).unwrap(); - let expected = Array2::from_iter([true, false, true]); + let mut out = Array::new(&NopBufferManager, DataType::Boolean, 3).unwrap(); + planned.function_impl.execute(&batch, &mut out).unwrap(); + let expected = Array::try_from_iter([true, false, true]).unwrap(); - assert_eq!(expected, out); + assert_arrays_eq(&expected, &out); } #[test] fn lt_i32() { - let a = Array2::from_iter([1, 2, 3]); - let b = Array2::from_iter([2, 2, 6]); + let a = Array::try_from_iter([1, 2, 3]).unwrap(); + let b = Array::try_from_iter([2, 2, 6]).unwrap(); + let batch = Batch::from_arrays([a, b], true).unwrap(); let mut table_list = TableList::empty(); let table_ref = table_list @@ -990,16 +937,18 @@ mod tests { ) .unwrap(); - let out = planned.function_impl.execute2(&[&a, &b]).unwrap(); - let expected = Array2::from_iter([true, false, true]); + let mut out = Array::new(&NopBufferManager, DataType::Boolean, 3).unwrap(); + planned.function_impl.execute(&batch, &mut out).unwrap(); + let expected = Array::try_from_iter([true, false, true]).unwrap(); - assert_eq!(expected, out); + assert_arrays_eq(&expected, &out); } #[test] fn lt_eq_i32() { - let a = Array2::from_iter([1, 2, 3]); - let b = Array2::from_iter([2, 2, 6]); + let a = Array::try_from_iter([1, 2, 3]).unwrap(); + let b = Array::try_from_iter([2, 2, 6]).unwrap(); + let batch = Batch::from_arrays([a, b], true).unwrap(); let mut table_list = TableList::empty(); let table_ref = table_list @@ -1017,16 +966,18 @@ mod tests { ) .unwrap(); - let out = planned.function_impl.execute2(&[&a, &b]).unwrap(); - let expected = Array2::from_iter([true, true, true]); + let mut out = Array::new(&NopBufferManager, DataType::Boolean, 3).unwrap(); + planned.function_impl.execute(&batch, &mut out).unwrap(); + let expected = Array::try_from_iter([true, true, true]).unwrap(); - assert_eq!(expected, out); + assert_arrays_eq(&expected, &out); } #[test] fn gt_i32() { - let a = Array2::from_iter([1, 2, 3]); - let b = Array2::from_iter([2, 2, 6]); + let a = Array::try_from_iter([1, 2, 3]).unwrap(); + let b = Array::try_from_iter([2, 2, 6]).unwrap(); + let batch = Batch::from_arrays([a, b], true).unwrap(); let mut table_list = TableList::empty(); let table_ref = table_list @@ -1044,16 +995,18 @@ mod tests { ) .unwrap(); - let out = planned.function_impl.execute2(&[&a, &b]).unwrap(); - let expected = Array2::from_iter([false, false, false]); + let mut out = Array::new(&NopBufferManager, DataType::Boolean, 3).unwrap(); + planned.function_impl.execute(&batch, &mut out).unwrap(); + let expected = Array::try_from_iter([false, false, false]).unwrap(); - assert_eq!(expected, out); + assert_arrays_eq(&expected, &out); } #[test] fn gt_eq_i32() { - let a = Array2::from_iter([1, 2, 3]); - let b = Array2::from_iter([2, 2, 6]); + let a = Array::try_from_iter([1, 2, 3]).unwrap(); + let b = Array::try_from_iter([2, 2, 6]).unwrap(); + let batch = Batch::from_arrays([a, b], true).unwrap(); let mut table_list = TableList::empty(); let table_ref = table_list @@ -1071,9 +1024,10 @@ mod tests { ) .unwrap(); - let out = planned.function_impl.execute2(&[&a, &b]).unwrap(); - let expected = Array2::from_iter([false, true, false]); + let mut out = Array::new(&NopBufferManager, DataType::Boolean, 3).unwrap(); + planned.function_impl.execute(&batch, &mut out).unwrap(); + let expected = Array::try_from_iter([false, true, false]).unwrap(); - assert_eq!(expected, out); + assert_arrays_eq(&expected, &out); } } From 90470e2653c31230f116b6b9e51712ad5627bc53 Mon Sep 17 00:00:00 2001 From: Sean Smith Date: Tue, 31 Dec 2024 16:50:48 -0500 Subject: [PATCH 36/59] some manage --- .../src/arrays/array/array_data.rs | 39 ++++++--- .../rayexec_execution/src/arrays/array/exp.rs | 30 +++++++ .../src/arrays/buffer/buffer_manager.rs | 2 + .../intermediate/planner/plan_scan.rs | 2 +- .../operators/hash_join/condition.rs | 4 +- .../src/execution/operators/hash_join/mod.rs | 4 +- .../src/execution/operators/project.rs | 2 +- .../src/execution/operators/table_inout.rs | 4 +- .../src/execution/operators/unnest.rs | 4 +- .../src/expr/physical/case_expr.rs | 10 +-- .../src/expr/physical/cast_expr.rs | 4 +- .../src/expr/physical/column_expr.rs | 16 +++- .../src/expr/physical/evaluator.rs | 84 +++++++++++++++++++ .../src/expr/physical/literal_expr.rs | 2 +- .../src/expr/physical/mod.rs | 15 ++-- .../src/expr/physical/scalar_function_expr.rs | 4 +- .../functions/scalar/builtin/comparison.rs | 4 + .../src/functions/scalar/mod.rs | 13 +-- .../src/optimizer/expr_rewrite/const_fold.rs | 2 +- 19 files changed, 200 insertions(+), 45 deletions(-) create mode 100644 crates/rayexec_execution/src/expr/physical/evaluator.rs diff --git a/crates/rayexec_execution/src/arrays/array/array_data.rs b/crates/rayexec_execution/src/arrays/array/array_data.rs index 7a3e60c8e..1e06632da 100644 --- a/crates/rayexec_execution/src/arrays/array/array_data.rs +++ b/crates/rayexec_execution/src/arrays/array/array_data.rs @@ -13,7 +13,8 @@ pub struct ArrayData { #[derive(Debug)] enum ArrayDataInner { - Managed(B::CowPtr>), + /// Array buffer is being managed and is behind a shared pointer. + Managed(B::CowPtr>, Option>), Owned(ArrayBuffer), Uninit, } @@ -22,20 +23,36 @@ impl ArrayData where B: BufferManager, { - pub fn owned(buffer: ArrayBuffer) -> Self { + pub(crate) fn owned(buffer: ArrayBuffer) -> Self { ArrayData { inner: ArrayDataInner::Owned(buffer), } } - pub fn managed(buffer: B::CowPtr>) -> Self { - ArrayData { - inner: ArrayDataInner::Managed(buffer), + /// Set this array data to point to a buffer that's being managed. + /// + /// If this array data was previously holding onto an owned buffer, we store + /// that so we can quickly reset back to it as needed without needing to + /// allocate an additional buffer. + pub(crate) fn set_managed(&mut self, managed: B::CowPtr>) -> Result<()> { + match std::mem::replace(&mut self.inner, ArrayDataInner::Uninit) { + ArrayDataInner::Managed(_, cached) => { + // Nothing fancy, just update the managed array. + self.inner = ArrayDataInner::Managed(managed, cached); + } + ArrayDataInner::Owned(owned) => { + // Cache our owned version so we can reset the data to a mutable + // variant as needed. + self.inner = ArrayDataInner::Managed(managed, Some(owned)) + } + ArrayDataInner::Uninit => panic!("Array data in invalid state"), } + + Ok(()) } pub fn is_managed(&self) -> bool { - matches!(self.inner, ArrayDataInner::Managed(_)) + matches!(self.inner, ArrayDataInner::Managed(_, _)) } pub fn is_owned(&self) -> bool { @@ -52,7 +69,7 @@ where /// A cloned pointer to the newly managed array will be returned. pub fn make_managed(&mut self, manager: &B) -> Result>> { match &mut self.inner { - ArrayDataInner::Managed(m) => Ok(m.clone()), // Already managed. + ArrayDataInner::Managed(m, _) => Ok(m.clone()), // Already managed. ArrayDataInner::Owned(_) => { let orig = std::mem::replace(&mut self.inner, ArrayDataInner::Uninit); let array = match orig { @@ -62,9 +79,9 @@ where match manager.make_cow(array) { Ok(managed) => { - self.inner = ArrayDataInner::Managed(managed); + self.inner = ArrayDataInner::Managed(managed, None); // Manager took ownership, nothing to cache. match &self.inner { - ArrayDataInner::Managed(m) => Ok(m.clone()), + ArrayDataInner::Managed(m, _) => Ok(m.clone()), _ => unreachable!("variant just set"), } } @@ -82,7 +99,7 @@ where pub fn try_as_mut(&mut self) -> Result<&mut ArrayBuffer> { match &mut self.inner { - ArrayDataInner::Managed(_) => Err(RayexecError::new( + ArrayDataInner::Managed(_, _) => Err(RayexecError::new( "Mut references from managed arrays not yet supported", )), ArrayDataInner::Owned(array) => Ok(array), @@ -97,7 +114,7 @@ where { fn as_ref(&self) -> &ArrayBuffer { match &self.inner { - ArrayDataInner::Managed(m) => m.as_ref(), + ArrayDataInner::Managed(m, _) => m.as_ref(), ArrayDataInner::Owned(array) => array, ArrayDataInner::Uninit => panic!("array in uninit state"), } diff --git a/crates/rayexec_execution/src/arrays/array/exp.rs b/crates/rayexec_execution/src/arrays/array/exp.rs index e06fa914e..059365559 100644 --- a/crates/rayexec_execution/src/arrays/array/exp.rs +++ b/crates/rayexec_execution/src/arrays/array/exp.rs @@ -278,6 +278,36 @@ where Ok(()) } + + /// "Clones" some other array into this array. + /// + /// This will try to make the buffer from the other array managed to make it + /// cheaply cloneable and shared with this array. + /// + /// Array capacities and datatypes must be the same for both arrays. + pub fn clone_from(&mut self, manager: &B, other: &mut Self) -> Result<()> { + if self.datatype != other.datatype { + return Err(RayexecError::new( + "Attempted clone array from other array with different data types", + ) + .with_field("own_datatype", self.datatype.clone()) + .with_field("other_datatype", other.datatype.clone())); + } + + if self.capacity() != other.capacity() { + return Err(RayexecError::new( + "Attempted to clone into array from other array with different capacity", + ) + .with_field("own_capacity", self.capacity()) + .with_field("other_capacity", other.capacity())); + } + + let managed = other.data.make_managed(manager)?; + self.data.set_managed(managed)?; + self.validity = other.validity.clone(); + + Ok(()) + } } /// Helper for copying rows. diff --git a/crates/rayexec_execution/src/arrays/buffer/buffer_manager.rs b/crates/rayexec_execution/src/arrays/buffer/buffer_manager.rs index b564c0c12..cdbb0651f 100644 --- a/crates/rayexec_execution/src/arrays/buffer/buffer_manager.rs +++ b/crates/rayexec_execution/src/arrays/buffer/buffer_manager.rs @@ -16,6 +16,8 @@ pub trait BufferManager: Debug + Sync + Send + Clone { fn make_cow(&self, item: T) -> Result, T>; } +// TODO: Probably rename, I don't think we want the 'cow' logic on this. Instead +// that'll probably be on ArrayData. pub trait CowPtr: Debug + Clone + AsRef + Deref { // TODO: Clone on write. // diff --git a/crates/rayexec_execution/src/execution/intermediate/planner/plan_scan.rs b/crates/rayexec_execution/src/execution/intermediate/planner/plan_scan.rs index a0ad5281a..c5b968492 100644 --- a/crates/rayexec_execution/src/execution/intermediate/planner/plan_scan.rs +++ b/crates/rayexec_execution/src/execution/intermediate/planner/plan_scan.rs @@ -96,7 +96,7 @@ impl IntermediatePipelineBuildState<'_> { let arrs = exprs .into_iter() .map(|expr| { - let arr = expr.eval(&dummy_batch)?; + let arr = expr.eval2(&dummy_batch)?; Ok(arr.into_owned()) }) .collect::>>()?; diff --git a/crates/rayexec_execution/src/execution/operators/hash_join/condition.rs b/crates/rayexec_execution/src/execution/operators/hash_join/condition.rs index 1c40a69ae..b16ef7cea 100644 --- a/crates/rayexec_execution/src/execution/operators/hash_join/condition.rs +++ b/crates/rayexec_execution/src/execution/operators/hash_join/condition.rs @@ -79,7 +79,7 @@ impl LeftPrecomputedJoinConditions { /// input. pub fn precompute_for_left_batch(&mut self, left: &Batch2) -> Result<()> { for condition in &mut self.conditions { - let precomputed = condition.left.eval(left)?; + let precomputed = condition.left.eval2(left)?; condition.left_precomputed.push(precomputed.into_owned()) } @@ -121,7 +121,7 @@ impl LeftPrecomputedJoinConditions { left_precomputed.select_mut(left_row_sel.clone()); // Eval the right side. - let right_arr = condition.right.eval(&selected_right)?; + let right_arr = condition.right.eval2(&selected_right)?; // Compute join condition result. let result = condition diff --git a/crates/rayexec_execution/src/execution/operators/hash_join/mod.rs b/crates/rayexec_execution/src/execution/operators/hash_join/mod.rs index 717e307be..11e7a9241 100644 --- a/crates/rayexec_execution/src/execution/operators/hash_join/mod.rs +++ b/crates/rayexec_execution/src/execution/operators/hash_join/mod.rs @@ -314,7 +314,7 @@ impl ExecutableOperator for PhysicalHashJoin { state.hash_buf.resize(batch.num_rows(), 0); for (idx, equality) in self.equalities.iter().enumerate() { - let result = equality.right.eval(&batch)?; + let result = equality.right.eval2(&batch)?; if idx == 0 { HashExecutor::hash_no_combine(&result, &mut state.hash_buf)?; @@ -613,7 +613,7 @@ impl PhysicalHashJoin { state.hash_buf.resize(batch.num_rows(), 0); for (idx, equality) in self.equalities.iter().enumerate() { - let result = equality.left.eval(&batch)?; + let result = equality.left.eval2(&batch)?; if idx == 0 { HashExecutor::hash_no_combine(&result, &mut state.hash_buf)?; diff --git a/crates/rayexec_execution/src/execution/operators/project.rs b/crates/rayexec_execution/src/execution/operators/project.rs index 0eab9d78f..40f23557f 100644 --- a/crates/rayexec_execution/src/execution/operators/project.rs +++ b/crates/rayexec_execution/src/execution/operators/project.rs @@ -26,7 +26,7 @@ impl StatelessOperation for ProjectOperation { .exprs .iter() .map(|expr| { - let arr = expr.eval(&batch)?; + let arr = expr.eval2(&batch)?; Ok(arr.into_owned()) }) .collect::>>()?; diff --git a/crates/rayexec_execution/src/execution/operators/table_inout.rs b/crates/rayexec_execution/src/execution/operators/table_inout.rs index 85355dbee..73a47e3fe 100644 --- a/crates/rayexec_execution/src/execution/operators/table_inout.rs +++ b/crates/rayexec_execution/src/execution/operators/table_inout.rs @@ -93,7 +93,7 @@ impl ExecutableOperator for PhysicalTableInOut { .function_inputs .iter() .map(|expr| { - let arr = expr.eval(&batch)?; + let arr = expr.eval2(&batch)?; Ok(arr.into_owned()) }) .collect::>>()?; @@ -116,7 +116,7 @@ impl ExecutableOperator for PhysicalTableInOut { .projected_outputs .iter() .map(|expr| { - let arr = expr.eval(&batch)?; + let arr = expr.eval2(&batch)?; Ok(arr.into_owned()) }) .collect::>>()?; diff --git a/crates/rayexec_execution/src/execution/operators/unnest.rs b/crates/rayexec_execution/src/execution/operators/unnest.rs index dcb08617d..bc84bb236 100644 --- a/crates/rayexec_execution/src/execution/operators/unnest.rs +++ b/crates/rayexec_execution/src/execution/operators/unnest.rs @@ -141,11 +141,11 @@ impl ExecutableOperator for PhysicalUnnest { // Compute inputs. These will be stored until we've processed all rows. for (col_idx, expr) in self.project_expressions.iter().enumerate() { - state.project_inputs[col_idx] = expr.eval(&batch)?.into_owned(); + state.project_inputs[col_idx] = expr.eval2(&batch)?.into_owned(); } for (col_idx, expr) in self.unnest_expressions.iter().enumerate() { - state.unnest_inputs[col_idx] = expr.eval(&batch)?.into_owned(); + state.unnest_inputs[col_idx] = expr.eval2(&batch)?.into_owned(); } state.input_num_rows = batch.num_rows(); diff --git a/crates/rayexec_execution/src/expr/physical/case_expr.rs b/crates/rayexec_execution/src/expr/physical/case_expr.rs index 9f81d5a6d..f7eaca87e 100644 --- a/crates/rayexec_execution/src/expr/physical/case_expr.rs +++ b/crates/rayexec_execution/src/expr/physical/case_expr.rs @@ -30,7 +30,7 @@ pub struct PhysicalCaseExpr { } impl PhysicalCaseExpr { - pub fn eval<'a>(&self, batch: &'a Batch2) -> Result> { + pub fn eval2<'a>(&self, batch: &'a Batch2) -> Result> { let mut arrays = Vec::new(); let mut indices: Vec<(usize, usize)> = (0..batch.num_rows()).map(|_| (0, 0)).collect(); @@ -49,7 +49,7 @@ impl PhysicalCaseExpr { let selected_batch = batch.select(selection.clone()); // Execute 'when'. - let selected = case.when.eval(&selected_batch)?; + let selected = case.when.eval2(&selected_batch)?; // Determine which rows should be executed for 'then', and which we // need to fall through on. @@ -57,7 +57,7 @@ impl PhysicalCaseExpr { // Select rows in batch to execute on based on 'trues'. let execute_batch = selected_batch.select(Arc::new(trues_sel.clone())); - let output = case.then.eval(&execute_batch)?; + let output = case.then.eval2(&execute_batch)?; // Store array for later interleaving. let array_idx = arrays.len(); @@ -83,7 +83,7 @@ impl PhysicalCaseExpr { let selection = Arc::new(SelectionVector::from_iter(remaining.index_iter())); let remaining_batch = batch.select(selection.clone()); - let output = self.else_expr.eval(&remaining_batch)?; + let output = self.else_expr.eval2(&remaining_batch)?; let array_idx = arrays.len(); arrays.push(output.into_owned()); @@ -181,7 +181,7 @@ mod tests { let planner = PhysicalExpressionPlanner::new(&table_list); let physical_case = planner.plan_scalar(&[table_ref], &case_expr).unwrap(); - let got = physical_case.eval(&batch).unwrap(); + let got = physical_case.eval2(&batch).unwrap(); assert_eq!(ScalarValue::from("else"), got.logical_value(0).unwrap()); assert_eq!( diff --git a/crates/rayexec_execution/src/expr/physical/cast_expr.rs b/crates/rayexec_execution/src/expr/physical/cast_expr.rs index 59f6067a3..5b3987a2c 100644 --- a/crates/rayexec_execution/src/expr/physical/cast_expr.rs +++ b/crates/rayexec_execution/src/expr/physical/cast_expr.rs @@ -20,8 +20,8 @@ pub struct PhysicalCastExpr { } impl PhysicalCastExpr { - pub fn eval<'a>(&self, batch: &'a Batch2) -> Result> { - let input = self.expr.eval(batch)?; + pub fn eval2<'a>(&self, batch: &'a Batch2) -> Result> { + let input = self.expr.eval2(batch)?; let out = cast_array(input.as_ref(), self.to.clone(), CastFailBehavior::Error)?; Ok(Cow::Owned(out)) } diff --git a/crates/rayexec_execution/src/expr/physical/column_expr.rs b/crates/rayexec_execution/src/expr/physical/column_expr.rs index 42aaff2c8..a4883edab 100644 --- a/crates/rayexec_execution/src/expr/physical/column_expr.rs +++ b/crates/rayexec_execution/src/expr/physical/column_expr.rs @@ -3,8 +3,12 @@ use std::fmt; use rayexec_error::{RayexecError, Result}; +use super::evaluator::ExpressionState; +use crate::arrays::array::exp::Array; +use crate::arrays::array::selection::Selection; use crate::arrays::array::Array2; use crate::arrays::batch::Batch2; +use crate::arrays::batch_exp::Batch; use crate::database::DatabaseContext; use crate::proto::DatabaseProtoConv; @@ -14,7 +18,7 @@ pub struct PhysicalColumnExpr { } impl PhysicalColumnExpr { - pub fn eval<'a>(&self, batch: &'a Batch2) -> Result> { + pub fn eval2<'a>(&self, batch: &'a Batch2) -> Result> { let col = batch.column(self.idx).ok_or_else(|| { RayexecError::new(format!( "Tried to get column at index {} in a batch with {} columns", @@ -25,6 +29,16 @@ impl PhysicalColumnExpr { Ok(Cow::Borrowed(col)) } + + pub(crate) fn eval( + &self, + input: &mut Batch, + _: &mut ExpressionState, + sel: Selection, + output: &mut Array, + ) -> Result<()> { + unimplemented!() + } } impl fmt::Display for PhysicalColumnExpr { diff --git a/crates/rayexec_execution/src/expr/physical/evaluator.rs b/crates/rayexec_execution/src/expr/physical/evaluator.rs new file mode 100644 index 000000000..988114ed5 --- /dev/null +++ b/crates/rayexec_execution/src/expr/physical/evaluator.rs @@ -0,0 +1,84 @@ +use rayexec_error::{RayexecError, Result}; + +use super::PhysicalScalarExpression; +use crate::arrays::array::exp::Array; +use crate::arrays::array::selection::Selection; +use crate::arrays::batch_exp::Batch; + +/// Evaluate expressions on batch inputs. +#[derive(Debug)] +pub struct ExpressionEvaluator { + expressions: Vec, + states: Vec, +} + +#[derive(Debug)] +pub(crate) struct ExpressionState { + /// Buffer for writing intermediate results. + pub(crate) buffer: Batch, + /// Child states for expressions that contain other input expressions. + pub(crate) inputs: Vec, +} + +impl ExpressionState { + pub(crate) const fn empty() -> Self { + ExpressionState { + buffer: Batch::empty(), + inputs: Vec::new(), + } + } +} + +impl ExpressionEvaluator { + pub fn new(expressions: Vec, batch_size: usize) -> Self { + unimplemented!() + } + + pub fn num_expressions(&self) -> usize { + self.expressions.len() + } + + /// Evaluate the expression on an input batch, writing the results to the + /// output batch. + /// + /// Output batch must contain the same number of arrays as expressions in + /// this evaluator. Arrays will be written to in the same order as the + /// expressions. + /// + /// `input` is mutable only to allow converting arrays from owned to + /// managed. + pub fn eval_batch( + &mut self, + input: &mut Batch, + sel: Selection, + output: &mut Batch, + ) -> Result<()> { + debug_assert_eq!(self.expressions.len(), output.arrays().len()); + + for (idx, expr) in self.expressions.iter().enumerate() { + let output = &mut output.arrays_mut()[idx]; + let state = &mut self.states[idx]; + + Self::eval_expression(expr, input, state, sel, output)?; + } + + Ok(()) + } + + pub(crate) fn eval_expression( + expr: &PhysicalScalarExpression, + input: &mut Batch, + state: &mut ExpressionState, + sel: Selection, + output: &mut Array, + ) -> Result<()> { + // TODO: Reset array for writes. + + match expr { + // PhysicalScalarExpression::Column(expr) => expr.eval(input, state, sel, output), + // PhysicalScalarExpression::ScalarFunction(expr) => expr.eval(input, state, sel, output), + // PhysicalScalarExpression::Literal(expr) => expr.eval(input, state, sel, output), + _ => unimplemented!(), + } + } +} diff --git a/crates/rayexec_execution/src/expr/physical/literal_expr.rs b/crates/rayexec_execution/src/expr/physical/literal_expr.rs index dfaee9484..f95f8e6e4 100644 --- a/crates/rayexec_execution/src/expr/physical/literal_expr.rs +++ b/crates/rayexec_execution/src/expr/physical/literal_expr.rs @@ -16,7 +16,7 @@ pub struct PhysicalLiteralExpr { } impl PhysicalLiteralExpr { - pub fn eval<'a>(&self, batch: &'a Batch2) -> Result> { + pub fn eval2<'a>(&self, batch: &'a Batch2) -> Result> { let arr = self.literal.as_array(batch.num_rows())?; Ok(Cow::Owned(arr)) } diff --git a/crates/rayexec_execution/src/expr/physical/mod.rs b/crates/rayexec_execution/src/expr/physical/mod.rs index 4d5ae80b8..428fb4b3e 100644 --- a/crates/rayexec_execution/src/expr/physical/mod.rs +++ b/crates/rayexec_execution/src/expr/physical/mod.rs @@ -1,3 +1,4 @@ +pub mod evaluator; pub mod planner; pub mod case_expr; @@ -34,13 +35,13 @@ pub enum PhysicalScalarExpression { } impl PhysicalScalarExpression { - pub fn eval<'a>(&self, batch: &'a Batch2) -> Result> { + pub fn eval2<'a>(&self, batch: &'a Batch2) -> Result> { match self { - Self::Case(e) => e.eval(batch), - Self::Cast(e) => e.eval(batch), - Self::Column(e) => e.eval(batch), - Self::Literal(e) => e.eval(batch), - Self::ScalarFunction(e) => e.eval(batch), + Self::Case(e) => e.eval2(batch), + Self::Cast(e) => e.eval2(batch), + Self::Column(e) => e.eval2(batch), + Self::Literal(e) => e.eval2(batch), + Self::ScalarFunction(e) => e.eval2(batch), } } @@ -49,7 +50,7 @@ impl PhysicalScalarExpression { /// The selection vector will include row indices where the expression /// evaluates to true. pub fn select(&self, batch: &Batch2) -> Result { - let selected = self.eval(batch)?; + let selected = self.eval2(batch)?; let mut selection = SelectionVector::with_capacity(selected.logical_len()); SelectExecutor::select(&selected, &mut selection)?; diff --git a/crates/rayexec_execution/src/expr/physical/scalar_function_expr.rs b/crates/rayexec_execution/src/expr/physical/scalar_function_expr.rs index 79c9e4f17..642620212 100644 --- a/crates/rayexec_execution/src/expr/physical/scalar_function_expr.rs +++ b/crates/rayexec_execution/src/expr/physical/scalar_function_expr.rs @@ -18,11 +18,11 @@ pub struct PhysicalScalarFunctionExpr { } impl PhysicalScalarFunctionExpr { - pub fn eval<'a>(&self, batch: &'a Batch2) -> Result> { + pub fn eval2<'a>(&self, batch: &'a Batch2) -> Result> { let inputs = self .inputs .iter() - .map(|input| input.eval(batch)) + .map(|input| input.eval2(batch)) .collect::>>()?; let refs: Vec<_> = inputs.iter().map(|a| a.as_ref()).collect(); // Can I not? diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/comparison.rs b/crates/rayexec_execution/src/functions/scalar/builtin/comparison.rs index 40e8da1cc..7b6018a0a 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/comparison.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/comparison.rs @@ -695,6 +695,10 @@ impl ScalarFunctionImpl for ListComparisonImpl where O: ComparisonOperation, { + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { + unimplemented!() + } + fn execute2(&self, inputs: &[&Array2]) -> Result { let left = inputs[0]; let right = inputs[1]; diff --git a/crates/rayexec_execution/src/functions/scalar/mod.rs b/crates/rayexec_execution/src/functions/scalar/mod.rs index dde1ffb40..c68b9283a 100644 --- a/crates/rayexec_execution/src/functions/scalar/mod.rs +++ b/crates/rayexec_execution/src/functions/scalar/mod.rs @@ -112,11 +112,14 @@ pub trait ScalarFunctionImpl: Debug + Sync + Send + DynClone { /// Execute the function the input batch, writing the output for each row /// into `output` at the same index. /// - /// `output` is guaranteed to be the exact size needed for the output as - /// well as being the correct physical type. - fn execute(&self, input: &Batch, output: &mut Array) -> Result<()> { - unimplemented!() - } + /// `output` has the following guarantees: + /// - Has at least the primary buffer capacity needed to write the results. + /// - All validities are initalized to 'valid'. + /// - Array data can be made mutable via `try_as_mut()`. + /// + /// The batch's `selection` method should be called to determine which rows + /// should be looked at during function eval. + fn execute(&self, input: &Batch, output: &mut Array) -> Result<()>; } impl Clone for Box { diff --git a/crates/rayexec_execution/src/optimizer/expr_rewrite/const_fold.rs b/crates/rayexec_execution/src/optimizer/expr_rewrite/const_fold.rs index c614b4acc..e213f074b 100644 --- a/crates/rayexec_execution/src/optimizer/expr_rewrite/const_fold.rs +++ b/crates/rayexec_execution/src/optimizer/expr_rewrite/const_fold.rs @@ -27,7 +27,7 @@ fn maybe_fold(table_list: &TableList, expr: &mut Expression) -> Result<()> { let planner = PhysicalExpressionPlanner::new(table_list); let phys_expr = planner.plan_scalar(&[], expr)?; let dummy = Batch2::empty_with_num_rows(1); - let val = phys_expr.eval(&dummy)?; + let val = phys_expr.eval2(&dummy)?; if val.logical_len() != 1 { return Err(RayexecError::new(format!( From 91acb77ab7c94738d3a75fe4f36f25558d38e93a Mon Sep 17 00:00:00 2001 From: Sean Smith Date: Tue, 31 Dec 2024 18:09:24 -0500 Subject: [PATCH 37/59] reset for write --- .../src/arrays/array/array_data.rs | 26 ++- .../rayexec_execution/src/arrays/array/exp.rs | 215 ++++++++++++------ 2 files changed, 167 insertions(+), 74 deletions(-) diff --git a/crates/rayexec_execution/src/arrays/array/array_data.rs b/crates/rayexec_execution/src/arrays/array/array_data.rs index 1e06632da..dd6037fec 100644 --- a/crates/rayexec_execution/src/arrays/array/array_data.rs +++ b/crates/rayexec_execution/src/arrays/array/array_data.rs @@ -59,6 +59,30 @@ where matches!(self.inner, ArrayDataInner::Owned(_)) } + /// Try to reset the array data for writes. + /// + /// If the buffer is already owned, nothing is done. If the buffer is + /// managed, but we have a cached owned buffer, we use the cached buffer to + /// make this `Owned`. + /// + /// Returns `Ok(())` if the reset was successful, `Err(())` otherwise. If + /// `Err(())` is returned, this remains unchanged. + pub fn try_reset_for_write(&mut self) -> Result<(), ()> { + match &mut self.inner { + ArrayDataInner::Managed(_, cached) => { + if let Some(cached) = cached.take() { + self.inner = ArrayDataInner::Owned(cached); + Ok(()) + } else { + // No cached buffer. + Err(()) + } + } + ArrayDataInner::Owned(_) => Ok(()), // Nothing to do, already writable. + ArrayDataInner::Uninit => panic!("Array data in invalid state"), + } + } + /// Try to make the array managed by the buffer manager. /// /// Does nothing if the array is already managed. @@ -67,7 +91,7 @@ where /// still valid (and remains in the 'owned' state). /// /// A cloned pointer to the newly managed array will be returned. - pub fn make_managed(&mut self, manager: &B) -> Result>> { + pub(crate) fn make_managed(&mut self, manager: &B) -> Result>> { match &mut self.inner { ArrayDataInner::Managed(m, _) => Ok(m.clone()), // Already managed. ArrayDataInner::Owned(_) => { diff --git a/crates/rayexec_execution/src/arrays/array/exp.rs b/crates/rayexec_execution/src/arrays/array/exp.rs index 059365559..72d6aff9a 100644 --- a/crates/rayexec_execution/src/arrays/array/exp.rs +++ b/crates/rayexec_execution/src/arrays/array/exp.rs @@ -51,79 +51,7 @@ where /// This will take care of initalizing the primary and secondary data /// buffers depending on the type. pub fn new(manager: &B, datatype: DataType, capacity: usize) -> Result { - let buffer = match datatype.physical_type() { - PhysicalType::Boolean => { - ArrayBuffer::with_primary_capacity::(manager, capacity)? - } - PhysicalType::Int8 => { - ArrayBuffer::with_primary_capacity::(manager, capacity)? - } - PhysicalType::Int16 => { - ArrayBuffer::with_primary_capacity::(manager, capacity)? - } - PhysicalType::Int32 => { - ArrayBuffer::with_primary_capacity::(manager, capacity)? - } - PhysicalType::Int64 => { - ArrayBuffer::with_primary_capacity::(manager, capacity)? - } - PhysicalType::Int128 => { - ArrayBuffer::with_primary_capacity::(manager, capacity)? - } - PhysicalType::UInt8 => { - ArrayBuffer::with_primary_capacity::(manager, capacity)? - } - PhysicalType::UInt16 => { - ArrayBuffer::with_primary_capacity::(manager, capacity)? - } - PhysicalType::UInt32 => { - ArrayBuffer::with_primary_capacity::(manager, capacity)? - } - PhysicalType::UInt64 => { - ArrayBuffer::with_primary_capacity::(manager, capacity)? - } - PhysicalType::UInt128 => { - ArrayBuffer::with_primary_capacity::(manager, capacity)? - } - PhysicalType::Float16 => { - ArrayBuffer::with_primary_capacity::(manager, capacity)? - } - PhysicalType::Float32 => { - ArrayBuffer::with_primary_capacity::(manager, capacity)? - } - PhysicalType::Float64 => { - ArrayBuffer::with_primary_capacity::(manager, capacity)? - } - PhysicalType::Interval => { - ArrayBuffer::with_primary_capacity::(manager, capacity)? - } - PhysicalType::Utf8 => { - let mut buffer = - ArrayBuffer::with_primary_capacity::(manager, capacity)?; - buffer.put_secondary_buffer(SecondaryBuffer::StringViewHeap(StringViewHeap::new())); - buffer - } - PhysicalType::List => { - let inner_type = match &datatype { - DataType::List(m) => m.datatype.as_ref().clone(), - other => { - return Err(RayexecError::new(format!( - "Expected list datatype, got {other}" - ))) - } - }; - - let child = Self::new(manager, inner_type, capacity)?; - - let mut buffer = - ArrayBuffer::with_primary_capacity::(manager, capacity)?; - buffer.put_secondary_buffer(SecondaryBuffer::List(ListBuffer::new(child))); - - buffer - } - _ => unimplemented!(), - }; - + let buffer = array_buffer_for_datatype(manager, &datatype, capacity)?; let validity = Validity::new_all_valid(capacity); Ok(Array { @@ -308,6 +236,103 @@ where Ok(()) } + + /// Resets self to prepare for writing to the array. + /// + /// This will: + /// - Reset validity to all 'valid'. + /// - Create or reuse a writeable buffer for array data. No guarantees are + /// made about the contents of the buffer. + pub fn reset_for_write(&mut self, manager: &B) -> Result<()> { + self.validity = Validity::new_all_valid(self.capacity()); + + if let Err(()) = self.data.try_reset_for_write() { + // Need to create a new buffer and set that. + let buffer = array_buffer_for_datatype(manager, &self.datatype, self.capacity())?; + self.data = ArrayData::owned(buffer) + } + + Ok(()) + } +} + +/// Create a new array buffer for a datatype. +fn array_buffer_for_datatype( + manager: &B, + datatype: &DataType, + capacity: usize, +) -> Result> +where + B: BufferManager, +{ + let buffer = match datatype.physical_type() { + PhysicalType::Boolean => { + ArrayBuffer::with_primary_capacity::(manager, capacity)? + } + PhysicalType::Int8 => ArrayBuffer::with_primary_capacity::(manager, capacity)?, + PhysicalType::Int16 => { + ArrayBuffer::with_primary_capacity::(manager, capacity)? + } + PhysicalType::Int32 => { + ArrayBuffer::with_primary_capacity::(manager, capacity)? + } + PhysicalType::Int64 => { + ArrayBuffer::with_primary_capacity::(manager, capacity)? + } + PhysicalType::Int128 => { + ArrayBuffer::with_primary_capacity::(manager, capacity)? + } + PhysicalType::UInt8 => ArrayBuffer::with_primary_capacity::(manager, capacity)?, + PhysicalType::UInt16 => { + ArrayBuffer::with_primary_capacity::(manager, capacity)? + } + PhysicalType::UInt32 => { + ArrayBuffer::with_primary_capacity::(manager, capacity)? + } + PhysicalType::UInt64 => { + ArrayBuffer::with_primary_capacity::(manager, capacity)? + } + PhysicalType::UInt128 => { + ArrayBuffer::with_primary_capacity::(manager, capacity)? + } + PhysicalType::Float16 => { + ArrayBuffer::with_primary_capacity::(manager, capacity)? + } + PhysicalType::Float32 => { + ArrayBuffer::with_primary_capacity::(manager, capacity)? + } + PhysicalType::Float64 => { + ArrayBuffer::with_primary_capacity::(manager, capacity)? + } + PhysicalType::Interval => { + ArrayBuffer::with_primary_capacity::(manager, capacity)? + } + PhysicalType::Utf8 => { + let mut buffer = ArrayBuffer::with_primary_capacity::(manager, capacity)?; + buffer.put_secondary_buffer(SecondaryBuffer::StringViewHeap(StringViewHeap::new())); + buffer + } + PhysicalType::List => { + let inner_type = match &datatype { + DataType::List(m) => m.datatype.as_ref().clone(), + other => { + return Err(RayexecError::new(format!( + "Expected list datatype, got {other}" + ))) + } + }; + + let child = Array::new(manager, inner_type, capacity)?; + + let mut buffer = ArrayBuffer::with_primary_capacity::(manager, capacity)?; + buffer.put_secondary_buffer(SecondaryBuffer::List(ListBuffer::new(child))); + + buffer + } + _ => unimplemented!(), + }; + + Ok(buffer) } /// Helper for copying rows. @@ -455,3 +480,47 @@ where Ok(array) } } + +#[cfg(test)] +mod tests { + use super::*; + use crate::arrays::testutil::assert_arrays_eq; + + #[test] + fn reset_after_clone_from() { + let mut a1 = Array::try_from_iter(["a", "bb", "ccc"]).unwrap(); + let mut a2 = Array::try_from_iter(["d", "ee", "fff"]).unwrap(); + + a1.clone_from(&NopBufferManager, &mut a2).unwrap(); + + let expected = Array::try_from_iter(["d", "ee", "fff"]).unwrap(); + assert_arrays_eq(&expected, &a1); + assert_arrays_eq(&expected, &a2); + + a1.reset_for_write(&NopBufferManager).unwrap(); + + // Ensure we can write to it. + let mut strings = a1 + .data_mut() + .try_as_mut() + .unwrap() + .try_as_string_view_addressable_mut() + .unwrap(); + + strings.put(0, "hello"); + strings.put(1, "world"); + strings.put(2, "goodbye"); + + let expected = Array::try_from_iter(["hello", "world", "goodbye"]).unwrap(); + assert_arrays_eq(&expected, &a1); + } + + #[test] + fn reset_resets_validity() { + let mut a = Array::try_from_iter([Some("a"), None, Some("c")]).unwrap(); + assert!(!a.validity().all_valid()); + + a.reset_for_write(&NopBufferManager).unwrap(); + assert!(a.validity().all_valid()); + } +} From e95d8c0ee39ec962fbe52b18c9af400bf3ede5a7 Mon Sep 17 00:00:00 2001 From: Sean Smith Date: Tue, 31 Dec 2024 18:36:19 -0500 Subject: [PATCH 38/59] eval column expr --- .../rayexec_execution/src/arrays/array/exp.rs | 2 + .../src/expr/physical/column_expr.rs | 63 ++++++++++++++++++- .../src/expr/physical/evaluator.rs | 7 ++- 3 files changed, 70 insertions(+), 2 deletions(-) diff --git a/crates/rayexec_execution/src/arrays/array/exp.rs b/crates/rayexec_execution/src/arrays/array/exp.rs index 72d6aff9a..5dea5080e 100644 --- a/crates/rayexec_execution/src/arrays/array/exp.rs +++ b/crates/rayexec_execution/src/arrays/array/exp.rs @@ -246,6 +246,8 @@ where pub fn reset_for_write(&mut self, manager: &B) -> Result<()> { self.validity = Validity::new_all_valid(self.capacity()); + // TODO: We should clear some secondary buffers (mostly string heap) + if let Err(()) = self.data.try_reset_for_write() { // Need to create a new buffer and set that. let buffer = array_buffer_for_datatype(manager, &self.datatype, self.capacity())?; diff --git a/crates/rayexec_execution/src/expr/physical/column_expr.rs b/crates/rayexec_execution/src/expr/physical/column_expr.rs index a4883edab..2fcf62b8d 100644 --- a/crates/rayexec_execution/src/expr/physical/column_expr.rs +++ b/crates/rayexec_execution/src/expr/physical/column_expr.rs @@ -9,6 +9,7 @@ use crate::arrays::array::selection::Selection; use crate::arrays::array::Array2; use crate::arrays::batch::Batch2; use crate::arrays::batch_exp::Batch; +use crate::arrays::buffer::buffer_manager::NopBufferManager; use crate::database::DatabaseContext; use crate::proto::DatabaseProtoConv; @@ -37,7 +38,14 @@ impl PhysicalColumnExpr { sel: Selection, output: &mut Array, ) -> Result<()> { - unimplemented!() + let col = &mut input.arrays_mut()[self.idx]; + output.clone_from(&NopBufferManager, col)?; + + if !sel.is_linear() || sel.len() != input.num_rows() { + output.select(&NopBufferManager, sel.iter())?; + } + + Ok(()) } } @@ -62,3 +70,56 @@ impl DatabaseProtoConv for PhysicalColumnExpr { }) } } + +#[cfg(test)] +mod tests { + use iterutil::TryFromExactSizeIterator; + + use super::*; + use crate::arrays::datatype::DataType; + use crate::arrays::testutil::assert_arrays_eq; + + #[test] + fn column_expr_eval() { + let mut input = Batch::from_arrays( + [ + Array::try_from_iter(["a", "b", "c", "d"]).unwrap(), + Array::try_from_iter([1, 2, 3, 4]).unwrap(), + ], + true, + ) + .unwrap(); + + let expr = PhysicalColumnExpr { idx: 1 }; + let mut out = Array::new(&NopBufferManager, DataType::Int32, 4).unwrap(); + let sel = Selection::linear(4); + + expr.eval(&mut input, &mut ExpressionState::empty(), sel, &mut out) + .unwrap(); + + let expected = Array::try_from_iter([1, 2, 3, 4]).unwrap(); + assert_arrays_eq(&expected, &out); + } + + #[test] + fn column_expr_eval_with_selection() { + let mut input = Batch::from_arrays( + [ + Array::try_from_iter(["a", "b", "c", "d"]).unwrap(), + Array::try_from_iter([1, 2, 3, 4]).unwrap(), + ], + true, + ) + .unwrap(); + + let expr = PhysicalColumnExpr { idx: 1 }; + let mut out = Array::new(&NopBufferManager, DataType::Int32, 4).unwrap(); + let sel = Selection::selection(&[1, 3]); + + expr.eval(&mut input, &mut ExpressionState::empty(), sel, &mut out) + .unwrap(); + + let expected = Array::try_from_iter([2, 4]).unwrap(); + assert_arrays_eq(&expected, &out); + } +} diff --git a/crates/rayexec_execution/src/expr/physical/evaluator.rs b/crates/rayexec_execution/src/expr/physical/evaluator.rs index 988114ed5..c3bfc5a6a 100644 --- a/crates/rayexec_execution/src/expr/physical/evaluator.rs +++ b/crates/rayexec_execution/src/expr/physical/evaluator.rs @@ -4,6 +4,7 @@ use super::PhysicalScalarExpression; use crate::arrays::array::exp::Array; use crate::arrays::array::selection::Selection; use crate::arrays::batch_exp::Batch; +use crate::arrays::buffer::buffer_manager::NopBufferManager; /// Evaluate expressions on batch inputs. #[derive(Debug)] @@ -72,7 +73,11 @@ impl ExpressionEvaluator { sel: Selection, output: &mut Array, ) -> Result<()> { - // TODO: Reset array for writes. + // TODO: Figure out how the manager will be threaded down. Might just + // keep it on the array/buffer/batch/something else. We might need + // `Arc` here, ideally the buffer reuse prevents us from + // needing to call into it often. + output.reset_for_write(&NopBufferManager)?; match expr { // PhysicalScalarExpression::Column(expr) => expr.eval(input, state, sel, output), From 7768bb7e9cc2c073e833dd11607d8cae65be5e4a Mon Sep 17 00:00:00 2001 From: Sean Smith Date: Tue, 31 Dec 2024 19:45:28 -0500 Subject: [PATCH 39/59] lit --- .../src/arrays/array/array_data.rs | 2 +- .../rayexec_execution/src/arrays/array/exp.rs | 151 +++++++++++++++++- .../src/arrays/buffer/mod.rs | 30 +++- .../src/arrays/buffer/string_view.rs | 4 + .../src/expr/physical/literal_expr.rs | 71 ++++++++ .../scalar/builtin/list/list_values.rs | 5 + 6 files changed, 259 insertions(+), 4 deletions(-) diff --git a/crates/rayexec_execution/src/arrays/array/array_data.rs b/crates/rayexec_execution/src/arrays/array/array_data.rs index dd6037fec..1d4ef56ca 100644 --- a/crates/rayexec_execution/src/arrays/array/array_data.rs +++ b/crates/rayexec_execution/src/arrays/array/array_data.rs @@ -67,7 +67,7 @@ where /// /// Returns `Ok(())` if the reset was successful, `Err(())` otherwise. If /// `Err(())` is returned, this remains unchanged. - pub fn try_reset_for_write(&mut self) -> Result<(), ()> { + pub(crate) fn try_reset_for_write(&mut self) -> Result<(), ()> { match &mut self.inner { ArrayDataInner::Managed(_, cached) => { if let Some(cached) = cached.take() { diff --git a/crates/rayexec_execution/src/arrays/array/exp.rs b/crates/rayexec_execution/src/arrays/array/exp.rs index 5dea5080e..2b2b630c5 100644 --- a/crates/rayexec_execution/src/arrays/array/exp.rs +++ b/crates/rayexec_execution/src/arrays/array/exp.rs @@ -1,6 +1,6 @@ use half::f16; use iterutil::{IntoExactSizeIterator, TryFromExactSizeIterator}; -use rayexec_error::{RayexecError, Result}; +use rayexec_error::{not_implemented, RayexecError, Result}; use super::array_data::ArrayData; use super::flat::FlatArrayView; @@ -10,6 +10,7 @@ use crate::arrays::buffer::physical_type::{ Addressable, AddressableMut, MutablePhysicalStorage, + PhysicalBinary, PhysicalBool, PhysicalDictionary, PhysicalF16, @@ -31,9 +32,16 @@ use crate::arrays::buffer::physical_type::{ PhysicalUtf8, }; use crate::arrays::buffer::string_view::StringViewHeap; -use crate::arrays::buffer::{ArrayBuffer, DictionaryBuffer, ListBuffer, SecondaryBuffer}; +use crate::arrays::buffer::{ + ArrayBuffer, + DictionaryBuffer, + ListBuffer, + ListItemMetadata, + SecondaryBuffer, +}; use crate::arrays::datatype::DataType; use crate::arrays::scalar::interval::Interval; +use crate::arrays::scalar::ScalarValue; #[derive(Debug)] pub struct Array { @@ -243,6 +251,10 @@ where /// - Reset validity to all 'valid'. /// - Create or reuse a writeable buffer for array data. No guarantees are /// made about the contents of the buffer. + /// + /// Bfuffer values _must_ be written for a row before attempting to read a + /// value for that row after calling this function. Underlying storage may + /// be cleared resulting in stale metadata (and thus invalid reads). pub fn reset_for_write(&mut self, manager: &B) -> Result<()> { self.validity = Validity::new_all_valid(self.capacity()); @@ -254,6 +266,141 @@ where self.data = ArrayData::owned(buffer) } + // Reset secondary buffers. + match self.data.try_as_mut()?.get_secondary_mut() { + SecondaryBuffer::StringViewHeap(heap) => { + heap.clear(); + // All metadata is stale. Panics may occur if attempting to read + // prior to writing new values for a row. + } + SecondaryBuffer::List(list) => { + list.entries = 0; + // Child array keeps its capacity, it'll be overwritten. List + // item metadata will become stale, but technically won't error. + } + SecondaryBuffer::Dictionary(_) => (), + SecondaryBuffer::None => (), + } + + Ok(()) + } + + /// Set a scalar value at a given index. + pub fn set_value(&mut self, idx: usize, val: &ScalarValue) -> Result<()> { + if idx >= self.capacity() { + return Err(RayexecError::new("Index out of bounds") + .with_field("idx", idx) + .with_field("capacity", self.capacity())); + } + + self.validity.set_valid(idx); + let data = self.data.try_as_mut()?; + + match val { + ScalarValue::Null => { + self.validity.set_invalid(idx); + } + ScalarValue::Boolean(val) => { + PhysicalBool::get_addressable_mut(data)?.put(idx, val); + } + ScalarValue::Int8(val) => { + PhysicalI8::get_addressable_mut(data)?.put(idx, val); + } + ScalarValue::Int16(val) => { + PhysicalI16::get_addressable_mut(data)?.put(idx, val); + } + ScalarValue::Int32(val) => { + PhysicalI32::get_addressable_mut(data)?.put(idx, val); + } + ScalarValue::Int64(val) => { + PhysicalI64::get_addressable_mut(data)?.put(idx, val); + } + ScalarValue::Int128(val) => { + PhysicalI128::get_addressable_mut(data)?.put(idx, val); + } + ScalarValue::UInt8(val) => { + PhysicalU8::get_addressable_mut(data)?.put(idx, val); + } + ScalarValue::UInt16(val) => { + PhysicalU16::get_addressable_mut(data)?.put(idx, val); + } + ScalarValue::UInt32(val) => { + PhysicalU32::get_addressable_mut(data)?.put(idx, val); + } + ScalarValue::UInt64(val) => { + PhysicalU64::get_addressable_mut(data)?.put(idx, val); + } + ScalarValue::UInt128(val) => { + PhysicalU128::get_addressable_mut(data)?.put(idx, val); + } + ScalarValue::Float16(val) => { + PhysicalF16::get_addressable_mut(data)?.put(idx, val); + } + ScalarValue::Float32(val) => { + PhysicalF32::get_addressable_mut(data)?.put(idx, val); + } + ScalarValue::Float64(val) => { + PhysicalF64::get_addressable_mut(data)?.put(idx, val); + } + ScalarValue::Decimal64(val) => { + PhysicalI64::get_addressable_mut(data)?.put(idx, &val.value); + } + ScalarValue::Decimal128(val) => { + PhysicalI128::get_addressable_mut(data)?.put(idx, &val.value); + } + ScalarValue::Date32(val) => { + PhysicalI32::get_addressable_mut(data)?.put(idx, val); + } + ScalarValue::Date64(val) => { + PhysicalI64::get_addressable_mut(data)?.put(idx, val); + } + ScalarValue::Timestamp(val) => { + PhysicalI64::get_addressable_mut(data)?.put(idx, &val.value); + } + ScalarValue::Interval(val) => { + PhysicalInterval::get_addressable_mut(data)?.put(idx, val); + } + ScalarValue::Utf8(val) => { + PhysicalUtf8::get_addressable_mut(data)?.put(idx, val); + } + ScalarValue::Binary(val) => { + PhysicalBinary::get_addressable_mut(data)?.put(idx, val); + } + ScalarValue::List(list) => { + let secondary = self.data.try_as_mut()?.get_secondary_mut().get_list_mut()?; + + // Ensure we have space to push. + let rem_cap = secondary.child.capacity() - secondary.entries; + if rem_cap < list.len() { + // TODO: Just resize secondary. + return Err(RayexecError::new( + "Secondary list buffer does not have required capacity", + ) + .with_field("remaining", rem_cap) + .with_field("need", list.len())); + } + + for (child_idx, val) in (secondary.entries..).zip(list) { + secondary.child.set_value(child_idx, val)?; + } + + // Now update entry count in child. Original value is our offset + // index. + let start_offset = secondary.entries; + secondary.entries += list.len(); + + // Set metadata pointing to new list. + PhysicalList::get_addressable_mut(self.data.try_as_mut()?)?.put( + idx, + &ListItemMetadata { + offset: start_offset as i32, + len: list.len() as i32, + }, + ); + } + ScalarValue::Struct(_) => not_implemented!("set value for struct"), + } + Ok(()) } } diff --git a/crates/rayexec_execution/src/arrays/buffer/mod.rs b/crates/rayexec_execution/src/arrays/buffer/mod.rs index 342c31712..3686517bb 100644 --- a/crates/rayexec_execution/src/arrays/buffer/mod.rs +++ b/crates/rayexec_execution/src/arrays/buffer/mod.rs @@ -23,6 +23,7 @@ use string_view::{ use super::array::array_data::ArrayData; use super::array::exp::Array; use super::array::validity::Validity; +use super::scalar::ScalarValue; /// Buffer for arrays. /// @@ -231,6 +232,25 @@ pub enum SecondaryBuffer { None, } +impl SecondaryBuffer +where + B: BufferManager, +{ + pub fn get_list(&self) -> Result<&ListBuffer> { + match self { + Self::List(l) => Ok(l), + _ => Err(RayexecError::new("Expected list buffer")), + } + } + + pub fn get_list_mut(&mut self) -> Result<&mut ListBuffer> { + match self { + Self::List(l) => Ok(l), + _ => Err(RayexecError::new("Expected list buffer")), + } + } +} + #[derive(Debug)] pub struct DictionaryBuffer { pub(crate) validity: Validity, @@ -255,6 +275,14 @@ pub struct ListItemMetadata { #[derive(Debug)] pub struct ListBuffer { + /// Number of "filled" entries in the child array. + /// + /// This differs from the child's capacity as we need to be able + /// incrementally push back values. + /// + /// This is only looked at when writing values to the child array. Reads can + /// ignore this as all required info is in the entry metadata. + pub(crate) entries: usize, pub(crate) child: Array, } @@ -263,7 +291,7 @@ where B: BufferManager, { pub fn new(child: Array) -> Self { - ListBuffer { child } + ListBuffer { entries: 0, child } } } diff --git a/crates/rayexec_execution/src/arrays/buffer/string_view.rs b/crates/rayexec_execution/src/arrays/buffer/string_view.rs index 2d32f57f4..6240c148d 100644 --- a/crates/rayexec_execution/src/arrays/buffer/string_view.rs +++ b/crates/rayexec_execution/src/arrays/buffer/string_view.rs @@ -210,6 +210,10 @@ impl StringViewHeap { StringViewHeap { buffer: Vec::new() } } + pub fn clear(&mut self) { + self.buffer.clear(); + } + pub fn push_bytes(&mut self, value: &[u8]) -> StringViewMetadataUnion { if value.len() as i32 <= 12 { // Store completely inline. diff --git a/crates/rayexec_execution/src/expr/physical/literal_expr.rs b/crates/rayexec_execution/src/expr/physical/literal_expr.rs index f95f8e6e4..3179cf94a 100644 --- a/crates/rayexec_execution/src/expr/physical/literal_expr.rs +++ b/crates/rayexec_execution/src/expr/physical/literal_expr.rs @@ -4,8 +4,13 @@ use std::fmt; use rayexec_error::{OptionExt, Result}; use rayexec_proto::ProtoConv; +use super::evaluator::ExpressionState; +use crate::arrays::array::exp::Array; +use crate::arrays::array::selection::Selection; use crate::arrays::array::Array2; use crate::arrays::batch::Batch2; +use crate::arrays::batch_exp::Batch; +use crate::arrays::buffer::buffer_manager::NopBufferManager; use crate::arrays::scalar::OwnedScalarValue; use crate::database::DatabaseContext; use crate::proto::DatabaseProtoConv; @@ -20,6 +25,21 @@ impl PhysicalLiteralExpr { let arr = self.literal.as_array(batch.num_rows())?; Ok(Cow::Owned(arr)) } + + pub(crate) fn eval( + &self, + _: &mut Batch, + _: &mut ExpressionState, + sel: Selection, + output: &mut Array, + ) -> Result<()> { + output.set_value(0, &self.literal)?; + + // TODO: Need to be able to provide "constant" selection here. + output.select(&NopBufferManager, std::iter::repeat(0).take(sel.len()))?; + + Ok(()) + } } impl fmt::Display for PhysicalLiteralExpr { @@ -43,3 +63,54 @@ impl DatabaseProtoConv for PhysicalLiteralExpr { }) } } + +#[cfg(test)] +mod tests { + use iterutil::TryFromExactSizeIterator; + + use super::*; + use crate::arrays::datatype::DataType; + use crate::arrays::testutil::assert_arrays_eq; + + #[test] + fn literal_eval() { + let mut input = Batch::empty_with_num_rows(4); + + let expr = PhysicalLiteralExpr { + literal: "catdog".into(), + }; + + let mut out = Array::new(&NopBufferManager, DataType::Utf8, 4).unwrap(); + expr.eval( + &mut input, + &mut ExpressionState::empty(), + Selection::linear(4), + &mut out, + ) + .unwrap(); + + let expected = Array::try_from_iter(["catdog", "catdog", "catdog", "catdog"]).unwrap(); + assert_arrays_eq(&expected, &out); + } + + #[test] + fn literal_eval_with_selection() { + let mut input = Batch::empty_with_num_rows(4); + + let expr = PhysicalLiteralExpr { + literal: "catdog".into(), + }; + + let mut out = Array::new(&NopBufferManager, DataType::Utf8, 4).unwrap(); + expr.eval( + &mut input, + &mut ExpressionState::empty(), + Selection::selection(&[2, 3]), + &mut out, + ) + .unwrap(); + + let expected = Array::try_from_iter(["catdog", "catdog"]).unwrap(); + assert_arrays_eq(&expected, &out); + } +} diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/list/list_values.rs b/crates/rayexec_execution/src/functions/scalar/builtin/list/list_values.rs index 2917027dc..13e3bc258 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/list/list_values.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/list/list_values.rs @@ -185,6 +185,11 @@ fn list_values_inner( .child .put_validity(Validity::new_all_valid(capacity))?; + // Update metadata on the list buffer itself. Note that this can be less + // than the buffer's actual capacity. This only matters during writes to + // know if we still have room to push to the child array. + list_buf.entries = capacity; + let mut child_outputs = S::get_addressable_mut(list_buf.child.data.try_as_mut()?)?; let child_validity = &mut list_buf.child.validity; From 577573520b5dbc8d8418850720ab5d668de2b016 Mon Sep 17 00:00:00 2001 From: Sean Smith Date: Thu, 2 Jan 2025 13:17:49 -0500 Subject: [PATCH 40/59] casting --- .../rayexec_execution/src/arrays/array/exp.rs | 6 +- .../src/arrays/array/validity.rs | 35 +- .../src/arrays/compute/cast/array.rs | 859 ++++++++++-------- .../src/arrays/compute/cast/behavior.rs | 92 +- .../src/arrays/executor_exp/mod.rs | 6 +- .../src/arrays/scalar/decimal.rs | 7 +- .../src/expr/physical/cast_expr.rs | 5 +- .../rayexec_parquet/src/reader/primitive.rs | 7 +- 8 files changed, 546 insertions(+), 471 deletions(-) diff --git a/crates/rayexec_execution/src/arrays/array/exp.rs b/crates/rayexec_execution/src/arrays/array/exp.rs index 2b2b630c5..3ff693e8c 100644 --- a/crates/rayexec_execution/src/arrays/array/exp.rs +++ b/crates/rayexec_execution/src/arrays/array/exp.rs @@ -29,6 +29,7 @@ use crate::arrays::buffer::physical_type::{ PhysicalU32, PhysicalU64, PhysicalU8, + PhysicalUntypedNull, PhysicalUtf8, }; use crate::arrays::buffer::string_view::StringViewHeap; @@ -415,6 +416,9 @@ where B: BufferManager, { let buffer = match datatype.physical_type() { + PhysicalType::UntypedNull => { + ArrayBuffer::with_primary_capacity::(manager, capacity)? + } PhysicalType::Boolean => { ArrayBuffer::with_primary_capacity::(manager, capacity)? } @@ -478,7 +482,7 @@ where buffer } - _ => unimplemented!(), + other => not_implemented!("create array buffer for physical type {other}"), }; Ok(buffer) diff --git a/crates/rayexec_execution/src/arrays/array/validity.rs b/crates/rayexec_execution/src/arrays/array/validity.rs index a14f15a38..13464a7c4 100644 --- a/crates/rayexec_execution/src/arrays/array/validity.rs +++ b/crates/rayexec_execution/src/arrays/array/validity.rs @@ -8,7 +8,9 @@ pub struct Validity { #[derive(Debug, Clone)] enum ValidityInner { /// No mask has been set, assume all entries valid. - NoMask { len: usize }, + AllValid { len: usize }, + /// All entries invalid. + AllInvalid { len: usize }, /// Mask has been set. Bitmap indicates which entries are valid or invalid. Mask { bitmap: Bitmap }, } @@ -16,13 +18,20 @@ enum ValidityInner { impl Validity { pub fn new_all_valid(len: usize) -> Self { Validity { - inner: ValidityInner::NoMask { len }, + inner: ValidityInner::AllValid { len }, + } + } + + pub fn new_all_invalid(len: usize) -> Self { + Validity { + inner: ValidityInner::AllInvalid { len }, } } pub fn len(&self) -> usize { match &self.inner { - ValidityInner::NoMask { len } => *len, + ValidityInner::AllValid { len } => *len, + ValidityInner::AllInvalid { len } => *len, ValidityInner::Mask { bitmap } => bitmap.len(), } } @@ -33,32 +42,40 @@ impl Validity { pub fn all_valid(&self) -> bool { match &self.inner { - ValidityInner::NoMask { .. } => true, + ValidityInner::AllValid { .. } => true, + ValidityInner::AllInvalid { .. } => false, ValidityInner::Mask { bitmap } => bitmap.is_all_true(), } } pub fn is_valid(&self, idx: usize) -> bool { match &self.inner { - ValidityInner::NoMask { .. } => true, + ValidityInner::AllValid { .. } => true, + ValidityInner::AllInvalid { .. } => false, ValidityInner::Mask { bitmap } => bitmap.value(idx), } } pub fn set_valid(&mut self, idx: usize) { - if let ValidityInner::Mask { bitmap } = &mut self.inner { - bitmap.set_unchecked(idx, true) + match &mut self.inner { + ValidityInner::AllValid { .. } => (), // Already valid, + ValidityInner::AllInvalid { len } => { + let mut bitmap = Bitmap::new_with_all_false(*len); + bitmap.set_unchecked(idx, true); + self.inner = ValidityInner::Mask { bitmap } + } + ValidityInner::Mask { bitmap } => bitmap.set_unchecked(idx, true), } - // Otherwise we already assume everything is valid. } pub fn set_invalid(&mut self, idx: usize) { match &mut self.inner { - ValidityInner::NoMask { len } => { + ValidityInner::AllValid { len } => { let mut bitmap = Bitmap::new_with_all_true(*len); bitmap.set_unchecked(idx, false); self.inner = ValidityInner::Mask { bitmap } } + ValidityInner::AllInvalid { .. } => (), // Nothing to do, already invalid. ValidityInner::Mask { bitmap } => bitmap.set_unchecked(idx, false), } } diff --git a/crates/rayexec_execution/src/arrays/compute/cast/array.rs b/crates/rayexec_execution/src/arrays/compute/cast/array.rs index b0532fd30..2de9f9731 100644 --- a/crates/rayexec_execution/src/arrays/compute/cast/array.rs +++ b/crates/rayexec_execution/src/arrays/compute/cast/array.rs @@ -1,6 +1,6 @@ use std::ops::Mul; -use half::f16; +use iterutil::IntoExactSizeIterator; use num::{CheckedDiv, CheckedMul, Float, NumCast, PrimInt, ToPrimitive}; use rayexec_error::{RayexecError, Result}; @@ -48,233 +48,257 @@ use super::parse::{ UInt64Parser, UInt8Parser, }; -use crate::arrays::array::{Array2, ArrayData2}; -use crate::arrays::bitmap::Bitmap; -use crate::arrays::datatype::{DataType, TimeUnit}; -use crate::arrays::executor::builder::{ - ArrayBuilder, - BooleanBuffer, - GermanVarlenBuffer, - PrimitiveBuffer, -}; -use crate::arrays::executor::physical_type::{ - PhysicalBool_2, - PhysicalF16_2, - PhysicalF32_2, - PhysicalF64_2, - PhysicalI128_2, - PhysicalI16_2, - PhysicalI32_2, - PhysicalI64_2, - PhysicalI8_2, - PhysicalStorage2, - PhysicalU128_2, - PhysicalU16_2, - PhysicalU32_2, - PhysicalU64_2, - PhysicalU8_2, - PhysicalUtf8_2, +use crate::arrays::array::exp::Array; +use crate::arrays::array::validity::Validity; +use crate::arrays::buffer::buffer_manager::NopBufferManager; +use crate::arrays::buffer::physical_type::{ + MutablePhysicalStorage, + PhysicalBool, + PhysicalF16, + PhysicalF32, + PhysicalF64, + PhysicalI128, + PhysicalI16, + PhysicalI32, + PhysicalI64, + PhysicalI8, + PhysicalInterval, + PhysicalStorage, + PhysicalU128, + PhysicalU16, + PhysicalU32, + PhysicalU64, + PhysicalU8, + PhysicalUtf8, }; -use crate::arrays::executor::scalar::UnaryExecutor2; +use crate::arrays::datatype::{DataType, TimeUnit}; +use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; +use crate::arrays::executor_exp::OutBuffer; use crate::arrays::scalar::decimal::{Decimal128Type, Decimal64Type, DecimalType}; -use crate::arrays::storage::{AddressableStorage, PrimitiveStorage}; -pub fn cast_array(arr: &Array2, to: DataType, behavior: CastFailBehavior) -> Result { - if arr.datatype() == &to { - // TODO: Cow? - return Ok(arr.clone()); +/// Casts an array to another array. +/// +/// The datatype of `out` determines the what we're casting values to. +/// +/// `behavior` determines what happens if casting results in an overflow or some +/// other precision/accuracy error. Note that if we don't have an implementation +/// of casting from one type to another, this will always error. +pub fn cast_array( + arr: &mut Array, + sel: impl IntoExactSizeIterator, + out: &mut Array, + behavior: CastFailBehavior, +) -> Result<()> { + if arr.datatype() == out.datatype() { + out.clone_from(&NopBufferManager, arr)?; + out.select(&NopBufferManager, sel)?; + + return Ok(()); } - let arr = match arr.datatype() { + let to = out.datatype(); + + match arr.datatype() { DataType::Null => { - // Can cast NULL to anything else. - let data = to.physical_type2()?.zeroed_array_data(arr.logical_len()); - let validity = Bitmap::new_with_all_false(arr.logical_len()); - Array2::new_with_validity_and_array_data(to, validity, data) + // Can cast NULL to anything else. Just set the valid mask to all + // invalid. + out.put_validity(Validity::new_all_invalid(out.capacity()))?; + Ok(()) } // String to anything else. - DataType::Utf8 => cast_from_utf8(arr, to, behavior)?, + DataType::Utf8 => cast_from_utf8(arr, sel, out, behavior), // Primitive numerics to other primitive numerics. DataType::Int8 if to.is_primitive_numeric() => { - cast_primitive_numeric_helper::(arr, to, behavior)? + cast_primitive_numeric_helper::(arr, sel, out, behavior) } DataType::Int16 if to.is_primitive_numeric() => { - cast_primitive_numeric_helper::(arr, to, behavior)? + cast_primitive_numeric_helper::(arr, sel, out, behavior) } DataType::Int32 if to.is_primitive_numeric() => { - cast_primitive_numeric_helper::(arr, to, behavior)? + cast_primitive_numeric_helper::(arr, sel, out, behavior) } DataType::Int64 if to.is_primitive_numeric() => { - cast_primitive_numeric_helper::(arr, to, behavior)? + cast_primitive_numeric_helper::(arr, sel, out, behavior) } DataType::Int128 if to.is_primitive_numeric() => { - cast_primitive_numeric_helper::(arr, to, behavior)? + cast_primitive_numeric_helper::(arr, sel, out, behavior) } DataType::UInt8 if to.is_primitive_numeric() => { - cast_primitive_numeric_helper::(arr, to, behavior)? + cast_primitive_numeric_helper::(arr, sel, out, behavior) } DataType::UInt16 if to.is_primitive_numeric() => { - cast_primitive_numeric_helper::(arr, to, behavior)? + cast_primitive_numeric_helper::(arr, sel, out, behavior) } DataType::UInt32 if to.is_primitive_numeric() => { - cast_primitive_numeric_helper::(arr, to, behavior)? + cast_primitive_numeric_helper::(arr, sel, out, behavior) } DataType::UInt64 if to.is_primitive_numeric() => { - cast_primitive_numeric_helper::(arr, to, behavior)? + cast_primitive_numeric_helper::(arr, sel, out, behavior) } DataType::UInt128 if to.is_primitive_numeric() => { - cast_primitive_numeric_helper::(arr, to, behavior)? + cast_primitive_numeric_helper::(arr, sel, out, behavior) } DataType::Float16 if to.is_primitive_numeric() => { - cast_primitive_numeric_helper::(arr, to, behavior)? + cast_primitive_numeric_helper::(arr, sel, out, behavior) } DataType::Float32 if to.is_primitive_numeric() => { - cast_primitive_numeric_helper::(arr, to, behavior)? + cast_primitive_numeric_helper::(arr, sel, out, behavior) } DataType::Float64 if to.is_primitive_numeric() => { - cast_primitive_numeric_helper::(arr, to, behavior)? + cast_primitive_numeric_helper::(arr, sel, out, behavior) } // Int to date32 - DataType::Int8 if to == DataType::Date32 => { - cast_primitive_numeric::(arr, to, behavior)? + DataType::Int8 if to == &DataType::Date32 => { + cast_primitive_numeric::(arr, sel, out, behavior) } - DataType::Int16 if to == DataType::Date32 => { - cast_primitive_numeric::(arr, to, behavior)? + DataType::Int16 if to == &DataType::Date32 => { + cast_primitive_numeric::(arr, sel, out, behavior) } - DataType::Int32 if to == DataType::Date32 => { - cast_primitive_numeric::(arr, to, behavior)? + DataType::Int32 if to == &DataType::Date32 => { + cast_primitive_numeric::(arr, sel, out, behavior) } - DataType::UInt8 if to == DataType::Date32 => { - cast_primitive_numeric::(arr, to, behavior)? + DataType::UInt8 if to == &DataType::Date32 => { + cast_primitive_numeric::(arr, sel, out, behavior) } - DataType::UInt16 if to == DataType::Date32 => { - cast_primitive_numeric::(arr, to, behavior)? + DataType::UInt16 if to == &DataType::Date32 => { + cast_primitive_numeric::(arr, sel, out, behavior) } // Int to decimal. DataType::Int8 if to.is_decimal() => { - cast_int_to_decimal_helper::(arr, to, behavior)? + cast_int_to_decimal_helper::(arr, sel, out, behavior) } DataType::Int16 if to.is_decimal() => { - cast_int_to_decimal_helper::(arr, to, behavior)? + cast_int_to_decimal_helper::(arr, sel, out, behavior) } DataType::Int32 if to.is_decimal() => { - cast_int_to_decimal_helper::(arr, to, behavior)? + cast_int_to_decimal_helper::(arr, sel, out, behavior) } DataType::Int64 if to.is_decimal() => { - cast_int_to_decimal_helper::(arr, to, behavior)? + cast_int_to_decimal_helper::(arr, sel, out, behavior) } DataType::Int128 if to.is_decimal() => { - cast_int_to_decimal_helper::(arr, to, behavior)? + cast_int_to_decimal_helper::(arr, sel, out, behavior) } DataType::UInt8 if to.is_decimal() => { - cast_int_to_decimal_helper::(arr, to, behavior)? + cast_int_to_decimal_helper::(arr, sel, out, behavior) } DataType::UInt16 if to.is_decimal() => { - cast_int_to_decimal_helper::(arr, to, behavior)? + cast_int_to_decimal_helper::(arr, sel, out, behavior) } DataType::UInt32 if to.is_decimal() => { - cast_int_to_decimal_helper::(arr, to, behavior)? + cast_int_to_decimal_helper::(arr, sel, out, behavior) } DataType::UInt64 if to.is_decimal() => { - cast_int_to_decimal_helper::(arr, to, behavior)? + cast_int_to_decimal_helper::(arr, sel, out, behavior) } DataType::UInt128 if to.is_decimal() => { - cast_int_to_decimal_helper::(arr, to, behavior)? + cast_int_to_decimal_helper::(arr, sel, out, behavior) } // Float to decimal. + DataType::Float16 if to.is_decimal() => { + cast_float_to_decimal_helper::(arr, sel, out, behavior) + } DataType::Float32 if to.is_decimal() => { - cast_float_to_decimal_helper::(arr, to, behavior)? + cast_float_to_decimal_helper::(arr, sel, out, behavior) } DataType::Float64 if to.is_decimal() => { - cast_float_to_decimal_helper::(arr, to, behavior)? + cast_float_to_decimal_helper::(arr, sel, out, behavior) } // Decimal to decimal DataType::Decimal64(_) if to.is_decimal() => { - decimal_rescale_helper::(arr, to, behavior)? + decimal_rescale_helper::(arr, sel, out, behavior) } DataType::Decimal128(_) if to.is_decimal() => { - decimal_rescale_helper::(arr, to, behavior)? + decimal_rescale_helper::(arr, sel, out, behavior) } // Decimal to float. DataType::Decimal64(_) => match to { - DataType::Float32 => cast_decimal_to_float::(arr, to, behavior)?, - DataType::Float64 => cast_decimal_to_float::(arr, to, behavior)?, - other => return Err(RayexecError::new(format!("Unhandled data type: {other}"))), + DataType::Float16 => { + cast_decimal_to_float::(arr, sel, out, behavior) + } + DataType::Float32 => { + cast_decimal_to_float::(arr, sel, out, behavior) + } + DataType::Float64 => { + cast_decimal_to_float::(arr, sel, out, behavior) + } + other => Err(RayexecError::new(format!("Unhandled data type: {other}"))), }, DataType::Decimal128(_) => match to { - DataType::Float32 => cast_decimal_to_float::(arr, to, behavior)?, - DataType::Float64 => cast_decimal_to_float::(arr, to, behavior)?, - other => return Err(RayexecError::new(format!("Unhandled data type: {other}"))), + DataType::Float16 => { + cast_decimal_to_float::(arr, sel, out, behavior) + } + DataType::Float32 => { + cast_decimal_to_float::(arr, sel, out, behavior) + } + DataType::Float64 => { + cast_decimal_to_float::(arr, sel, out, behavior) + } + other => Err(RayexecError::new(format!("Unhandled data type: {other}"))), }, // Anything to string. - _ if to.is_utf8() => cast_to_utf8(arr, behavior)?, + _ if to.is_utf8() => cast_to_utf8(arr, sel, out, behavior), - other => { - return Err(RayexecError::new(format!( - "Casting from {other} to {to} not implemented" - ))) - } - }; - - Ok(arr) + other => Err(RayexecError::new(format!( + "Casting from {other} to {to} not implemented", + ))), + } } -fn decimal_rescale_helper<'a, S>( - arr: &'a Array2, - to: DataType, +fn decimal_rescale_helper( + arr: &Array, + sel: impl IntoExactSizeIterator, + out: &mut Array, behavior: CastFailBehavior, -) -> Result +) -> Result<()> where - S: PhysicalStorage2, - S::Type<'a>: PrimInt, + D1: DecimalType, { - match to { - DataType::Decimal64(_) => decimal_rescale::(arr, to, behavior), - DataType::Decimal128(_) => decimal_rescale::(arr, to, behavior), + match out.datatype() { + DataType::Decimal64(_) => decimal_rescale::(arr, sel, out, behavior), + DataType::Decimal128(_) => decimal_rescale::(arr, sel, out, behavior), other => Err(RayexecError::new(format!("Unhandled data type: {other}"))), } } -pub fn decimal_rescale<'a, S, D>( - arr: &'a Array2, - to: DataType, +pub fn decimal_rescale( + arr: &Array, + sel: impl IntoExactSizeIterator, + out: &mut Array, behavior: CastFailBehavior, -) -> Result +) -> Result<()> where - S: PhysicalStorage2, - D: DecimalType, - S::Type<'a>: PrimInt, - ArrayData2: From>, + D1: DecimalType, + D2: DecimalType, { - let new_meta = to.try_get_decimal_type_meta()?; + let new_meta = arr.datatype().try_get_decimal_type_meta()?; let arr_meta = arr.datatype().try_get_decimal_type_meta()?; - let scale_amount = ::from( + let scale_amount = ::from( 10.pow((arr_meta.scale - new_meta.scale).unsigned_abs() as u32), ) .expect("to be in range"); - let mut fail_state = behavior.new_state_for_array(arr); - let output = UnaryExecutor2::execute::( + let mut fail_state = behavior.new_state(); + UnaryExecutor::execute::( arr, - ArrayBuilder { - datatype: to, - buffer: PrimitiveBuffer::with_len(arr.logical_len()), - }, - |v, buf| { + sel, + OutBuffer::from_array(out)?, + |&v, buf| { // Convert to decimal primitive. - let v = match ::from(v) { + let v = match ::from(v) { Some(v) => v, None => { - fail_state.set_did_fail(buf.idx); + fail_state.set_error(|| RayexecError::new("Failed cast decimal")); + buf.put_null(); return; } }; @@ -282,169 +306,186 @@ where if arr_meta.scale < new_meta.scale { match v.checked_mul(&scale_amount) { Some(v) => buf.put(&v), - None => fail_state.set_did_fail(buf.idx), + None => { + fail_state.set_error(|| RayexecError::new("Failed cast decimal")); + buf.put_null(); + } } } else { match v.checked_div(&scale_amount) { Some(v) => buf.put(&v), - None => fail_state.set_did_fail(buf.idx), + None => { + fail_state.set_error(|| RayexecError::new("Failed cast decimal")); + buf.put_null(); + } } } }, )?; - fail_state.check_and_apply(arr, output) + fail_state.into_result() } -fn cast_float_to_decimal_helper<'a, S>( - arr: &'a Array2, - to: DataType, +fn cast_float_to_decimal_helper( + arr: &Array, + sel: impl IntoExactSizeIterator, + out: &mut Array, behavior: CastFailBehavior, -) -> Result +) -> Result<()> where - S: PhysicalStorage2, - S::Type<'a>: Float, + S: PhysicalStorage, + S::StorageType: Float, { - match to { - DataType::Decimal64(_) => cast_float_to_decimal::(arr, to, behavior), - DataType::Decimal128(_) => cast_float_to_decimal::(arr, to, behavior), + match out.datatype() { + DataType::Decimal64(_) => { + cast_float_to_decimal::(arr, sel, out, behavior) + } + DataType::Decimal128(_) => { + cast_float_to_decimal::(arr, sel, out, behavior) + } other => Err(RayexecError::new(format!("Unhandled data type: {other}"))), } } -fn cast_float_to_decimal<'a, S, D>( - arr: &'a Array2, - to: DataType, +fn cast_float_to_decimal( + arr: &Array, + sel: impl IntoExactSizeIterator, + out: &mut Array, behavior: CastFailBehavior, -) -> Result +) -> Result<()> where - S: PhysicalStorage2, + S: PhysicalStorage, + S::StorageType: Float, D: DecimalType, - S::Type<'a>: Float, - ArrayData2: From>, { - let decimal_meta = to.try_get_decimal_type_meta()?; + let decimal_meta = out.datatype().try_get_decimal_type_meta()?; let scale = decimal_meta.scale; let precision = decimal_meta.precision; - let scale = < as AddressableStorage>::T as NumCast>::from( - 10.pow(scale.unsigned_abs() as u32), - ) - .ok_or_else(|| RayexecError::new(format!("Failed to cast scale {scale} to float")))?; + let scale = ::from(10.pow(scale.unsigned_abs() as u32)) + .ok_or_else(|| RayexecError::new(format!("Failed to cast scale {scale} to float")))?; - let mut fail_state = behavior.new_state_for_array(arr); - let output = UnaryExecutor2::execute::( + let mut fail_state = behavior.new_state(); + UnaryExecutor::execute::( arr, - ArrayBuilder { - datatype: to, - buffer: PrimitiveBuffer::with_len(arr.logical_len()), - }, - |v, buf| { + sel, + OutBuffer::from_array(out)?, + |&v, buf| { // TODO: Properly handle negative scale. let scaled_value = v.mul(scale).round(); match ::from(scaled_value) { Some(v) => { if let Err(err) = D::validate_precision(v, precision) { - fail_state.set_did_fail_with_error(buf.idx, err); + fail_state.set_error(|| err); + buf.put_null(); return; } buf.put(&v) } - None => fail_state.set_did_fail(buf.idx), + None => { + fail_state.set_error(|| RayexecError::new("Failed cast decimal")); + buf.put_null(); + return; + } } }, )?; - fail_state.check_and_apply(arr, output) + fail_state.into_result() } -// TODO: Weird to specify both the float generic and datatype. -pub fn cast_decimal_to_float<'a, S, F>( - arr: &'a Array2, - to: DataType, +pub fn cast_decimal_to_float( + arr: &Array, + sel: impl IntoExactSizeIterator, + out: &mut Array, behavior: CastFailBehavior, -) -> Result +) -> Result<()> where - S: PhysicalStorage2, - F: Float + Default + Copy, - <::Storage<'a> as AddressableStorage>::T: ToPrimitive, - ArrayData2: From>, + D: DecimalType, + S: MutablePhysicalStorage, + S::StorageType: Float + Copy, { let decimal_meta = arr.datatype().try_get_decimal_type_meta()?; - let scale = ::from((10.0).powi(decimal_meta.scale as i32)).ok_or_else(|| { - RayexecError::new(format!( - "Failed to cast scale {} to float", - decimal_meta.scale - )) - })?; - - let builder = ArrayBuilder { - datatype: to, - buffer: PrimitiveBuffer::::with_len(arr.logical_len()), - }; + let scale = ::from((10.0).powi(decimal_meta.scale as i32)) + .ok_or_else(|| { + RayexecError::new(format!( + "Failed to cast scale {} to float", + decimal_meta.scale + )) + })?; - let mut fail_state = behavior.new_state_for_array(arr); - let output = - UnaryExecutor2::execute::(arr, builder, |v, buf| match ::from(v) { + let mut fail_state = behavior.new_state(); + UnaryExecutor::execute::( + arr, + sel, + OutBuffer::from_array(out)?, + |&v, buf| match ::from(v) { Some(v) => { - let scaled = v.div(scale); + let scaled = v / scale; buf.put(&scaled); } - None => fail_state.set_did_fail(buf.idx), - })?; + None => { + fail_state.set_error(|| RayexecError::new("Failed to cast float to decimal")); + buf.put_null(); + return; + } + }, + )?; - fail_state.check_and_apply(arr, output) + fail_state.into_result() } -fn cast_int_to_decimal_helper<'a, S>( - arr: &'a Array2, - to: DataType, +fn cast_int_to_decimal_helper( + arr: &Array, + sel: impl IntoExactSizeIterator, + out: &mut Array, behavior: CastFailBehavior, -) -> Result +) -> Result<()> where - S: PhysicalStorage2, - S::Type<'a>: PrimInt, + S: PhysicalStorage, + S::StorageType: PrimInt, { - match to { - DataType::Decimal64(_) => cast_int_to_decimal::(arr, to, behavior), - DataType::Decimal128(_) => cast_int_to_decimal::(arr, to, behavior), + match out.datatype() { + DataType::Decimal64(_) => cast_int_to_decimal::(arr, sel, out, behavior), + DataType::Decimal128(_) => { + cast_int_to_decimal::(arr, sel, out, behavior) + } other => Err(RayexecError::new(format!("Unhandled data type: {other}"))), } } -fn cast_int_to_decimal<'a, S, D>( - arr: &'a Array2, - to: DataType, +fn cast_int_to_decimal( + arr: &Array, + sel: impl IntoExactSizeIterator, + out: &mut Array, behavior: CastFailBehavior, -) -> Result +) -> Result<()> where - S: PhysicalStorage2, + S: PhysicalStorage, D: DecimalType, - S::Type<'a>: PrimInt, - ArrayData2: From>, + S::StorageType: PrimInt, { - let decimal_meta = to.try_get_decimal_type_meta()?; + let decimal_meta = out.datatype().try_get_decimal_type_meta()?; let scale = decimal_meta.scale; let precision = decimal_meta.precision; let scale_amount = ::from(10.pow(scale.unsigned_abs() as u32)) .expect("to be in range"); - let mut fail_state = behavior.new_state_for_array(arr); - let output = UnaryExecutor2::execute::( + let mut fail_state = behavior.new_state(); + UnaryExecutor::execute::( arr, - ArrayBuilder { - datatype: to, - buffer: PrimitiveBuffer::with_len(arr.logical_len()), - }, - |v, buf| { + sel, + OutBuffer::from_array(out)?, + |&v, buf| { // Convert to decimal primitive. let v = match ::from(v) { Some(v) => v, None => { - fail_state.set_did_fail(buf.idx); + fail_state.set_error(|| RayexecError::new("Failed to cast int to decimal")); + buf.put_null(); return; } }; @@ -454,7 +495,8 @@ where match v.checked_mul(&scale_amount) { Some(v) => v, None => { - fail_state.set_did_fail(buf.idx); + fail_state.set_error(|| RayexecError::new("Failed to cast int to decimal")); + buf.put_null(); return; } } @@ -462,14 +504,16 @@ where match v.checked_div(&scale_amount) { Some(v) => v, None => { - fail_state.set_did_fail(buf.idx); + fail_state.set_error(|| RayexecError::new("Failed to cast int to decimal")); + buf.put_null(); return; } } }; if let Err(err) = D::validate_precision(val, precision) { - fail_state.set_did_fail_with_error(buf.idx, err); + fail_state.set_error(|| err); + buf.put_null(); return; } @@ -477,176 +521,249 @@ where }, )?; - fail_state.check_and_apply(arr, output) + fail_state.into_result() } -fn cast_primitive_numeric_helper<'a, S>( - arr: &'a Array2, - to: DataType, +fn cast_primitive_numeric_helper( + arr: &Array, + sel: impl IntoExactSizeIterator, + out: &mut Array, behavior: CastFailBehavior, -) -> Result +) -> Result<()> where - S: PhysicalStorage2, - S::Type<'a>: ToPrimitive, + S: PhysicalStorage, + S::StorageType: ToPrimitive + Sized + Copy, { - match to { - DataType::Int8 => cast_primitive_numeric::(arr, to, behavior), - DataType::Int16 => cast_primitive_numeric::(arr, to, behavior), - DataType::Int32 => cast_primitive_numeric::(arr, to, behavior), - DataType::Int64 => cast_primitive_numeric::(arr, to, behavior), - DataType::Int128 => cast_primitive_numeric::(arr, to, behavior), - DataType::UInt8 => cast_primitive_numeric::(arr, to, behavior), - DataType::UInt16 => cast_primitive_numeric::(arr, to, behavior), - DataType::UInt32 => cast_primitive_numeric::(arr, to, behavior), - DataType::UInt64 => cast_primitive_numeric::(arr, to, behavior), - DataType::UInt128 => cast_primitive_numeric::(arr, to, behavior), - DataType::Float16 => cast_primitive_numeric::(arr, to, behavior), - DataType::Float32 => cast_primitive_numeric::(arr, to, behavior), - DataType::Float64 => cast_primitive_numeric::(arr, to, behavior), + match out.datatype() { + DataType::Int8 => cast_primitive_numeric::(arr, sel, out, behavior), + DataType::Int16 => cast_primitive_numeric::(arr, sel, out, behavior), + DataType::Int32 => cast_primitive_numeric::(arr, sel, out, behavior), + DataType::Int64 => cast_primitive_numeric::(arr, sel, out, behavior), + DataType::Int128 => cast_primitive_numeric::(arr, sel, out, behavior), + DataType::UInt8 => cast_primitive_numeric::(arr, sel, out, behavior), + DataType::UInt16 => cast_primitive_numeric::(arr, sel, out, behavior), + DataType::UInt32 => cast_primitive_numeric::(arr, sel, out, behavior), + DataType::UInt64 => cast_primitive_numeric::(arr, sel, out, behavior), + DataType::UInt128 => cast_primitive_numeric::(arr, sel, out, behavior), + DataType::Float16 => cast_primitive_numeric::(arr, sel, out, behavior), + DataType::Float32 => cast_primitive_numeric::(arr, sel, out, behavior), + DataType::Float64 => cast_primitive_numeric::(arr, sel, out, behavior), other => Err(RayexecError::new(format!("Unhandled data type: {other}"))), } } -pub fn cast_primitive_numeric<'a, S, T>( - arr: &'a Array2, - datatype: DataType, +/// Cast a primitive number to some other primitive numeric. +fn cast_primitive_numeric( + arr: &Array, + sel: impl IntoExactSizeIterator, + out: &mut Array, behavior: CastFailBehavior, -) -> Result +) -> Result<()> where - S: PhysicalStorage2, - S::Type<'a>: ToPrimitive, - T: NumCast + Default + Copy, - ArrayData2: From>, + S1: PhysicalStorage, + S1::StorageType: ToPrimitive + Sized + Copy, + S2: MutablePhysicalStorage, + S2::StorageType: NumCast + Copy, { - let mut fail_state = behavior.new_state_for_array(arr); - let output = UnaryExecutor2::execute::( - arr, - ArrayBuilder { - datatype, - buffer: PrimitiveBuffer::with_len(arr.logical_len()), - }, - |v, buf| match T::from(v) { + let mut fail_state = behavior.new_state(); + UnaryExecutor::execute::(arr, sel, OutBuffer::from_array(out)?, |&v, buf| { + match NumCast::from(v) { Some(v) => buf.put(&v), - None => fail_state.set_did_fail(buf.idx), - }, - )?; + None => { + fail_state.set_error(|| RayexecError::new("Failed to cast primitive numeric")); + buf.put_null(); + } + } + })?; - fail_state.check_and_apply(arr, output) + fail_state.into_result() } pub fn cast_from_utf8( - arr: &Array2, - datatype: DataType, + arr: &Array, + sel: impl IntoExactSizeIterator, + out: &mut Array, behavior: CastFailBehavior, -) -> Result { - match datatype { - DataType::Boolean => cast_parse_bool(arr, behavior), - DataType::Int8 => cast_parse_primitive(arr, datatype, behavior, Int8Parser::default()), - DataType::Int16 => cast_parse_primitive(arr, datatype, behavior, Int16Parser::default()), - DataType::Int32 => cast_parse_primitive(arr, datatype, behavior, Int32Parser::default()), - DataType::Int64 => cast_parse_primitive(arr, datatype, behavior, Int64Parser::default()), - DataType::Int128 => cast_parse_primitive(arr, datatype, behavior, Int128Parser::default()), - DataType::UInt8 => cast_parse_primitive(arr, datatype, behavior, UInt8Parser::default()), - DataType::UInt16 => cast_parse_primitive(arr, datatype, behavior, UInt16Parser::default()), - DataType::UInt32 => cast_parse_primitive(arr, datatype, behavior, UInt32Parser::default()), - DataType::UInt64 => cast_parse_primitive(arr, datatype, behavior, UInt64Parser::default()), - DataType::UInt128 => { - cast_parse_primitive(arr, datatype, behavior, UInt128Parser::default()) +) -> Result<()> { + match out.datatype() { + DataType::Boolean => { + cast_parse_primitive::<_, PhysicalBool>(arr, sel, out, behavior, BoolParser) } - DataType::Float16 => { - cast_parse_primitive(arr, datatype, behavior, Float16Parser::default()) + DataType::Int8 => { + cast_parse_primitive::<_, PhysicalI8>(arr, sel, out, behavior, Int8Parser::default()) } - DataType::Float32 => { - cast_parse_primitive(arr, datatype, behavior, Float32Parser::default()) + DataType::Int16 => { + cast_parse_primitive::<_, PhysicalI16>(arr, sel, out, behavior, Int16Parser::default()) } - DataType::Float64 => { - cast_parse_primitive(arr, datatype, behavior, Float64Parser::default()) + DataType::Int32 => { + cast_parse_primitive::<_, PhysicalI32>(arr, sel, out, behavior, Int32Parser::default()) + } + DataType::Int64 => { + cast_parse_primitive::<_, PhysicalI64>(arr, sel, out, behavior, Int64Parser::default()) + } + DataType::Int128 => cast_parse_primitive::<_, PhysicalI128>( + arr, + sel, + out, + behavior, + Int128Parser::default(), + ), + DataType::UInt8 => { + cast_parse_primitive::<_, PhysicalU8>(arr, sel, out, behavior, UInt8Parser::default()) + } + DataType::UInt16 => { + cast_parse_primitive::<_, PhysicalU16>(arr, sel, out, behavior, UInt16Parser::default()) } - DataType::Decimal64(m) => cast_parse_primitive( + DataType::UInt32 => { + cast_parse_primitive::<_, PhysicalU32>(arr, sel, out, behavior, UInt32Parser::default()) + } + DataType::UInt64 => { + cast_parse_primitive::<_, PhysicalU64>(arr, sel, out, behavior, UInt64Parser::default()) + } + DataType::UInt128 => cast_parse_primitive::<_, PhysicalU128>( + arr, + sel, + out, + behavior, + UInt128Parser::default(), + ), + DataType::Float16 => cast_parse_primitive::<_, PhysicalF16>( arr, - datatype, + sel, + out, + behavior, + Float16Parser::default(), + ), + DataType::Float32 => cast_parse_primitive::<_, PhysicalF32>( + arr, + sel, + out, + behavior, + Float32Parser::default(), + ), + DataType::Float64 => cast_parse_primitive::<_, PhysicalF64>( + arr, + sel, + out, + behavior, + Float64Parser::default(), + ), + DataType::Decimal64(m) => cast_parse_primitive::<_, PhysicalI64>( + arr, + sel, + out, behavior, Decimal64Parser::new(m.precision, m.scale), ), - DataType::Decimal128(m) => cast_parse_primitive( + DataType::Decimal128(m) => cast_parse_primitive::<_, PhysicalI128>( arr, - datatype, + sel, + out, behavior, Decimal128Parser::new(m.precision, m.scale), ), - DataType::Date32 => cast_parse_primitive(arr, datatype, behavior, Date32Parser), - DataType::Interval => { - cast_parse_primitive(arr, datatype, behavior, IntervalParser::default()) + DataType::Date32 => { + cast_parse_primitive::<_, PhysicalI32>(arr, sel, out, behavior, Date32Parser) } + DataType::Interval => cast_parse_primitive::<_, PhysicalInterval>( + arr, + sel, + out, + behavior, + IntervalParser::default(), + ), other => Err(RayexecError::new(format!( "Unable to cast utf8 array to {other}" ))), } } -pub fn cast_to_utf8(arr: &Array2, behavior: CastFailBehavior) -> Result { +pub fn cast_to_utf8( + arr: &Array, + sel: impl IntoExactSizeIterator, + out: &mut Array, + behavior: CastFailBehavior, +) -> Result<()> { match arr.datatype() { DataType::Boolean => { - cast_format::(arr, BoolFormatter::default(), behavior) + cast_format::(arr, sel, out, BoolFormatter::default(), behavior) + } + DataType::Int8 => { + cast_format::(arr, sel, out, Int8Formatter::default(), behavior) } - DataType::Int8 => cast_format::(arr, Int8Formatter::default(), behavior), DataType::Int16 => { - cast_format::(arr, Int16Formatter::default(), behavior) + cast_format::(arr, sel, out, Int16Formatter::default(), behavior) } DataType::Int32 => { - cast_format::(arr, Int32Formatter::default(), behavior) + cast_format::(arr, sel, out, Int32Formatter::default(), behavior) } DataType::Int64 => { - cast_format::(arr, Int64Formatter::default(), behavior) + cast_format::(arr, sel, out, Int64Formatter::default(), behavior) } DataType::Int128 => { - cast_format::(arr, Int128Formatter::default(), behavior) + cast_format::(arr, sel, out, Int128Formatter::default(), behavior) + } + DataType::UInt8 => { + cast_format::(arr, sel, out, UInt8Formatter::default(), behavior) } - DataType::UInt8 => cast_format::(arr, UInt8Formatter::default(), behavior), DataType::UInt16 => { - cast_format::(arr, UInt16Formatter::default(), behavior) + cast_format::(arr, sel, out, UInt16Formatter::default(), behavior) } DataType::UInt32 => { - cast_format::(arr, UInt32Formatter::default(), behavior) + cast_format::(arr, sel, out, UInt32Formatter::default(), behavior) } DataType::UInt64 => { - cast_format::(arr, UInt64Formatter::default(), behavior) + cast_format::(arr, sel, out, UInt64Formatter::default(), behavior) } DataType::UInt128 => { - cast_format::(arr, UInt128Formatter::default(), behavior) + cast_format::(arr, sel, out, UInt128Formatter::default(), behavior) } DataType::Float32 => { - cast_format::(arr, Float32Formatter::default(), behavior) + cast_format::(arr, sel, out, Float32Formatter::default(), behavior) } DataType::Float64 => { - cast_format::(arr, Float64Formatter::default(), behavior) + cast_format::(arr, sel, out, Float64Formatter::default(), behavior) } - DataType::Decimal64(m) => cast_format::( + DataType::Decimal64(m) => cast_format::( arr, + sel, + out, Decimal64Formatter::new(m.precision, m.scale), behavior, ), - DataType::Decimal128(m) => cast_format::( + DataType::Decimal128(m) => cast_format::( arr, + sel, + out, Decimal128Formatter::new(m.precision, m.scale), behavior, ), DataType::Timestamp(m) => match m.unit { - TimeUnit::Second => { - cast_format::(arr, TimestampSecondsFormatter::default(), behavior) - } - TimeUnit::Millisecond => cast_format::( + TimeUnit::Second => cast_format::( arr, + sel, + out, + TimestampSecondsFormatter::default(), + behavior, + ), + TimeUnit::Millisecond => cast_format::( + arr, + sel, + out, TimestampMillisecondsFormatter::default(), behavior, ), - TimeUnit::Microsecond => cast_format::( + TimeUnit::Microsecond => cast_format::( arr, + sel, + out, TimestampMicrosecondsFormatter::default(), behavior, ), - TimeUnit::Nanosecond => cast_format::( + TimeUnit::Nanosecond => cast_format::( arr, + sel, + out, TimestampNanosecondsFormatter::default(), behavior, ), @@ -657,141 +774,129 @@ pub fn cast_to_utf8(arr: &Array2, behavior: CastFailBehavior) -> Result } } -fn cast_format<'a, S, F>( - arr: &'a Array2, +/// Cast an array to strings by formatting values. +fn cast_format( + arr: &Array, + sel: impl IntoExactSizeIterator, + out: &mut Array, mut formatter: F, behavior: CastFailBehavior, -) -> Result +) -> Result<()> where - S: PhysicalStorage2, - F: Formatter>, + S: PhysicalStorage, + F: Formatter, { - let mut fail_state = behavior.new_state_for_array(arr); + let mut fail_state = behavior.new_state(); let mut string_buf = String::new(); - let output = UnaryExecutor2::execute::( + UnaryExecutor::execute::( arr, - ArrayBuilder { - datatype: DataType::Utf8, - buffer: GermanVarlenBuffer::with_len(arr.logical_len()), - }, + sel, + OutBuffer::from_array(out)?, |v, buf| { string_buf.clear(); match formatter.write(&v, &mut string_buf) { Ok(_) => buf.put(string_buf.as_str()), - Err(_) => fail_state.set_did_fail(buf.idx), + Err(_) => { + fail_state.set_error(|| RayexecError::new("Failed to cast to utf8")); + buf.put_null(); + } } }, )?; - fail_state.check_and_apply(arr, output) + fail_state.into_result() } -fn cast_parse_bool(arr: &Array2, behavior: CastFailBehavior) -> Result { - let mut fail_state = behavior.new_state_for_array(arr); - let output = UnaryExecutor2::execute::( - arr, - ArrayBuilder { - datatype: DataType::Boolean, - buffer: BooleanBuffer::with_len(arr.logical_len()), - }, - |v, buf| match BoolParser.parse(v) { - Some(v) => buf.put(&v), - None => fail_state.set_did_fail(buf.idx), - }, - )?; - - fail_state.check_and_apply(arr, output) -} - -fn cast_parse_primitive( - arr: &Array2, - datatype: DataType, +/// Cast a utf8 array to some other primitive type by parsing string values. +fn cast_parse_primitive( + arr: &Array, + sel: impl IntoExactSizeIterator, + out: &mut Array, behavior: CastFailBehavior, mut parser: P, -) -> Result +) -> Result<()> where - T: Default + Copy, - P: Parser, - ArrayData2: From>, + S::StorageType: Sized, + P: Parser, + S: MutablePhysicalStorage, { - let mut fail_state = behavior.new_state_for_array(arr); - let output = UnaryExecutor2::execute::( + let mut fail_state = behavior.new_state(); + UnaryExecutor::execute::( arr, - ArrayBuilder { - datatype: datatype.clone(), - buffer: PrimitiveBuffer::::with_len(arr.logical_len()), - }, + sel, + OutBuffer::from_array(out)?, |v, buf| match parser.parse(v) { Some(v) => buf.put(&v), - None => fail_state.set_did_fail(buf.idx), + None => { + fail_state.set_error(|| RayexecError::new("Failed to parse value from utf8")); + buf.put_null(); + } }, )?; - fail_state.check_and_apply(arr, output) + fail_state.into_result() } #[cfg(test)] mod tests { + use iterutil::TryFromExactSizeIterator; + use super::*; use crate::arrays::datatype::DecimalTypeMeta; - use crate::arrays::scalar::ScalarValue; + use crate::arrays::testutil::assert_arrays_eq; #[test] fn array_cast_utf8_to_i32() { - let arr = Array2::from_iter(["13", "18", "123456789"]); + let mut arr = Array::try_from_iter(["13", "18", "123456789"]).unwrap(); + let mut out = Array::new(&NopBufferManager, DataType::Int32, 3).unwrap(); - let got = cast_array(&arr, DataType::Int32, CastFailBehavior::Error).unwrap(); + cast_array(&mut arr, 0..3, &mut out, CastFailBehavior::Error).unwrap(); - assert_eq!(ScalarValue::from(13), got.logical_value(0).unwrap()); - assert_eq!(ScalarValue::from(18), got.logical_value(1).unwrap()); - assert_eq!(ScalarValue::from(123456789), got.logical_value(2).unwrap()); + let expected = Array::try_from_iter([13, 18, 123456789]).unwrap(); + assert_arrays_eq(&expected, &out); } #[test] fn array_cast_utf8_to_i32_overflow_error() { - let arr = Array2::from_iter(["13", "18", "123456789000000"]); - cast_array(&arr, DataType::Int32, CastFailBehavior::Error).unwrap_err(); + let mut arr = Array::try_from_iter(["13", "18", "123456789000000"]).unwrap(); + let mut out = Array::new(&NopBufferManager, DataType::Int32, 3).unwrap(); + cast_array(&mut arr, 0..3, &mut out, CastFailBehavior::Error).unwrap_err(); } #[test] fn array_cast_utf8_to_i32_overflow_null() { - let arr = Array2::from_iter(["13", "18", "123456789000000"]); + let mut arr = Array::try_from_iter(["13", "18", "123456789000000"]).unwrap(); + let mut out = Array::new(&NopBufferManager, DataType::Int32, 3).unwrap(); - let got = cast_array(&arr, DataType::Int32, CastFailBehavior::Null).unwrap(); + cast_array(&mut arr, 0..3, &mut out, CastFailBehavior::Null).unwrap(); - assert_eq!(ScalarValue::from(13), got.logical_value(0).unwrap()); - assert_eq!(ScalarValue::from(18), got.logical_value(1).unwrap()); - assert_eq!(ScalarValue::Null, got.logical_value(2).unwrap()); + let expected = Array::try_from_iter([Some(13), Some(18), None]).unwrap(); + assert_arrays_eq(&expected, &out); } #[test] fn array_cast_null_to_f32() { - let arr = Array2::new_untyped_null_array(3); + let mut arr = Array::new(&NopBufferManager, DataType::Null, 3).unwrap(); + let mut out = Array::new(&NopBufferManager, DataType::Float32, 3).unwrap(); - let got = cast_array(&arr, DataType::Float32, CastFailBehavior::Error).unwrap(); + cast_array(&mut arr, 0..3, &mut out, CastFailBehavior::Error).unwrap(); - assert_eq!(&DataType::Float32, got.datatype()); + let expected = Array::try_from_iter([None as Option, None, None]).unwrap(); - assert_eq!(ScalarValue::Null, got.logical_value(0).unwrap()); - assert_eq!(ScalarValue::Null, got.logical_value(1).unwrap()); - assert_eq!(ScalarValue::Null, got.logical_value(2).unwrap()); + assert_arrays_eq(&expected, &out); } #[test] fn array_cast_decimal64_to_f64() { - let arr = Array2::new_with_array_data( - DataType::Decimal64(DecimalTypeMeta { - precision: 10, - scale: 3, - }), - PrimitiveStorage::from(vec![1500_i64, 2000_i64, 2500_i64]), - ); - - let got = cast_array(&arr, DataType::Float64, CastFailBehavior::Error).unwrap(); - - assert_eq!(ScalarValue::Float64(1.5), got.logical_value(0).unwrap()); - assert_eq!(ScalarValue::Float64(2.0), got.logical_value(1).unwrap()); - assert_eq!(ScalarValue::Float64(2.5), got.logical_value(2).unwrap()); + let mut arr = Array::try_from_iter([1500_i64, 2000, 2500]).unwrap(); + // '[1.500, 2.000, 2.500]' + arr.datatype = DataType::Decimal64(DecimalTypeMeta::new(10, 3)); + + let mut out = Array::new(&NopBufferManager, DataType::Float64, 3).unwrap(); + cast_array(&mut arr, 0..3, &mut out, CastFailBehavior::Error).unwrap(); + + let expected = Array::try_from_iter([1.5_f64, 2.0, 2.5]).unwrap(); + assert_arrays_eq(&expected, &out); } } diff --git a/crates/rayexec_execution/src/arrays/compute/cast/behavior.rs b/crates/rayexec_execution/src/arrays/compute/cast/behavior.rs index 8ae4ae51e..1de04215d 100644 --- a/crates/rayexec_execution/src/arrays/compute/cast/behavior.rs +++ b/crates/rayexec_execution/src/arrays/compute/cast/behavior.rs @@ -1,7 +1,5 @@ use rayexec_error::{RayexecError, Result}; -use crate::arrays::array::Array2; - /// Behavior when a cast fail due to under/overflow. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum CastFailBehavior { @@ -12,84 +10,38 @@ pub enum CastFailBehavior { } impl CastFailBehavior { - pub(crate) fn new_state_for_array(&self, _arr: &Array2) -> CastFailState { - match self { - CastFailBehavior::Error => CastFailState::TrackOneAndError(None), - CastFailBehavior::Null => CastFailState::TrackManyAndInvalidate(Vec::new()), + pub(crate) fn new_state(&self) -> CastErrorState { + CastErrorState { + behavior: *self, + error: None, } } } #[derive(Debug)] -pub struct ErrorIndex { - /// Row index that we failed on. - pub idx: usize, - /// Optional error we can use instead of the generic "failed to cast" error. - pub error: Option, -} - -/// State used to track failures casting. -#[derive(Debug)] -pub(crate) enum CastFailState { - /// Keep the row index of the first failure. - TrackOneAndError(Option), - /// Track all failures during casting. - TrackManyAndInvalidate(Vec), +pub struct CastErrorState { + behavior: CastFailBehavior, + error: Option, } -impl CastFailState { - pub(crate) fn set_did_fail(&mut self, idx: usize) { - match self { - Self::TrackOneAndError(maybe_idx) => { - if maybe_idx.is_none() { - *maybe_idx = Some(ErrorIndex { idx, error: None }); - } - } - Self::TrackManyAndInvalidate(indices) => indices.push(idx), - } - } - - pub(crate) fn set_did_fail_with_error(&mut self, idx: usize, error: RayexecError) { - match self { - Self::TrackOneAndError(maybe_idx) => { - if maybe_idx.is_none() { - *maybe_idx = Some(ErrorIndex { - idx, - error: Some(error), - }) - } - } - Self::TrackManyAndInvalidate(indices) => indices.push(idx), // Error ignored, we're replacing with null. +impl CastErrorState { + /// Set the error from a function. + /// + /// If the cast behavior is use NULL on failure, then `error_fn` is not + /// called. + pub fn set_error(&mut self, error_fn: F) + where + F: FnOnce() -> RayexecError, + { + if self.behavior == CastFailBehavior::Error && self.error.is_none() { + self.error = Some(error_fn()) } } - pub(crate) fn check_and_apply(self, original: &Array2, mut output: Array2) -> Result { - match self { - Self::TrackOneAndError(None) => Ok(output), - Self::TrackOneAndError(Some(error_idx)) => { - let scalar = original.logical_value(error_idx.idx)?; - match error_idx.error { - Some(error) => Err(RayexecError::with_source( - format!("Failed to cast '{scalar}' to {}", output.datatype()), - Box::new(error), - )), - None => Err(RayexecError::new(format!( - "Failed to cast '{scalar}' to {}", - output.datatype() - ))), - } - } - Self::TrackManyAndInvalidate(indices) => { - if indices.is_empty() { - Ok(output) - } else { - // Apply the nulls. - for idx in indices { - output.set_physical_validity(idx, false); - } - Ok(output) - } - } + pub fn into_result(self) -> Result<()> { + match self.error { + Some(err) => Err(err), + None => Ok(()), } } } diff --git a/crates/rayexec_execution/src/arrays/executor_exp/mod.rs b/crates/rayexec_execution/src/arrays/executor_exp/mod.rs index 50baf6f24..ab1049476 100644 --- a/crates/rayexec_execution/src/arrays/executor_exp/mod.rs +++ b/crates/rayexec_execution/src/arrays/executor_exp/mod.rs @@ -31,9 +31,9 @@ pub struct PutBuffer<'a, M> where M: AddressableMut, { - idx: usize, - buffer: &'a mut M, - validity: &'a mut Validity, + pub(crate) idx: usize, + pub(crate) buffer: &'a mut M, + pub(crate) validity: &'a mut Validity, } impl<'a, M> PutBuffer<'a, M> diff --git a/crates/rayexec_execution/src/arrays/scalar/decimal.rs b/crates/rayexec_execution/src/arrays/scalar/decimal.rs index 73963c4c0..c23abf276 100644 --- a/crates/rayexec_execution/src/arrays/scalar/decimal.rs +++ b/crates/rayexec_execution/src/arrays/scalar/decimal.rs @@ -5,12 +5,7 @@ use rayexec_error::{RayexecError, Result, ResultExt}; use rayexec_proto::ProtoConv; use serde::{Deserialize, Serialize}; -use crate::arrays::buffer::physical_type::{ - MutablePhysicalStorage, - PhysicalI128, - PhysicalI64, - PhysicalStorage, -}; +use crate::arrays::buffer::physical_type::{MutablePhysicalStorage, PhysicalI128, PhysicalI64}; use crate::arrays::executor::physical_type::{PhysicalI128_2, PhysicalI64_2, PhysicalStorage2}; pub trait DecimalPrimitive: diff --git a/crates/rayexec_execution/src/expr/physical/cast_expr.rs b/crates/rayexec_execution/src/expr/physical/cast_expr.rs index 5b3987a2c..5b4d5ca9a 100644 --- a/crates/rayexec_execution/src/expr/physical/cast_expr.rs +++ b/crates/rayexec_execution/src/expr/physical/cast_expr.rs @@ -22,8 +22,9 @@ pub struct PhysicalCastExpr { impl PhysicalCastExpr { pub fn eval2<'a>(&self, batch: &'a Batch2) -> Result> { let input = self.expr.eval2(batch)?; - let out = cast_array(input.as_ref(), self.to.clone(), CastFailBehavior::Error)?; - Ok(Cow::Owned(out)) + unimplemented!() + // let out = cast_array(input.as_ref(), self.to.clone(), CastFailBehavior::Error)?; + // Ok(Cow::Owned(out)) } } diff --git a/crates/rayexec_parquet/src/reader/primitive.rs b/crates/rayexec_parquet/src/reader/primitive.rs index 41ac0729f..65f26d094 100644 --- a/crates/rayexec_parquet/src/reader/primitive.rs +++ b/crates/rayexec_parquet/src/reader/primitive.rs @@ -93,9 +93,10 @@ where None => Array2::new_with_array_data(build_type, array_data), }; - if needs_cast { - array = cast_array(&array, self.datatype.clone(), CastFailBehavior::Null)?; - } + // TODO + // if needs_cast { + // array = cast_array(&array, self.datatype.clone(), CastFailBehavior::Null)?; + // } Ok(array) } From 14c93fbebfa5fd6a074565b5f97496d46808e1f6 Mon Sep 17 00:00:00 2001 From: Sean Smith Date: Thu, 2 Jan 2025 14:23:03 -0500 Subject: [PATCH 41/59] more expr stuff --- crates/rayexec_error/src/lib.rs | 10 ++- .../rayexec_execution/src/arrays/batch_exp.rs | 2 +- .../src/expr/physical/cast_expr.rs | 81 ++++++++++++++++--- .../src/expr/physical/column_expr.rs | 17 +--- .../src/expr/physical/evaluator.rs | 7 +- .../src/expr/physical/literal_expr.rs | 7 -- .../src/expr/physical/mod.rs | 25 ++++-- .../src/expr/physical/scalar_function_expr.rs | 45 +++++------ 8 files changed, 126 insertions(+), 68 deletions(-) diff --git a/crates/rayexec_error/src/lib.rs b/crates/rayexec_error/src/lib.rs index 3d1e6a084..dacda57d5 100644 --- a/crates/rayexec_error/src/lib.rs +++ b/crates/rayexec_error/src/lib.rs @@ -16,12 +16,10 @@ macro_rules! not_implemented { } // TODO: Implement partial eq on msg -#[derive(Debug)] pub struct RayexecError { inner: Box, } -#[derive(Debug)] struct RayexecErrorInner { /// Message for the error. pub msg: String, @@ -153,6 +151,14 @@ impl fmt::Display for RayexecError { } } +impl fmt::Debug for RayexecError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // Just use the Display impl for Debug, significantly easier to read + // especially when the error contains a backtrace. + write!(f, "{self}") + } +} + impl Error for RayexecError { fn source(&self) -> Option<&(dyn Error + 'static)> { self.inner.source.as_ref().map(|e| e.as_ref() as _) diff --git a/crates/rayexec_execution/src/arrays/batch_exp.rs b/crates/rayexec_execution/src/arrays/batch_exp.rs index 23ff71cbf..7405a372e 100644 --- a/crates/rayexec_execution/src/arrays/batch_exp.rs +++ b/crates/rayexec_execution/src/arrays/batch_exp.rs @@ -124,7 +124,7 @@ where } /// Returns a selection that selects rows [0, num_rows). - pub fn selection(&self) -> Selection { + pub fn selection<'a>(&self) -> Selection<'a> { Selection::Linear { len: self.num_rows } } diff --git a/crates/rayexec_execution/src/expr/physical/cast_expr.rs b/crates/rayexec_execution/src/expr/physical/cast_expr.rs index 5b4d5ca9a..dd0e3263f 100644 --- a/crates/rayexec_execution/src/expr/physical/cast_expr.rs +++ b/crates/rayexec_execution/src/expr/physical/cast_expr.rs @@ -1,12 +1,13 @@ -use std::borrow::Cow; use std::fmt; use rayexec_error::{OptionExt, Result}; use rayexec_proto::ProtoConv; -use super::PhysicalScalarExpression; -use crate::arrays::array::Array2; -use crate::arrays::batch::Batch2; +use super::evaluator::ExpressionEvaluator; +use super::{ExpressionState, PhysicalScalarExpression}; +use crate::arrays::array::exp::Array; +use crate::arrays::array::selection::Selection; +use crate::arrays::batch_exp::Batch; use crate::arrays::compute::cast::array::cast_array; use crate::arrays::compute::cast::behavior::CastFailBehavior; use crate::arrays::datatype::DataType; @@ -20,11 +21,35 @@ pub struct PhysicalCastExpr { } impl PhysicalCastExpr { - pub fn eval2<'a>(&self, batch: &'a Batch2) -> Result> { - let input = self.expr.eval2(batch)?; - unimplemented!() - // let out = cast_array(input.as_ref(), self.to.clone(), CastFailBehavior::Error)?; - // Ok(Cow::Owned(out)) + pub(crate) fn eval( + &self, + input: &mut Batch, + state: &mut ExpressionState, + sel: Selection, + output: &mut Array, + ) -> Result<()> { + // Eval child. + let child_output = &mut state.buffer.arrays_mut()[0]; + ExpressionEvaluator::eval_expression( + &self.expr, + input, + &mut state.inputs[0], + sel, + child_output, + )?; + + // Cast child output. + // + // Note we discard the previous selection since the child would have + // written the rows starting at 0 up to selection len. + cast_array( + child_output, + Selection::linear(sel.len()), + output, + CastFailBehavior::Error, + )?; + + Ok(()) } } @@ -54,3 +79,41 @@ impl DatabaseProtoConv for PhysicalCastExpr { }) } } + +#[cfg(test)] +mod tests { + use iterutil::TryFromExactSizeIterator; + + use super::*; + use crate::arrays::buffer::buffer_manager::NopBufferManager; + use crate::arrays::testutil::assert_arrays_eq_sel; + use crate::expr::physical::literal_expr::PhysicalLiteralExpr; + + #[test] + fn cast_expr_literal_string_to_i32() { + let expr = PhysicalCastExpr { + to: DataType::Int32, + expr: Box::new(PhysicalScalarExpression::Literal(PhysicalLiteralExpr { + literal: "35".into(), + })), + }; + + let mut state = ExpressionState { + buffer: Batch::from_arrays( + [Array::new(&NopBufferManager, DataType::Utf8, 1024).unwrap()], + false, + ) + .unwrap(), + inputs: vec![ExpressionState::empty()], + }; + + let mut out = Array::new(&NopBufferManager, DataType::Int32, 1024).unwrap(); + let mut input = Batch::empty_with_num_rows(3); + let sel = input.selection(); + + expr.eval(&mut input, &mut state, sel, &mut out).unwrap(); + + let expected = Array::try_from_iter([35, 35, 35]).unwrap(); + assert_arrays_eq_sel(&expected, 0..3, &out, 0..3); + } +} diff --git a/crates/rayexec_execution/src/expr/physical/column_expr.rs b/crates/rayexec_execution/src/expr/physical/column_expr.rs index 2fcf62b8d..c1e57b62f 100644 --- a/crates/rayexec_execution/src/expr/physical/column_expr.rs +++ b/crates/rayexec_execution/src/expr/physical/column_expr.rs @@ -1,13 +1,10 @@ -use std::borrow::Cow; use std::fmt; -use rayexec_error::{RayexecError, Result}; +use rayexec_error::Result; use super::evaluator::ExpressionState; use crate::arrays::array::exp::Array; use crate::arrays::array::selection::Selection; -use crate::arrays::array::Array2; -use crate::arrays::batch::Batch2; use crate::arrays::batch_exp::Batch; use crate::arrays::buffer::buffer_manager::NopBufferManager; use crate::database::DatabaseContext; @@ -19,18 +16,6 @@ pub struct PhysicalColumnExpr { } impl PhysicalColumnExpr { - pub fn eval2<'a>(&self, batch: &'a Batch2) -> Result> { - let col = batch.column(self.idx).ok_or_else(|| { - RayexecError::new(format!( - "Tried to get column at index {} in a batch with {} columns", - self.idx, - batch.columns().len() - )) - })?; - - Ok(Cow::Borrowed(col)) - } - pub(crate) fn eval( &self, input: &mut Batch, diff --git a/crates/rayexec_execution/src/expr/physical/evaluator.rs b/crates/rayexec_execution/src/expr/physical/evaluator.rs index c3bfc5a6a..5fdb36d78 100644 --- a/crates/rayexec_execution/src/expr/physical/evaluator.rs +++ b/crates/rayexec_execution/src/expr/physical/evaluator.rs @@ -80,9 +80,10 @@ impl ExpressionEvaluator { output.reset_for_write(&NopBufferManager)?; match expr { - // PhysicalScalarExpression::Column(expr) => expr.eval(input, state, sel, output), - // PhysicalScalarExpression::ScalarFunction(expr) => expr.eval(input, state, sel, output), - // PhysicalScalarExpression::Literal(expr) => expr.eval(input, state, sel, output), + PhysicalScalarExpression::Column(expr) => expr.eval(input, state, sel, output), + PhysicalScalarExpression::Cast(expr) => expr.eval(input, state, sel, output), + PhysicalScalarExpression::ScalarFunction(expr) => expr.eval(input, state, sel, output), + PhysicalScalarExpression::Literal(expr) => expr.eval(input, state, sel, output), _ => unimplemented!(), } } diff --git a/crates/rayexec_execution/src/expr/physical/literal_expr.rs b/crates/rayexec_execution/src/expr/physical/literal_expr.rs index 3179cf94a..0a6cc9d2f 100644 --- a/crates/rayexec_execution/src/expr/physical/literal_expr.rs +++ b/crates/rayexec_execution/src/expr/physical/literal_expr.rs @@ -7,8 +7,6 @@ use rayexec_proto::ProtoConv; use super::evaluator::ExpressionState; use crate::arrays::array::exp::Array; use crate::arrays::array::selection::Selection; -use crate::arrays::array::Array2; -use crate::arrays::batch::Batch2; use crate::arrays::batch_exp::Batch; use crate::arrays::buffer::buffer_manager::NopBufferManager; use crate::arrays::scalar::OwnedScalarValue; @@ -21,11 +19,6 @@ pub struct PhysicalLiteralExpr { } impl PhysicalLiteralExpr { - pub fn eval2<'a>(&self, batch: &'a Batch2) -> Result> { - let arr = self.literal.as_array(batch.num_rows())?; - Ok(Cow::Owned(arr)) - } - pub(crate) fn eval( &self, _: &mut Batch, diff --git a/crates/rayexec_execution/src/expr/physical/mod.rs b/crates/rayexec_execution/src/expr/physical/mod.rs index 428fb4b3e..e579fac06 100644 --- a/crates/rayexec_execution/src/expr/physical/mod.rs +++ b/crates/rayexec_execution/src/expr/physical/mod.rs @@ -13,6 +13,7 @@ use std::fmt; use case_expr::PhysicalCaseExpr; use cast_expr::PhysicalCastExpr; use column_expr::PhysicalColumnExpr; +use evaluator::ExpressionState; use literal_expr::PhysicalLiteralExpr; use rayexec_error::{not_implemented, OptionExt, Result}; use scalar_function_expr::PhysicalScalarFunctionExpr; @@ -35,14 +36,24 @@ pub enum PhysicalScalarExpression { } impl PhysicalScalarExpression { + // pub(crate) fn new_state(&self, batch_size: usize) -> Result { + // match self { + // Self::Cast(expr) => expr.new_state(batch_size), + // Self::Column(expr) => expr.new_state(batch_size), + // Self::Literal(expr) => expr.new_state(batch_size), + // _ => unimplemented!(), + // } + // } + pub fn eval2<'a>(&self, batch: &'a Batch2) -> Result> { - match self { - Self::Case(e) => e.eval2(batch), - Self::Cast(e) => e.eval2(batch), - Self::Column(e) => e.eval2(batch), - Self::Literal(e) => e.eval2(batch), - Self::ScalarFunction(e) => e.eval2(batch), - } + unimplemented!() + // match self { + // Self::Case(e) => e.eval2(batch), + // Self::Cast(e) => e.eval2(batch), + // Self::Column(e) => e.eval2(batch), + // Self::Literal(e) => e.eval2(batch), + // Self::ScalarFunction(e) => e.eval2(batch), + // } } /// Produce a selection vector for the batch using this expression. diff --git a/crates/rayexec_execution/src/expr/physical/scalar_function_expr.rs b/crates/rayexec_execution/src/expr/physical/scalar_function_expr.rs index 642620212..0a641dfdc 100644 --- a/crates/rayexec_execution/src/expr/physical/scalar_function_expr.rs +++ b/crates/rayexec_execution/src/expr/physical/scalar_function_expr.rs @@ -1,13 +1,14 @@ -use std::borrow::Cow; use std::fmt; use fmtutil::IntoDisplayableSlice; use rayexec_error::Result; -use super::PhysicalScalarExpression; -use crate::arrays::array::Array2; -use crate::arrays::batch::Batch2; +use super::{ExpressionState, PhysicalScalarExpression}; +use crate::arrays::array::exp::Array; +use crate::arrays::array::selection::Selection; +use crate::arrays::batch_exp::Batch; use crate::database::DatabaseContext; +use crate::expr::physical::evaluator::ExpressionEvaluator; use crate::functions::scalar::PlannedScalarFunction; use crate::proto::DatabaseProtoConv; @@ -18,27 +19,25 @@ pub struct PhysicalScalarFunctionExpr { } impl PhysicalScalarFunctionExpr { - pub fn eval2<'a>(&self, batch: &'a Batch2) -> Result> { - let inputs = self - .inputs - .iter() - .map(|input| input.eval2(batch)) - .collect::>>()?; - - let refs: Vec<_> = inputs.iter().map(|a| a.as_ref()).collect(); // Can I not? - let mut out = self.function.function_impl.execute2(&refs)?; - - // If function is provided no input, it's expected to return an - // array of length 1. We extend the array here so that it's the - // same size as the rest. - // - // TODO: Could just extend the selection vector too. - if refs.is_empty() { - let scalar = out.logical_value(0)?; - out = scalar.as_array(batch.num_rows())?; + pub(crate) fn eval( + &self, + input: &mut Batch, + state: &mut ExpressionState, + sel: Selection, + output: &mut Array, + ) -> Result<()> { + // Eval children. + for (child_idx, array) in state.buffer.arrays_mut().iter_mut().enumerate() { + let expr = &self.inputs[child_idx]; + let child_state = &mut state.inputs[child_idx]; + ExpressionEvaluator::eval_expression(expr, input, child_state, sel, array)?; } - Ok(Cow::Owned(out)) + // Eval function with child outputs. + state.buffer.set_num_rows(sel.len())?; + self.function.function_impl.execute(&state.buffer, output)?; + + Ok(()) } } From bcf868c90031f2ac4e0db990efb1fd37ed4000b6 Mon Sep 17 00:00:00 2001 From: Sean Smith Date: Thu, 2 Jan 2025 19:31:36 -0500 Subject: [PATCH 42/59] case --- .../rayexec_execution/src/arrays/array/exp.rs | 30 +- .../src/expr/physical/case_expr.rs | 306 ++++++++++-------- 2 files changed, 202 insertions(+), 134 deletions(-) diff --git a/crates/rayexec_execution/src/arrays/array/exp.rs b/crates/rayexec_execution/src/arrays/array/exp.rs index 3ff693e8c..a7c6825dd 100644 --- a/crates/rayexec_execution/src/arrays/array/exp.rs +++ b/crates/rayexec_execution/src/arrays/array/exp.rs @@ -182,6 +182,11 @@ where // And set the new buf, old buf gets dropped. self.data = ArrayData::owned(new_buf); + debug_assert!(matches!( + self.data.get_secondary(), + SecondaryBuffer::Dictionary(_) + )); + return Ok(()); } @@ -213,6 +218,11 @@ where buffer: orig_buffer, })); + debug_assert!(matches!( + self.data.get_secondary(), + SecondaryBuffer::Dictionary(_) + )); + Ok(()) } @@ -259,7 +269,25 @@ where pub fn reset_for_write(&mut self, manager: &B) -> Result<()> { self.validity = Validity::new_all_valid(self.capacity()); - // TODO: We should clear some secondary buffers (mostly string heap) + // Check if dictionary first since we want to try to get the underlying + // buffer from that. We should only have layer of "dictionary", so we + // shouldn't need to recurse. + if self.data.as_ref().physical_type() == PhysicalType::Dictionary { + let secondary = self.data.try_as_mut()?.get_secondary_mut(); + let dict = match std::mem::replace(secondary, SecondaryBuffer::None) { + SecondaryBuffer::Dictionary(dict) => dict, + other => { + return Err(RayexecError::new(format!( + "Expected dictionary secondary buffer, got {other:?}", + ))) + } + }; + + // TODO: Not sure what to do if capacities don't match. Currently + // dictionaries are only created through 'select' and the index + // buffer gets initialized to the length of the selection. + self.data = dict.buffer; + } if let Err(()) = self.data.try_reset_for_write() { // Need to create a new buffer and set that. diff --git a/crates/rayexec_execution/src/expr/physical/case_expr.rs b/crates/rayexec_execution/src/expr/physical/case_expr.rs index f7eaca87e..92d2b7841 100644 --- a/crates/rayexec_execution/src/expr/physical/case_expr.rs +++ b/crates/rayexec_execution/src/expr/physical/case_expr.rs @@ -1,15 +1,15 @@ -use std::borrow::Cow; use std::fmt; -use std::sync::Arc; use rayexec_error::Result; -use super::PhysicalScalarExpression; -use crate::arrays::array::Array2; -use crate::arrays::batch::Batch2; -use crate::arrays::bitmap::Bitmap; -use crate::arrays::executor::scalar::{interleave, SelectExecutor}; -use crate::arrays::selection::SelectionVector; +use super::{ExpressionState, PhysicalScalarExpression}; +use crate::arrays::array::exp::Array; +use crate::arrays::array::selection::Selection; +use crate::arrays::batch_exp::Batch; +use crate::arrays::buffer::buffer_manager::NopBufferManager; +use crate::arrays::buffer::physical_type::PhysicalBool; +use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; +use crate::expr::physical::evaluator::ExpressionEvaluator; #[derive(Debug, Clone)] pub struct PhysicalWhenThen { @@ -23,6 +23,7 @@ impl fmt::Display for PhysicalWhenThen { } } +/// Physical expression for 'CASE .. THEN .. ELSE ..' expressions. #[derive(Debug, Clone)] pub struct PhysicalCaseExpr { pub cases: Vec, @@ -30,74 +31,111 @@ pub struct PhysicalCaseExpr { } impl PhysicalCaseExpr { - pub fn eval2<'a>(&self, batch: &'a Batch2) -> Result> { - let mut arrays = Vec::new(); - let mut indices: Vec<(usize, usize)> = (0..batch.num_rows()).map(|_| (0, 0)).collect(); - - // Track remaining rows we need to evaluate. + pub(crate) fn eval( + &self, + input: &mut Batch, + state: &mut ExpressionState, + sel: Selection, + output: &mut Array, + ) -> Result<()> { + // Indices where 'when' evaluated to true and the 'then' expression + // needs to be evaluated. + let mut then_selection = Vec::with_capacity(sel.len()); + // Indices where 'then' evaluated to false or null. + let mut fallthrough_selection = Vec::with_capacity(sel.len()); + + // Current selection for a single when/then pair. // - // True bits are rows we still need to consider. - let mut remaining = Bitmap::new_with_all_true(batch.num_rows()); + // Initialized to the initial selection passed in. + let mut curr_selection: Vec<_> = sel.iter().collect(); // TODO: Would be cool not needing to allocate here. - let mut trues_sel = SelectionVector::with_capacity(batch.num_rows()); + for (case_idx, case) in self.cases.iter().enumerate() { + fallthrough_selection.clear(); + then_selection.clear(); - for case in &self.cases { - // Generate selection from remaining bitmap. - let selection = Arc::new(SelectionVector::from_iter(remaining.index_iter())); - - // Get batch with only remaining rows that we should consider. - let selected_batch = batch.select(selection.clone()); - - // Execute 'when'. - let selected = case.when.eval2(&selected_batch)?; - - // Determine which rows should be executed for 'then', and which we - // need to fall through on. - SelectExecutor::select(&selected, &mut trues_sel)?; - - // Select rows in batch to execute on based on 'trues'. - let execute_batch = selected_batch.select(Arc::new(trues_sel.clone())); - let output = case.then.eval2(&execute_batch)?; - - // Store array for later interleaving. - let array_idx = arrays.len(); - arrays.push(output.into_owned()); - - // Figure out mapping from the 'trues' selection to the original row - // index. - // - // The selection vector locations should index into the full-length - // selection vector to get the original row index. - for (array_row_idx, selected_row_idx) in trues_sel.iter_locations().enumerate() { - // Final output row. - let output_row_idx = selection.get(selected_row_idx); - indices[output_row_idx] = (array_idx, array_row_idx); - - // Update bitmap, this row was handled. - remaining.set_unchecked(output_row_idx, false); + if curr_selection.is_empty() { + // Nothing left to do. + break; } - } - // Do all remaining rows. - if remaining.count_trues() != 0 { - let selection = Arc::new(SelectionVector::from_iter(remaining.index_iter())); - let remaining_batch = batch.select(selection.clone()); - - let output = self.else_expr.eval2(&remaining_batch)?; - let array_idx = arrays.len(); - arrays.push(output.into_owned()); + // Each case has two input states, one for 'when' and one for + // 'then'. + let when_state = &mut state.inputs[case_idx * 2]; + // When array reused for each case. + let when_array = &mut state.buffer.arrays_mut()[0]; + when_array.reset_for_write(&NopBufferManager)?; + + // Eval 'when' + ExpressionEvaluator::eval_expression( + &case.when, + input, + when_state, + Selection::selection(&curr_selection), + when_array, + )?; + + UnaryExecutor::for_each_flat::( + when_array.flat_view()?, + Selection::selection(&curr_selection), + |idx, b| { + if let Some(&true) = b { + // 'When' expression evaluated to true, select it for + // 'then' expression eval. + then_selection.push(idx); + } else { + // Not true, need to fall through. + fallthrough_selection.push(idx); + } + }, + )?; - // Update indices. - for (array_row_idx, output_row_idx) in selection.iter_locations().enumerate() { - indices[output_row_idx] = (array_idx, array_row_idx); + if then_selection.is_empty() { + // Everything in this case's 'when' evaluated to false. + continue; } + + let then_state = &mut state.inputs[case_idx * 2 + 1]; + // Reused, assumes all 'then' expressions and the 'else' expression + // are the same type. + let then_array = &mut state.buffer.arrays_mut()[1]; + then_array.reset_for_write(&NopBufferManager)?; + + // Eval 'then' with selection from 'when'. + ExpressionEvaluator::eval_expression( + &case.then, + input, + then_state, + Selection::selection(&then_selection), + then_array, + )?; + + // Fill output array according to indices in 'when' selection. + then_array.copy_rows(then_selection.iter().copied().enumerate(), output)?; + + // Update next iteration to use fallthrough indices. + std::mem::swap(&mut fallthrough_selection, &mut curr_selection); } - // Interleave. - let refs: Vec<_> = arrays.iter().collect(); - let arr = interleave(&refs, &indices)?; + if !curr_selection.is_empty() { + // We have remaining indices that fell through all cases. Eval with + // else expression and add those in. + let else_state = state.inputs.last_mut().unwrap(); // Last state after all when/then states. + let else_array = &mut state.buffer.arrays_mut()[1]; + else_array.reset_for_write(&NopBufferManager)?; + + ExpressionEvaluator::eval_expression( + &self.else_expr, + input, + else_state, + Selection::selection(&curr_selection), + else_array, + )?; + + // And fill remaining. + else_array.copy_rows(curr_selection.iter().copied().enumerate(), output)?; + } - Ok(Cow::Owned(arr)) + Ok(()) } } @@ -115,83 +153,85 @@ impl fmt::Display for PhysicalCaseExpr { #[cfg(test)] mod tests { + use iterutil::TryFromExactSizeIterator; use super::*; use crate::arrays::datatype::DataType; - use crate::arrays::scalar::ScalarValue; - use crate::expr::case_expr::{CaseExpr, WhenThen}; - use crate::expr::physical::planner::PhysicalExpressionPlanner; - use crate::expr::{self, Expression}; - use crate::functions::scalar::builtin::comparison::Eq; - use crate::functions::scalar::ScalarFunction; - use crate::logical::binder::table_list::TableList; + use crate::arrays::testutil::assert_arrays_eq; + use crate::expr::physical::column_expr::PhysicalColumnExpr; + use crate::expr::physical::literal_expr::PhysicalLiteralExpr; #[test] fn case_simple() { - let batch = Batch2::try_new([ - Array2::from_iter([1, 2, 3, 4]), - Array2::from_iter([12, 13, 14, 15]), - ]) + // CASE a THEN b + // ELSE 48 + let expr = PhysicalCaseExpr { + cases: vec![PhysicalWhenThen { + when: PhysicalScalarExpression::Column(PhysicalColumnExpr { idx: 0 }), + then: PhysicalScalarExpression::Column(PhysicalColumnExpr { idx: 1 }), + }], + else_expr: Box::new(PhysicalScalarExpression::Literal(PhysicalLiteralExpr { + literal: 48.into(), + })), + }; + + let mut input = Batch::from_arrays( + [ + Array::try_from_iter([true, true, false]).unwrap(), + Array::try_from_iter([1, 2, 3]).unwrap(), + ], + true, + ) .unwrap(); - let mut table_list = TableList::empty(); - let table_ref = table_list - .push_table( - None, - vec![DataType::Int32, DataType::Int32], - vec!["a".to_string(), "b".to_string()], - ) + let mut state = ExpressionState { + buffer: Batch::new(&NopBufferManager, [DataType::Boolean, DataType::Int32], 3).unwrap(), + inputs: vec![ExpressionState::empty(), ExpressionState::empty()], + }; + + let mut out = Array::new(&NopBufferManager, DataType::Int32, 3).unwrap(); + expr.eval(&mut input, &mut state, Selection::linear(3), &mut out) .unwrap(); - // CASE WHEN a = 2 THEN 'first_case' - // WHEN a = 3 THEN 'second_case' - // ELSE 'else' - // END - - let when_expr_0 = Expression::ScalarFunction( - Eq.plan(&table_list, vec![expr::col_ref(table_ref, 0), expr::lit(2)]) - .unwrap() - .into(), - ); - let then_expr_0 = expr::lit("first_case"); - - let when_expr_1 = Expression::ScalarFunction( - Eq.plan(&table_list, vec![expr::col_ref(table_ref, 0), expr::lit(3)]) - .unwrap() - .into(), - ); - let then_expr_1 = expr::lit("second_case"); - - let else_expr = expr::lit("else"); - - let case_expr = Expression::Case(CaseExpr { - cases: vec![ - WhenThen { - when: when_expr_0, - then: then_expr_0, - }, - WhenThen { - when: when_expr_1, - then: then_expr_1, - }, + let expected = Array::try_from_iter([1, 2, 48]).unwrap(); + assert_arrays_eq(&expected, &out); + } + + #[test] + fn case_falsey() { + // Same as above but check that 'when' treats nulls as false. + + // CASE a THEN b + // ELSE 48 + let expr = PhysicalCaseExpr { + cases: vec![PhysicalWhenThen { + when: PhysicalScalarExpression::Column(PhysicalColumnExpr { idx: 0 }), + then: PhysicalScalarExpression::Column(PhysicalColumnExpr { idx: 1 }), + }], + else_expr: Box::new(PhysicalScalarExpression::Literal(PhysicalLiteralExpr { + literal: 48.into(), + })), + }; + + let mut input = Batch::from_arrays( + [ + Array::try_from_iter([Some(true), None, Some(false)]).unwrap(), + Array::try_from_iter([1, 2, 3]).unwrap(), ], - else_expr: Some(Box::new(else_expr)), - }); - - let planner = PhysicalExpressionPlanner::new(&table_list); - let physical_case = planner.plan_scalar(&[table_ref], &case_expr).unwrap(); - - let got = physical_case.eval2(&batch).unwrap(); - - assert_eq!(ScalarValue::from("else"), got.logical_value(0).unwrap()); - assert_eq!( - ScalarValue::from("first_case"), - got.logical_value(1).unwrap() - ); - assert_eq!( - ScalarValue::from("second_case"), - got.logical_value(2).unwrap() - ); - assert_eq!(ScalarValue::from("else"), got.logical_value(3).unwrap()); + true, + ) + .unwrap(); + + let mut state = ExpressionState { + buffer: Batch::new(&NopBufferManager, [DataType::Boolean, DataType::Int32], 3).unwrap(), + inputs: vec![ExpressionState::empty(), ExpressionState::empty()], + }; + + let mut out = Array::new(&NopBufferManager, DataType::Int32, 3).unwrap(); + expr.eval(&mut input, &mut state, Selection::linear(3), &mut out) + .unwrap(); + + let expected = Array::try_from_iter([1, 48, 48]).unwrap(); + assert_arrays_eq(&expected, &out); } } From 8372762c9d688b970cd3b683643a89f37aa58bf6 Mon Sep 17 00:00:00 2001 From: Sean Smith Date: Fri, 3 Jan 2025 11:17:03 -0500 Subject: [PATCH 43/59] expr create state --- .../intermediate/planner/plan_aggregate.rs | 15 +++-- .../execution/operators/sort/gather_sort.rs | 11 +++- .../execution/operators/sort/scatter_sort.rs | 21 +++++-- .../src/expr/physical/case_expr.rs | 61 ++++++++++++++++--- .../src/expr/physical/cast_expr.rs | 29 ++++++--- .../src/expr/physical/column_expr.rs | 40 ++++++++---- .../src/expr/physical/evaluator.rs | 4 +- .../src/expr/physical/literal_expr.rs | 9 +++ .../src/expr/physical/mod.rs | 21 +++++++ .../src/expr/physical/planner.rs | 8 ++- .../src/expr/physical/scalar_function_expr.rs | 24 ++++++++ 11 files changed, 201 insertions(+), 42 deletions(-) diff --git a/crates/rayexec_execution/src/execution/intermediate/planner/plan_aggregate.rs b/crates/rayexec_execution/src/execution/intermediate/planner/plan_aggregate.rs index 4f9e88d37..a2ce125a3 100644 --- a/crates/rayexec_execution/src/execution/intermediate/planner/plan_aggregate.rs +++ b/crates/rayexec_execution/src/execution/intermediate/planner/plan_aggregate.rs @@ -41,7 +41,6 @@ impl IntermediatePipelineBuildState<'_> { } }; - let start_col_index = preproject_exprs.len(); for arg in &agg.agg.inputs { let scalar = self .expr_planner @@ -49,13 +48,19 @@ impl IntermediatePipelineBuildState<'_> { .context("Failed to plan expressions for aggregate pre-projection")?; preproject_exprs.push(scalar); } - let end_col_index = preproject_exprs.len(); + + let columns = preproject_exprs + .iter() + .enumerate() + .map(|(idx, expr)| PhysicalColumnExpr { + idx, + datatype: expr.datatype(), + }) + .collect(); let phys_agg = PhysicalAggregateExpression { function: agg.agg, - columns: (start_col_index..end_col_index) - .map(|idx| PhysicalColumnExpr { idx }) - .collect(), + columns, is_distinct: agg.distinct, }; diff --git a/crates/rayexec_execution/src/execution/operators/sort/gather_sort.rs b/crates/rayexec_execution/src/execution/operators/sort/gather_sort.rs index 391941599..031571c8d 100644 --- a/crates/rayexec_execution/src/execution/operators/sort/gather_sort.rs +++ b/crates/rayexec_execution/src/execution/operators/sort/gather_sort.rs @@ -613,6 +613,7 @@ mod tests { use std::sync::Arc; use super::*; + use crate::arrays::datatype::DataType; use crate::execution::operators::test_util::{ make_i32_batch, unwrap_poll_pull_batch, @@ -629,7 +630,10 @@ mod tests { ]; let operator = Arc::new(PhysicalGatherSort::new(vec![PhysicalSortExpression { - column: PhysicalColumnExpr { idx: 0 }, + column: PhysicalColumnExpr { + idx: 0, + datatype: DataType::Int32, + }, desc: true, nulls_first: true, }])); @@ -721,7 +725,10 @@ mod tests { ]; let operator = Arc::new(PhysicalGatherSort::new(vec![PhysicalSortExpression { - column: PhysicalColumnExpr { idx: 0 }, + column: PhysicalColumnExpr { + idx: 0, + datatype: DataType::Int32, + }, desc: true, nulls_first: true, }])); diff --git a/crates/rayexec_execution/src/execution/operators/sort/scatter_sort.rs b/crates/rayexec_execution/src/execution/operators/sort/scatter_sort.rs index 5a90d44f7..b70897911 100644 --- a/crates/rayexec_execution/src/execution/operators/sort/scatter_sort.rs +++ b/crates/rayexec_execution/src/execution/operators/sort/scatter_sort.rs @@ -263,6 +263,7 @@ mod tests { use std::sync::Arc; use super::*; + use crate::arrays::datatype::DataType; use crate::execution::operators::test_util::{ make_i32_batch, test_database_context, @@ -290,7 +291,10 @@ mod tests { ]; let operator = Arc::new(PhysicalScatterSort::new(vec![PhysicalSortExpression { - column: PhysicalColumnExpr { idx: 0 }, + column: PhysicalColumnExpr { + idx: 0, + datatype: DataType::Int32, + }, desc: true, nulls_first: true, }])); @@ -332,7 +336,10 @@ mod tests { ]; let operator = Arc::new(PhysicalScatterSort::new(vec![PhysicalSortExpression { - column: PhysicalColumnExpr { idx: 0 }, + column: PhysicalColumnExpr { + idx: 0, + datatype: DataType::Int32, + }, desc: false, nulls_first: true, }])); @@ -378,7 +385,10 @@ mod tests { ]; let operator = Arc::new(PhysicalScatterSort::new(vec![PhysicalSortExpression { - column: PhysicalColumnExpr { idx: 0 }, + column: PhysicalColumnExpr { + idx: 0, + datatype: DataType::Int32, + }, desc: true, nulls_first: true, }])); @@ -443,7 +453,10 @@ mod tests { ]; let operator = Arc::new(PhysicalScatterSort::new(vec![PhysicalSortExpression { - column: PhysicalColumnExpr { idx: 0 }, + column: PhysicalColumnExpr { + idx: 0, + datatype: DataType::Int32, + }, desc: true, nulls_first: true, }])); diff --git a/crates/rayexec_execution/src/expr/physical/case_expr.rs b/crates/rayexec_execution/src/expr/physical/case_expr.rs index 92d2b7841..45b74cafe 100644 --- a/crates/rayexec_execution/src/expr/physical/case_expr.rs +++ b/crates/rayexec_execution/src/expr/physical/case_expr.rs @@ -8,6 +8,7 @@ use crate::arrays::array::selection::Selection; use crate::arrays::batch_exp::Batch; use crate::arrays::buffer::buffer_manager::NopBufferManager; use crate::arrays::buffer::physical_type::PhysicalBool; +use crate::arrays::datatype::DataType; use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; use crate::expr::physical::evaluator::ExpressionEvaluator; @@ -28,9 +29,42 @@ impl fmt::Display for PhysicalWhenThen { pub struct PhysicalCaseExpr { pub cases: Vec, pub else_expr: Box, + pub datatype: DataType, } impl PhysicalCaseExpr { + pub(crate) fn create_state(&self, batch_size: usize) -> Result { + // 2 states per when/then pair, plus one for the 'else'. + let mut inputs = Vec::with_capacity(self.cases.len() * 2 + 1); + for case in &self.cases { + let when_input = case.when.create_state(batch_size)?; + inputs.push(when_input); + + let then_input = case.then.create_state(batch_size)?; + inputs.push(then_input); + } + + let else_input = self.else_expr.create_state(batch_size)?; + inputs.push(else_input); + + // 2 arrays in the buffer, one 'boolean' for conditional evaluation, one + // for the result if condition is true. 'then' and 'else' expressions + // should evaluate to the same type. + let buffer = Batch::from_arrays( + [ + Array::new(&NopBufferManager, DataType::Boolean, batch_size)?, + Array::new(&NopBufferManager, self.else_expr.datatype(), batch_size)?, + ], + false, + )?; + + Ok(ExpressionState { buffer, inputs }) + } + + pub fn datatype(&self) -> DataType { + self.datatype.clone() + } + pub(crate) fn eval( &self, input: &mut Batch, @@ -167,12 +201,19 @@ mod tests { // ELSE 48 let expr = PhysicalCaseExpr { cases: vec![PhysicalWhenThen { - when: PhysicalScalarExpression::Column(PhysicalColumnExpr { idx: 0 }), - then: PhysicalScalarExpression::Column(PhysicalColumnExpr { idx: 1 }), + when: PhysicalScalarExpression::Column(PhysicalColumnExpr { + idx: 0, + datatype: DataType::Boolean, + }), + then: PhysicalScalarExpression::Column(PhysicalColumnExpr { + idx: 1, + datatype: DataType::Int32, + }), }], else_expr: Box::new(PhysicalScalarExpression::Literal(PhysicalLiteralExpr { literal: 48.into(), })), + datatype: DataType::Int32, }; let mut input = Batch::from_arrays( @@ -184,10 +225,7 @@ mod tests { ) .unwrap(); - let mut state = ExpressionState { - buffer: Batch::new(&NopBufferManager, [DataType::Boolean, DataType::Int32], 3).unwrap(), - inputs: vec![ExpressionState::empty(), ExpressionState::empty()], - }; + let mut state = expr.create_state(3).unwrap(); let mut out = Array::new(&NopBufferManager, DataType::Int32, 3).unwrap(); expr.eval(&mut input, &mut state, Selection::linear(3), &mut out) @@ -205,12 +243,19 @@ mod tests { // ELSE 48 let expr = PhysicalCaseExpr { cases: vec![PhysicalWhenThen { - when: PhysicalScalarExpression::Column(PhysicalColumnExpr { idx: 0 }), - then: PhysicalScalarExpression::Column(PhysicalColumnExpr { idx: 1 }), + when: PhysicalScalarExpression::Column(PhysicalColumnExpr { + idx: 0, + datatype: DataType::Boolean, + }), + then: PhysicalScalarExpression::Column(PhysicalColumnExpr { + idx: 1, + datatype: DataType::Int32, + }), }], else_expr: Box::new(PhysicalScalarExpression::Literal(PhysicalLiteralExpr { literal: 48.into(), })), + datatype: DataType::Int32, }; let mut input = Batch::from_arrays( diff --git a/crates/rayexec_execution/src/expr/physical/cast_expr.rs b/crates/rayexec_execution/src/expr/physical/cast_expr.rs index dd0e3263f..2c4a56c41 100644 --- a/crates/rayexec_execution/src/expr/physical/cast_expr.rs +++ b/crates/rayexec_execution/src/expr/physical/cast_expr.rs @@ -8,6 +8,7 @@ use super::{ExpressionState, PhysicalScalarExpression}; use crate::arrays::array::exp::Array; use crate::arrays::array::selection::Selection; use crate::arrays::batch_exp::Batch; +use crate::arrays::buffer::buffer_manager::NopBufferManager; use crate::arrays::compute::cast::array::cast_array; use crate::arrays::compute::cast::behavior::CastFailBehavior; use crate::arrays::datatype::DataType; @@ -21,6 +22,24 @@ pub struct PhysicalCastExpr { } impl PhysicalCastExpr { + pub(crate) fn create_state(&self, batch_size: usize) -> Result { + let inputs = vec![self.expr.create_state(batch_size)?]; + let buffer = Batch::from_arrays( + [Array::new( + &NopBufferManager, + self.expr.datatype(), + batch_size, + )?], + false, + )?; + + Ok(ExpressionState { buffer, inputs }) + } + + pub fn datatype(&self) -> DataType { + self.to.clone() + } + pub(crate) fn eval( &self, input: &mut Batch, @@ -98,15 +117,7 @@ mod tests { })), }; - let mut state = ExpressionState { - buffer: Batch::from_arrays( - [Array::new(&NopBufferManager, DataType::Utf8, 1024).unwrap()], - false, - ) - .unwrap(), - inputs: vec![ExpressionState::empty()], - }; - + let mut state = expr.create_state(1024).unwrap(); let mut out = Array::new(&NopBufferManager, DataType::Int32, 1024).unwrap(); let mut input = Batch::empty_with_num_rows(3); let sel = input.selection(); diff --git a/crates/rayexec_execution/src/expr/physical/column_expr.rs b/crates/rayexec_execution/src/expr/physical/column_expr.rs index c1e57b62f..6280eabb0 100644 --- a/crates/rayexec_execution/src/expr/physical/column_expr.rs +++ b/crates/rayexec_execution/src/expr/physical/column_expr.rs @@ -7,15 +7,25 @@ use crate::arrays::array::exp::Array; use crate::arrays::array::selection::Selection; use crate::arrays::batch_exp::Batch; use crate::arrays::buffer::buffer_manager::NopBufferManager; +use crate::arrays::datatype::DataType; use crate::database::DatabaseContext; use crate::proto::DatabaseProtoConv; #[derive(Debug, Clone)] pub struct PhysicalColumnExpr { + pub datatype: DataType, pub idx: usize, } impl PhysicalColumnExpr { + pub(crate) fn create_state(&self, _batch_size: usize) -> Result { + Ok(ExpressionState::empty()) + } + + pub fn datatype(&self) -> DataType { + self.datatype.clone() + } + pub(crate) fn eval( &self, input: &mut Batch, @@ -44,15 +54,17 @@ impl DatabaseProtoConv for PhysicalColumnExpr { type ProtoType = rayexec_proto::generated::physical_expr::PhysicalColumnExpr; fn to_proto_ctx(&self, _context: &DatabaseContext) -> Result { - Ok(Self::ProtoType { - idx: self.idx as u32, - }) + unimplemented!() + // Ok(Self::ProtoType { + // idx: self.idx as u32, + // }) } - fn from_proto_ctx(proto: Self::ProtoType, _context: &DatabaseContext) -> Result { - Ok(Self { - idx: proto.idx as usize, - }) + fn from_proto_ctx(_proto: Self::ProtoType, _context: &DatabaseContext) -> Result { + unimplemented!() + // Ok(Self { + // idx: proto.idx as usize, + // }) } } @@ -75,7 +87,10 @@ mod tests { ) .unwrap(); - let expr = PhysicalColumnExpr { idx: 1 }; + let expr = PhysicalColumnExpr { + idx: 1, + datatype: DataType::Int32, + }; let mut out = Array::new(&NopBufferManager, DataType::Int32, 4).unwrap(); let sel = Selection::linear(4); @@ -97,12 +112,15 @@ mod tests { ) .unwrap(); - let expr = PhysicalColumnExpr { idx: 1 }; + let expr = PhysicalColumnExpr { + idx: 1, + datatype: DataType::Int32, + }; + let mut state = expr.create_state(4).unwrap(); let mut out = Array::new(&NopBufferManager, DataType::Int32, 4).unwrap(); let sel = Selection::selection(&[1, 3]); - expr.eval(&mut input, &mut ExpressionState::empty(), sel, &mut out) - .unwrap(); + expr.eval(&mut input, &mut state, sel, &mut out).unwrap(); let expected = Array::try_from_iter([2, 4]).unwrap(); assert_arrays_eq(&expected, &out); diff --git a/crates/rayexec_execution/src/expr/physical/evaluator.rs b/crates/rayexec_execution/src/expr/physical/evaluator.rs index 5fdb36d78..1314fe224 100644 --- a/crates/rayexec_execution/src/expr/physical/evaluator.rs +++ b/crates/rayexec_execution/src/expr/physical/evaluator.rs @@ -81,10 +81,10 @@ impl ExpressionEvaluator { match expr { PhysicalScalarExpression::Column(expr) => expr.eval(input, state, sel, output), + PhysicalScalarExpression::Case(expr) => expr.eval(input, state, sel, output), PhysicalScalarExpression::Cast(expr) => expr.eval(input, state, sel, output), - PhysicalScalarExpression::ScalarFunction(expr) => expr.eval(input, state, sel, output), PhysicalScalarExpression::Literal(expr) => expr.eval(input, state, sel, output), - _ => unimplemented!(), + PhysicalScalarExpression::ScalarFunction(expr) => expr.eval(input, state, sel, output), } } } diff --git a/crates/rayexec_execution/src/expr/physical/literal_expr.rs b/crates/rayexec_execution/src/expr/physical/literal_expr.rs index 0a6cc9d2f..f220d8fbe 100644 --- a/crates/rayexec_execution/src/expr/physical/literal_expr.rs +++ b/crates/rayexec_execution/src/expr/physical/literal_expr.rs @@ -9,6 +9,7 @@ use crate::arrays::array::exp::Array; use crate::arrays::array::selection::Selection; use crate::arrays::batch_exp::Batch; use crate::arrays::buffer::buffer_manager::NopBufferManager; +use crate::arrays::datatype::DataType; use crate::arrays::scalar::OwnedScalarValue; use crate::database::DatabaseContext; use crate::proto::DatabaseProtoConv; @@ -19,6 +20,14 @@ pub struct PhysicalLiteralExpr { } impl PhysicalLiteralExpr { + pub(crate) fn create_state(&self, _batch_size: usize) -> Result { + Ok(ExpressionState::empty()) + } + + pub fn datatype(&self) -> DataType { + self.literal.datatype() + } + pub(crate) fn eval( &self, _: &mut Batch, diff --git a/crates/rayexec_execution/src/expr/physical/mod.rs b/crates/rayexec_execution/src/expr/physical/mod.rs index e579fac06..a4685e687 100644 --- a/crates/rayexec_execution/src/expr/physical/mod.rs +++ b/crates/rayexec_execution/src/expr/physical/mod.rs @@ -20,6 +20,7 @@ use scalar_function_expr::PhysicalScalarFunctionExpr; use crate::arrays::array::Array2; use crate::arrays::batch::Batch2; +use crate::arrays::datatype::DataType; use crate::arrays::executor::scalar::SelectExecutor; use crate::arrays::selection::SelectionVector; use crate::database::DatabaseContext; @@ -36,6 +37,26 @@ pub enum PhysicalScalarExpression { } impl PhysicalScalarExpression { + pub(crate) fn create_state(&self, batch_size: usize) -> Result { + match self { + Self::Case(expr) => expr.create_state(batch_size), + Self::Cast(expr) => expr.create_state(batch_size), + Self::Column(expr) => expr.create_state(batch_size), + Self::Literal(expr) => expr.create_state(batch_size), + Self::ScalarFunction(expr) => expr.create_state(batch_size), + } + } + + pub fn datatype(&self) -> DataType { + match self { + Self::Case(expr) => expr.datatype(), + Self::Cast(expr) => expr.datatype(), + Self::Column(expr) => expr.datatype(), + Self::Literal(expr) => expr.datatype(), + Self::ScalarFunction(expr) => expr.datatype(), + } + } + // pub(crate) fn new_state(&self, batch_size: usize) -> Result { // match self { // Self::Cast(expr) => expr.new_state(batch_size), diff --git a/crates/rayexec_execution/src/expr/physical/planner.rs b/crates/rayexec_execution/src/expr/physical/planner.rs index cf40aed00..e98f77324 100644 --- a/crates/rayexec_execution/src/expr/physical/planner.rs +++ b/crates/rayexec_execution/src/expr/physical/planner.rs @@ -64,10 +64,15 @@ impl<'a> PhysicalExpressionPlanner<'a> { let mut offset = 0; for &table_ref in table_refs { let table = self.table_list.get(table_ref)?; + let datatype = + table.column_types.get(col.column).cloned().ok_or_else(|| { + RayexecError::new(format!("Missing column: {}", col.column)) + })?; if col.table_scope == table_ref { return Ok(PhysicalScalarExpression::Column(PhysicalColumnExpr { idx: offset + col.column, + datatype, })); } @@ -176,7 +181,7 @@ impl<'a> PhysicalExpressionPlanner<'a> { let else_expr = match &expr.else_expr { Some(else_expr) => self.plan_scalar(table_refs, else_expr)?, None => PhysicalScalarExpression::Cast(PhysicalCastExpr { - to: datatype, + to: datatype.clone(), expr: Box::new(PhysicalScalarExpression::Literal(PhysicalLiteralExpr { literal: ScalarValue::Null, })), @@ -186,6 +191,7 @@ impl<'a> PhysicalExpressionPlanner<'a> { Ok(PhysicalScalarExpression::Case(PhysicalCaseExpr { cases, else_expr: Box::new(else_expr), + datatype, })) } other => Err(RayexecError::new(format!( diff --git a/crates/rayexec_execution/src/expr/physical/scalar_function_expr.rs b/crates/rayexec_execution/src/expr/physical/scalar_function_expr.rs index 0a641dfdc..b6011fb91 100644 --- a/crates/rayexec_execution/src/expr/physical/scalar_function_expr.rs +++ b/crates/rayexec_execution/src/expr/physical/scalar_function_expr.rs @@ -7,6 +7,8 @@ use super::{ExpressionState, PhysicalScalarExpression}; use crate::arrays::array::exp::Array; use crate::arrays::array::selection::Selection; use crate::arrays::batch_exp::Batch; +use crate::arrays::buffer::buffer_manager::NopBufferManager; +use crate::arrays::datatype::DataType; use crate::database::DatabaseContext; use crate::expr::physical::evaluator::ExpressionEvaluator; use crate::functions::scalar::PlannedScalarFunction; @@ -19,6 +21,28 @@ pub struct PhysicalScalarFunctionExpr { } impl PhysicalScalarFunctionExpr { + pub(crate) fn create_state(&self, batch_size: usize) -> Result { + let inputs = self + .inputs + .iter() + .map(|input| input.create_state(batch_size)) + .collect::>>()?; + + let arrays = self + .inputs + .iter() + .map(|input| Array::new(&NopBufferManager, input.datatype(), batch_size)) + .collect::>>()?; + + let buffer = Batch::from_arrays(arrays, false)?; + + Ok(ExpressionState { buffer, inputs }) + } + + pub fn datatype(&self) -> DataType { + self.function.return_type.clone() + } + pub(crate) fn eval( &self, input: &mut Batch, From 0c5df1017d7ea67a26229257df003d7a76ed4bc5 Mon Sep 17 00:00:00 2001 From: Sean Smith Date: Fri, 3 Jan 2025 11:55:37 -0500 Subject: [PATCH 44/59] eval constant --- .../rayexec_execution/src/arrays/array/exp.rs | 153 ++++++++++++++++++ .../src/expr/physical/evaluator.rs | 33 +++- .../src/expr/physical/mod.rs | 9 -- .../src/optimizer/expr_rewrite/const_fold.rs | 27 +--- 4 files changed, 189 insertions(+), 33 deletions(-) diff --git a/crates/rayexec_execution/src/arrays/array/exp.rs b/crates/rayexec_execution/src/arrays/array/exp.rs index a7c6825dd..3e106d0ba 100644 --- a/crates/rayexec_execution/src/arrays/array/exp.rs +++ b/crates/rayexec_execution/src/arrays/array/exp.rs @@ -23,6 +23,7 @@ use crate::arrays::buffer::physical_type::{ PhysicalI8, PhysicalInterval, PhysicalList, + PhysicalStorage, PhysicalType, PhysicalU128, PhysicalU16, @@ -41,7 +42,9 @@ use crate::arrays::buffer::{ SecondaryBuffer, }; use crate::arrays::datatype::DataType; +use crate::arrays::scalar::decimal::{Decimal128Scalar, Decimal64Scalar}; use crate::arrays::scalar::interval::Interval; +use crate::arrays::scalar::timestamp::TimestampScalar; use crate::arrays::scalar::ScalarValue; #[derive(Debug)] @@ -314,6 +317,156 @@ where Ok(()) } + pub fn get_value(&self, idx: usize) -> Result { + if idx >= self.capacity() { + return Err(RayexecError::new("Index out of bounds") + .with_field("idx", idx) + .with_field("capacity", self.capacity())); + } + + let flat = self.flat_view()?; + + if !flat.validity.is_valid(idx) { + return Ok(ScalarValue::Null); + } + + match &self.datatype { + DataType::Boolean => { + let v = PhysicalBool::get_addressable(flat.array_buffer)? + .get(idx) + .unwrap(); + Ok(ScalarValue::Boolean(*v)) + } + DataType::Int8 => { + let v = PhysicalI8::get_addressable(flat.array_buffer)? + .get(idx) + .unwrap(); + Ok(ScalarValue::Int8(*v)) + } + DataType::Int16 => { + let v = PhysicalI16::get_addressable(flat.array_buffer)? + .get(idx) + .unwrap(); + Ok(ScalarValue::Int16(*v)) + } + DataType::Int32 => { + let v = PhysicalI32::get_addressable(flat.array_buffer)? + .get(idx) + .unwrap(); + Ok(ScalarValue::Int32(*v)) + } + DataType::Int64 => { + let v = PhysicalI64::get_addressable(flat.array_buffer)? + .get(idx) + .unwrap(); + Ok(ScalarValue::Int64(*v)) + } + DataType::Int128 => { + let v = PhysicalI128::get_addressable(flat.array_buffer)? + .get(idx) + .unwrap(); + Ok(ScalarValue::Int128(*v)) + } + DataType::UInt8 => { + let v = PhysicalU8::get_addressable(flat.array_buffer)? + .get(idx) + .unwrap(); + Ok(ScalarValue::UInt8(*v)) + } + DataType::UInt16 => { + let v = PhysicalU16::get_addressable(flat.array_buffer)? + .get(idx) + .unwrap(); + Ok(ScalarValue::UInt16(*v)) + } + DataType::UInt32 => { + let v = PhysicalU32::get_addressable(flat.array_buffer)? + .get(idx) + .unwrap(); + Ok(ScalarValue::UInt32(*v)) + } + DataType::UInt64 => { + let v = PhysicalU64::get_addressable(flat.array_buffer)? + .get(idx) + .unwrap(); + Ok(ScalarValue::UInt64(*v)) + } + DataType::UInt128 => { + let v = PhysicalU128::get_addressable(flat.array_buffer)? + .get(idx) + .unwrap(); + Ok(ScalarValue::UInt128(*v)) + } + DataType::Float16 => { + let v = PhysicalF16::get_addressable(flat.array_buffer)? + .get(idx) + .unwrap(); + Ok(ScalarValue::Float16(*v)) + } + DataType::Float32 => { + let v = PhysicalF32::get_addressable(flat.array_buffer)? + .get(idx) + .unwrap(); + Ok(ScalarValue::Float32(*v)) + } + DataType::Float64 => { + let v = PhysicalF64::get_addressable(flat.array_buffer)? + .get(idx) + .unwrap(); + Ok(ScalarValue::Float64(*v)) + } + DataType::Decimal64(m) => { + let v = PhysicalI64::get_addressable(flat.array_buffer)? + .get(idx) + .unwrap(); + Ok(ScalarValue::Decimal64(Decimal64Scalar { + precision: m.precision, + scale: m.scale, + value: *v, + })) + } + DataType::Decimal128(m) => { + let v = PhysicalI128::get_addressable(flat.array_buffer)? + .get(idx) + .unwrap(); + Ok(ScalarValue::Decimal128(Decimal128Scalar { + precision: m.precision, + scale: m.scale, + value: *v, + })) + } + DataType::Interval => { + let v = PhysicalInterval::get_addressable(flat.array_buffer)? + .get(idx) + .unwrap(); + Ok(ScalarValue::Interval(*v)) + } + DataType::Timestamp(m) => { + let v = PhysicalI64::get_addressable(flat.array_buffer)? + .get(idx) + .unwrap(); + Ok(ScalarValue::Timestamp(TimestampScalar { + unit: m.unit, + value: *v, + })) + } + DataType::Utf8 => { + let addressable = PhysicalUtf8::get_addressable(flat.array_buffer)?; + // TODO: Don't allocate. Doesn't matter too much since this is + // just for constant eval right now. + let v = addressable.get(idx).unwrap().to_string(); + Ok(ScalarValue::Utf8(v.into())) + } + DataType::Binary => { + let addressable = PhysicalBinary::get_addressable(flat.array_buffer)?; + let v = addressable.get(idx).unwrap().to_vec(); + Ok(ScalarValue::Binary(v.into())) + } + + _ => not_implemented!("get value for scalar type"), + } + } + /// Set a scalar value at a given index. pub fn set_value(&mut self, idx: usize, val: &ScalarValue) -> Result<()> { if idx >= self.capacity() { diff --git a/crates/rayexec_execution/src/expr/physical/evaluator.rs b/crates/rayexec_execution/src/expr/physical/evaluator.rs index 1314fe224..1cf350a07 100644 --- a/crates/rayexec_execution/src/expr/physical/evaluator.rs +++ b/crates/rayexec_execution/src/expr/physical/evaluator.rs @@ -5,6 +5,7 @@ use crate::arrays::array::exp::Array; use crate::arrays::array::selection::Selection; use crate::arrays::batch_exp::Batch; use crate::arrays::buffer::buffer_manager::NopBufferManager; +use crate::arrays::scalar::{OwnedScalarValue, ScalarValue}; /// Evaluate expressions on batch inputs. #[derive(Debug)] @@ -31,14 +32,42 @@ impl ExpressionState { } impl ExpressionEvaluator { - pub fn new(expressions: Vec, batch_size: usize) -> Self { - unimplemented!() + pub fn try_new(expressions: Vec, batch_size: usize) -> Result { + let states = expressions + .iter() + .map(|expr| expr.create_state(batch_size)) + .collect::>>()?; + + Ok(ExpressionEvaluator { + expressions, + states, + }) } pub fn num_expressions(&self) -> usize { self.expressions.len() } + pub fn try_eval_constant(&mut self) -> Result { + if self.expressions.len() != 1 { + return Err(RayexecError::new( + "Single expression for constant eval required", + )); + } + + let expr = &self.expressions[0]; + let state = &mut self.states[0]; + + let mut input = Batch::empty_with_num_rows(1); + let mut out = Array::new(&NopBufferManager, expr.datatype(), 1)?; + + Self::eval_expression(expr, &mut input, state, Selection::linear(1), &mut out)?; + + let v = out.get_value(0)?; + + Ok(v.into_owned()) + } + /// Evaluate the expression on an input batch, writing the results to the /// output batch. /// diff --git a/crates/rayexec_execution/src/expr/physical/mod.rs b/crates/rayexec_execution/src/expr/physical/mod.rs index a4685e687..0f1cbdc6b 100644 --- a/crates/rayexec_execution/src/expr/physical/mod.rs +++ b/crates/rayexec_execution/src/expr/physical/mod.rs @@ -57,15 +57,6 @@ impl PhysicalScalarExpression { } } - // pub(crate) fn new_state(&self, batch_size: usize) -> Result { - // match self { - // Self::Cast(expr) => expr.new_state(batch_size), - // Self::Column(expr) => expr.new_state(batch_size), - // Self::Literal(expr) => expr.new_state(batch_size), - // _ => unimplemented!(), - // } - // } - pub fn eval2<'a>(&self, batch: &'a Batch2) -> Result> { unimplemented!() // match self { diff --git a/crates/rayexec_execution/src/optimizer/expr_rewrite/const_fold.rs b/crates/rayexec_execution/src/optimizer/expr_rewrite/const_fold.rs index e213f074b..29de5a46e 100644 --- a/crates/rayexec_execution/src/optimizer/expr_rewrite/const_fold.rs +++ b/crates/rayexec_execution/src/optimizer/expr_rewrite/const_fold.rs @@ -1,8 +1,8 @@ -use rayexec_error::{RayexecError, Result}; +use rayexec_error::Result; use super::ExpressionRewriteRule; -use crate::arrays::batch::Batch2; use crate::expr::literal_expr::LiteralExpr; +use crate::expr::physical::evaluator::ExpressionEvaluator; use crate::expr::physical::planner::PhysicalExpressionPlanner; use crate::expr::Expression; use crate::logical::binder::table_list::TableList; @@ -26,28 +26,11 @@ fn maybe_fold(table_list: &TableList, expr: &mut Expression) -> Result<()> { if expr.is_const_foldable() { let planner = PhysicalExpressionPlanner::new(table_list); let phys_expr = planner.plan_scalar(&[], expr)?; - let dummy = Batch2::empty_with_num_rows(1); - let val = phys_expr.eval2(&dummy)?; - - if val.logical_len() != 1 { - return Err(RayexecError::new(format!( - "Expected 1 value from const eval, got {}", - val.logical_len() - ))); - } - - let val = val - .logical_value(0) // Len checked above. - .map_err(|_| { - RayexecError::new(format!( - "Failed to get folded scalar value from expression: {expr}" - )) - })?; + let mut evaluator = ExpressionEvaluator::try_new(vec![phys_expr], 1)?; + let val = evaluator.try_eval_constant()?; // Our brand new expression. - *expr = Expression::Literal(LiteralExpr { - literal: val.into_owned(), - }); + *expr = Expression::Literal(LiteralExpr { literal: val }); return Ok(()); } From 5390ed09c5f7890e026ef06f49b7afe2e1b812bb Mon Sep 17 00:00:00 2001 From: Sean Smith Date: Sat, 4 Jan 2025 12:03:46 -0600 Subject: [PATCH 45/59] fixup! Merge remote-tracking branch 'origin/main' into sean/buf2 --- Cargo.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index 16ebc6ba3..024eb4021 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1392,7 +1392,7 @@ dependencies = [ [[package]] name = "iterutil" -version = "0.0.93" +version = "0.0.94" [[package]] name = "itoa" From cd088f9432f6f7846ed0ca9886f8e982d4b3ddb8 Mon Sep 17 00:00:00 2001 From: Sean Smith Date: Sat, 4 Jan 2025 15:15:27 -0600 Subject: [PATCH 46/59] some aggregate rework --- .../src/arrays/executor/aggregate/binary.rs | 6 +- .../src/arrays/executor/aggregate/mod.rs | 4 +- .../src/arrays/executor/aggregate/unary.rs | 12 +- .../src/arrays/executor_exp/aggregate/mod.rs | 25 ++ .../operators/hash_aggregate/distinct.rs | 12 +- .../src/functions/aggregate/builtin/avg.rs | 125 +++++----- .../src/functions/aggregate/builtin/corr.rs | 4 +- .../src/functions/aggregate/builtin/count.rs | 4 +- .../src/functions/aggregate/builtin/covar.rs | 6 +- .../src/functions/aggregate/builtin/first.rs | 10 +- .../src/functions/aggregate/builtin/minmax.rs | 10 +- .../functions/aggregate/builtin/regr_avg.rs | 6 +- .../functions/aggregate/builtin/regr_count.rs | 4 +- .../functions/aggregate/builtin/regr_r2.rs | 4 +- .../functions/aggregate/builtin/regr_slope.rs | 4 +- .../src/functions/aggregate/builtin/stddev.rs | 10 +- .../functions/aggregate/builtin/string_agg.rs | 4 +- .../src/functions/aggregate/builtin/sum.rs | 8 +- .../src/functions/aggregate/mod.rs | 7 +- .../src/functions/aggregate/states.rs | 219 ++++++++++++++++-- 20 files changed, 347 insertions(+), 137 deletions(-) diff --git a/crates/rayexec_execution/src/arrays/executor/aggregate/binary.rs b/crates/rayexec_execution/src/arrays/executor/aggregate/binary.rs index 9138c486e..5c1b20fea 100644 --- a/crates/rayexec_execution/src/arrays/executor/aggregate/binary.rs +++ b/crates/rayexec_execution/src/arrays/executor/aggregate/binary.rs @@ -9,9 +9,9 @@ use crate::arrays::storage::AddressableStorage; /// Updates aggregate states for an aggregate that accepts two inputs. #[derive(Debug, Clone, Copy)] -pub struct BinaryNonNullUpdater; +pub struct BinaryNonNullUpdater2; -impl BinaryNonNullUpdater { +impl BinaryNonNullUpdater2 { pub fn update<'a, S1, S2, I, State, Output>( array1: &'a Array2, array2: &'a Array2, @@ -131,7 +131,7 @@ mod tests { }, ]; - BinaryNonNullUpdater::update::( + BinaryNonNullUpdater2::update::( &array1, &array2, mapping, diff --git a/crates/rayexec_execution/src/arrays/executor/aggregate/mod.rs b/crates/rayexec_execution/src/arrays/executor/aggregate/mod.rs index 4dbead80e..f8e399a04 100644 --- a/crates/rayexec_execution/src/arrays/executor/aggregate/mod.rs +++ b/crates/rayexec_execution/src/arrays/executor/aggregate/mod.rs @@ -39,9 +39,9 @@ pub struct RowToStateMapping { } #[derive(Debug, Clone, Copy)] -pub struct StateCombiner; +pub struct StateCombiner2; -impl StateCombiner { +impl StateCombiner2 { /// Combine states, merging states from `consume` into `targets`. /// /// `mapping` provides a mapping of consume states to the target index. The diff --git a/crates/rayexec_execution/src/arrays/executor/aggregate/unary.rs b/crates/rayexec_execution/src/arrays/executor/aggregate/unary.rs index a69df72ac..d94e56fec 100644 --- a/crates/rayexec_execution/src/arrays/executor/aggregate/unary.rs +++ b/crates/rayexec_execution/src/arrays/executor/aggregate/unary.rs @@ -8,9 +8,9 @@ use crate::arrays::storage::AddressableStorage; /// Updates aggregate states for an aggregate that accepts one input. #[derive(Debug, Clone, Copy)] -pub struct UnaryNonNullUpdater; +pub struct UnaryNonNullUpdater2; -impl UnaryNonNullUpdater { +impl UnaryNonNullUpdater2 { pub fn update<'a, S, I, State, Output>( array: &'a Array2, mapping: I, @@ -102,7 +102,7 @@ mod tests { }, ]; - UnaryNonNullUpdater::update::(&array, mapping, &mut states) + UnaryNonNullUpdater2::update::(&array, mapping, &mut states) .unwrap(); assert_eq!(11, states[0].val); @@ -127,7 +127,7 @@ mod tests { }, ]; - UnaryNonNullUpdater::update::(&array, mapping, &mut states) + UnaryNonNullUpdater2::update::(&array, mapping, &mut states) .unwrap(); assert_eq!(7, states[0].val); @@ -156,7 +156,7 @@ mod tests { }, ]; - UnaryNonNullUpdater::update::(&array, mapping, &mut states) + UnaryNonNullUpdater2::update::(&array, mapping, &mut states) .unwrap(); assert_eq!(5, states[0].val); @@ -204,7 +204,7 @@ mod tests { }, ]; - UnaryNonNullUpdater::update::(&array, mapping, &mut states) + UnaryNonNullUpdater2::update::(&array, mapping, &mut states) .unwrap(); assert_eq!("aabbbcccc", &states[0].buf); diff --git a/crates/rayexec_execution/src/arrays/executor_exp/aggregate/mod.rs b/crates/rayexec_execution/src/arrays/executor_exp/aggregate/mod.rs index 76356e6e3..f836c670d 100644 --- a/crates/rayexec_execution/src/arrays/executor_exp/aggregate/mod.rs +++ b/crates/rayexec_execution/src/arrays/executor_exp/aggregate/mod.rs @@ -25,3 +25,28 @@ pub trait AggregateState: Debug { where M: AddressableMut; } + +#[derive(Debug, Clone, Copy)] +pub struct StateCombiner; + +impl StateCombiner { + /// Combine states, merging states from `consume` into `targets`. + /// + /// `mapping` provides (from, to) mappings between `consume` and `targets`. + pub fn combine( + consume: &mut [State], + mapping: impl IntoIterator, + targets: &mut [State], + ) -> Result<()> + where + State: AggregateState, + { + for (from, to) in mapping { + let consume = &mut consume[from]; + let target = &mut targets[to]; + target.merge(consume)?; + } + + Ok(()) + } +} diff --git a/crates/rayexec_execution/src/execution/operators/hash_aggregate/distinct.rs b/crates/rayexec_execution/src/execution/operators/hash_aggregate/distinct.rs index 6dfd36bac..7b64bd74c 100644 --- a/crates/rayexec_execution/src/execution/operators/hash_aggregate/distinct.rs +++ b/crates/rayexec_execution/src/execution/operators/hash_aggregate/distinct.rs @@ -58,9 +58,9 @@ impl AggregateGroupStates for DistinctGroupedStates { // insert into the group specific hash table. for state_idx in 0..self.distinct_inputs.len() { let row_sel = Arc::new(SelectionVector::from_iter(mappings.iter().filter_map( - |row_mapping| { - if row_mapping.to_state == state_idx { - Some(row_mapping.from_row) + |&(from, to)| { + if to == state_idx { + Some(from) } else { None } @@ -105,9 +105,9 @@ impl AggregateGroupStates for DistinctGroupedStates { .opaque_states_mut() .downcast::>>()?; - for mapping in mapping { - let target = self.distinct_inputs[mapping.to_state].as_mut().unwrap(); - let consume = other_distinct_inputs[mapping.from_row].as_mut().unwrap(); + for (from, to) in mapping { + let consume = other_distinct_inputs[from].as_mut().unwrap(); + let target = self.distinct_inputs[to].as_mut().unwrap(); target.merge(consume)?; } diff --git a/crates/rayexec_execution/src/functions/aggregate/builtin/avg.rs b/crates/rayexec_execution/src/functions/aggregate/builtin/avg.rs index 061ebd7d8..7f551d3bd 100644 --- a/crates/rayexec_execution/src/functions/aggregate/builtin/avg.rs +++ b/crates/rayexec_execution/src/functions/aggregate/builtin/avg.rs @@ -8,16 +8,27 @@ use serde::{Deserialize, Serialize}; use crate::arrays::array::Array2; use crate::arrays::bitmap::Bitmap; -use crate::arrays::datatype::{DataType, DataTypeId}; +use crate::arrays::buffer::physical_type::{ + AddressableMut, + MutablePhysicalStorage, + PhysicalF64, + PhysicalI64, +}; +use crate::arrays::datatype::{DataType, DataTypeId, DecimalTypeMeta}; use crate::arrays::executor::aggregate::AggregateState2; use crate::arrays::executor::builder::{ArrayBuilder, ArrayDataBuffer, PrimitiveBuffer}; use crate::arrays::executor::physical_type::{PhysicalF64_2, PhysicalI64_2}; +use crate::arrays::executor_exp::aggregate::AggregateState; +use crate::arrays::executor_exp::PutBuffer; use crate::arrays::scalar::decimal::{Decimal128Type, Decimal64Type, DecimalType}; use crate::expr::Expression; use crate::functions::aggregate::states::{ - new_unary_aggregate_states, + drain, + new_unary_aggregate_states2, primitive_finalize, + unary_update, AggregateGroupStates, + TypedAggregateGroupStates, }; use crate::functions::aggregate::{ AggregateFunction, @@ -134,42 +145,23 @@ where D::Primitive: Into, { fn new_states(&self) -> Box { - let datatype = self.datatype.clone(); - - let state_finalize = move |states: &mut [AvgStateDecimal]| { - let mut builder = ArrayBuilder { - datatype: DataType::Float64, - buffer: PrimitiveBuffer::with_len(states.len()), - }; - - let mut validities = Bitmap::new_with_all_true(states.len()); - - let m = datatype.clone().try_get_decimal_type_meta()?; - let scale = f64::powi(10.0, m.scale.abs() as i32); - - for (idx, state) in states.iter_mut().enumerate() { - let ((sum, count), valid) = state.finalize()?; - - if !valid { - validities.set_unchecked(idx, false); - continue; - } - - let val = (sum as f64) / (count as f64 * scale); - builder.buffer.put(idx, &val); - } - - Ok(Array2::new_with_validity_and_array_data( - builder.datatype, - validities, - builder.buffer.into_data(), - )) - }; - - new_unary_aggregate_states::( - AvgStateDecimal::::default, - state_finalize, - ) + let m = self + .datatype + .try_get_decimal_type_meta() + .unwrap_or(DecimalTypeMeta::new(D::MAX_PRECISION, D::DEFAULT_SCALE)); // TODO: Should rework to return the error instead. + + let scale = f64::powi(10.0, m.scale.abs() as i32); + + Box::new(TypedAggregateGroupStates::new( + move || AvgStateDecimal:: { + scale, + sum: 0, + count: 0, + _input: PhantomData, + }, + unary_update::, + drain::, + )) } } @@ -178,10 +170,11 @@ pub struct AvgFloat64Impl; impl AggregateFunctionImpl for AvgFloat64Impl { fn new_states(&self) -> Box { - new_unary_aggregate_states::( + Box::new(TypedAggregateGroupStates::new( AvgStateF64::::default, - move |states| primitive_finalize(DataType::Float64, states), - ) + unary_update::, + drain::, + )) } } @@ -190,38 +183,52 @@ pub struct AvgInt64Impl; impl AggregateFunctionImpl for AvgInt64Impl { fn new_states(&self) -> Box { - new_unary_aggregate_states::( + Box::new(TypedAggregateGroupStates::new( AvgStateF64::::default, - move |states| primitive_finalize(DataType::Float64, states), - ) + unary_update::, + drain::, + )) } } -#[derive(Debug, Default)] +#[derive(Debug)] struct AvgStateDecimal { + /// Scale to use when finalizing the physical decimal value. + scale: f64, sum: i128, count: i64, _input: PhantomData, } -impl + Default + Debug> AggregateState2 for AvgStateDecimal { +impl AggregateState<&I, f64> for AvgStateDecimal +where + I: Into + Copy + Debug, +{ fn merge(&mut self, other: &mut Self) -> Result<()> { self.sum += other.sum; self.count += other.count; Ok(()) } - fn update(&mut self, input: I) -> Result<()> { + fn update(&mut self, &input: &I) -> Result<()> { self.sum += input.into(); self.count += 1; Ok(()) } - fn finalize(&mut self) -> Result<((i128, i64), bool)> { + fn finalize(&mut self, output: PutBuffer) -> Result<()> + where + M: AddressableMut, + { if self.count == 0 { - return Ok(((0, 0), false)); + output.put_null(); + return Ok(()); } - Ok(((self.sum, self.count), true)) + + let val = (self.sum as f64) / (self.count as f64 * self.scale); + output.put(&val); + + Ok(()) } } @@ -232,9 +239,9 @@ struct AvgStateF64 { _input: PhantomData, } -impl AggregateState2 for AvgStateF64 +impl AggregateState<&I, f64> for AvgStateF64 where - I: Into + Default + Debug, + I: Into + Copy + Default + Debug, T: AsPrimitive + AddAssign + Debug + Default, { fn merge(&mut self, other: &mut Self) -> Result<()> { @@ -243,17 +250,23 @@ where Ok(()) } - fn update(&mut self, input: I) -> Result<()> { + fn update(&mut self, &input: &I) -> Result<()> { self.sum += input.into(); self.count += 1; Ok(()) } - fn finalize(&mut self) -> Result<(f64, bool)> { + fn finalize(&mut self, output: PutBuffer) -> Result<()> + where + M: AddressableMut, + { if self.count == 0 { - return Ok((0.0, false)); + output.put_null(); + return Ok(()); } let sum: f64 = self.sum.as_(); - Ok((sum / self.count as f64, true)) + output.put(&sum); + + Ok(()) } } diff --git a/crates/rayexec_execution/src/functions/aggregate/builtin/corr.rs b/crates/rayexec_execution/src/functions/aggregate/builtin/corr.rs index bd612b42f..9407125fb 100644 --- a/crates/rayexec_execution/src/functions/aggregate/builtin/corr.rs +++ b/crates/rayexec_execution/src/functions/aggregate/builtin/corr.rs @@ -9,7 +9,7 @@ use crate::arrays::executor::aggregate::AggregateState2; use crate::arrays::executor::physical_type::PhysicalF64_2; use crate::expr::Expression; use crate::functions::aggregate::states::{ - new_binary_aggregate_states, + new_binary_aggregate_states2, primitive_finalize, AggregateGroupStates, }; @@ -74,7 +74,7 @@ pub struct CorrImpl; impl AggregateFunctionImpl for CorrImpl { fn new_states(&self) -> Box { - new_binary_aggregate_states::( + new_binary_aggregate_states2::( CorrelationState::default, move |states| primitive_finalize(DataType::Float64, states), ) diff --git a/crates/rayexec_execution/src/functions/aggregate/builtin/count.rs b/crates/rayexec_execution/src/functions/aggregate/builtin/count.rs index 354d90240..27ee36b1c 100644 --- a/crates/rayexec_execution/src/functions/aggregate/builtin/count.rs +++ b/crates/rayexec_execution/src/functions/aggregate/builtin/count.rs @@ -5,7 +5,7 @@ use crate::arrays::executor::aggregate::AggregateState2; use crate::arrays::executor::physical_type::PhysicalAny; use crate::expr::{self, Expression}; use crate::functions::aggregate::states::{ - new_unary_aggregate_states, + new_unary_aggregate_states2, primitive_finalize, AggregateGroupStates, }; @@ -75,7 +75,7 @@ pub struct CountNonNullImpl; impl AggregateFunctionImpl for CountNonNullImpl { fn new_states(&self) -> Box { - new_unary_aggregate_states::( + new_unary_aggregate_states2::( CountNonNullState::default, move |states| primitive_finalize(DataType::Int64, states), ) diff --git a/crates/rayexec_execution/src/functions/aggregate/builtin/covar.rs b/crates/rayexec_execution/src/functions/aggregate/builtin/covar.rs index 653123530..ed34f9e20 100644 --- a/crates/rayexec_execution/src/functions/aggregate/builtin/covar.rs +++ b/crates/rayexec_execution/src/functions/aggregate/builtin/covar.rs @@ -8,7 +8,7 @@ use crate::arrays::executor::aggregate::AggregateState2; use crate::arrays::executor::physical_type::PhysicalF64_2; use crate::expr::Expression; use crate::functions::aggregate::states::{ - new_binary_aggregate_states, + new_binary_aggregate_states2, primitive_finalize, AggregateGroupStates, }; @@ -72,7 +72,7 @@ pub struct CovarPopImpl; impl AggregateFunctionImpl for CovarPopImpl { fn new_states(&self) -> Box { - new_binary_aggregate_states::( + new_binary_aggregate_states2::( CovarState::::default, move |states| primitive_finalize(DataType::Float64, states), ) @@ -130,7 +130,7 @@ pub struct CovarSampImpl; impl AggregateFunctionImpl for CovarSampImpl { fn new_states(&self) -> Box { - new_binary_aggregate_states::( + new_binary_aggregate_states2::( CovarState::::default, move |states| primitive_finalize(DataType::Float64, states), ) diff --git a/crates/rayexec_execution/src/functions/aggregate/builtin/first.rs b/crates/rayexec_execution/src/functions/aggregate/builtin/first.rs index 3dbdda2d9..59b646887 100644 --- a/crates/rayexec_execution/src/functions/aggregate/builtin/first.rs +++ b/crates/rayexec_execution/src/functions/aggregate/builtin/first.rs @@ -34,7 +34,7 @@ use crate::arrays::storage::{PrimitiveStorage, UntypedNull}; use crate::expr::Expression; use crate::functions::aggregate::states::{ boolean_finalize, - new_unary_aggregate_states, + new_unary_aggregate_states2, primitive_finalize, untyped_null_finalize, AggregateGroupStates, @@ -157,7 +157,7 @@ impl AggregateFunctionImpl for FirstBinaryImpl { fn new_states(&self) -> Box { let datatype = self.datatype.clone(); - new_unary_aggregate_states::( + new_unary_aggregate_states2::( FirstStateBinary::default, move |states| { let builder = ArrayBuilder { @@ -175,7 +175,7 @@ pub struct FirstUntypedNullImpl; impl AggregateFunctionImpl for FirstUntypedNullImpl { fn new_states(&self) -> Box { - new_unary_aggregate_states::( + new_unary_aggregate_states2::( FirstState::::default, untyped_null_finalize, ) @@ -187,7 +187,7 @@ pub struct FirstBoolImpl; impl AggregateFunctionImpl for FirstBoolImpl { fn new_states(&self) -> Box { - new_unary_aggregate_states::( + new_unary_aggregate_states2::( FirstState::::default, move |states| boolean_finalize(DataType::Boolean, states), ) @@ -221,7 +221,7 @@ where fn new_states(&self) -> Box { let datatype = self.datatype.clone(); - new_unary_aggregate_states::(FirstState::::default, move |states| { + new_unary_aggregate_states2::(FirstState::::default, move |states| { primitive_finalize(datatype.clone(), states) }) } diff --git a/crates/rayexec_execution/src/functions/aggregate/builtin/minmax.rs b/crates/rayexec_execution/src/functions/aggregate/builtin/minmax.rs index 329a9d75d..a2e5c4e30 100644 --- a/crates/rayexec_execution/src/functions/aggregate/builtin/minmax.rs +++ b/crates/rayexec_execution/src/functions/aggregate/builtin/minmax.rs @@ -34,7 +34,7 @@ use crate::arrays::storage::{PrimitiveStorage, UntypedNull}; use crate::expr::Expression; use crate::functions::aggregate::states::{ boolean_finalize, - new_unary_aggregate_states, + new_unary_aggregate_states2, primitive_finalize, untyped_null_finalize, AggregateGroupStates, @@ -242,7 +242,7 @@ pub struct MinMaxUntypedNull; impl AggregateFunctionImpl for MinMaxUntypedNull { fn new_states(&self) -> Box { // Note min vs max doesn't matter. Everything is null. - new_unary_aggregate_states::( + new_unary_aggregate_states2::( MinState::::default, untyped_null_finalize, ) @@ -274,7 +274,7 @@ where fn new_states(&self) -> Box { let datatype = self.datatype.clone(); - new_unary_aggregate_states::(M::default, move |states| { + new_unary_aggregate_states2::(M::default, move |states| { let builder = ArrayBuilder { datatype: datatype.clone(), buffer: GermanVarlenBuffer::<[u8]>::with_len(states.len()), @@ -309,7 +309,7 @@ where M: AggregateState2 + Default + Sync + Send + 'static, { fn new_states(&self) -> Box { - new_unary_aggregate_states::(M::default, move |states| { + new_unary_aggregate_states2::(M::default, move |states| { boolean_finalize(DataType::Boolean, states) }) } @@ -354,7 +354,7 @@ where fn new_states(&self) -> Box { let datatype = self.datatype.clone(); - new_unary_aggregate_states::(M::default, move |states| { + new_unary_aggregate_states2::(M::default, move |states| { primitive_finalize(datatype.clone(), states) }) } diff --git a/crates/rayexec_execution/src/functions/aggregate/builtin/regr_avg.rs b/crates/rayexec_execution/src/functions/aggregate/builtin/regr_avg.rs index b86a9668b..4efe5fa3e 100644 --- a/crates/rayexec_execution/src/functions/aggregate/builtin/regr_avg.rs +++ b/crates/rayexec_execution/src/functions/aggregate/builtin/regr_avg.rs @@ -8,7 +8,7 @@ use crate::arrays::executor::aggregate::AggregateState2; use crate::arrays::executor::physical_type::PhysicalF64_2; use crate::expr::Expression; use crate::functions::aggregate::states::{ - new_binary_aggregate_states, + new_binary_aggregate_states2, primitive_finalize, AggregateGroupStates, }; @@ -72,7 +72,7 @@ pub struct RegrAvgYImpl; impl AggregateFunctionImpl for RegrAvgYImpl { fn new_states(&self) -> Box { - new_binary_aggregate_states::( + new_binary_aggregate_states2::( RegrAvgState::::default, move |states| primitive_finalize(DataType::Float64, states), ) @@ -137,7 +137,7 @@ pub struct RegrAvgXImpl; impl AggregateFunctionImpl for RegrAvgXImpl { fn new_states(&self) -> Box { - new_binary_aggregate_states::( + new_binary_aggregate_states2::( RegrAvgState::::default, move |states| primitive_finalize(DataType::Float64, states), ) diff --git a/crates/rayexec_execution/src/functions/aggregate/builtin/regr_count.rs b/crates/rayexec_execution/src/functions/aggregate/builtin/regr_count.rs index 74b16b960..29914d218 100644 --- a/crates/rayexec_execution/src/functions/aggregate/builtin/regr_count.rs +++ b/crates/rayexec_execution/src/functions/aggregate/builtin/regr_count.rs @@ -7,7 +7,7 @@ use crate::arrays::executor::aggregate::AggregateState2; use crate::arrays::executor::physical_type::PhysicalAny; use crate::expr::Expression; use crate::functions::aggregate::states::{ - new_binary_aggregate_states, + new_binary_aggregate_states2, primitive_finalize, AggregateGroupStates, }; @@ -71,7 +71,7 @@ pub struct RegrCountImpl; impl AggregateFunctionImpl for RegrCountImpl { fn new_states(&self) -> Box { - new_binary_aggregate_states::( + new_binary_aggregate_states2::( RegrCountState::default, move |states| primitive_finalize(DataType::Int64, states), ) diff --git a/crates/rayexec_execution/src/functions/aggregate/builtin/regr_r2.rs b/crates/rayexec_execution/src/functions/aggregate/builtin/regr_r2.rs index e42a68442..f4df4795e 100644 --- a/crates/rayexec_execution/src/functions/aggregate/builtin/regr_r2.rs +++ b/crates/rayexec_execution/src/functions/aggregate/builtin/regr_r2.rs @@ -8,7 +8,7 @@ use crate::arrays::executor::aggregate::AggregateState2; use crate::arrays::executor::physical_type::PhysicalF64_2; use crate::expr::Expression; use crate::functions::aggregate::states::{ - new_binary_aggregate_states, + new_binary_aggregate_states2, primitive_finalize, AggregateGroupStates, }; @@ -72,7 +72,7 @@ pub struct RegrR2Impl; impl AggregateFunctionImpl for RegrR2Impl { fn new_states(&self) -> Box { - new_binary_aggregate_states::( + new_binary_aggregate_states2::( RegrR2State::default, move |states| primitive_finalize(DataType::Float64, states), ) diff --git a/crates/rayexec_execution/src/functions/aggregate/builtin/regr_slope.rs b/crates/rayexec_execution/src/functions/aggregate/builtin/regr_slope.rs index 5fa3e2da3..7b61ba9d4 100644 --- a/crates/rayexec_execution/src/functions/aggregate/builtin/regr_slope.rs +++ b/crates/rayexec_execution/src/functions/aggregate/builtin/regr_slope.rs @@ -9,7 +9,7 @@ use crate::arrays::executor::aggregate::AggregateState2; use crate::arrays::executor::physical_type::PhysicalF64_2; use crate::expr::Expression; use crate::functions::aggregate::states::{ - new_binary_aggregate_states, + new_binary_aggregate_states2, primitive_finalize, AggregateGroupStates, }; @@ -73,7 +73,7 @@ pub struct RegrSlopeImpl; impl AggregateFunctionImpl for RegrSlopeImpl { fn new_states(&self) -> Box { - new_binary_aggregate_states::( + new_binary_aggregate_states2::( RegrSlopeState::default, move |states| primitive_finalize(DataType::Float64, states), ) diff --git a/crates/rayexec_execution/src/functions/aggregate/builtin/stddev.rs b/crates/rayexec_execution/src/functions/aggregate/builtin/stddev.rs index 03a28a764..7300aebb9 100644 --- a/crates/rayexec_execution/src/functions/aggregate/builtin/stddev.rs +++ b/crates/rayexec_execution/src/functions/aggregate/builtin/stddev.rs @@ -8,7 +8,7 @@ use crate::arrays::executor::aggregate::AggregateState2; use crate::arrays::executor::physical_type::PhysicalF64_2; use crate::expr::Expression; use crate::functions::aggregate::states::{ - new_unary_aggregate_states, + new_unary_aggregate_states2, primitive_finalize, AggregateGroupStates, }; @@ -69,7 +69,7 @@ pub struct StddevPopImpl; impl AggregateFunctionImpl for StddevPopImpl { fn new_states(&self) -> Box { - new_unary_aggregate_states::( + new_unary_aggregate_states2::( VarianceState::::default, move |states| primitive_finalize(DataType::Float64, states), ) @@ -128,7 +128,7 @@ pub struct StddevSampImpl; impl AggregateFunctionImpl for StddevSampImpl { fn new_states(&self) -> Box { - new_unary_aggregate_states::( + new_unary_aggregate_states2::( VarianceState::::default, move |states| primitive_finalize(DataType::Float64, states), ) @@ -183,7 +183,7 @@ pub struct VarPopImpl; impl AggregateFunctionImpl for VarPopImpl { fn new_states(&self) -> Box { - new_unary_aggregate_states::( + new_unary_aggregate_states2::( VarianceState::::default, move |states| primitive_finalize(DataType::Float64, states), ) @@ -238,7 +238,7 @@ pub struct VarSampImpl; impl AggregateFunctionImpl for VarSampImpl { fn new_states(&self) -> Box { - new_unary_aggregate_states::( + new_unary_aggregate_states2::( VarianceState::::default, move |states| primitive_finalize(DataType::Float64, states), ) diff --git a/crates/rayexec_execution/src/functions/aggregate/builtin/string_agg.rs b/crates/rayexec_execution/src/functions/aggregate/builtin/string_agg.rs index bf1d7dab2..cd1ec0d65 100644 --- a/crates/rayexec_execution/src/functions/aggregate/builtin/string_agg.rs +++ b/crates/rayexec_execution/src/functions/aggregate/builtin/string_agg.rs @@ -8,7 +8,7 @@ use crate::arrays::executor::builder::{ArrayBuilder, GermanVarlenBuffer}; use crate::arrays::executor::physical_type::PhysicalUtf8_2; use crate::arrays::scalar::ScalarValue; use crate::expr::Expression; -use crate::functions::aggregate::states::{new_unary_aggregate_states, AggregateGroupStates}; +use crate::functions::aggregate::states::{new_unary_aggregate_states2, AggregateGroupStates}; use crate::functions::aggregate::{ AggregateFunction, AggregateFunctionImpl, @@ -99,7 +99,7 @@ impl AggregateFunctionImpl for StringAggImpl { string: None, }; - new_unary_aggregate_states::(state_init, move |states| { + new_unary_aggregate_states2::(state_init, move |states| { let builder = ArrayBuilder { datatype: DataType::Utf8, buffer: GermanVarlenBuffer::::with_len(states.len()), diff --git a/crates/rayexec_execution/src/functions/aggregate/builtin/sum.rs b/crates/rayexec_execution/src/functions/aggregate/builtin/sum.rs index 7991c8f37..26d00f5fa 100644 --- a/crates/rayexec_execution/src/functions/aggregate/builtin/sum.rs +++ b/crates/rayexec_execution/src/functions/aggregate/builtin/sum.rs @@ -16,7 +16,7 @@ use crate::arrays::scalar::decimal::{Decimal128Type, Decimal64Type, DecimalType} use crate::arrays::storage::PrimitiveStorage; use crate::expr::Expression; use crate::functions::aggregate::states::{ - new_unary_aggregate_states, + new_unary_aggregate_states2, primitive_finalize, AggregateGroupStates, }; @@ -117,7 +117,7 @@ pub struct SumInt64Impl; impl AggregateFunctionImpl for SumInt64Impl { fn new_states(&self) -> Box { - new_unary_aggregate_states::( + new_unary_aggregate_states2::( SumStateCheckedAdd::::default, move |states| primitive_finalize(DataType::Int64, states), ) @@ -129,7 +129,7 @@ pub struct SumFloat64Impl; impl AggregateFunctionImpl for SumFloat64Impl { fn new_states(&self) -> Box { - new_unary_aggregate_states::( + new_unary_aggregate_states2::( SumStateAdd::::default, move |states| primitive_finalize(DataType::Float64, states), ) @@ -159,7 +159,7 @@ where fn new_states(&self) -> Box { let datatype = self.datatype.clone(); - new_unary_aggregate_states::( + new_unary_aggregate_states2::( SumStateCheckedAdd::::default, move |states| primitive_finalize(datatype.clone(), states), ) diff --git a/crates/rayexec_execution/src/functions/aggregate/mod.rs b/crates/rayexec_execution/src/functions/aggregate/mod.rs index 0bfa1baba..7ccc951ad 100644 --- a/crates/rayexec_execution/src/functions/aggregate/mod.rs +++ b/crates/rayexec_execution/src/functions/aggregate/mod.rs @@ -103,7 +103,7 @@ impl<'a> ChunkGroupAddressIter<'a> { } impl Iterator for ChunkGroupAddressIter<'_> { - type Item = RowToStateMapping; + type Item = (usize, usize); #[inline] fn next(&mut self) -> Option { @@ -111,10 +111,7 @@ impl Iterator for ChunkGroupAddressIter<'_> { if addr.chunk_idx == self.chunk_idx { let row = self.row_idx; self.row_idx += 1; - return Some(RowToStateMapping { - from_row: row, - to_state: addr.row_idx as usize, - }); + return Some((row, addr.row_idx as usize)); } self.row_idx += 1; } diff --git a/crates/rayexec_execution/src/functions/aggregate/states.rs b/crates/rayexec_execution/src/functions/aggregate/states.rs index d4af24b9b..a97b401ee 100644 --- a/crates/rayexec_execution/src/functions/aggregate/states.rs +++ b/crates/rayexec_execution/src/functions/aggregate/states.rs @@ -3,28 +3,47 @@ use std::any::Any; use std::fmt::Debug; use std::marker::PhantomData; +use iterutil::IntoExactSizeIterator; use rayexec_error::{RayexecError, Result}; use super::ChunkGroupAddressIter; use crate::arrays::array::exp::Array; +use crate::arrays::array::selection::Selection; use crate::arrays::array::{Array2, ArrayData2}; +use crate::arrays::buffer::physical_type::{ + MutablePhysicalStorage, + PhysicalBool, + PhysicalStorage, + PhysicalType, +}; use crate::arrays::datatype::DataType; use crate::arrays::executor::aggregate::{ AggregateState2, - BinaryNonNullUpdater, - StateCombiner, + BinaryNonNullUpdater2, + StateCombiner2, StateFinalizer, - UnaryNonNullUpdater, + UnaryNonNullUpdater2, }; use crate::arrays::executor::builder::{ArrayBuilder, BooleanBuffer, PrimitiveBuffer}; use crate::arrays::executor::physical_type::PhysicalStorage2; +use crate::arrays::executor_exp::aggregate::unary::UnaryNonNullUpdater; +use crate::arrays::executor_exp::aggregate::{AggregateState, StateCombiner}; +use crate::arrays::executor_exp::PutBuffer; use crate::arrays::storage::{AddressableStorage, PrimitiveStorage}; pub struct TypedAggregateGroupStates { + /// States being tracked. states: Vec, + /// Index we should start draining from. Updated after every call to + /// `drain`. + drain_idx: usize, + + /// How new states are initialized. state_init: StateInit, + /// How states get updated. state_update: StateUpdate, + /// How to finalize states. state_finalize: StateFinalize, _input: PhantomData, @@ -41,6 +60,131 @@ impl ) -> Self { TypedAggregateGroupStates { states: Vec::new(), + drain_idx: 0, + state_init, + state_update, + state_finalize, + _input: PhantomData, + _output: PhantomData, + } + } +} + +impl AggregateGroupStates + for TypedAggregateGroupStates +where + State: AggregateState + Sync + Send + 'static, + Input: Sync + Send, + Output: Sync + Send, + StateInit: Fn() -> State + Sync + Send, + StateUpdate: Fn(&[Array], Selection, &[usize], &mut [State]) -> Result<()> + Sync + Send, + StateFinalize: Fn(&mut [State], &mut Array) -> Result<()> + Sync + Send, +{ + fn opaque_states_mut(&mut self) -> OpaqueStatesMut<'_> { + debug_assert_eq!(0, self.drain_idx); + OpaqueStatesMut(&mut self.states) + } + + fn new_states(&mut self, count: usize) { + debug_assert_eq!(0, self.drain_idx); + self.states.extend((0..count).map(|_| (self.state_init)())) + } + + fn num_states(&self) -> usize { + self.states.len() + } + + fn update_states2(&mut self, inputs: &[&Array2], mapping: ChunkGroupAddressIter) -> Result<()> { + unimplemented!() + } + + fn update_states( + &mut self, + inputs: &[Array], + selection: Selection, + mapping: &[usize], + ) -> Result<()> { + debug_assert_eq!(0, self.drain_idx); + debug_assert_eq!(selection.len(), mapping.len()); + + (self.state_update)(inputs, selection, mapping, &mut self.states) + } + + fn combine( + &mut self, + consume: &mut Box, + mapping: ChunkGroupAddressIter, + ) -> Result<()> { + debug_assert_eq!(0, self.drain_idx); + let consume_states = consume.opaque_states_mut().downcast::>()?; + StateCombiner::combine(consume_states, mapping, &mut self.states) + } + + fn finalize2(&mut self) -> Result { + unimplemented!() + } + + fn drain(&mut self, output: &mut Array) -> Result { + let num_drain = usize::min(self.states.len() - self.drain_idx, output.capacity()); + let drain_states = &mut self.states[self.drain_idx..self.drain_idx + num_drain]; + + (self.state_finalize)(drain_states, output)?; + self.drain_idx += num_drain; + + Ok(num_drain) + } +} + +pub fn drain(states: &mut [State], output: &mut Array) -> Result<()> +where + S: MutablePhysicalStorage, + State: AggregateState, +{ + let buffer = &mut S::get_addressable_mut(output.data.try_as_mut()?)?; + let validity = &mut output.validity; + + for (idx, state) in states.iter_mut().enumerate() { + state.finalize(PutBuffer { + idx, + buffer, + validity, + })?; + } + + Ok(()) +} + +impl fmt::Debug + for TypedAggregateGroupStates +{ + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("TypedAggregateGroupedStates") + .field("num_states", &self.states.len()) + .finish_non_exhaustive() + } +} + +pub struct TypedAggregateGroupStates2 { + states: Vec, + + state_init: StateInit, + state_update: StateUpdate, + state_finalize: StateFinalize, + + _input: PhantomData, + _output: PhantomData, +} + +impl + TypedAggregateGroupStates2 +{ + pub fn new( + state_init: StateInit, + state_update: StateUpdate, + state_finalize: StateFinalize, + ) -> Self { + TypedAggregateGroupStates2 { + states: Vec::::new(), state_init, state_update, state_finalize, @@ -51,7 +195,7 @@ impl } /// Helper for create an `AggregateGroupStates` that accepts one input. -pub fn new_unary_aggregate_states( +pub fn new_unary_aggregate_states2( state_init: StateInit, state_finalize: StateFinalize, ) -> Box @@ -67,10 +211,10 @@ where StateInit: Fn() -> State + Sync + Send + 'static, StateFinalize: Fn(&mut [State]) -> Result + Sync + Send + 'static, { - Box::new(TypedAggregateGroupStates { - states: Vec::new(), + Box::new(TypedAggregateGroupStates2 { + states: Vec::::new(), state_init, - state_update: unary_update::, + state_update: unary_update2::, state_finalize, _input: PhantomData, _output: PhantomData, @@ -78,7 +222,7 @@ where } /// Helper for create an `AggregateGroupStates` that accepts two inputs. -pub fn new_binary_aggregate_states( +pub fn new_binary_aggregate_states2( state_init: StateInit, state_finalize: StateFinalize, ) -> Box @@ -93,10 +237,10 @@ where StateInit: Fn() -> State + Sync + Send + 'static, StateFinalize: Fn(&mut [State]) -> Result + Sync + Send + 'static, { - Box::new(TypedAggregateGroupStates { - states: Vec::new(), + Box::new(TypedAggregateGroupStates2 { + states: Vec::::new(), state_init, - state_update: binary_update::, + state_update: binary_update2::, state_finalize, _input: PhantomData, _output: PhantomData, @@ -104,7 +248,7 @@ where } impl AggregateGroupStates - for TypedAggregateGroupStates + for TypedAggregateGroupStates2 where State: AggregateState2 + Sync + Send + 'static, Input: Sync + Send, @@ -135,7 +279,8 @@ where mapping: ChunkGroupAddressIter, ) -> Result<()> { let consume_states = consume.opaque_states_mut().downcast::>()?; - StateCombiner::combine(consume_states, mapping, &mut self.states) + // StateCombiner2::combine(consume_states, mapping, &mut self.states) + unimplemented!() } fn finalize2(&mut self) -> Result { @@ -144,7 +289,7 @@ where } impl fmt::Debug - for TypedAggregateGroupStates + for TypedAggregateGroupStates2 { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("TypedAggregateGroupedStates") @@ -169,7 +314,12 @@ pub trait AggregateGroupStates: Debug + Sync + Send { /// Update states from inputs using some mapping. fn update_states2(&mut self, inputs: &[&Array2], mapping: ChunkGroupAddressIter) -> Result<()>; - fn update_states(&mut self, inputs: &[Array], mapping: ChunkGroupAddressIter) -> Result<()> { + fn update_states( + &mut self, + inputs: &[Array], + selection: Selection, + mapping: &[usize], + ) -> Result<()> { unimplemented!() } @@ -183,7 +333,11 @@ pub trait AggregateGroupStates: Debug + Sync + Send { /// Finalize the states and return an array. fn finalize2(&mut self) -> Result; - fn drain(&mut self, output: &mut Array) -> Result<()> { + /// Finalize and drain state into `output`. + /// + /// Returns the number of states drained. If the number of states drained is + /// less than the capacity of the output arrays, then draining is finished. + fn drain(&mut self, output: &mut Array) -> Result { unimplemented!() } } @@ -201,8 +355,27 @@ impl<'a> OpaqueStatesMut<'a> { } } +pub fn unary_update( + arrays: &[Array], + selection: Selection, + mapping: &[usize], + states: &mut [State], +) -> Result<()> +where + Storage: PhysicalStorage, + Output: MutablePhysicalStorage, + State: for<'a> AggregateState<&'a Storage::StorageType, Output::StorageType>, +{ + UnaryNonNullUpdater::update::( + &arrays[0], + selection, + mapping.iter().copied(), + states, + ) +} + /// Update function for a unary aggregate. -pub fn unary_update( +pub fn unary_update2( arrays: &[&Array2], mapping: ChunkGroupAddressIter, states: &mut [State], @@ -211,10 +384,11 @@ where Storage: PhysicalStorage2, State: for<'a> AggregateState2, Output>, { - UnaryNonNullUpdater::update::(arrays[0], mapping, states) + unimplemented!() + // UnaryNonNullUpdater::update::(arrays[0], mapping, states) } -pub fn binary_update( +pub fn binary_update2( arrays: &[&Array2], mapping: ChunkGroupAddressIter, states: &mut [State], @@ -224,9 +398,10 @@ where Storage2: PhysicalStorage2, State: for<'a> AggregateState2<(Storage1::Type<'a>, Storage2::Type<'a>), Output>, { - BinaryNonNullUpdater::update::( - arrays[0], arrays[1], mapping, states, - ) + unimplemented!() + // BinaryNonNullUpdater::update::( + // arrays[0], arrays[1], mapping, states, + // ) } pub fn untyped_null_finalize(states: &mut [State]) -> Result { From 554dc0b7a7c45f5ade8512260c605743c63d57ac Mon Sep 17 00:00:00 2001 From: Sean Smith Date: Sat, 4 Jan 2025 16:15:58 -0600 Subject: [PATCH 47/59] more agg --- .../src/arrays/buffer/physical_type.rs | 16 +-- .../src/functions/aggregate/builtin/avg.rs | 14 +-- .../src/functions/aggregate/builtin/corr.rs | 69 ++++++----- .../src/functions/aggregate/builtin/count.rs | 114 +++++++++++++++--- .../src/functions/aggregate/builtin/covar.rs | 67 ++++++---- .../functions/aggregate/builtin/regr_r2.rs | 38 +++--- .../functions/aggregate/builtin/regr_slope.rs | 46 ++++--- .../src/functions/aggregate/builtin/stddev.rs | 92 ++++++++------ .../src/functions/aggregate/states.rs | 25 ++++ 9 files changed, 317 insertions(+), 164 deletions(-) diff --git a/crates/rayexec_execution/src/arrays/buffer/physical_type.rs b/crates/rayexec_execution/src/arrays/buffer/physical_type.rs index f03063aff..be77a43ec 100644 --- a/crates/rayexec_execution/src/arrays/buffer/physical_type.rs +++ b/crates/rayexec_execution/src/arrays/buffer/physical_type.rs @@ -168,7 +168,7 @@ where } /// Trait for determining how we access the underlying storage for arrays. -pub trait PhysicalStorage: Debug + Sync + Send + Clone + Copy + 'static { +pub trait PhysicalStorage: Debug + Default + Sync + Send + Clone + Copy + 'static { const PHYSICAL_TYPE: PhysicalType; /// Size in bytes of the type being stored in the primary buffer. @@ -182,7 +182,7 @@ pub trait PhysicalStorage: Debug + Sync + Send + Clone + Copy + 'static { /// The logical type being stored that can be accessed. /// /// For primitive buffers, this will be the same as the primary buffer type. - type StorageType: ?Sized; + type StorageType: Sync + Send + ?Sized; /// The type of the addressable storage. type Addressable<'a>: Addressable; @@ -202,7 +202,7 @@ pub trait MutablePhysicalStorage: PhysicalStorage { macro_rules! generate_primitive { ($prim:ty, $name:ident, $phys_typ:ident) => { - #[derive(Debug, Clone, Copy, PartialEq, Eq)] + #[derive(Debug, Default, Clone, Copy, PartialEq, Eq)] pub struct $name; impl PhysicalStorage for $name { @@ -258,7 +258,7 @@ generate_primitive!(Interval, PhysicalInterval, Interval); #[derive(Debug, Default, Clone, Copy, PartialEq, Eq)] pub struct UntypedNull; -#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)] pub struct PhysicalUntypedNull; impl PhysicalStorage for PhysicalUntypedNull { @@ -284,7 +284,7 @@ impl MutablePhysicalStorage for PhysicalUntypedNull { } } -#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)] pub struct PhysicalUtf8; impl PhysicalStorage for PhysicalUtf8 { @@ -310,7 +310,7 @@ impl MutablePhysicalStorage for PhysicalUtf8 { } } -#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)] pub struct PhysicalBinary; impl PhysicalStorage for PhysicalBinary { @@ -337,7 +337,7 @@ impl MutablePhysicalStorage for PhysicalBinary { } /// Dictionary arrays have the selection vector as the primary data buffer. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)] pub struct PhysicalDictionary; impl PhysicalStorage for PhysicalDictionary { @@ -353,7 +353,7 @@ impl PhysicalStorage for PhysicalDictionary { } } -#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)] pub struct PhysicalList; impl PhysicalStorage for PhysicalList { diff --git a/crates/rayexec_execution/src/functions/aggregate/builtin/avg.rs b/crates/rayexec_execution/src/functions/aggregate/builtin/avg.rs index 7f551d3bd..28eaaca04 100644 --- a/crates/rayexec_execution/src/functions/aggregate/builtin/avg.rs +++ b/crates/rayexec_execution/src/functions/aggregate/builtin/avg.rs @@ -6,26 +6,14 @@ use num_traits::AsPrimitive; use rayexec_error::Result; use serde::{Deserialize, Serialize}; -use crate::arrays::array::Array2; -use crate::arrays::bitmap::Bitmap; -use crate::arrays::buffer::physical_type::{ - AddressableMut, - MutablePhysicalStorage, - PhysicalF64, - PhysicalI64, -}; +use crate::arrays::buffer::physical_type::{AddressableMut, PhysicalF64, PhysicalI64}; use crate::arrays::datatype::{DataType, DataTypeId, DecimalTypeMeta}; -use crate::arrays::executor::aggregate::AggregateState2; -use crate::arrays::executor::builder::{ArrayBuilder, ArrayDataBuffer, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::{PhysicalF64_2, PhysicalI64_2}; use crate::arrays::executor_exp::aggregate::AggregateState; use crate::arrays::executor_exp::PutBuffer; use crate::arrays::scalar::decimal::{Decimal128Type, Decimal64Type, DecimalType}; use crate::expr::Expression; use crate::functions::aggregate::states::{ drain, - new_unary_aggregate_states2, - primitive_finalize, unary_update, AggregateGroupStates, TypedAggregateGroupStates, diff --git a/crates/rayexec_execution/src/functions/aggregate/builtin/corr.rs b/crates/rayexec_execution/src/functions/aggregate/builtin/corr.rs index 9407125fb..34cb40c62 100644 --- a/crates/rayexec_execution/src/functions/aggregate/builtin/corr.rs +++ b/crates/rayexec_execution/src/functions/aggregate/builtin/corr.rs @@ -4,14 +4,16 @@ use rayexec_error::Result; use super::covar::{CovarPopFinalize, CovarState}; use super::stddev::{StddevPopFinalize, VarianceState}; +use crate::arrays::buffer::physical_type::{AddressableMut, PhysicalF64}; use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::aggregate::AggregateState2; -use crate::arrays::executor::physical_type::PhysicalF64_2; +use crate::arrays::executor_exp::aggregate::AggregateState; +use crate::arrays::executor_exp::PutBuffer; use crate::expr::Expression; use crate::functions::aggregate::states::{ - new_binary_aggregate_states2, - primitive_finalize, + binary_update, + drain, AggregateGroupStates, + TypedAggregateGroupStates, }; use crate::functions::aggregate::{ AggregateFunction, @@ -74,10 +76,11 @@ pub struct CorrImpl; impl AggregateFunctionImpl for CorrImpl { fn new_states(&self) -> Box { - new_binary_aggregate_states2::( + Box::new(TypedAggregateGroupStates::new( CorrelationState::default, - move |states| primitive_finalize(DataType::Float64, states), - ) + binary_update::, + drain::, + )) } } @@ -88,7 +91,25 @@ pub struct CorrelationState { stddev_y: VarianceState, } -impl AggregateState2<(f64, f64), f64> for CorrelationState { +impl CorrelationState { + pub fn finalize_value(&self) -> Option { + let cov = self.covar.finalize_value()?; + let stddev_x = self.stddev_x.finalize_value()?; + let stddev_y = self.stddev_y.finalize_value()?; + + let div = stddev_x * stddev_y; + if div == 0.0 { + // Return null, matches Postgres. + // + // Note duckdb returns NaN here. + return None; + } + + Some(cov / div) + } +} + +impl AggregateState<(&f64, &f64), f64> for CorrelationState { fn merge(&mut self, other: &mut Self) -> Result<()> { self.covar.merge(&mut other.covar)?; self.stddev_x.merge(&mut other.stddev_x)?; @@ -96,7 +117,7 @@ impl AggregateState2<(f64, f64), f64> for CorrelationState { Ok(()) } - fn update(&mut self, input: (f64, f64)) -> Result<()> { + fn update(&mut self, input: (&f64, &f64)) -> Result<()> { self.covar.update(input)?; // Note input is passed in as (y, x) @@ -106,23 +127,15 @@ impl AggregateState2<(f64, f64), f64> for CorrelationState { Ok(()) } - fn finalize(&mut self) -> Result<(f64, bool)> { - let (cov, cov_valid) = self.covar.finalize()?; - let (stddev_x, stddev_x_valid) = self.stddev_x.finalize()?; - let (stddev_y, stddev_y_valid) = self.stddev_y.finalize()?; - - if cov_valid && stddev_x_valid && stddev_y_valid { - let div = stddev_x * stddev_y; - if div == 0.0 { - // Matches Postgres. - // - // Note duckdb returns NaN here. - return Ok((0.0, false)); - } - Ok((cov / div, true)) - } else { - Ok((0.0, false)) + fn finalize(&mut self, output: PutBuffer) -> Result<()> + where + M: AddressableMut, + { + match self.finalize_value() { + Some(val) => output.put(&val), + None => output.put_null(), } + Ok(()) } } @@ -133,9 +146,9 @@ mod tests { #[test] fn correlation_state_single_input() { let mut state = CorrelationState::default(); - state.update((1.0, 1.0)).unwrap(); + state.update((&1.0, &1.0)).unwrap(); - let (_v, valid) = state.finalize().unwrap(); - assert!(!valid); + let v = state.finalize_value(); + assert_eq!(None, v); } } diff --git a/crates/rayexec_execution/src/functions/aggregate/builtin/count.rs b/crates/rayexec_execution/src/functions/aggregate/builtin/count.rs index 27ee36b1c..25ff22b55 100644 --- a/crates/rayexec_execution/src/functions/aggregate/builtin/count.rs +++ b/crates/rayexec_execution/src/functions/aggregate/builtin/count.rs @@ -1,13 +1,41 @@ -use rayexec_error::Result; +use std::marker::PhantomData; +use rayexec_error::{not_implemented, Result}; + +use crate::arrays::buffer::physical_type::{ + AddressableMut, + PhysicalBinary, + PhysicalBool, + PhysicalDictionary, + PhysicalF16, + PhysicalF32, + PhysicalF64, + PhysicalI128, + PhysicalI16, + PhysicalI32, + PhysicalI64, + PhysicalI8, + PhysicalInterval, + PhysicalList, + PhysicalStorage, + PhysicalType, + PhysicalU128, + PhysicalU16, + PhysicalU32, + PhysicalU64, + PhysicalU8, + PhysicalUntypedNull, + PhysicalUtf8, +}; use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::aggregate::AggregateState2; -use crate::arrays::executor::physical_type::PhysicalAny; +use crate::arrays::executor_exp::aggregate::AggregateState; +use crate::arrays::executor_exp::PutBuffer; use crate::expr::{self, Expression}; use crate::functions::aggregate::states::{ - new_unary_aggregate_states2, - primitive_finalize, + drain, + unary_update, AggregateGroupStates, + TypedAggregateGroupStates, }; use crate::functions::aggregate::{ AggregateFunction, @@ -28,7 +56,7 @@ impl Count { function: Box::new(*self), return_type: DataType::Int64, inputs: vec![expr::lit(true)], - function_impl: Box::new(CountNonNullImpl), + function_impl: Box::new(CountNonNullImpl::::new()), } } } @@ -56,50 +84,96 @@ impl FunctionInfo for Count { impl AggregateFunction for Count { fn plan( &self, - _table_list: &TableList, + table_list: &TableList, inputs: Vec, ) -> Result { plan_check_num_args(self, &inputs, 1)?; + let function_impl: Box = match inputs[0] + .datatype(table_list)? + .physical_type() + { + PhysicalType::UntypedNull => Box::new(CountNonNullImpl::::new()), + PhysicalType::Boolean => Box::new(CountNonNullImpl::::new()), + PhysicalType::Int8 => Box::new(CountNonNullImpl::::new()), + PhysicalType::Int16 => Box::new(CountNonNullImpl::::new()), + PhysicalType::Int32 => Box::new(CountNonNullImpl::::new()), + PhysicalType::Int64 => Box::new(CountNonNullImpl::::new()), + PhysicalType::Int128 => Box::new(CountNonNullImpl::::new()), + PhysicalType::UInt8 => Box::new(CountNonNullImpl::::new()), + PhysicalType::UInt16 => Box::new(CountNonNullImpl::::new()), + PhysicalType::UInt32 => Box::new(CountNonNullImpl::::new()), + PhysicalType::UInt64 => Box::new(CountNonNullImpl::::new()), + PhysicalType::UInt128 => Box::new(CountNonNullImpl::::new()), + PhysicalType::Float16 => Box::new(CountNonNullImpl::::new()), + PhysicalType::Float32 => Box::new(CountNonNullImpl::::new()), + PhysicalType::Float64 => Box::new(CountNonNullImpl::::new()), + PhysicalType::Interval => Box::new(CountNonNullImpl::::new()), + PhysicalType::Utf8 => Box::new(CountNonNullImpl::::new()), + PhysicalType::Binary => Box::new(CountNonNullImpl::::new()), + PhysicalType::Dictionary => Box::new(CountNonNullImpl::::new()), + PhysicalType::List => Box::new(CountNonNullImpl::::new()), + PhysicalType::Struct => not_implemented!("count struct"), + }; + Ok(PlannedAggregateFunction { function: Box::new(*self), return_type: DataType::Int64, inputs, - function_impl: Box::new(CountNonNullImpl), + function_impl, }) } } #[derive(Debug, Clone)] -pub struct CountNonNullImpl; +pub struct CountNonNullImpl { + _s: PhantomData, +} -impl AggregateFunctionImpl for CountNonNullImpl { +impl CountNonNullImpl { + const fn new() -> Self { + CountNonNullImpl { _s: PhantomData } + } +} + +impl AggregateFunctionImpl for CountNonNullImpl +where + S: PhysicalStorage, +{ fn new_states(&self) -> Box { - new_unary_aggregate_states2::( - CountNonNullState::default, - move |states| primitive_finalize(DataType::Int64, states), - ) + Box::new(TypedAggregateGroupStates::new( + CountNonNullState::::default, + unary_update::, + drain::, + )) } } #[derive(Debug, Default)] -pub struct CountNonNullState { +pub struct CountNonNullState { count: i64, + _s: PhantomData, } -impl AggregateState2<(), i64> for CountNonNullState { +impl AggregateState<&S::StorageType, i64> for CountNonNullState +where + S: PhysicalStorage, +{ fn merge(&mut self, other: &mut Self) -> Result<()> { self.count += other.count; Ok(()) } - fn update(&mut self, _input: ()) -> Result<()> { + fn update(&mut self, _input: &S::StorageType) -> Result<()> { self.count += 1; Ok(()) } - fn finalize(&mut self) -> Result<(i64, bool)> { - // Always valid, even when count is 0 - Ok((self.count, true)) + fn finalize(&mut self, output: PutBuffer) -> Result<()> + where + M: AddressableMut, + { + output.put(&self.count); + Ok(()) } } diff --git a/crates/rayexec_execution/src/functions/aggregate/builtin/covar.rs b/crates/rayexec_execution/src/functions/aggregate/builtin/covar.rs index ed34f9e20..c03c14162 100644 --- a/crates/rayexec_execution/src/functions/aggregate/builtin/covar.rs +++ b/crates/rayexec_execution/src/functions/aggregate/builtin/covar.rs @@ -3,14 +3,16 @@ use std::marker::PhantomData; use rayexec_error::Result; +use crate::arrays::buffer::physical_type::{AddressableMut, PhysicalF64}; use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::aggregate::AggregateState2; -use crate::arrays::executor::physical_type::PhysicalF64_2; +use crate::arrays::executor_exp::aggregate::AggregateState; +use crate::arrays::executor_exp::PutBuffer; use crate::expr::Expression; use crate::functions::aggregate::states::{ - new_binary_aggregate_states2, - primitive_finalize, + binary_update, + drain, AggregateGroupStates, + TypedAggregateGroupStates, }; use crate::functions::aggregate::{ AggregateFunction, @@ -72,10 +74,11 @@ pub struct CovarPopImpl; impl AggregateFunctionImpl for CovarPopImpl { fn new_states(&self) -> Box { - new_binary_aggregate_states2::( + Box::new(TypedAggregateGroupStates::new( CovarState::::default, - move |states| primitive_finalize(DataType::Float64, states), - ) + binary_update::, + drain::, + )) } } @@ -130,25 +133,26 @@ pub struct CovarSampImpl; impl AggregateFunctionImpl for CovarSampImpl { fn new_states(&self) -> Box { - new_binary_aggregate_states2::( + Box::new(TypedAggregateGroupStates::new( CovarState::::default, - move |states| primitive_finalize(DataType::Float64, states), - ) + binary_update::, + drain::, + )) } } pub trait CovarFinalize: Sync + Send + Debug + Default + 'static { - fn finalize(co_moment: f64, count: i64) -> (f64, bool); + fn finalize(co_moment: f64, count: i64) -> Option; } #[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] pub struct CovarSampFinalize; impl CovarFinalize for CovarSampFinalize { - fn finalize(co_moment: f64, count: i64) -> (f64, bool) { + fn finalize(co_moment: f64, count: i64) -> Option { match count { - 0 | 1 => (0.0, false), - _ => (co_moment / (count - 1) as f64, true), + 0 | 1 => None, + _ => Some(co_moment / (count - 1) as f64), } } } @@ -157,10 +161,10 @@ impl CovarFinalize for CovarSampFinalize { pub struct CovarPopFinalize; impl CovarFinalize for CovarPopFinalize { - fn finalize(co_moment: f64, count: i64) -> (f64, bool) { + fn finalize(co_moment: f64, count: i64) -> Option { match count { - 0 => (0.0, false), - _ => (co_moment / count as f64, true), + 0 => None, + _ => Some(co_moment / count as f64), } } } @@ -174,7 +178,16 @@ pub struct CovarState { _finalize: PhantomData, } -impl AggregateState2<(f64, f64), f64> for CovarState +impl CovarState +where + F: CovarFinalize, +{ + pub fn finalize_value(&self) -> Option { + F::finalize(self.co_moment, self.count) + } +} + +impl AggregateState<(&f64, &f64), f64> for CovarState where F: CovarFinalize, { @@ -206,11 +219,8 @@ where Ok(()) } - fn update(&mut self, input: (f64, f64)) -> Result<()> { - // Note that 'y' comes first, covariance functions are call like `COVAR_SAMP(y, x)`. - let x = input.1; - let y = input.0; - + // Note that 'y' comes first, covariance functions are call like `COVAR_SAMP(y, x)`. + fn update(&mut self, (&y, &x): (&f64, &f64)) -> Result<()> { self.count += 1; let n = self.count as f64; @@ -229,7 +239,14 @@ where Ok(()) } - fn finalize(&mut self) -> Result<(f64, bool)> { - Ok(F::finalize(self.co_moment, self.count)) + fn finalize(&mut self, output: PutBuffer) -> Result<()> + where + M: AddressableMut, + { + match F::finalize(self.co_moment, self.count) { + Some(val) => output.put(&val), + None => output.put_null(), + } + Ok(()) } } diff --git a/crates/rayexec_execution/src/functions/aggregate/builtin/regr_r2.rs b/crates/rayexec_execution/src/functions/aggregate/builtin/regr_r2.rs index f4df4795e..7ed1379d8 100644 --- a/crates/rayexec_execution/src/functions/aggregate/builtin/regr_r2.rs +++ b/crates/rayexec_execution/src/functions/aggregate/builtin/regr_r2.rs @@ -3,14 +3,16 @@ use std::fmt::Debug; use rayexec_error::Result; use super::corr::CorrelationState; +use crate::arrays::buffer::physical_type::{AddressableMut, PhysicalF64}; use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::aggregate::AggregateState2; -use crate::arrays::executor::physical_type::PhysicalF64_2; +use crate::arrays::executor_exp::aggregate::AggregateState; +use crate::arrays::executor_exp::PutBuffer; use crate::expr::Expression; use crate::functions::aggregate::states::{ - new_binary_aggregate_states2, - primitive_finalize, + binary_update, + drain, AggregateGroupStates, + TypedAggregateGroupStates, }; use crate::functions::aggregate::{ AggregateFunction, @@ -72,10 +74,11 @@ pub struct RegrR2Impl; impl AggregateFunctionImpl for RegrR2Impl { fn new_states(&self) -> Box { - new_binary_aggregate_states2::( + Box::new(TypedAggregateGroupStates::new( RegrR2State::default, - move |states| primitive_finalize(DataType::Float64, states), - ) + binary_update::, + drain::, + )) } } @@ -84,23 +87,28 @@ pub struct RegrR2State { corr: CorrelationState, } -impl AggregateState2<(f64, f64), f64> for RegrR2State { +impl AggregateState<(&f64, &f64), f64> for RegrR2State { fn merge(&mut self, other: &mut Self) -> Result<()> { self.corr.merge(&mut other.corr)?; Ok(()) } - fn update(&mut self, input: (f64, f64)) -> Result<()> { + fn update(&mut self, input: (&f64, &f64)) -> Result<()> { self.corr.update(input)?; Ok(()) } - fn finalize(&mut self) -> Result<(f64, bool)> { - let (v, valid) = self.corr.finalize()?; - if valid { - Ok((v.powi(2), true)) - } else { - Ok((0.0, false)) + fn finalize(&mut self, output: PutBuffer) -> Result<()> + where + M: AddressableMut, + { + match self.corr.finalize_value() { + Some(val) => { + let val = val.powi(2); + output.put(&val); + } + None => output.put_null(), } + Ok(()) } } diff --git a/crates/rayexec_execution/src/functions/aggregate/builtin/regr_slope.rs b/crates/rayexec_execution/src/functions/aggregate/builtin/regr_slope.rs index 7b61ba9d4..b8b60f6f3 100644 --- a/crates/rayexec_execution/src/functions/aggregate/builtin/regr_slope.rs +++ b/crates/rayexec_execution/src/functions/aggregate/builtin/regr_slope.rs @@ -4,14 +4,16 @@ use rayexec_error::Result; use super::covar::{CovarPopFinalize, CovarState}; use super::stddev::{VariancePopFinalize, VarianceState}; +use crate::arrays::buffer::physical_type::{AddressableMut, PhysicalF64}; use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::aggregate::AggregateState2; -use crate::arrays::executor::physical_type::PhysicalF64_2; +use crate::arrays::executor_exp::aggregate::AggregateState; +use crate::arrays::executor_exp::PutBuffer; use crate::expr::Expression; use crate::functions::aggregate::states::{ - new_binary_aggregate_states2, - primitive_finalize, + binary_update, + drain, AggregateGroupStates, + TypedAggregateGroupStates, }; use crate::functions::aggregate::{ AggregateFunction, @@ -73,10 +75,11 @@ pub struct RegrSlopeImpl; impl AggregateFunctionImpl for RegrSlopeImpl { fn new_states(&self) -> Box { - new_binary_aggregate_states2::( + Box::new(TypedAggregateGroupStates::new( RegrSlopeState::default, - move |states| primitive_finalize(DataType::Float64, states), - ) + binary_update::, + drain::, + )) } } @@ -86,31 +89,34 @@ pub struct RegrSlopeState { var: VarianceState, } -impl AggregateState2<(f64, f64), f64> for RegrSlopeState { +impl AggregateState<(&f64, &f64), f64> for RegrSlopeState { fn merge(&mut self, other: &mut Self) -> Result<()> { self.cov.merge(&mut other.cov)?; self.var.merge(&mut other.var)?; Ok(()) } - fn update(&mut self, input: (f64, f64)) -> Result<()> { + fn update(&mut self, input: (&f64, &f64)) -> Result<()> { self.cov.update(input)?; self.var.update(input.1)?; // Update with 'x' Ok(()) } - fn finalize(&mut self) -> Result<(f64, bool)> { - let (cov, cov_valid) = self.cov.finalize()?; - let (var, var_valid) = self.var.finalize()?; - - if cov_valid && var_valid { - if var == 0.0 { - return Ok((0.0, false)); + fn finalize(&mut self, output: PutBuffer) -> Result<()> + where + M: AddressableMut, + { + match (self.cov.finalize_value(), self.var.finalize_value()) { + (Some(cov), Some(var)) => { + if var == 0.0 { + output.put_null(); + return Ok(()); + } + let v = cov / var; + output.put(&v); } - let v = cov / var; - Ok((v, true)) - } else { - Ok((0.0, false)) + _ => output.put_null(), } + Ok(()) } } diff --git a/crates/rayexec_execution/src/functions/aggregate/builtin/stddev.rs b/crates/rayexec_execution/src/functions/aggregate/builtin/stddev.rs index 7300aebb9..63a04b510 100644 --- a/crates/rayexec_execution/src/functions/aggregate/builtin/stddev.rs +++ b/crates/rayexec_execution/src/functions/aggregate/builtin/stddev.rs @@ -3,14 +3,16 @@ use std::marker::PhantomData; use rayexec_error::Result; +use crate::arrays::buffer::physical_type::{AddressableMut, PhysicalF64}; use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::aggregate::AggregateState2; -use crate::arrays::executor::physical_type::PhysicalF64_2; +use crate::arrays::executor_exp::aggregate::AggregateState; +use crate::arrays::executor_exp::PutBuffer; use crate::expr::Expression; use crate::functions::aggregate::states::{ - new_unary_aggregate_states2, - primitive_finalize, + drain, + unary_update, AggregateGroupStates, + TypedAggregateGroupStates, }; use crate::functions::aggregate::{ AggregateFunction, @@ -69,10 +71,11 @@ pub struct StddevPopImpl; impl AggregateFunctionImpl for StddevPopImpl { fn new_states(&self) -> Box { - new_unary_aggregate_states2::( + Box::new(TypedAggregateGroupStates::new( VarianceState::::default, - move |states| primitive_finalize(DataType::Float64, states), - ) + unary_update::, + drain::, + )) } } @@ -128,10 +131,11 @@ pub struct StddevSampImpl; impl AggregateFunctionImpl for StddevSampImpl { fn new_states(&self) -> Box { - new_unary_aggregate_states2::( + Box::new(TypedAggregateGroupStates::new( VarianceState::::default, - move |states| primitive_finalize(DataType::Float64, states), - ) + unary_update::, + drain::, + )) } } @@ -183,10 +187,11 @@ pub struct VarPopImpl; impl AggregateFunctionImpl for VarPopImpl { fn new_states(&self) -> Box { - new_unary_aggregate_states2::( + Box::new(TypedAggregateGroupStates::new( VarianceState::::default, - move |states| primitive_finalize(DataType::Float64, states), - ) + unary_update::, + drain::, + )) } } @@ -238,28 +243,29 @@ pub struct VarSampImpl; impl AggregateFunctionImpl for VarSampImpl { fn new_states(&self) -> Box { - new_unary_aggregate_states2::( + Box::new(TypedAggregateGroupStates::new( VarianceState::::default, - move |states| primitive_finalize(DataType::Float64, states), - ) + unary_update::, + drain::, + )) } } pub trait VarianceFinalize: Sync + Send + Debug + Default + 'static { - fn finalize(count: i64, mean: f64, m2: f64) -> (f64, bool); + fn finalize(count: i64, mean: f64, m2: f64) -> Option; } #[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] pub struct StddevPopFinalize; impl VarianceFinalize for StddevPopFinalize { - fn finalize(count: i64, _mean: f64, m2: f64) -> (f64, bool) { + fn finalize(count: i64, _mean: f64, m2: f64) -> Option { match count { - 0 => (0.0, false), - 1 => (0.0, true), + 0 => None, + 1 => Some(0.0), _ => { let v = f64::sqrt(m2 / count as f64); - (v, true) + Some(v) } } } @@ -269,12 +275,12 @@ impl VarianceFinalize for StddevPopFinalize { pub struct StddevSampFinalize; impl VarianceFinalize for StddevSampFinalize { - fn finalize(count: i64, _mean: f64, m2: f64) -> (f64, bool) { + fn finalize(count: i64, _mean: f64, m2: f64) -> Option { match count { - 0 | 1 => (0.0, false), + 0 | 1 => None, _ => { let v = f64::sqrt(m2 / (count - 1) as f64); - (v, true) + Some(v) } } } @@ -284,12 +290,12 @@ impl VarianceFinalize for StddevSampFinalize { pub struct VarianceSampFinalize; impl VarianceFinalize for VarianceSampFinalize { - fn finalize(count: i64, _mean: f64, m2: f64) -> (f64, bool) { + fn finalize(count: i64, _mean: f64, m2: f64) -> Option { match count { - 0 | 1 => (0.0, false), + 0 | 1 => None, _ => { let v = m2 / (count - 1) as f64; - (v, true) + Some(v) } } } @@ -299,13 +305,13 @@ impl VarianceFinalize for VarianceSampFinalize { pub struct VariancePopFinalize; impl VarianceFinalize for VariancePopFinalize { - fn finalize(count: i64, _mean: f64, m2: f64) -> (f64, bool) { + fn finalize(count: i64, _mean: f64, m2: f64) -> Option { match count { - 0 => (0.0, false), - 1 => (0.0, true), + 0 => None, + 1 => Some(0.0), _ => { let v = m2 / count as f64; - (v, true) + Some(v) } } } @@ -319,7 +325,16 @@ pub struct VarianceState { _finalize: PhantomData, } -impl AggregateState2 for VarianceState +impl VarianceState +where + F: VarianceFinalize, +{ + pub fn finalize_value(&self) -> Option { + F::finalize(self.count, self.mean, self.m2) + } +} + +impl AggregateState<&f64, f64> for VarianceState where F: VarianceFinalize, { @@ -343,7 +358,7 @@ where Ok(()) } - fn update(&mut self, input: f64) -> Result<()> { + fn update(&mut self, &input: &f64) -> Result<()> { self.count += 1; let delta = input - self.mean; self.mean += delta / self.count as f64; @@ -353,7 +368,14 @@ where Ok(()) } - fn finalize(&mut self) -> Result<(f64, bool)> { - Ok(F::finalize(self.count, self.mean, self.m2)) + fn finalize(&mut self, output: PutBuffer) -> Result<()> + where + M: AddressableMut, + { + match F::finalize(self.count, self.mean, self.m2) { + Some(val) => output.put(&val), + None => output.put_null(), + } + Ok(()) } } diff --git a/crates/rayexec_execution/src/functions/aggregate/states.rs b/crates/rayexec_execution/src/functions/aggregate/states.rs index a97b401ee..f1416fc07 100644 --- a/crates/rayexec_execution/src/functions/aggregate/states.rs +++ b/crates/rayexec_execution/src/functions/aggregate/states.rs @@ -26,6 +26,7 @@ use crate::arrays::executor::aggregate::{ }; use crate::arrays::executor::builder::{ArrayBuilder, BooleanBuffer, PrimitiveBuffer}; use crate::arrays::executor::physical_type::PhysicalStorage2; +use crate::arrays::executor_exp::aggregate::binary::BinaryNonNullUpdater; use crate::arrays::executor_exp::aggregate::unary::UnaryNonNullUpdater; use crate::arrays::executor_exp::aggregate::{AggregateState, StateCombiner}; use crate::arrays::executor_exp::PutBuffer; @@ -374,6 +375,30 @@ where ) } +pub fn binary_update( + arrays: &[Array], + selection: Selection, + mapping: &[usize], + states: &mut [State], +) -> Result<()> +where + Storage1: PhysicalStorage, + Storage2: PhysicalStorage, + Output: MutablePhysicalStorage, + State: for<'a> AggregateState< + (&'a Storage1::StorageType, &'a Storage2::StorageType), + Output::StorageType, + >, +{ + BinaryNonNullUpdater::update::( + &arrays[0], + &arrays[1], + selection, + mapping.iter().copied(), + states, + ) +} + /// Update function for a unary aggregate. pub fn unary_update2( arrays: &[&Array2], From f5d49200b00940f80d2986caedeec6497abc7bfb Mon Sep 17 00:00:00 2001 From: Sean Smith Date: Sun, 5 Jan 2025 11:49:01 -0600 Subject: [PATCH 48/59] wip --- .../src/arrays/buffer/physical_type.rs | 2 +- .../src/functions/aggregate/builtin/first.rs | 290 +++++++------- .../src/functions/aggregate/builtin/minmax.rs | 374 ++++++++++++------ 3 files changed, 404 insertions(+), 262 deletions(-) diff --git a/crates/rayexec_execution/src/arrays/buffer/physical_type.rs b/crates/rayexec_execution/src/arrays/buffer/physical_type.rs index be77a43ec..8b97c9758 100644 --- a/crates/rayexec_execution/src/arrays/buffer/physical_type.rs +++ b/crates/rayexec_execution/src/arrays/buffer/physical_type.rs @@ -255,7 +255,7 @@ generate_primitive!(Interval, PhysicalInterval, Interval); /// /// This will be the type we use for queries like `SELECT NULL` where there's no /// additional type information in the query. -#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)] +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] pub struct UntypedNull; #[derive(Debug, Default, Clone, Copy, PartialEq, Eq)] diff --git a/crates/rayexec_execution/src/functions/aggregate/builtin/first.rs b/crates/rayexec_execution/src/functions/aggregate/builtin/first.rs index 59b646887..435b4d848 100644 --- a/crates/rayexec_execution/src/functions/aggregate/builtin/first.rs +++ b/crates/rayexec_execution/src/functions/aggregate/builtin/first.rs @@ -1,10 +1,17 @@ -use std::fmt::Debug; +use std::borrow::Borrow; +use std::fmt::{self, Debug}; use std::marker::PhantomData; use half::f16; use rayexec_error::{not_implemented, Result}; use crate::arrays::array::ArrayData2; +use crate::arrays::buffer::physical_type::{ + AddressableMut, + MutablePhysicalStorage, + PhysicalBinary, + PhysicalType, +}; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::aggregate::{AggregateState2, StateFinalizer}; use crate::arrays::executor::builder::{ArrayBuilder, GermanVarlenBuffer}; @@ -29,15 +36,20 @@ use crate::arrays::executor::physical_type::{ PhysicalU8_2, PhysicalUntypedNull_2, }; +use crate::arrays::executor_exp::aggregate::AggregateState; +use crate::arrays::executor_exp::PutBuffer; use crate::arrays::scalar::interval::Interval; use crate::arrays::storage::{PrimitiveStorage, UntypedNull}; use crate::expr::Expression; use crate::functions::aggregate::states::{ boolean_finalize, + drain, new_unary_aggregate_states2, primitive_finalize, + unary_update, untyped_null_finalize, AggregateGroupStates, + TypedAggregateGroupStates, }; use crate::functions::aggregate::{ AggregateFunction, @@ -81,63 +93,68 @@ impl AggregateFunction for First { let datatype = inputs[0].datatype(table_list)?; - let function_impl: Box = match datatype.physical_type2()? { - PhysicalType2::UntypedNull => Box::new(FirstUntypedNullImpl), - PhysicalType2::Boolean => Box::new(FirstBoolImpl), - PhysicalType2::Float16 => Box::new(FirstPrimitiveImpl::::new( - datatype.clone(), - )), - PhysicalType2::Float32 => Box::new(FirstPrimitiveImpl::::new( - datatype.clone(), - )), - PhysicalType2::Float64 => Box::new(FirstPrimitiveImpl::::new( - datatype.clone(), - )), - PhysicalType2::Int8 => Box::new(FirstPrimitiveImpl::::new( - datatype.clone(), - )), - PhysicalType2::Int16 => Box::new(FirstPrimitiveImpl::::new( - datatype.clone(), - )), - PhysicalType2::Int32 => Box::new(FirstPrimitiveImpl::::new( - datatype.clone(), - )), - PhysicalType2::Int64 => Box::new(FirstPrimitiveImpl::::new( - datatype.clone(), - )), - PhysicalType2::Int128 => Box::new(FirstPrimitiveImpl::::new( - datatype.clone(), - )), - PhysicalType2::UInt8 => Box::new(FirstPrimitiveImpl::::new( - datatype.clone(), - )), - PhysicalType2::UInt16 => Box::new(FirstPrimitiveImpl::::new( - datatype.clone(), - )), - PhysicalType2::UInt32 => Box::new(FirstPrimitiveImpl::::new( - datatype.clone(), - )), - PhysicalType2::UInt64 => Box::new(FirstPrimitiveImpl::::new( - datatype.clone(), - )), - PhysicalType2::UInt128 => Box::new(FirstPrimitiveImpl::::new( - datatype.clone(), - )), - PhysicalType2::Interval => Box::new( - FirstPrimitiveImpl::::new(datatype.clone()), - ), - PhysicalType2::Binary => Box::new(FirstBinaryImpl { - datatype: datatype.clone(), - }), - PhysicalType2::Utf8 => Box::new(FirstBinaryImpl { - datatype: datatype.clone(), - }), - PhysicalType2::List => { - // TODO: Easy, clone underlying array and select. - not_implemented!("FIRST for list arrays") - } + let function_impl: Box = match datatype.physical_type() { + // PhysicalType::Boolean + other => not_implemented!("FIRST for physical type: {other}"), }; + // let function_impl: Box = match datatype.physical_type2()? { + // PhysicalType2::UntypedNull => Box::new(FirstUntypedNullImpl), + // PhysicalType2::Boolean => Box::new(FirstBoolImpl), + // PhysicalType2::Float16 => Box::new(FirstPrimitiveImpl::::new( + // datatype.clone(), + // )), + // PhysicalType2::Float32 => Box::new(FirstPrimitiveImpl::::new( + // datatype.clone(), + // )), + // PhysicalType2::Float64 => Box::new(FirstPrimitiveImpl::::new( + // datatype.clone(), + // )), + // PhysicalType2::Int8 => Box::new(FirstPrimitiveImpl::::new( + // datatype.clone(), + // )), + // PhysicalType2::Int16 => Box::new(FirstPrimitiveImpl::::new( + // datatype.clone(), + // )), + // PhysicalType2::Int32 => Box::new(FirstPrimitiveImpl::::new( + // datatype.clone(), + // )), + // PhysicalType2::Int64 => Box::new(FirstPrimitiveImpl::::new( + // datatype.clone(), + // )), + // PhysicalType2::Int128 => Box::new(FirstPrimitiveImpl::::new( + // datatype.clone(), + // )), + // PhysicalType2::UInt8 => Box::new(FirstPrimitiveImpl::::new( + // datatype.clone(), + // )), + // PhysicalType2::UInt16 => Box::new(FirstPrimitiveImpl::::new( + // datatype.clone(), + // )), + // PhysicalType2::UInt32 => Box::new(FirstPrimitiveImpl::::new( + // datatype.clone(), + // )), + // PhysicalType2::UInt64 => Box::new(FirstPrimitiveImpl::::new( + // datatype.clone(), + // )), + // PhysicalType2::UInt128 => Box::new(FirstPrimitiveImpl::::new( + // datatype.clone(), + // )), + // PhysicalType2::Interval => Box::new( + // FirstPrimitiveImpl::::new(datatype.clone()), + // ), + // PhysicalType2::Binary => Box::new(FirstBinaryImpl { + // datatype: datatype.clone(), + // }), + // PhysicalType2::Utf8 => Box::new(FirstBinaryImpl { + // datatype: datatype.clone(), + // }), + // PhysicalType2::List => { + // // TODO: Easy, clone underlying array and select. + // not_implemented!("FIRST for list arrays") + // } + // }; + Ok(PlannedAggregateFunction { function: Box::new(*self), return_type: datatype, @@ -147,140 +164,133 @@ impl AggregateFunction for First { } } -/// FIRST aggregate impl for utf8 and binary. -#[derive(Debug, Clone)] -pub struct FirstBinaryImpl { - datatype: DataType, -} - -impl AggregateFunctionImpl for FirstBinaryImpl { - fn new_states(&self) -> Box { - let datatype = self.datatype.clone(); - - new_unary_aggregate_states2::( - FirstStateBinary::default, - move |states| { - let builder = ArrayBuilder { - datatype: datatype.clone(), - buffer: GermanVarlenBuffer::<[u8]>::with_len(states.len()), - }; - StateFinalizer::finalize(states, builder) - }, - ) - } +#[derive(Debug, Clone, Copy)] +pub struct FirstPrimitiveImpl { + _s: PhantomData, } -#[derive(Debug, Clone)] -pub struct FirstUntypedNullImpl; - -impl AggregateFunctionImpl for FirstUntypedNullImpl { +impl AggregateFunctionImpl for FirstPrimitiveImpl +where + S: MutablePhysicalStorage, + S::StorageType: Debug + Default + Copy, +{ fn new_states(&self) -> Box { - new_unary_aggregate_states2::( - FirstState::::default, - untyped_null_finalize, - ) + Box::new(TypedAggregateGroupStates::new( + FirstPrimitiveState::::default, + unary_update::, + drain::, + )) } } -#[derive(Debug, Clone)] -pub struct FirstBoolImpl; +// #[derive(Debug, Clone, Copy)] +// pub struct FirstBinaryImpl; -impl AggregateFunctionImpl for FirstBoolImpl { - fn new_states(&self) -> Box { - new_unary_aggregate_states2::( - FirstState::::default, - move |states| boolean_finalize(DataType::Boolean, states), - ) - } -} +// impl AggregateFunctionImpl for FirstBinaryImpl { +// fn new_states(&self) -> Box { +// Box::new(TypedAggregateGroupStates::new( +// FirstBinaryState::default, +// unary_update::, +// drain::, +// )) +// } +// } -// TODO: Remove T -#[derive(Debug, Clone)] -pub struct FirstPrimitiveImpl { - datatype: DataType, - _s: PhantomData, - _t: PhantomData, +#[derive(Debug, Default)] +pub struct FirstPrimitiveState { + value: Option, } -impl FirstPrimitiveImpl { - fn new(datatype: DataType) -> Self { - FirstPrimitiveImpl { - datatype, - _s: PhantomData, - _t: PhantomData, +impl AggregateState<&T, T> for FirstPrimitiveState +where + T: Debug + Default + Copy, +{ + fn merge(&mut self, other: &mut Self) -> Result<()> { + if self.value.is_none() { + std::mem::swap(&mut self.value, &mut other.value); } + Ok(()) } -} -impl AggregateFunctionImpl for FirstPrimitiveImpl -where - for<'a> S: PhysicalStorage2 = T>, - T: Copy + Debug + Default + Sync + Send + 'static, - ArrayData2: From>, -{ - fn new_states(&self) -> Box { - let datatype = self.datatype.clone(); + fn update(&mut self, &input: &T) -> Result<()> { + if self.value.is_none() { + self.value = Some(input); + } + Ok(()) + } - new_unary_aggregate_states2::(FirstState::::default, move |states| { - primitive_finalize(datatype.clone(), states) - }) + fn finalize(&mut self, output: PutBuffer) -> Result<()> + where + M: AddressableMut, + { + match &self.value { + Some(val) => output.put(val), + None => output.put_null(), + } + Ok(()) } } #[derive(Debug, Default)] -pub struct FirstState { - value: Option, +pub struct FirstBinaryState { + value: Option>, } -impl AggregateState2 for FirstState { +impl AggregateState<&[u8], [u8]> for FirstBinaryState { fn merge(&mut self, other: &mut Self) -> Result<()> { if self.value.is_none() { - self.value = other.value; - return Ok(()); + std::mem::swap(&mut self.value, &mut other.value); } Ok(()) } - fn update(&mut self, input: T) -> Result<()> { + fn update(&mut self, input: &[u8]) -> Result<()> { if self.value.is_none() { - self.value = Some(input); + self.value = Some(input.to_vec()); } Ok(()) } - fn finalize(&mut self) -> Result<(T, bool)> { - match self.value { - Some(v) => Ok((v, true)), - None => Ok((T::default(), false)), + fn finalize(&mut self, output: PutBuffer) -> Result<()> + where + M: AddressableMut, + { + match &self.value { + Some(val) => output.put(val), + None => output.put_null(), } + Ok(()) } } #[derive(Debug, Default)] -pub struct FirstStateBinary { - value: Option>, +pub struct FirstStringState { + value: Option, } -impl AggregateState2<&[u8], Vec> for FirstStateBinary { +impl AggregateState<&str, str> for FirstStringState { fn merge(&mut self, other: &mut Self) -> Result<()> { if self.value.is_none() { std::mem::swap(&mut self.value, &mut other.value); - return Ok(()); } Ok(()) } - fn update(&mut self, input: &[u8]) -> Result<()> { + fn update(&mut self, input: &str) -> Result<()> { if self.value.is_none() { - self.value = Some(input.to_owned()); + self.value = Some(input.to_string()); } Ok(()) } - fn finalize(&mut self) -> Result<(Vec, bool)> { - match self.value.as_mut() { - Some(v) => Ok((std::mem::take(v), true)), - None => Ok((Vec::new(), false)), + fn finalize(&mut self, output: PutBuffer) -> Result<()> + where + M: AddressableMut, + { + match &self.value { + Some(val) => output.put(val), + None => output.put_null(), } + Ok(()) } } diff --git a/crates/rayexec_execution/src/functions/aggregate/builtin/minmax.rs b/crates/rayexec_execution/src/functions/aggregate/builtin/minmax.rs index a2e5c4e30..448e18fc1 100644 --- a/crates/rayexec_execution/src/functions/aggregate/builtin/minmax.rs +++ b/crates/rayexec_execution/src/functions/aggregate/builtin/minmax.rs @@ -1,3 +1,4 @@ +use std::borrow::Borrow; use std::fmt::Debug; use std::marker::PhantomData; @@ -5,6 +6,32 @@ use half::f16; use rayexec_error::{not_implemented, Result}; use crate::arrays::array::ArrayData2; +use crate::arrays::buffer::physical_type::{ + AddressableMut, + MutablePhysicalStorage, + PhysicalBinary, + PhysicalBool, + PhysicalDictionary, + PhysicalF16, + PhysicalF32, + PhysicalF64, + PhysicalI128, + PhysicalI16, + PhysicalI32, + PhysicalI64, + PhysicalI8, + PhysicalInterval, + PhysicalList, + PhysicalStorage, + PhysicalType, + PhysicalU128, + PhysicalU16, + PhysicalU32, + PhysicalU64, + PhysicalU8, + PhysicalUntypedNull, + PhysicalUtf8, +}; use crate::arrays::datatype::{DataType, DataTypeId}; use crate::arrays::executor::aggregate::{AggregateState2, StateFinalizer}; use crate::arrays::executor::builder::{ArrayBuilder, GermanVarlenBuffer}; @@ -29,15 +56,20 @@ use crate::arrays::executor::physical_type::{ PhysicalU8_2, PhysicalUntypedNull_2, }; +use crate::arrays::executor_exp::aggregate::AggregateState; +use crate::arrays::executor_exp::PutBuffer; use crate::arrays::scalar::interval::Interval; use crate::arrays::storage::{PrimitiveStorage, UntypedNull}; use crate::expr::Expression; use crate::functions::aggregate::states::{ boolean_finalize, + drain, new_unary_aggregate_states2, primitive_finalize, + unary_update, untyped_null_finalize, AggregateGroupStates, + TypedAggregateGroupStates, }; use crate::functions::aggregate::{ AggregateFunction, @@ -81,64 +113,65 @@ impl AggregateFunction for Min { let datatype = inputs[0].datatype(table_list)?; - let function_impl: Box = match datatype.physical_type2()? { - PhysicalType2::UntypedNull => Box::new(MinMaxUntypedNull), - PhysicalType2::Boolean => Box::new(MinBoolImpl::new()), - PhysicalType2::Float16 => Box::new(MinPrimitiveImpl::::new( - datatype.clone(), - )), - PhysicalType2::Float32 => Box::new(MinPrimitiveImpl::::new( - datatype.clone(), - )), - PhysicalType2::Float64 => Box::new(MinPrimitiveImpl::::new( - datatype.clone(), - )), - PhysicalType2::Int8 => { - Box::new(MinPrimitiveImpl::::new(datatype.clone())) - } - PhysicalType2::Int16 => Box::new(MinPrimitiveImpl::::new( - datatype.clone(), - )), - PhysicalType2::Int32 => Box::new(MinPrimitiveImpl::::new( - datatype.clone(), - )), - PhysicalType2::Int64 => Box::new(MinPrimitiveImpl::::new( - datatype.clone(), - )), - PhysicalType2::Int128 => Box::new(MinPrimitiveImpl::::new( - datatype.clone(), - )), - PhysicalType2::UInt8 => { - Box::new(MinPrimitiveImpl::::new(datatype.clone())) - } - PhysicalType2::UInt16 => Box::new(MinPrimitiveImpl::::new( - datatype.clone(), - )), - PhysicalType2::UInt32 => Box::new(MinPrimitiveImpl::::new( - datatype.clone(), - )), - PhysicalType2::UInt64 => Box::new(MinPrimitiveImpl::::new( - datatype.clone(), - )), - PhysicalType2::UInt128 => Box::new(MinPrimitiveImpl::::new( - datatype.clone(), - )), - PhysicalType2::Interval => Box::new( - MinPrimitiveImpl::::new(datatype.clone()), - ), - PhysicalType2::Binary => Box::new(MinBinaryImpl::new(datatype.clone())), - PhysicalType2::Utf8 => Box::new(MinBinaryImpl::new(datatype.clone())), - PhysicalType2::List => { - not_implemented!("MIN for list arrays") - } - }; - - Ok(PlannedAggregateFunction { - function: Box::new(*self), - return_type: datatype, - inputs, - function_impl, - }) + unimplemented!() + // let function_impl: Box = match datatype.physical_type2()? { + // PhysicalType2::UntypedNull => Box::new(MinMaxUntypedNull), + // PhysicalType2::Boolean => Box::new(MinBoolImpl::new()), + // PhysicalType2::Float16 => Box::new(MinPrimitiveImpl::::new( + // datatype.clone(), + // )), + // PhysicalType2::Float32 => Box::new(MinPrimitiveImpl::::new( + // datatype.clone(), + // )), + // PhysicalType2::Float64 => Box::new(MinPrimitiveImpl::::new( + // datatype.clone(), + // )), + // PhysicalType2::Int8 => { + // Box::new(MinPrimitiveImpl::::new(datatype.clone())) + // } + // PhysicalType2::Int16 => Box::new(MinPrimitiveImpl::::new( + // datatype.clone(), + // )), + // PhysicalType2::Int32 => Box::new(MinPrimitiveImpl::::new( + // datatype.clone(), + // )), + // PhysicalType2::Int64 => Box::new(MinPrimitiveImpl::::new( + // datatype.clone(), + // )), + // PhysicalType2::Int128 => Box::new(MinPrimitiveImpl::::new( + // datatype.clone(), + // )), + // PhysicalType2::UInt8 => { + // Box::new(MinPrimitiveImpl::::new(datatype.clone())) + // } + // PhysicalType2::UInt16 => Box::new(MinPrimitiveImpl::::new( + // datatype.clone(), + // )), + // PhysicalType2::UInt32 => Box::new(MinPrimitiveImpl::::new( + // datatype.clone(), + // )), + // PhysicalType2::UInt64 => Box::new(MinPrimitiveImpl::::new( + // datatype.clone(), + // )), + // PhysicalType2::UInt128 => Box::new(MinPrimitiveImpl::::new( + // datatype.clone(), + // )), + // PhysicalType2::Interval => Box::new( + // MinPrimitiveImpl::::new(datatype.clone()), + // ), + // PhysicalType2::Binary => Box::new(MinBinaryImpl::new(datatype.clone())), + // PhysicalType2::Utf8 => Box::new(MinBinaryImpl::new(datatype.clone())), + // PhysicalType2::List => { + // not_implemented!("MIN for list arrays") + // } + // }; + + // Ok(PlannedAggregateFunction { + // function: Box::new(*self), + // return_type: datatype, + // inputs, + // function_impl, + // }) } } @@ -175,64 +208,87 @@ impl AggregateFunction for Max { let datatype = inputs[0].datatype(table_list)?; - let function_impl: Box = match datatype.physical_type2()? { - PhysicalType2::UntypedNull => Box::new(MinMaxUntypedNull), - PhysicalType2::Boolean => Box::new(MaxBoolImpl::new()), - PhysicalType2::Float16 => Box::new(MaxPrimitiveImpl::::new( - datatype.clone(), - )), - PhysicalType2::Float32 => Box::new(MaxPrimitiveImpl::::new( - datatype.clone(), - )), - PhysicalType2::Float64 => Box::new(MaxPrimitiveImpl::::new( - datatype.clone(), - )), - PhysicalType2::Int8 => { - Box::new(MaxPrimitiveImpl::::new(datatype.clone())) - } - PhysicalType2::Int16 => Box::new(MaxPrimitiveImpl::::new( - datatype.clone(), - )), - PhysicalType2::Int32 => Box::new(MaxPrimitiveImpl::::new( - datatype.clone(), - )), - PhysicalType2::Int64 => Box::new(MaxPrimitiveImpl::::new( - datatype.clone(), - )), - PhysicalType2::Int128 => Box::new(MaxPrimitiveImpl::::new( - datatype.clone(), - )), - PhysicalType2::UInt8 => { - Box::new(MaxPrimitiveImpl::::new(datatype.clone())) - } - PhysicalType2::UInt16 => Box::new(MaxPrimitiveImpl::::new( - datatype.clone(), - )), - PhysicalType2::UInt32 => Box::new(MaxPrimitiveImpl::::new( - datatype.clone(), - )), - PhysicalType2::UInt64 => Box::new(MaxPrimitiveImpl::::new( - datatype.clone(), - )), - PhysicalType2::UInt128 => Box::new(MaxPrimitiveImpl::::new( - datatype.clone(), - )), - PhysicalType2::Interval => Box::new( - MaxPrimitiveImpl::::new(datatype.clone()), - ), - PhysicalType2::Binary => Box::new(MaxBinaryImpl::new(datatype.clone())), - PhysicalType2::Utf8 => Box::new(MaxBinaryImpl::new(datatype.clone())), - PhysicalType2::List => { - not_implemented!("MAX for list arrays") - } + let function_impl: Box = match datatype.physical_type() { + PhysicalType::UntypedNull => Box::new(MaxPrimitiveImpl::::new()), + PhysicalType::Boolean => Box::new(MaxPrimitiveImpl::::new()), + PhysicalType::Int8 => Box::new(MaxPrimitiveImpl::::new()), + PhysicalType::Int16 => Box::new(MaxPrimitiveImpl::::new()), + PhysicalType::Int32 => Box::new(MaxPrimitiveImpl::::new()), + PhysicalType::Int64 => Box::new(MaxPrimitiveImpl::::new()), + PhysicalType::Int128 => Box::new(MaxPrimitiveImpl::::new()), + PhysicalType::UInt8 => Box::new(MaxPrimitiveImpl::::new()), + PhysicalType::UInt16 => Box::new(MaxPrimitiveImpl::::new()), + PhysicalType::UInt32 => Box::new(MaxPrimitiveImpl::::new()), + PhysicalType::UInt64 => Box::new(MaxPrimitiveImpl::::new()), + PhysicalType::UInt128 => Box::new(MaxPrimitiveImpl::::new()), + PhysicalType::Float16 => Box::new(MaxPrimitiveImpl::::new()), + PhysicalType::Float32 => Box::new(MaxPrimitiveImpl::::new()), + PhysicalType::Float64 => Box::new(MaxPrimitiveImpl::::new()), + PhysicalType::Interval => Box::new(MaxPrimitiveImpl::::new()), + // PhysicalType::Utf8 => Box::new(MaxImpl::::new()), + // PhysicalType::Binary => Box::new(MaxImpl::::new()), + other => not_implemented!("max for type {other:?}"), }; - Ok(PlannedAggregateFunction { - function: Box::new(*self), - return_type: datatype, - inputs, - function_impl, - }) + // let function_impl: Box = match datatype.physical_type2()? { + // PhysicalType2::UntypedNull => Box::new(MinMaxUntypedNull), + // PhysicalType2::Boolean => Box::new(MaxBoolImpl::new()), + // PhysicalType2::Float16 => Box::new(MaxPrimitiveImpl::::new( + // datatype.clone(), + // )), + // PhysicalType2::Float32 => Box::new(MaxPrimitiveImpl::::new( + // datatype.clone(), + // )), + // PhysicalType2::Float64 => Box::new(MaxPrimitiveImpl::::new( + // datatype.clone(), + // )), + // PhysicalType2::Int8 => { + // Box::new(MaxPrimitiveImpl::::new(datatype.clone())) + // } + // PhysicalType2::Int16 => Box::new(MaxPrimitiveImpl::::new( + // datatype.clone(), + // )), + // PhysicalType2::Int32 => Box::new(MaxPrimitiveImpl::::new( + // datatype.clone(), + // )), + // PhysicalType2::Int64 => Box::new(MaxPrimitiveImpl::::new( + // datatype.clone(), + // )), + // PhysicalType2::Int128 => Box::new(MaxPrimitiveImpl::::new( + // datatype.clone(), + // )), + // PhysicalType2::UInt8 => { + // Box::new(MaxPrimitiveImpl::::new(datatype.clone())) + // } + // PhysicalType2::UInt16 => Box::new(MaxPrimitiveImpl::::new( + // datatype.clone(), + // )), + // PhysicalType2::UInt32 => Box::new(MaxPrimitiveImpl::::new( + // datatype.clone(), + // )), + // PhysicalType2::UInt64 => Box::new(MaxPrimitiveImpl::::new( + // datatype.clone(), + // )), + // PhysicalType2::UInt128 => Box::new(MaxPrimitiveImpl::::new( + // datatype.clone(), + // )), + // PhysicalType2::Interval => Box::new( + // MaxPrimitiveImpl::::new(datatype.clone()), + // ), + // PhysicalType2::Binary => Box::new(MaxBinaryImpl::new(datatype.clone())), + // PhysicalType2::Utf8 => Box::new(MaxBinaryImpl::new(datatype.clone())), + // PhysicalType2::List => { + // not_implemented!("MAX for list arrays") + // } + // }; + unimplemented!() + + // Ok(PlannedAggregateFunction { + // function: Box::new(*self), + // return_type: datatype, + // inputs, + // function_impl, + // }) } } @@ -250,7 +306,7 @@ impl AggregateFunctionImpl for MinMaxUntypedNull { } pub type MinBinaryImpl = MinMaxBinaryImpl; -pub type MaxBinaryImpl = MinMaxBinaryImpl; +pub type MaxBinaryImpl = MinMaxBinaryImpl; #[derive(Debug)] pub struct MinMaxBinaryImpl { @@ -291,7 +347,7 @@ impl Clone for MinMaxBinaryImpl { } pub type MinBoolImpl = MinMaxBoolImpl>; -pub type MaxBoolImpl = MinMaxBoolImpl>; +pub type MaxBoolImpl = MinMaxBoolImpl>; #[derive(Debug)] pub struct MinMaxBoolImpl { @@ -322,7 +378,7 @@ impl Clone for MinMaxBoolImpl { } pub type MinPrimitiveImpl = MinMaxPrimitiveImpl, S, T>; -pub type MaxPrimitiveImpl = MinMaxPrimitiveImpl, S, T>; +pub type MaxPrimitiveImpl2 = MinMaxPrimitiveImpl, S, T>; // TODO: Remove T #[derive(Debug)] @@ -444,13 +500,89 @@ impl AggregateState2<&[u8], Vec> for MinStateBinary { } } +#[derive(Debug, Clone, Copy)] +pub struct MaxPrimitiveImpl { + _s: PhantomData, +} + +impl MaxPrimitiveImpl { + const fn new() -> Self { + MaxPrimitiveImpl { _s: PhantomData } + } +} + +impl AggregateFunctionImpl for MaxPrimitiveImpl +where + S: MutablePhysicalStorage, + S::StorageType: Default + Debug + Sync + Send + PartialOrd + Copy, +{ + fn new_states(&self) -> Box { + Box::new(TypedAggregateGroupStates::new( + MaxStatePrimitive::::default, + unary_update::, + drain::, + )) + } +} + +#[derive(Debug, Default)] +pub struct MaxStatePrimitive { + max: T, + valid: bool, +} + +impl AggregateState<&T, T> for MaxStatePrimitive +where + T: Debug + Sync + Send + PartialOrd + Copy, +{ + fn merge(&mut self, other: &mut Self) -> Result<()> { + if !self.valid { + self.valid = other.valid; + std::mem::swap(&mut self.max, &mut other.max); + return Ok(()); + } + + if self.max.lt(&other.max) { + std::mem::swap(&mut self.max, &mut other.max); + } + + Ok(()) + } + + fn update(&mut self, input: &T) -> Result<()> { + if !self.valid { + self.max = *input; + return Ok(()); + } + + if self.max.lt(input) { + self.max = *input; + } + + Ok(()) + } + + fn finalize(&mut self, output: PutBuffer) -> Result<()> + where + M: AddressableMut, + { + if self.valid { + output.put(&self.max); + } else { + output.put_null(); + } + + Ok(()) + } +} + #[derive(Debug, Default)] -pub struct MaxState { +pub struct MaxState2 { max: T, valid: bool, } -impl AggregateState2 for MaxState +impl AggregateState2 for MaxState2 where T: PartialOrd + Debug + Default + Copy, { @@ -485,12 +617,12 @@ where } #[derive(Debug, Default)] -pub struct MaxStateBinary { +pub struct MaxStateBinary2 { max: Vec, valid: bool, } -impl AggregateState2<&[u8], Vec> for MaxStateBinary { +impl AggregateState2<&[u8], Vec> for MaxStateBinary2 { fn merge(&mut self, other: &mut Self) -> Result<()> { if !self.valid { self.valid = other.valid; From edb8875f7a851be7d1cf41a154cd1090f3d8a8ed Mon Sep 17 00:00:00 2001 From: Sean Smith Date: Sun, 5 Jan 2025 12:24:43 -0600 Subject: [PATCH 49/59] spooky --- Cargo.lock | 10 +++---- crates/rayexec_execution/Cargo.toml | 2 +- .../rayexec_execution/src/arrays/array/exp.rs | 2 +- .../rayexec_execution/src/arrays/batch_exp.rs | 4 +-- .../src/arrays/compute/cast/array.rs | 4 +-- .../src/arrays/compute/date.rs | 2 +- .../arrays/executor_exp/aggregate/binary.rs | 4 +-- .../src/arrays/executor_exp/aggregate/mod.rs | 1 + .../arrays/executor_exp/aggregate/unary.rs | 4 +-- .../src/arrays/executor_exp/scalar/binary.rs | 4 +-- .../arrays/executor_exp/scalar/list_reduce.rs | 2 +- .../src/arrays/executor_exp/scalar/ternary.rs | 4 +-- .../src/arrays/executor_exp/scalar/unary.rs | 4 +-- .../src/arrays/executor_exp/scalar/uniform.rs | 4 +-- .../rayexec_execution/src/arrays/testutil.rs | 4 +-- .../src/expr/physical/case_expr.rs | 2 +- .../src/expr/physical/cast_expr.rs | 2 +- .../src/expr/physical/column_expr.rs | 2 +- .../src/expr/physical/literal_expr.rs | 2 +- .../src/functions/aggregate/states.rs | 26 ++++++++++++------- .../src/functions/scalar/builtin/arith/add.rs | 2 +- .../src/functions/scalar/builtin/arith/div.rs | 2 +- .../src/functions/scalar/builtin/arith/mul.rs | 2 +- .../src/functions/scalar/builtin/arith/rem.rs | 2 +- .../src/functions/scalar/builtin/arith/sub.rs | 2 +- .../src/functions/scalar/builtin/boolean.rs | 2 +- .../functions/scalar/builtin/comparison.rs | 2 +- .../scalar/builtin/datetime/epoch.rs | 2 +- .../src/functions/scalar/builtin/is.rs | 2 +- .../scalar/builtin/list/list_extract.rs | 4 +-- .../scalar/builtin/list/list_values.rs | 4 +-- .../functions/scalar/builtin/numeric/abs.rs | 2 +- .../functions/scalar/builtin/numeric/acos.rs | 2 +- .../functions/scalar/builtin/numeric/asin.rs | 2 +- .../functions/scalar/builtin/numeric/atan.rs | 2 +- .../functions/scalar/builtin/numeric/cbrt.rs | 2 +- .../functions/scalar/builtin/numeric/ceil.rs | 2 +- .../functions/scalar/builtin/numeric/cos.rs | 2 +- .../scalar/builtin/numeric/degrees.rs | 2 +- .../functions/scalar/builtin/numeric/exp.rs | 2 +- .../functions/scalar/builtin/numeric/floor.rs | 2 +- .../functions/scalar/builtin/numeric/ln.rs | 2 +- .../functions/scalar/builtin/numeric/log.rs | 2 +- .../functions/scalar/builtin/numeric/mod.rs | 2 +- .../scalar/builtin/numeric/radians.rs | 2 +- .../functions/scalar/builtin/numeric/sin.rs | 2 +- .../functions/scalar/builtin/numeric/sqrt.rs | 2 +- .../functions/scalar/builtin/numeric/tan.rs | 2 +- .../scalar/builtin/similarity/l2_distance.rs | 2 +- .../functions/scalar/builtin/string/case.rs | 2 +- crates/{iterutil => stdutil}/Cargo.toml | 2 +- .../src/lib.rs => stdutil/src/iter.rs} | 0 crates/stdutil/src/lib.rs | 4 +++ crates/stdutil/src/marker.rs | 22 ++++++++++++++++ 54 files changed, 108 insertions(+), 73 deletions(-) rename crates/{iterutil => stdutil}/Cargo.toml (80%) rename crates/{iterutil/src/lib.rs => stdutil/src/iter.rs} (100%) create mode 100644 crates/stdutil/src/lib.rs create mode 100644 crates/stdutil/src/marker.rs diff --git a/Cargo.lock b/Cargo.lock index 024eb4021..2f7d8207a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1390,10 +1390,6 @@ dependencies = [ "either", ] -[[package]] -name = "iterutil" -version = "0.0.94" - [[package]] name = "itoa" version = "1.0.10" @@ -2269,7 +2265,6 @@ dependencies = [ "half", "hashbrown 0.14.5", "indexmap", - "iterutil", "num", "num-traits", "num_cpus", @@ -2288,6 +2283,7 @@ dependencies = [ "serde_json", "similar-asserts", "smallvec", + "stdutil", "strsim", "textwrap", "tokio", @@ -3036,6 +3032,10 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" +[[package]] +name = "stdutil" +version = "0.0.94" + [[package]] name = "stringprep" version = "0.1.5" diff --git a/crates/rayexec_execution/Cargo.toml b/crates/rayexec_execution/Cargo.toml index f895cb44a..bb23dab95 100644 --- a/crates/rayexec_execution/Cargo.toml +++ b/crates/rayexec_execution/Cargo.toml @@ -10,7 +10,7 @@ rayexec_parser = { path = "../rayexec_parser" } # rayexec_bullet = { path = "../rayexec_bullet" } rayexec_io = { path = "../rayexec_io" } fmtutil = { path = "../fmtutil" } -iterutil = { path = "../iterutil" } +stdutil = { path = "../stdutil" } # stackutil = { path = "../stackutil" } TODO: psm hash issues when compiling to wasm on macos ahash = { workspace = true } diff --git a/crates/rayexec_execution/src/arrays/array/exp.rs b/crates/rayexec_execution/src/arrays/array/exp.rs index 3e106d0ba..adee68af3 100644 --- a/crates/rayexec_execution/src/arrays/array/exp.rs +++ b/crates/rayexec_execution/src/arrays/array/exp.rs @@ -1,6 +1,6 @@ use half::f16; -use iterutil::{IntoExactSizeIterator, TryFromExactSizeIterator}; use rayexec_error::{not_implemented, RayexecError, Result}; +use stdutil::iter::{IntoExactSizeIterator, TryFromExactSizeIterator}; use super::array_data::ArrayData; use super::flat::FlatArrayView; diff --git a/crates/rayexec_execution/src/arrays/batch_exp.rs b/crates/rayexec_execution/src/arrays/batch_exp.rs index 7405a372e..0888785ff 100644 --- a/crates/rayexec_execution/src/arrays/batch_exp.rs +++ b/crates/rayexec_execution/src/arrays/batch_exp.rs @@ -1,5 +1,5 @@ -use iterutil::IntoExactSizeIterator; use rayexec_error::{RayexecError, Result}; +use stdutil::iter::IntoExactSizeIterator; use super::array::exp::Array; use super::array::selection::Selection; @@ -139,7 +139,7 @@ where #[cfg(test)] mod tests { - use iterutil::TryFromExactSizeIterator; + use stdutil::iter::TryFromExactSizeIterator; use super::*; diff --git a/crates/rayexec_execution/src/arrays/compute/cast/array.rs b/crates/rayexec_execution/src/arrays/compute/cast/array.rs index 2de9f9731..2156a127f 100644 --- a/crates/rayexec_execution/src/arrays/compute/cast/array.rs +++ b/crates/rayexec_execution/src/arrays/compute/cast/array.rs @@ -1,6 +1,6 @@ use std::ops::Mul; -use iterutil::IntoExactSizeIterator; +use stdutil::iter::IntoExactSizeIterator; use num::{CheckedDiv, CheckedMul, Float, NumCast, PrimInt, ToPrimitive}; use rayexec_error::{RayexecError, Result}; @@ -840,7 +840,7 @@ where #[cfg(test)] mod tests { - use iterutil::TryFromExactSizeIterator; + use stdutil::iter::TryFromExactSizeIterator; use super::*; use crate::arrays::datatype::DecimalTypeMeta; diff --git a/crates/rayexec_execution/src/arrays/compute/date.rs b/crates/rayexec_execution/src/arrays/compute/date.rs index 255fec887..a9c4213a1 100644 --- a/crates/rayexec_execution/src/arrays/compute/date.rs +++ b/crates/rayexec_execution/src/arrays/compute/date.rs @@ -1,5 +1,5 @@ use chrono::{DateTime, Datelike, NaiveDate, Timelike, Utc}; -use iterutil::IntoExactSizeIterator; +use stdutil::iter::IntoExactSizeIterator; use rayexec_error::{not_implemented, RayexecError, Result}; use crate::arrays::array::exp::Array; diff --git a/crates/rayexec_execution/src/arrays/executor_exp/aggregate/binary.rs b/crates/rayexec_execution/src/arrays/executor_exp/aggregate/binary.rs index 71d1c6b0e..139cb5652 100644 --- a/crates/rayexec_execution/src/arrays/executor_exp/aggregate/binary.rs +++ b/crates/rayexec_execution/src/arrays/executor_exp/aggregate/binary.rs @@ -1,5 +1,5 @@ -use iterutil::IntoExactSizeIterator; use rayexec_error::Result; +use stdutil::iter::IntoExactSizeIterator; use super::AggregateState; use crate::arrays::array::exp::Array; @@ -62,7 +62,7 @@ impl BinaryNonNullUpdater { #[cfg(test)] mod tests { - use iterutil::TryFromExactSizeIterator; + use stdutil::iter::TryFromExactSizeIterator; use super::*; use crate::arrays::buffer::physical_type::{AddressableMut, PhysicalI32}; diff --git a/crates/rayexec_execution/src/arrays/executor_exp/aggregate/mod.rs b/crates/rayexec_execution/src/arrays/executor_exp/aggregate/mod.rs index f836c670d..443ad1874 100644 --- a/crates/rayexec_execution/src/arrays/executor_exp/aggregate/mod.rs +++ b/crates/rayexec_execution/src/arrays/executor_exp/aggregate/mod.rs @@ -40,6 +40,7 @@ impl StateCombiner { ) -> Result<()> where State: AggregateState, + Output: ?Sized, { for (from, to) in mapping { let consume = &mut consume[from]; diff --git a/crates/rayexec_execution/src/arrays/executor_exp/aggregate/unary.rs b/crates/rayexec_execution/src/arrays/executor_exp/aggregate/unary.rs index 2532e9d65..463b568af 100644 --- a/crates/rayexec_execution/src/arrays/executor_exp/aggregate/unary.rs +++ b/crates/rayexec_execution/src/arrays/executor_exp/aggregate/unary.rs @@ -1,5 +1,5 @@ -use iterutil::IntoExactSizeIterator; use rayexec_error::Result; +use stdutil::iter::IntoExactSizeIterator; use super::AggregateState; use crate::arrays::array::exp::Array; @@ -94,7 +94,7 @@ impl UnaryNonNullUpdater { #[cfg(test)] mod tests { - use iterutil::TryFromExactSizeIterator; + use stdutil::iter::TryFromExactSizeIterator; use super::*; use crate::arrays::buffer::buffer_manager::NopBufferManager; diff --git a/crates/rayexec_execution/src/arrays/executor_exp/scalar/binary.rs b/crates/rayexec_execution/src/arrays/executor_exp/scalar/binary.rs index 3764884f1..894a56487 100644 --- a/crates/rayexec_execution/src/arrays/executor_exp/scalar/binary.rs +++ b/crates/rayexec_execution/src/arrays/executor_exp/scalar/binary.rs @@ -1,4 +1,4 @@ -use iterutil::IntoExactSizeIterator; +use stdutil::iter::IntoExactSizeIterator; use rayexec_error::Result; use crate::arrays::array::exp::Array; @@ -144,7 +144,7 @@ impl BinaryExecutor { #[cfg(test)] mod tests { - use iterutil::TryFromExactSizeIterator; + use stdutil::iter::TryFromExactSizeIterator; use super::*; use crate::arrays::array::validity::Validity; diff --git a/crates/rayexec_execution/src/arrays/executor_exp/scalar/list_reduce.rs b/crates/rayexec_execution/src/arrays/executor_exp/scalar/list_reduce.rs index a1860e6e2..c552c0161 100644 --- a/crates/rayexec_execution/src/arrays/executor_exp/scalar/list_reduce.rs +++ b/crates/rayexec_execution/src/arrays/executor_exp/scalar/list_reduce.rs @@ -1,4 +1,4 @@ -use iterutil::IntoExactSizeIterator; +use stdutil::iter::IntoExactSizeIterator; use rayexec_error::{RayexecError, Result}; use crate::arrays::array::exp::Array; diff --git a/crates/rayexec_execution/src/arrays/executor_exp/scalar/ternary.rs b/crates/rayexec_execution/src/arrays/executor_exp/scalar/ternary.rs index 179be7a19..49599c903 100644 --- a/crates/rayexec_execution/src/arrays/executor_exp/scalar/ternary.rs +++ b/crates/rayexec_execution/src/arrays/executor_exp/scalar/ternary.rs @@ -1,4 +1,4 @@ -use iterutil::IntoExactSizeIterator; +use stdutil::iter::IntoExactSizeIterator; use rayexec_error::Result; use crate::arrays::array::exp::Array; @@ -189,7 +189,7 @@ impl TernaryExecutor { #[cfg(test)] mod tests { - use iterutil::TryFromExactSizeIterator; + use stdutil::iter::TryFromExactSizeIterator; use super::*; use crate::arrays::buffer::buffer_manager::NopBufferManager; diff --git a/crates/rayexec_execution/src/arrays/executor_exp/scalar/unary.rs b/crates/rayexec_execution/src/arrays/executor_exp/scalar/unary.rs index 67f6a39cc..3e2f62a92 100644 --- a/crates/rayexec_execution/src/arrays/executor_exp/scalar/unary.rs +++ b/crates/rayexec_execution/src/arrays/executor_exp/scalar/unary.rs @@ -1,5 +1,5 @@ -use iterutil::IntoExactSizeIterator; use rayexec_error::Result; +use stdutil::iter::IntoExactSizeIterator; use crate::arrays::array::exp::Array; use crate::arrays::array::flat::FlatArrayView; @@ -179,7 +179,7 @@ impl UnaryExecutor { #[cfg(test)] mod tests { - use iterutil::TryFromExactSizeIterator; + use stdutil::iter::TryFromExactSizeIterator; use super::*; use crate::arrays::array::validity::Validity; diff --git a/crates/rayexec_execution/src/arrays/executor_exp/scalar/uniform.rs b/crates/rayexec_execution/src/arrays/executor_exp/scalar/uniform.rs index 08092741a..d45f71741 100644 --- a/crates/rayexec_execution/src/arrays/executor_exp/scalar/uniform.rs +++ b/crates/rayexec_execution/src/arrays/executor_exp/scalar/uniform.rs @@ -1,4 +1,4 @@ -use iterutil::IntoExactSizeIterator; +use stdutil::iter::IntoExactSizeIterator; use rayexec_error::Result; use crate::arrays::array::exp::Array; @@ -149,7 +149,7 @@ impl UniformExecutor { #[cfg(test)] mod tests { - use iterutil::TryFromExactSizeIterator; + use stdutil::iter::TryFromExactSizeIterator; use super::*; use crate::arrays::buffer::buffer_manager::NopBufferManager; diff --git a/crates/rayexec_execution/src/arrays/testutil.rs b/crates/rayexec_execution/src/arrays/testutil.rs index 8492dc81f..988e760ec 100644 --- a/crates/rayexec_execution/src/arrays/testutil.rs +++ b/crates/rayexec_execution/src/arrays/testutil.rs @@ -8,7 +8,7 @@ use std::collections::BTreeMap; use std::fmt::Debug; -use iterutil::IntoExactSizeIterator; +use stdutil::iter::IntoExactSizeIterator; use super::array::exp::Array; use super::batch_exp::Batch; @@ -201,7 +201,7 @@ pub fn assert_batches_eq(batch1: &Batch, batch2: &Batch) { #[cfg(test)] mod tests { - use iterutil::TryFromExactSizeIterator; + use stdutil::iter::TryFromExactSizeIterator; use super::*; use crate::arrays::buffer::buffer_manager::NopBufferManager; diff --git a/crates/rayexec_execution/src/expr/physical/case_expr.rs b/crates/rayexec_execution/src/expr/physical/case_expr.rs index 45b74cafe..48a4c78b4 100644 --- a/crates/rayexec_execution/src/expr/physical/case_expr.rs +++ b/crates/rayexec_execution/src/expr/physical/case_expr.rs @@ -187,7 +187,7 @@ impl fmt::Display for PhysicalCaseExpr { #[cfg(test)] mod tests { - use iterutil::TryFromExactSizeIterator; + use stdutil::iter::TryFromExactSizeIterator; use super::*; use crate::arrays::datatype::DataType; diff --git a/crates/rayexec_execution/src/expr/physical/cast_expr.rs b/crates/rayexec_execution/src/expr/physical/cast_expr.rs index 2c4a56c41..729e3f7e8 100644 --- a/crates/rayexec_execution/src/expr/physical/cast_expr.rs +++ b/crates/rayexec_execution/src/expr/physical/cast_expr.rs @@ -101,7 +101,7 @@ impl DatabaseProtoConv for PhysicalCastExpr { #[cfg(test)] mod tests { - use iterutil::TryFromExactSizeIterator; + use stdutil::iter::TryFromExactSizeIterator; use super::*; use crate::arrays::buffer::buffer_manager::NopBufferManager; diff --git a/crates/rayexec_execution/src/expr/physical/column_expr.rs b/crates/rayexec_execution/src/expr/physical/column_expr.rs index 6280eabb0..40537bc5f 100644 --- a/crates/rayexec_execution/src/expr/physical/column_expr.rs +++ b/crates/rayexec_execution/src/expr/physical/column_expr.rs @@ -70,7 +70,7 @@ impl DatabaseProtoConv for PhysicalColumnExpr { #[cfg(test)] mod tests { - use iterutil::TryFromExactSizeIterator; + use stdutil::iter::TryFromExactSizeIterator; use super::*; use crate::arrays::datatype::DataType; diff --git a/crates/rayexec_execution/src/expr/physical/literal_expr.rs b/crates/rayexec_execution/src/expr/physical/literal_expr.rs index f220d8fbe..a35bced6e 100644 --- a/crates/rayexec_execution/src/expr/physical/literal_expr.rs +++ b/crates/rayexec_execution/src/expr/physical/literal_expr.rs @@ -68,7 +68,7 @@ impl DatabaseProtoConv for PhysicalLiteralExpr { #[cfg(test)] mod tests { - use iterutil::TryFromExactSizeIterator; + use stdutil::iter::TryFromExactSizeIterator; use super::*; use crate::arrays::datatype::DataType; diff --git a/crates/rayexec_execution/src/functions/aggregate/states.rs b/crates/rayexec_execution/src/functions/aggregate/states.rs index f1416fc07..808218e32 100644 --- a/crates/rayexec_execution/src/functions/aggregate/states.rs +++ b/crates/rayexec_execution/src/functions/aggregate/states.rs @@ -3,8 +3,9 @@ use std::any::Any; use std::fmt::Debug; use std::marker::PhantomData; -use iterutil::IntoExactSizeIterator; use rayexec_error::{RayexecError, Result}; +use stdutil::iter::IntoExactSizeIterator; +use stdutil::marker::PhantomCovariant; use super::ChunkGroupAddressIter; use crate::arrays::array::exp::Array; @@ -32,7 +33,14 @@ use crate::arrays::executor_exp::aggregate::{AggregateState, StateCombiner}; use crate::arrays::executor_exp::PutBuffer; use crate::arrays::storage::{AddressableStorage, PrimitiveStorage}; -pub struct TypedAggregateGroupStates { +pub struct TypedAggregateGroupStates< + State, + Input, + Output: ?Sized, + StateInit, + StateUpdate, + StateFinalize, +> { /// States being tracked. states: Vec, @@ -47,11 +55,11 @@ pub struct TypedAggregateGroupStates, - _output: PhantomData, + _input: PhantomCovariant, + _output: PhantomCovariant, } -impl +impl TypedAggregateGroupStates { pub fn new( @@ -65,8 +73,8 @@ impl state_init, state_update, state_finalize, - _input: PhantomData, - _output: PhantomData, + _input: PhantomCovariant::new(), + _output: PhantomCovariant::new(), } } } @@ -76,7 +84,7 @@ impl AggregateGroup where State: AggregateState + Sync + Send + 'static, Input: Sync + Send, - Output: Sync + Send, + Output: Sync + Send + ?Sized, StateInit: Fn() -> State + Sync + Send, StateUpdate: Fn(&[Array], Selection, &[usize], &mut [State]) -> Result<()> + Sync + Send, StateFinalize: Fn(&mut [State], &mut Array) -> Result<()> + Sync + Send, @@ -155,7 +163,7 @@ where Ok(()) } -impl fmt::Debug +impl fmt::Debug for TypedAggregateGroupStates { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/arith/add.rs b/crates/rayexec_execution/src/functions/scalar/builtin/arith/add.rs index 4d97b33e4..fa67ea729 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/arith/add.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/arith/add.rs @@ -204,7 +204,7 @@ where #[cfg(test)] mod tests { - use iterutil::TryFromExactSizeIterator; + use stdutil::iter::TryFromExactSizeIterator; use super::*; use crate::arrays::buffer::buffer_manager::NopBufferManager; diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/arith/div.rs b/crates/rayexec_execution/src/functions/scalar/builtin/arith/div.rs index 596dd83fd..76a56409c 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/arith/div.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/arith/div.rs @@ -251,7 +251,7 @@ where #[cfg(test)] mod tests { - use iterutil::TryFromExactSizeIterator; + use stdutil::iter::TryFromExactSizeIterator; use super::*; use crate::arrays::buffer::buffer_manager::NopBufferManager; diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/arith/mul.rs b/crates/rayexec_execution/src/functions/scalar/builtin/arith/mul.rs index a657354a4..4edbfbe1d 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/arith/mul.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/arith/mul.rs @@ -317,7 +317,7 @@ where #[cfg(test)] mod tests { - use iterutil::TryFromExactSizeIterator; + use stdutil::iter::TryFromExactSizeIterator; use super::*; use crate::arrays::buffer::buffer_manager::NopBufferManager; diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/arith/rem.rs b/crates/rayexec_execution/src/functions/scalar/builtin/arith/rem.rs index 7f151e9b4..f7c590704 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/arith/rem.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/arith/rem.rs @@ -200,7 +200,7 @@ where #[cfg(test)] mod tests { - use iterutil::TryFromExactSizeIterator; + use stdutil::iter::TryFromExactSizeIterator; use super::*; use crate::arrays::buffer::buffer_manager::NopBufferManager; diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/arith/sub.rs b/crates/rayexec_execution/src/functions/scalar/builtin/arith/sub.rs index 5157ac36c..fc00d326c 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/arith/sub.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/arith/sub.rs @@ -212,7 +212,7 @@ where #[cfg(test)] mod tests { - use iterutil::TryFromExactSizeIterator; + use stdutil::iter::TryFromExactSizeIterator; use super::*; use crate::arrays::buffer::buffer_manager::NopBufferManager; diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/boolean.rs b/crates/rayexec_execution/src/functions/scalar/builtin/boolean.rs index 4ec43590c..c9d3cc7e8 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/boolean.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/boolean.rs @@ -225,7 +225,7 @@ impl ScalarFunctionImpl for OrImpl { #[cfg(test)] mod tests { - use iterutil::TryFromExactSizeIterator; + use stdutil::iter::TryFromExactSizeIterator; use super::*; use crate::arrays::buffer::buffer_manager::NopBufferManager; diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/comparison.rs b/crates/rayexec_execution/src/functions/scalar/builtin/comparison.rs index 7b6018a0a..99be81f0c 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/comparison.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/comparison.rs @@ -854,7 +854,7 @@ where #[cfg(test)] mod tests { - use iterutil::TryFromExactSizeIterator; + use stdutil::iter::TryFromExactSizeIterator; use super::*; use crate::arrays::buffer::buffer_manager::NopBufferManager; diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/datetime/epoch.rs b/crates/rayexec_execution/src/functions/scalar/builtin/datetime/epoch.rs index 42b444978..3ce6a16ee 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/datetime/epoch.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/datetime/epoch.rs @@ -1,4 +1,4 @@ -use iterutil::IntoExactSizeIterator; +use stdutil::iter::IntoExactSizeIterator; use rayexec_error::Result; use crate::arrays::array::exp::Array; diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/is.rs b/crates/rayexec_execution/src/functions/scalar/builtin/is.rs index 9aab1b382..888bd0b2b 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/is.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/is.rs @@ -337,7 +337,7 @@ impl ScalarFunctionImpl for CheckBoolImpl( #[cfg(test)] mod tests { - use iterutil::TryFromExactSizeIterator; + use stdutil::iter::TryFromExactSizeIterator; use super::*; use crate::arrays::buffer::physical_type::PhysicalStorage; diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/abs.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/abs.rs index d2ffb9b58..be949b68f 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/abs.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/abs.rs @@ -1,4 +1,4 @@ -use iterutil::IntoExactSizeIterator; +use stdutil::iter::IntoExactSizeIterator; use num_traits::Float; use rayexec_error::Result; diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/acos.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/acos.rs index cd3f9f61e..46b26bdec 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/acos.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/acos.rs @@ -1,4 +1,4 @@ -use iterutil::IntoExactSizeIterator; +use stdutil::iter::IntoExactSizeIterator; use num_traits::Float; use rayexec_error::Result; diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/asin.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/asin.rs index 48aec0004..6c1ddc6d5 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/asin.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/asin.rs @@ -1,4 +1,4 @@ -use iterutil::IntoExactSizeIterator; +use stdutil::iter::IntoExactSizeIterator; use num_traits::Float; use rayexec_error::Result; diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/atan.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/atan.rs index 57b043d97..61a4ab7c1 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/atan.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/atan.rs @@ -1,4 +1,4 @@ -use iterutil::IntoExactSizeIterator; +use stdutil::iter::IntoExactSizeIterator; use num_traits::Float; use rayexec_error::Result; diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/cbrt.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/cbrt.rs index 6869f7fc4..ee6979ac0 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/cbrt.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/cbrt.rs @@ -1,4 +1,4 @@ -use iterutil::IntoExactSizeIterator; +use stdutil::iter::IntoExactSizeIterator; use num_traits::Float; use rayexec_error::Result; diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/ceil.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/ceil.rs index 5a2c5bd99..b439a0f02 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/ceil.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/ceil.rs @@ -1,4 +1,4 @@ -use iterutil::IntoExactSizeIterator; +use stdutil::iter::IntoExactSizeIterator; use num_traits::Float; use rayexec_error::Result; diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/cos.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/cos.rs index 7be1dbf36..bc3c36af1 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/cos.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/cos.rs @@ -1,4 +1,4 @@ -use iterutil::IntoExactSizeIterator; +use stdutil::iter::IntoExactSizeIterator; use num_traits::Float; use rayexec_error::Result; diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/degrees.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/degrees.rs index 1024e2db1..b79f51c4a 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/degrees.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/degrees.rs @@ -1,4 +1,4 @@ -use iterutil::IntoExactSizeIterator; +use stdutil::iter::IntoExactSizeIterator; use num_traits::Float; use rayexec_error::Result; diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/exp.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/exp.rs index 7d3ff3fb5..649b8c3bf 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/exp.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/exp.rs @@ -1,4 +1,4 @@ -use iterutil::IntoExactSizeIterator; +use stdutil::iter::IntoExactSizeIterator; use num_traits::Float; use rayexec_error::Result; diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/floor.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/floor.rs index b1b7d9611..c47e8ba09 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/floor.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/floor.rs @@ -1,4 +1,4 @@ -use iterutil::IntoExactSizeIterator; +use stdutil::iter::IntoExactSizeIterator; use num_traits::Float; use rayexec_error::Result; diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/ln.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/ln.rs index 38ab5ba16..5650abb5e 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/ln.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/ln.rs @@ -1,4 +1,4 @@ -use iterutil::IntoExactSizeIterator; +use stdutil::iter::IntoExactSizeIterator; use num_traits::Float; use rayexec_error::Result; diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/log.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/log.rs index 9c4cbdeb0..cb9c9ccec 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/log.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/log.rs @@ -1,4 +1,4 @@ -use iterutil::IntoExactSizeIterator; +use stdutil::iter::IntoExactSizeIterator; use num_traits::Float; use rayexec_error::Result; diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/mod.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/mod.rs index 95310d2ce..a28318491 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/mod.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/mod.rs @@ -29,7 +29,7 @@ pub use degrees::*; pub use exp::*; pub use floor::*; pub use isnan::*; -use iterutil::IntoExactSizeIterator; +use stdutil::iter::IntoExactSizeIterator; pub use ln::*; pub use log::*; use num_traits::Float; diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/radians.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/radians.rs index 41305b084..86e94812c 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/radians.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/radians.rs @@ -1,4 +1,4 @@ -use iterutil::IntoExactSizeIterator; +use stdutil::iter::IntoExactSizeIterator; use num_traits::Float; use rayexec_error::Result; diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/sin.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/sin.rs index 1c6aefbe3..592a1a090 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/sin.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/sin.rs @@ -1,4 +1,4 @@ -use iterutil::IntoExactSizeIterator; +use stdutil::iter::IntoExactSizeIterator; use num_traits::Float; use rayexec_error::Result; diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/sqrt.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/sqrt.rs index b2b33cc7c..c0c3f58e2 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/sqrt.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/sqrt.rs @@ -1,4 +1,4 @@ -use iterutil::IntoExactSizeIterator; +use stdutil::iter::IntoExactSizeIterator; use num_traits::Float; use rayexec_error::Result; diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/tan.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/tan.rs index 0d92567d2..3392fe201 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/tan.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/tan.rs @@ -1,4 +1,4 @@ -use iterutil::IntoExactSizeIterator; +use stdutil::iter::IntoExactSizeIterator; use num_traits::Float; use rayexec_error::Result; diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/similarity/l2_distance.rs b/crates/rayexec_execution/src/functions/scalar/builtin/similarity/l2_distance.rs index 14f62a52a..857a093f7 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/similarity/l2_distance.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/similarity/l2_distance.rs @@ -149,7 +149,7 @@ where #[cfg(test)] mod tests { - use iterutil::TryFromExactSizeIterator; + use stdutil::iter::TryFromExactSizeIterator; use super::*; use crate::arrays::buffer::buffer_manager::NopBufferManager; diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/string/case.rs b/crates/rayexec_execution/src/functions/scalar/builtin/string/case.rs index 016a7ad79..47f4f47fd 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/string/case.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/string/case.rs @@ -1,4 +1,4 @@ -use iterutil::IntoExactSizeIterator; +use stdutil::iter::IntoExactSizeIterator; use rayexec_error::Result; use crate::arrays::array::exp::Array; diff --git a/crates/iterutil/Cargo.toml b/crates/stdutil/Cargo.toml similarity index 80% rename from crates/iterutil/Cargo.toml rename to crates/stdutil/Cargo.toml index 39c919663..7cb3e1417 100644 --- a/crates/iterutil/Cargo.toml +++ b/crates/stdutil/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "iterutil" +name = "stdutil" version.workspace = true edition.workspace = true diff --git a/crates/iterutil/src/lib.rs b/crates/stdutil/src/iter.rs similarity index 100% rename from crates/iterutil/src/lib.rs rename to crates/stdutil/src/iter.rs diff --git a/crates/stdutil/src/lib.rs b/crates/stdutil/src/lib.rs new file mode 100644 index 000000000..854431330 --- /dev/null +++ b/crates/stdutil/src/lib.rs @@ -0,0 +1,4 @@ +//! Utilities that are closely related to items found in std. + +pub mod iter; +pub mod marker; diff --git a/crates/stdutil/src/marker.rs b/crates/stdutil/src/marker.rs new file mode 100644 index 000000000..27a8868b4 --- /dev/null +++ b/crates/stdutil/src/marker.rs @@ -0,0 +1,22 @@ +use std::marker::PhantomData; + +/// Marker type that indicates covariance of `T` but does not inherit the bounds +/// of `T`. +/// +/// Has all the same properties of `PhantomData` minus the inherited trait +/// bounds. This lets us make structs and other types covariant to `T` but +/// without the potential inheritence of `?Sized` (or other undesired traits) in +/// the outer type. +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct PhantomCovariant(PhantomData T>) +where + T: ?Sized; + +impl PhantomCovariant +where + T: ?Sized, +{ + pub const fn new() -> Self { + PhantomCovariant(PhantomData) + } +} From 35de02d25c8430f6b1eb6dee9d17e9905a658a27 Mon Sep 17 00:00:00 2001 From: Sean Smith Date: Sun, 5 Jan 2025 13:36:23 -0600 Subject: [PATCH 50/59] more --- .../src/functions/aggregate/builtin/first.rs | 170 ++---- .../src/functions/aggregate/builtin/minmax.rs | 577 ++++++++---------- .../functions/aggregate/builtin/regr_count.rs | 52 +- .../functions/aggregate/builtin/string_agg.rs | 39 +- crates/stdutil/src/marker.rs | 22 +- 5 files changed, 390 insertions(+), 470 deletions(-) diff --git a/crates/rayexec_execution/src/functions/aggregate/builtin/first.rs b/crates/rayexec_execution/src/functions/aggregate/builtin/first.rs index 435b4d848..12192f7da 100644 --- a/crates/rayexec_execution/src/functions/aggregate/builtin/first.rs +++ b/crates/rayexec_execution/src/functions/aggregate/builtin/first.rs @@ -1,53 +1,38 @@ -use std::borrow::Borrow; -use std::fmt::{self, Debug}; +use std::fmt::Debug; use std::marker::PhantomData; -use half::f16; use rayexec_error::{not_implemented, Result}; -use crate::arrays::array::ArrayData2; use crate::arrays::buffer::physical_type::{ AddressableMut, MutablePhysicalStorage, PhysicalBinary, + PhysicalBool, + PhysicalF16, + PhysicalF32, + PhysicalF64, + PhysicalI128, + PhysicalI16, + PhysicalI32, + PhysicalI64, + PhysicalI8, + PhysicalInterval, PhysicalType, + PhysicalU128, + PhysicalU16, + PhysicalU32, + PhysicalU64, + PhysicalU8, + PhysicalUntypedNull, + PhysicalUtf8, }; -use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::aggregate::{AggregateState2, StateFinalizer}; -use crate::arrays::executor::builder::{ArrayBuilder, GermanVarlenBuffer}; -use crate::arrays::executor::physical_type::{ - PhysicalBinary_2, - PhysicalBool_2, - PhysicalF16_2, - PhysicalF32_2, - PhysicalF64_2, - PhysicalI128_2, - PhysicalI16_2, - PhysicalI32_2, - PhysicalI64_2, - PhysicalI8_2, - PhysicalInterval_2, - PhysicalStorage2, - PhysicalType2, - PhysicalU128_2, - PhysicalU16_2, - PhysicalU32_2, - PhysicalU64_2, - PhysicalU8_2, - PhysicalUntypedNull_2, -}; +use crate::arrays::datatype::DataTypeId; use crate::arrays::executor_exp::aggregate::AggregateState; use crate::arrays::executor_exp::PutBuffer; -use crate::arrays::scalar::interval::Interval; -use crate::arrays::storage::{PrimitiveStorage, UntypedNull}; use crate::expr::Expression; use crate::functions::aggregate::states::{ - boolean_finalize, drain, - new_unary_aggregate_states2, - primitive_finalize, unary_update, - untyped_null_finalize, AggregateGroupStates, TypedAggregateGroupStates, }; @@ -94,67 +79,27 @@ impl AggregateFunction for First { let datatype = inputs[0].datatype(table_list)?; let function_impl: Box = match datatype.physical_type() { - // PhysicalType::Boolean + PhysicalType::UntypedNull => Box::new(FirstPrimitiveImpl::::new()), + PhysicalType::Boolean => Box::new(FirstPrimitiveImpl::::new()), + PhysicalType::Int8 => Box::new(FirstPrimitiveImpl::::new()), + PhysicalType::Int16 => Box::new(FirstPrimitiveImpl::::new()), + PhysicalType::Int32 => Box::new(FirstPrimitiveImpl::::new()), + PhysicalType::Int64 => Box::new(FirstPrimitiveImpl::::new()), + PhysicalType::Int128 => Box::new(FirstPrimitiveImpl::::new()), + PhysicalType::UInt8 => Box::new(FirstPrimitiveImpl::::new()), + PhysicalType::UInt16 => Box::new(FirstPrimitiveImpl::::new()), + PhysicalType::UInt32 => Box::new(FirstPrimitiveImpl::::new()), + PhysicalType::UInt64 => Box::new(FirstPrimitiveImpl::::new()), + PhysicalType::UInt128 => Box::new(FirstPrimitiveImpl::::new()), + PhysicalType::Float16 => Box::new(FirstPrimitiveImpl::::new()), + PhysicalType::Float32 => Box::new(FirstPrimitiveImpl::::new()), + PhysicalType::Float64 => Box::new(FirstPrimitiveImpl::::new()), + PhysicalType::Interval => Box::new(FirstPrimitiveImpl::::new()), + PhysicalType::Utf8 => Box::new(FirstStringImpl), + PhysicalType::Binary => Box::new(FirstBinaryImpl), other => not_implemented!("FIRST for physical type: {other}"), }; - // let function_impl: Box = match datatype.physical_type2()? { - // PhysicalType2::UntypedNull => Box::new(FirstUntypedNullImpl), - // PhysicalType2::Boolean => Box::new(FirstBoolImpl), - // PhysicalType2::Float16 => Box::new(FirstPrimitiveImpl::::new( - // datatype.clone(), - // )), - // PhysicalType2::Float32 => Box::new(FirstPrimitiveImpl::::new( - // datatype.clone(), - // )), - // PhysicalType2::Float64 => Box::new(FirstPrimitiveImpl::::new( - // datatype.clone(), - // )), - // PhysicalType2::Int8 => Box::new(FirstPrimitiveImpl::::new( - // datatype.clone(), - // )), - // PhysicalType2::Int16 => Box::new(FirstPrimitiveImpl::::new( - // datatype.clone(), - // )), - // PhysicalType2::Int32 => Box::new(FirstPrimitiveImpl::::new( - // datatype.clone(), - // )), - // PhysicalType2::Int64 => Box::new(FirstPrimitiveImpl::::new( - // datatype.clone(), - // )), - // PhysicalType2::Int128 => Box::new(FirstPrimitiveImpl::::new( - // datatype.clone(), - // )), - // PhysicalType2::UInt8 => Box::new(FirstPrimitiveImpl::::new( - // datatype.clone(), - // )), - // PhysicalType2::UInt16 => Box::new(FirstPrimitiveImpl::::new( - // datatype.clone(), - // )), - // PhysicalType2::UInt32 => Box::new(FirstPrimitiveImpl::::new( - // datatype.clone(), - // )), - // PhysicalType2::UInt64 => Box::new(FirstPrimitiveImpl::::new( - // datatype.clone(), - // )), - // PhysicalType2::UInt128 => Box::new(FirstPrimitiveImpl::::new( - // datatype.clone(), - // )), - // PhysicalType2::Interval => Box::new( - // FirstPrimitiveImpl::::new(datatype.clone()), - // ), - // PhysicalType2::Binary => Box::new(FirstBinaryImpl { - // datatype: datatype.clone(), - // }), - // PhysicalType2::Utf8 => Box::new(FirstBinaryImpl { - // datatype: datatype.clone(), - // }), - // PhysicalType2::List => { - // // TODO: Easy, clone underlying array and select. - // not_implemented!("FIRST for list arrays") - // } - // }; - Ok(PlannedAggregateFunction { function: Box::new(*self), return_type: datatype, @@ -169,6 +114,12 @@ pub struct FirstPrimitiveImpl { _s: PhantomData, } +impl FirstPrimitiveImpl { + const fn new() -> Self { + FirstPrimitiveImpl { _s: PhantomData } + } +} + impl AggregateFunctionImpl for FirstPrimitiveImpl where S: MutablePhysicalStorage, @@ -183,18 +134,31 @@ where } } -// #[derive(Debug, Clone, Copy)] -// pub struct FirstBinaryImpl; +#[derive(Debug, Clone, Copy)] +pub struct FirstBinaryImpl; -// impl AggregateFunctionImpl for FirstBinaryImpl { -// fn new_states(&self) -> Box { -// Box::new(TypedAggregateGroupStates::new( -// FirstBinaryState::default, -// unary_update::, -// drain::, -// )) -// } -// } +impl AggregateFunctionImpl for FirstBinaryImpl { + fn new_states(&self) -> Box { + Box::new(TypedAggregateGroupStates::new( + FirstBinaryState::default, + unary_update::, + drain::, + )) + } +} + +#[derive(Debug, Clone, Copy)] +pub struct FirstStringImpl; + +impl AggregateFunctionImpl for FirstStringImpl { + fn new_states(&self) -> Box { + Box::new(TypedAggregateGroupStates::new( + FirstStringState::default, + unary_update::, + drain::, + )) + } +} #[derive(Debug, Default)] pub struct FirstPrimitiveState { diff --git a/crates/rayexec_execution/src/functions/aggregate/builtin/minmax.rs b/crates/rayexec_execution/src/functions/aggregate/builtin/minmax.rs index 448e18fc1..1b242f11a 100644 --- a/crates/rayexec_execution/src/functions/aggregate/builtin/minmax.rs +++ b/crates/rayexec_execution/src/functions/aggregate/builtin/minmax.rs @@ -1,17 +1,13 @@ -use std::borrow::Borrow; use std::fmt::Debug; use std::marker::PhantomData; -use half::f16; use rayexec_error::{not_implemented, Result}; -use crate::arrays::array::ArrayData2; use crate::arrays::buffer::physical_type::{ AddressableMut, MutablePhysicalStorage, PhysicalBinary, PhysicalBool, - PhysicalDictionary, PhysicalF16, PhysicalF32, PhysicalF64, @@ -21,8 +17,6 @@ use crate::arrays::buffer::physical_type::{ PhysicalI64, PhysicalI8, PhysicalInterval, - PhysicalList, - PhysicalStorage, PhysicalType, PhysicalU128, PhysicalU16, @@ -32,42 +26,13 @@ use crate::arrays::buffer::physical_type::{ PhysicalUntypedNull, PhysicalUtf8, }; -use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::aggregate::{AggregateState2, StateFinalizer}; -use crate::arrays::executor::builder::{ArrayBuilder, GermanVarlenBuffer}; -use crate::arrays::executor::physical_type::{ - PhysicalBinary_2, - PhysicalBool_2, - PhysicalF16_2, - PhysicalF32_2, - PhysicalF64_2, - PhysicalI128_2, - PhysicalI16_2, - PhysicalI32_2, - PhysicalI64_2, - PhysicalI8_2, - PhysicalInterval_2, - PhysicalStorage2, - PhysicalType2, - PhysicalU128_2, - PhysicalU16_2, - PhysicalU32_2, - PhysicalU64_2, - PhysicalU8_2, - PhysicalUntypedNull_2, -}; +use crate::arrays::datatype::DataTypeId; use crate::arrays::executor_exp::aggregate::AggregateState; use crate::arrays::executor_exp::PutBuffer; -use crate::arrays::scalar::interval::Interval; -use crate::arrays::storage::{PrimitiveStorage, UntypedNull}; use crate::expr::Expression; use crate::functions::aggregate::states::{ - boolean_finalize, drain, - new_unary_aggregate_states2, - primitive_finalize, unary_update, - untyped_null_finalize, AggregateGroupStates, TypedAggregateGroupStates, }; @@ -113,65 +78,34 @@ impl AggregateFunction for Min { let datatype = inputs[0].datatype(table_list)?; - unimplemented!() - // let function_impl: Box = match datatype.physical_type2()? { - // PhysicalType2::UntypedNull => Box::new(MinMaxUntypedNull), - // PhysicalType2::Boolean => Box::new(MinBoolImpl::new()), - // PhysicalType2::Float16 => Box::new(MinPrimitiveImpl::::new( - // datatype.clone(), - // )), - // PhysicalType2::Float32 => Box::new(MinPrimitiveImpl::::new( - // datatype.clone(), - // )), - // PhysicalType2::Float64 => Box::new(MinPrimitiveImpl::::new( - // datatype.clone(), - // )), - // PhysicalType2::Int8 => { - // Box::new(MinPrimitiveImpl::::new(datatype.clone())) - // } - // PhysicalType2::Int16 => Box::new(MinPrimitiveImpl::::new( - // datatype.clone(), - // )), - // PhysicalType2::Int32 => Box::new(MinPrimitiveImpl::::new( - // datatype.clone(), - // )), - // PhysicalType2::Int64 => Box::new(MinPrimitiveImpl::::new( - // datatype.clone(), - // )), - // PhysicalType2::Int128 => Box::new(MinPrimitiveImpl::::new( - // datatype.clone(), - // )), - // PhysicalType2::UInt8 => { - // Box::new(MinPrimitiveImpl::::new(datatype.clone())) - // } - // PhysicalType2::UInt16 => Box::new(MinPrimitiveImpl::::new( - // datatype.clone(), - // )), - // PhysicalType2::UInt32 => Box::new(MinPrimitiveImpl::::new( - // datatype.clone(), - // )), - // PhysicalType2::UInt64 => Box::new(MinPrimitiveImpl::::new( - // datatype.clone(), - // )), - // PhysicalType2::UInt128 => Box::new(MinPrimitiveImpl::::new( - // datatype.clone(), - // )), - // PhysicalType2::Interval => Box::new( - // MinPrimitiveImpl::::new(datatype.clone()), - // ), - // PhysicalType2::Binary => Box::new(MinBinaryImpl::new(datatype.clone())), - // PhysicalType2::Utf8 => Box::new(MinBinaryImpl::new(datatype.clone())), - // PhysicalType2::List => { - // not_implemented!("MIN for list arrays") - // } - // }; - - // Ok(PlannedAggregateFunction { - // function: Box::new(*self), - // return_type: datatype, - // inputs, - // function_impl, - // }) + let function_impl: Box = match datatype.physical_type() { + PhysicalType::UntypedNull => Box::new(MinPrimitiveImpl::::new()), + PhysicalType::Boolean => Box::new(MinPrimitiveImpl::::new()), + PhysicalType::Int8 => Box::new(MinPrimitiveImpl::::new()), + PhysicalType::Int16 => Box::new(MinPrimitiveImpl::::new()), + PhysicalType::Int32 => Box::new(MinPrimitiveImpl::::new()), + PhysicalType::Int64 => Box::new(MinPrimitiveImpl::::new()), + PhysicalType::Int128 => Box::new(MinPrimitiveImpl::::new()), + PhysicalType::UInt8 => Box::new(MinPrimitiveImpl::::new()), + PhysicalType::UInt16 => Box::new(MinPrimitiveImpl::::new()), + PhysicalType::UInt32 => Box::new(MinPrimitiveImpl::::new()), + PhysicalType::UInt64 => Box::new(MinPrimitiveImpl::::new()), + PhysicalType::UInt128 => Box::new(MinPrimitiveImpl::::new()), + PhysicalType::Float16 => Box::new(MinPrimitiveImpl::::new()), + PhysicalType::Float32 => Box::new(MinPrimitiveImpl::::new()), + PhysicalType::Float64 => Box::new(MinPrimitiveImpl::::new()), + PhysicalType::Interval => Box::new(MinPrimitiveImpl::::new()), + PhysicalType::Utf8 => Box::new(MinStringImpl), + PhysicalType::Binary => Box::new(MinBinaryImpl), + other => not_implemented!("max for type {other:?}"), + }; + + Ok(PlannedAggregateFunction { + function: Box::new(*self), + return_type: datatype, + inputs, + function_impl, + }) } } @@ -225,325 +159,288 @@ impl AggregateFunction for Max { PhysicalType::Float32 => Box::new(MaxPrimitiveImpl::::new()), PhysicalType::Float64 => Box::new(MaxPrimitiveImpl::::new()), PhysicalType::Interval => Box::new(MaxPrimitiveImpl::::new()), - // PhysicalType::Utf8 => Box::new(MaxImpl::::new()), - // PhysicalType::Binary => Box::new(MaxImpl::::new()), + PhysicalType::Utf8 => Box::new(MaxStringImpl), + PhysicalType::Binary => Box::new(MaxBinaryImpl), other => not_implemented!("max for type {other:?}"), }; - // let function_impl: Box = match datatype.physical_type2()? { - // PhysicalType2::UntypedNull => Box::new(MinMaxUntypedNull), - // PhysicalType2::Boolean => Box::new(MaxBoolImpl::new()), - // PhysicalType2::Float16 => Box::new(MaxPrimitiveImpl::::new( - // datatype.clone(), - // )), - // PhysicalType2::Float32 => Box::new(MaxPrimitiveImpl::::new( - // datatype.clone(), - // )), - // PhysicalType2::Float64 => Box::new(MaxPrimitiveImpl::::new( - // datatype.clone(), - // )), - // PhysicalType2::Int8 => { - // Box::new(MaxPrimitiveImpl::::new(datatype.clone())) - // } - // PhysicalType2::Int16 => Box::new(MaxPrimitiveImpl::::new( - // datatype.clone(), - // )), - // PhysicalType2::Int32 => Box::new(MaxPrimitiveImpl::::new( - // datatype.clone(), - // )), - // PhysicalType2::Int64 => Box::new(MaxPrimitiveImpl::::new( - // datatype.clone(), - // )), - // PhysicalType2::Int128 => Box::new(MaxPrimitiveImpl::::new( - // datatype.clone(), - // )), - // PhysicalType2::UInt8 => { - // Box::new(MaxPrimitiveImpl::::new(datatype.clone())) - // } - // PhysicalType2::UInt16 => Box::new(MaxPrimitiveImpl::::new( - // datatype.clone(), - // )), - // PhysicalType2::UInt32 => Box::new(MaxPrimitiveImpl::::new( - // datatype.clone(), - // )), - // PhysicalType2::UInt64 => Box::new(MaxPrimitiveImpl::::new( - // datatype.clone(), - // )), - // PhysicalType2::UInt128 => Box::new(MaxPrimitiveImpl::::new( - // datatype.clone(), - // )), - // PhysicalType2::Interval => Box::new( - // MaxPrimitiveImpl::::new(datatype.clone()), - // ), - // PhysicalType2::Binary => Box::new(MaxBinaryImpl::new(datatype.clone())), - // PhysicalType2::Utf8 => Box::new(MaxBinaryImpl::new(datatype.clone())), - // PhysicalType2::List => { - // not_implemented!("MAX for list arrays") - // } - // }; - unimplemented!() - - // Ok(PlannedAggregateFunction { - // function: Box::new(*self), - // return_type: datatype, - // inputs, - // function_impl, - // }) - } -} - -#[derive(Debug, Clone)] -pub struct MinMaxUntypedNull; - -impl AggregateFunctionImpl for MinMaxUntypedNull { - fn new_states(&self) -> Box { - // Note min vs max doesn't matter. Everything is null. - new_unary_aggregate_states2::( - MinState::::default, - untyped_null_finalize, - ) + Ok(PlannedAggregateFunction { + function: Box::new(*self), + return_type: datatype, + inputs, + function_impl, + }) } } -pub type MinBinaryImpl = MinMaxBinaryImpl; -pub type MaxBinaryImpl = MinMaxBinaryImpl; - -#[derive(Debug)] -pub struct MinMaxBinaryImpl { - datatype: DataType, - _m: PhantomData, +#[derive(Debug, Clone, Copy)] +pub struct MaxPrimitiveImpl { + _s: PhantomData, } -impl MinMaxBinaryImpl { - fn new(datatype: DataType) -> Self { - MinMaxBinaryImpl { - datatype, - _m: PhantomData, - } +impl MaxPrimitiveImpl { + const fn new() -> Self { + MaxPrimitiveImpl { _s: PhantomData } } } -impl AggregateFunctionImpl for MinMaxBinaryImpl +impl AggregateFunctionImpl for MaxPrimitiveImpl where - M: for<'a> AggregateState2<&'a [u8], Vec> + Default + Sync + Send + 'static, + S: MutablePhysicalStorage, + S::StorageType: Default + Debug + Sync + Send + PartialOrd + Copy, { fn new_states(&self) -> Box { - let datatype = self.datatype.clone(); - - new_unary_aggregate_states2::(M::default, move |states| { - let builder = ArrayBuilder { - datatype: datatype.clone(), - buffer: GermanVarlenBuffer::<[u8]>::with_len(states.len()), - }; - StateFinalizer::finalize(states, builder) - }) + Box::new(TypedAggregateGroupStates::new( + MaxStatePrimitive::::default, + unary_update::, + drain::, + )) } } -impl Clone for MinMaxBinaryImpl { - fn clone(&self) -> Self { - Self::new(self.datatype.clone()) +#[derive(Debug, Clone, Copy)] +pub struct MaxBinaryImpl; + +impl AggregateFunctionImpl for MaxBinaryImpl { + fn new_states(&self) -> Box { + Box::new(TypedAggregateGroupStates::new( + MaxStateBinary::default, + unary_update::, + drain::, + )) } } -pub type MinBoolImpl = MinMaxBoolImpl>; -pub type MaxBoolImpl = MinMaxBoolImpl>; +#[derive(Debug, Clone, Copy)] +pub struct MaxStringImpl; -#[derive(Debug)] -pub struct MinMaxBoolImpl { - _m: PhantomData, +impl AggregateFunctionImpl for MaxStringImpl { + fn new_states(&self) -> Box { + Box::new(TypedAggregateGroupStates::new( + MaxStateString::default, + unary_update::, + drain::, + )) + } } -impl MinMaxBoolImpl { - fn new() -> Self { - MinMaxBoolImpl { _m: PhantomData } - } +#[derive(Debug, Default)] +pub struct MaxStatePrimitive { + max: T, + valid: bool, } -impl AggregateFunctionImpl for MinMaxBoolImpl +impl AggregateState<&T, T> for MaxStatePrimitive where - M: AggregateState2 + Default + Sync + Send + 'static, + T: Debug + Sync + Send + PartialOrd + Copy, { - fn new_states(&self) -> Box { - new_unary_aggregate_states2::(M::default, move |states| { - boolean_finalize(DataType::Boolean, states) - }) - } -} - -impl Clone for MinMaxBoolImpl { - fn clone(&self) -> Self { - Self::new() - } -} + fn merge(&mut self, other: &mut Self) -> Result<()> { + if !self.valid { + self.valid = other.valid; + std::mem::swap(&mut self.max, &mut other.max); + return Ok(()); + } -pub type MinPrimitiveImpl = MinMaxPrimitiveImpl, S, T>; -pub type MaxPrimitiveImpl2 = MinMaxPrimitiveImpl, S, T>; + if self.max.lt(&other.max) { + std::mem::swap(&mut self.max, &mut other.max); + } -// TODO: Remove T -#[derive(Debug)] -pub struct MinMaxPrimitiveImpl { - datatype: DataType, - _m: PhantomData, - _s: PhantomData, - _t: PhantomData, -} + Ok(()) + } -impl MinMaxPrimitiveImpl { - fn new(datatype: DataType) -> Self { - MinMaxPrimitiveImpl { - datatype, - _m: PhantomData, - _s: PhantomData, - _t: PhantomData, + fn update(&mut self, input: &T) -> Result<()> { + if !self.valid { + self.max = *input; + return Ok(()); } - } -} -impl AggregateFunctionImpl for MinMaxPrimitiveImpl -where - for<'a> S: PhysicalStorage2 = T>, - T: PartialOrd + Debug + Default + Sync + Send + Copy + 'static, - M: AggregateState2 + Default + Sync + Send + 'static, - ArrayData2: From>, -{ - fn new_states(&self) -> Box { - let datatype = self.datatype.clone(); + if self.max.lt(input) { + self.max = *input; + } - new_unary_aggregate_states2::(M::default, move |states| { - primitive_finalize(datatype.clone(), states) - }) + Ok(()) } -} -impl Clone for MinMaxPrimitiveImpl { - fn clone(&self) -> Self { - Self::new(self.datatype.clone()) + fn finalize(&mut self, output: PutBuffer) -> Result<()> + where + M: AddressableMut, + { + if self.valid { + output.put(&self.max); + } else { + output.put_null(); + } + + Ok(()) } } #[derive(Debug, Default)] -pub struct MinState { - min: T, +pub struct MaxStateBinary { + max: Vec, valid: bool, } -impl AggregateState2 for MinState -where - T: PartialOrd + Debug + Default + Copy, -{ +impl AggregateState<&[u8], [u8]> for MaxStateBinary { fn merge(&mut self, other: &mut Self) -> Result<()> { if !self.valid { self.valid = other.valid; - self.min = other.min; - } else if other.valid && other.min < self.min { - self.min = other.min; + std::mem::swap(&mut self.max, &mut other.max); + return Ok(()); + } + + if self.max.lt(&other.max) { + std::mem::swap(&mut self.max, &mut other.max); } Ok(()) } - fn update(&mut self, input: T) -> Result<()> { + fn update(&mut self, input: &[u8]) -> Result<()> { if !self.valid { - self.valid = true; - self.min = input; - } else if input < self.min { - self.min = input + self.max = input.to_vec(); + return Ok(()); } + + if self.max.as_slice().lt(input) { + self.max = input.to_vec(); + } + Ok(()) } - fn finalize(&mut self) -> Result<(T, bool)> { + fn finalize(&mut self, output: PutBuffer) -> Result<()> + where + M: AddressableMut, + { if self.valid { - Ok((self.min, true)) + output.put(&self.max); } else { - Ok((T::default(), false)) + output.put_null(); } + + Ok(()) } } #[derive(Debug, Default)] -pub struct MinStateBinary { - min: Vec, +pub struct MaxStateString { + max: String, valid: bool, } -impl AggregateState2<&[u8], Vec> for MinStateBinary { +impl AggregateState<&str, str> for MaxStateString { fn merge(&mut self, other: &mut Self) -> Result<()> { if !self.valid { self.valid = other.valid; - std::mem::swap(&mut self.min, &mut other.min); - } else if other.valid && other.min < self.min { - std::mem::swap(&mut self.min, &mut other.min); + std::mem::swap(&mut self.max, &mut other.max); + return Ok(()); + } + + if self.max.lt(&other.max) { + std::mem::swap(&mut self.max, &mut other.max); } Ok(()) } - fn update(&mut self, input: &[u8]) -> Result<()> { + fn update(&mut self, input: &str) -> Result<()> { if !self.valid { - self.valid = true; - self.min = input.into(); - } else if input < self.min.as_slice() { - self.min = input.into(); + self.max = input.to_string(); + return Ok(()); + } + + if self.max.as_str().lt(input) { + self.max = input.to_string(); } Ok(()) } - fn finalize(&mut self) -> Result<(Vec, bool)> { + fn finalize(&mut self, output: PutBuffer) -> Result<()> + where + M: AddressableMut, + { if self.valid { - Ok((std::mem::take(&mut self.min), true)) + output.put(&self.max); } else { - Ok((Vec::new(), false)) + output.put_null(); } + + Ok(()) } } #[derive(Debug, Clone, Copy)] -pub struct MaxPrimitiveImpl { +pub struct MinPrimitiveImpl { _s: PhantomData, } -impl MaxPrimitiveImpl { +impl MinPrimitiveImpl { const fn new() -> Self { - MaxPrimitiveImpl { _s: PhantomData } + MinPrimitiveImpl { _s: PhantomData } } } -impl AggregateFunctionImpl for MaxPrimitiveImpl +impl AggregateFunctionImpl for MinPrimitiveImpl where S: MutablePhysicalStorage, S::StorageType: Default + Debug + Sync + Send + PartialOrd + Copy, { fn new_states(&self) -> Box { Box::new(TypedAggregateGroupStates::new( - MaxStatePrimitive::::default, + MinStatePrimitive::::default, unary_update::, drain::, )) } } +#[derive(Debug, Clone, Copy)] +pub struct MinBinaryImpl; + +impl AggregateFunctionImpl for MinBinaryImpl { + fn new_states(&self) -> Box { + Box::new(TypedAggregateGroupStates::new( + MinStateBinary::default, + unary_update::, + drain::, + )) + } +} + +#[derive(Debug, Clone, Copy)] +pub struct MinStringImpl; + +impl AggregateFunctionImpl for MinStringImpl { + fn new_states(&self) -> Box { + Box::new(TypedAggregateGroupStates::new( + MinStateString::default, + unary_update::, + drain::, + )) + } +} + #[derive(Debug, Default)] -pub struct MaxStatePrimitive { - max: T, +pub struct MinStatePrimitive { + min: T, valid: bool, } -impl AggregateState<&T, T> for MaxStatePrimitive +impl AggregateState<&T, T> for MinStatePrimitive where T: Debug + Sync + Send + PartialOrd + Copy, { fn merge(&mut self, other: &mut Self) -> Result<()> { if !self.valid { self.valid = other.valid; - std::mem::swap(&mut self.max, &mut other.max); + std::mem::swap(&mut self.min, &mut other.min); return Ok(()); } - if self.max.lt(&other.max) { - std::mem::swap(&mut self.max, &mut other.max); + if self.min.gt(&other.min) { + std::mem::swap(&mut self.min, &mut other.min); } Ok(()) @@ -551,12 +448,12 @@ where fn update(&mut self, input: &T) -> Result<()> { if !self.valid { - self.max = *input; + self.min = *input; return Ok(()); } - if self.max.lt(input) { - self.max = *input; + if self.min.gt(input) { + self.min = *input; } Ok(()) @@ -567,7 +464,7 @@ where M: AddressableMut, { if self.valid { - output.put(&self.max); + output.put(&self.min); } else { output.put_null(); } @@ -577,79 +474,97 @@ where } #[derive(Debug, Default)] -pub struct MaxState2 { - max: T, +pub struct MinStateBinary { + min: Vec, valid: bool, } -impl AggregateState2 for MaxState2 -where - T: PartialOrd + Debug + Default + Copy, -{ +impl AggregateState<&[u8], [u8]> for MinStateBinary { fn merge(&mut self, other: &mut Self) -> Result<()> { if !self.valid { self.valid = other.valid; - self.max = other.max; - } else if other.valid && other.max > self.max { - self.max = other.max; + std::mem::swap(&mut self.min, &mut other.min); + return Ok(()); } + + if self.min.gt(&other.min) { + std::mem::swap(&mut self.min, &mut other.min); + } + Ok(()) } - fn update(&mut self, input: T) -> Result<()> { + fn update(&mut self, input: &[u8]) -> Result<()> { if !self.valid { - self.valid = true; - self.max = input; - } else if input > self.max { - self.max = input + self.min = input.to_vec(); + return Ok(()); + } + + if self.min.as_slice().gt(input) { + self.min = input.to_vec(); } Ok(()) } - fn finalize(&mut self) -> Result<(T, bool)> { + fn finalize(&mut self, output: PutBuffer) -> Result<()> + where + M: AddressableMut, + { if self.valid { - Ok((self.max, true)) + output.put(&self.min); } else { - Ok((T::default(), false)) + output.put_null(); } + + Ok(()) } } #[derive(Debug, Default)] -pub struct MaxStateBinary2 { - max: Vec, +pub struct MinStateString { + min: String, valid: bool, } -impl AggregateState2<&[u8], Vec> for MaxStateBinary2 { +impl AggregateState<&str, str> for MinStateString { fn merge(&mut self, other: &mut Self) -> Result<()> { if !self.valid { self.valid = other.valid; - std::mem::swap(&mut self.max, &mut other.max); - } else if other.valid && other.max > self.max { - std::mem::swap(&mut self.max, &mut other.max); + std::mem::swap(&mut self.min, &mut other.min); + return Ok(()); + } + + if self.min.gt(&other.min) { + std::mem::swap(&mut self.min, &mut other.min); } Ok(()) } - fn update(&mut self, input: &[u8]) -> Result<()> { + fn update(&mut self, input: &str) -> Result<()> { if !self.valid { - self.valid = true; - self.max = input.into(); - } else if input > self.max.as_slice() { - self.max = input.into(); + self.min = input.to_string(); + return Ok(()); + } + + if self.min.as_str().gt(input) { + self.min = input.to_string(); } Ok(()) } - fn finalize(&mut self) -> Result<(Vec, bool)> { + fn finalize(&mut self, output: PutBuffer) -> Result<()> + where + M: AddressableMut, + { if self.valid { - Ok((std::mem::take(&mut self.max), true)) + output.put(&self.min); } else { - Ok((Vec::new(), false)) + output.put_null(); } + + Ok(()) } } diff --git a/crates/rayexec_execution/src/functions/aggregate/builtin/regr_count.rs b/crates/rayexec_execution/src/functions/aggregate/builtin/regr_count.rs index 29914d218..ddad9d65f 100644 --- a/crates/rayexec_execution/src/functions/aggregate/builtin/regr_count.rs +++ b/crates/rayexec_execution/src/functions/aggregate/builtin/regr_count.rs @@ -1,16 +1,14 @@ use std::fmt::Debug; +use std::marker::PhantomData; use rayexec_error::Result; +use crate::arrays::buffer::physical_type::{AddressableMut, PhysicalF64, PhysicalStorage}; use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::aggregate::AggregateState2; -use crate::arrays::executor::physical_type::PhysicalAny; +use crate::arrays::executor_exp::aggregate::AggregateState; +use crate::arrays::executor_exp::PutBuffer; use crate::expr::Expression; -use crate::functions::aggregate::states::{ - new_binary_aggregate_states2, - primitive_finalize, - AggregateGroupStates, -}; +use crate::functions::aggregate::states::AggregateGroupStates; use crate::functions::aggregate::{ AggregateFunction, AggregateFunctionImpl, @@ -59,7 +57,7 @@ impl AggregateFunction for RegrCount { function: Box::new(*self), return_type: DataType::Float64, inputs, - function_impl: Box::new(RegrCountImpl), + function_impl: Box::new(RegrCountImpl::::new()), }), (a, b) => Err(invalid_input_types_error(self, &[a, b])), } @@ -67,14 +65,22 @@ impl AggregateFunction for RegrCount { } #[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub struct RegrCountImpl; +pub struct RegrCountImpl { + _s: PhantomData, +} + +impl RegrCountImpl { + const fn new() -> Self { + RegrCountImpl { _s: PhantomData } + } +} -impl AggregateFunctionImpl for RegrCountImpl { +impl AggregateFunctionImpl for RegrCountImpl +where + S: PhysicalStorage, +{ fn new_states(&self) -> Box { - new_binary_aggregate_states2::( - RegrCountState::default, - move |states| primitive_finalize(DataType::Int64, states), - ) + unimplemented!() } } @@ -83,22 +89,30 @@ impl AggregateFunctionImpl for RegrCountImpl { /// Note that this can be used for any input type, but the sql function we /// expose only accepts f64 (to match Postgres). #[derive(Debug, Clone, Copy, Default)] -pub struct RegrCountState { +pub struct RegrCountState { count: i64, + _s: PhantomData, } -impl AggregateState2<((), ()), i64> for RegrCountState { +impl AggregateState<&S::StorageType, i64> for RegrCountState +where + S: PhysicalStorage, +{ fn merge(&mut self, other: &mut Self) -> Result<()> { self.count += other.count; Ok(()) } - fn update(&mut self, _input: ((), ())) -> Result<()> { + fn update(&mut self, _input: &S::StorageType) -> Result<()> { self.count += 1; Ok(()) } - fn finalize(&mut self) -> Result<(i64, bool)> { - Ok((self.count, true)) + fn finalize(&mut self, output: PutBuffer) -> Result<()> + where + M: AddressableMut, + { + output.put(&self.count); + Ok(()) } } diff --git a/crates/rayexec_execution/src/functions/aggregate/builtin/string_agg.rs b/crates/rayexec_execution/src/functions/aggregate/builtin/string_agg.rs index cd1ec0d65..b1407c091 100644 --- a/crates/rayexec_execution/src/functions/aggregate/builtin/string_agg.rs +++ b/crates/rayexec_execution/src/functions/aggregate/builtin/string_agg.rs @@ -2,13 +2,18 @@ use std::fmt::Debug; use rayexec_error::{RayexecError, Result}; +use crate::arrays::buffer::physical_type::{AddressableMut, PhysicalUtf8}; use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::aggregate::{AggregateState2, StateFinalizer}; -use crate::arrays::executor::builder::{ArrayBuilder, GermanVarlenBuffer}; -use crate::arrays::executor::physical_type::PhysicalUtf8_2; +use crate::arrays::executor_exp::aggregate::AggregateState; +use crate::arrays::executor_exp::PutBuffer; use crate::arrays::scalar::ScalarValue; use crate::expr::Expression; -use crate::functions::aggregate::states::{new_unary_aggregate_states2, AggregateGroupStates}; +use crate::functions::aggregate::states::{ + drain, + unary_update, + AggregateGroupStates, + TypedAggregateGroupStates, +}; use crate::functions::aggregate::{ AggregateFunction, AggregateFunctionImpl, @@ -99,13 +104,11 @@ impl AggregateFunctionImpl for StringAggImpl { string: None, }; - new_unary_aggregate_states2::(state_init, move |states| { - let builder = ArrayBuilder { - datatype: DataType::Utf8, - buffer: GermanVarlenBuffer::::with_len(states.len()), - }; - StateFinalizer::finalize(states, builder) - }) + Box::new(TypedAggregateGroupStates::new( + state_init, + unary_update::, + drain::, + )) } } @@ -119,7 +122,7 @@ pub struct StringAggState { string: Option, } -impl AggregateState2<&str, String> for StringAggState { +impl AggregateState<&str, str> for StringAggState { fn merge(&mut self, other: &mut Self) -> Result<()> { if self.string.is_none() { std::mem::swap(self, other); @@ -148,10 +151,14 @@ impl AggregateState2<&str, String> for StringAggState { Ok(()) } - fn finalize(&mut self) -> Result<(String, bool)> { - match self.string.take() { - Some(s) => Ok((s, true)), - None => Ok((String::new(), false)), + fn finalize(&mut self, output: PutBuffer) -> Result<()> + where + M: AddressableMut, + { + match &self.string { + Some(s) => output.put(s), + None => output.put_null(), } + Ok(()) } } diff --git a/crates/stdutil/src/marker.rs b/crates/stdutil/src/marker.rs index 27a8868b4..956ddfd7e 100644 --- a/crates/stdutil/src/marker.rs +++ b/crates/stdutil/src/marker.rs @@ -7,7 +7,7 @@ use std::marker::PhantomData; /// bounds. This lets us make structs and other types covariant to `T` but /// without the potential inheritence of `?Sized` (or other undesired traits) in /// the outer type. -#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct PhantomCovariant(PhantomData T>) where T: ?Sized; @@ -20,3 +20,23 @@ where PhantomCovariant(PhantomData) } } + +impl Clone for PhantomCovariant +where + T: ?Sized, +{ + fn clone(&self) -> Self { + Self::new() + } +} + +impl Copy for PhantomCovariant where T: ?Sized {} + +impl Default for PhantomCovariant +where + T: ?Sized, +{ + fn default() -> Self { + Self::new() + } +} From 8573d07258ba7a746dc10db606d9a39d29a68af2 Mon Sep 17 00:00:00 2001 From: Sean Smith Date: Sun, 5 Jan 2025 14:49:33 -0600 Subject: [PATCH 51/59] update sum --- .../operators/hash_aggregate/chunk.rs | 11 +- .../operators/hash_aggregate/distinct.rs | 10 +- .../operators/hash_aggregate/hash_table.rs | 2 +- .../operators/ungrouped_aggregate.rs | 11 +- .../src/functions/aggregate/builtin/sum.rs | 408 ++++++------------ .../src/functions/aggregate/states.rs | 79 +--- 6 files changed, 173 insertions(+), 348 deletions(-) diff --git a/crates/rayexec_execution/src/execution/operators/hash_aggregate/chunk.rs b/crates/rayexec_execution/src/execution/operators/hash_aggregate/chunk.rs index 805614f39..94e9522b0 100644 --- a/crates/rayexec_execution/src/execution/operators/hash_aggregate/chunk.rs +++ b/crates/rayexec_execution/src/execution/operators/hash_aggregate/chunk.rs @@ -69,7 +69,7 @@ impl GroupChunk { self.hashes.extend(hashes); for states in &mut self.aggregate_states { - states.states.new_states(new_groups); + states.states.new_groups(new_groups); } self.num_groups += new_groups; @@ -110,10 +110,11 @@ impl GroupChunk { let own_state = &mut self.aggregate_states[agg_idx]; let other_state = &mut other.aggregate_states[agg_idx]; - own_state.states.combine( - &mut other_state.states, - ChunkGroupAddressIter::new(self.chunk_idx, addrs), - )?; + unimplemented!() + // own_state.states.combine( + // &mut other_state.states, + // ChunkGroupAddressIter::new(self.chunk_idx, addrs), + // )?; } Ok(()) diff --git a/crates/rayexec_execution/src/execution/operators/hash_aggregate/distinct.rs b/crates/rayexec_execution/src/execution/operators/hash_aggregate/distinct.rs index 7b64bd74c..0f1bb4b83 100644 --- a/crates/rayexec_execution/src/execution/operators/hash_aggregate/distinct.rs +++ b/crates/rayexec_execution/src/execution/operators/hash_aggregate/distinct.rs @@ -3,6 +3,7 @@ use std::sync::Arc; use rayexec_error::Result; use super::hash_table::HashTable; +use crate::arrays::array::selection::Selection; use crate::arrays::array::Array2; use crate::arrays::executor::scalar::HashExecutor; use crate::arrays::selection::SelectionVector; @@ -40,7 +41,7 @@ impl AggregateGroupStates for DistinctGroupedStates { OpaqueStatesMut(&mut self.distinct_inputs) } - fn new_states(&mut self, count: usize) { + fn new_groups(&mut self, count: usize) { // Hash tables created with empty aggregates. self.distinct_inputs .extend((0..count).map(|_| Some(HashTable::new(16, Vec::new())))); @@ -99,13 +100,14 @@ impl AggregateGroupStates for DistinctGroupedStates { fn combine( &mut self, consume: &mut Box, - mapping: ChunkGroupAddressIter, + selection: Selection, + mapping: &[usize], ) -> Result<()> { let other_distinct_inputs = consume .opaque_states_mut() .downcast::>>()?; - for (from, to) in mapping { + for (from, to) in selection.iter().zip(mapping.iter().copied()) { let consume = other_distinct_inputs[from].as_mut().unwrap(); let target = self.distinct_inputs[to].as_mut().unwrap(); target.merge(consume)?; @@ -116,7 +118,7 @@ impl AggregateGroupStates for DistinctGroupedStates { fn finalize2(&mut self) -> Result { // And now we actually create the states we need. - self.states.new_states(self.distinct_inputs.len()); + self.states.new_groups(self.distinct_inputs.len()); let mut addresses_buf = Vec::new(); diff --git a/crates/rayexec_execution/src/execution/operators/hash_aggregate/hash_table.rs b/crates/rayexec_execution/src/execution/operators/hash_aggregate/hash_table.rs index b30a8a6dd..43b2e655d 100644 --- a/crates/rayexec_execution/src/execution/operators/hash_aggregate/hash_table.rs +++ b/crates/rayexec_execution/src/execution/operators/hash_aggregate/hash_table.rs @@ -308,7 +308,7 @@ impl HashTable { // Initialize the states. for state in &mut states { - state.states.new_states(num_new_groups); + state.states.new_groups(num_new_groups); } let chunk = GroupChunk { diff --git a/crates/rayexec_execution/src/execution/operators/ungrouped_aggregate.rs b/crates/rayexec_execution/src/execution/operators/ungrouped_aggregate.rs index dcad66f79..623e7bd45 100644 --- a/crates/rayexec_execution/src/execution/operators/ungrouped_aggregate.rs +++ b/crates/rayexec_execution/src/execution/operators/ungrouped_aggregate.rs @@ -16,6 +16,7 @@ use super::{ PollPull, PollPush, }; +use crate::arrays::array::selection::Selection; use crate::arrays::batch::Batch2; use crate::database::DatabaseContext; use crate::execution::operators::InputOutputStates; @@ -93,7 +94,7 @@ impl PhysicalUngroupedAggregate { } else { agg.function.function_impl.new_states() }; - state.new_states(1); + state.new_groups(1); states.push(state); } @@ -201,17 +202,13 @@ impl ExecutableOperator for PhysicalUngroupedAggregate { }; // Everything maps to the same group (group 0) - let mapping = [GroupAddress { - chunk_idx: 0, - row_idx: 0, - }]; - for (mut local_agg_state, global_agg_state) in agg_states.into_iter().zip(shared.agg_states.iter_mut()) { global_agg_state.combine( &mut local_agg_state, - ChunkGroupAddressIter::new(0, &mapping), + Selection::selection(&[0]), + &[0], )?; } diff --git a/crates/rayexec_execution/src/functions/aggregate/builtin/sum.rs b/crates/rayexec_execution/src/functions/aggregate/builtin/sum.rs index 26d00f5fa..d6f766750 100644 --- a/crates/rayexec_execution/src/functions/aggregate/builtin/sum.rs +++ b/crates/rayexec_execution/src/functions/aggregate/builtin/sum.rs @@ -5,20 +5,17 @@ use std::ops::AddAssign; use num_traits::CheckedAdd; use rayexec_error::Result; -use crate::arrays::array::ArrayData2; -use crate::arrays::buffer::physical_type::AddressableMut; +use crate::arrays::buffer::physical_type::{AddressableMut, PhysicalF64, PhysicalI64}; use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::aggregate::AggregateState2; -use crate::arrays::executor::physical_type::{PhysicalF64_2, PhysicalI64_2}; use crate::arrays::executor_exp::aggregate::AggregateState; use crate::arrays::executor_exp::PutBuffer; use crate::arrays::scalar::decimal::{Decimal128Type, Decimal64Type, DecimalType}; -use crate::arrays::storage::PrimitiveStorage; use crate::expr::Expression; use crate::functions::aggregate::states::{ - new_unary_aggregate_states2, - primitive_finalize, + drain, + unary_update, AggregateGroupStates, + TypedAggregateGroupStates, }; use crate::functions::aggregate::{ AggregateFunction, @@ -88,17 +85,11 @@ impl AggregateFunction for Sum { DataType::Float64 => (Box::new(SumFloat64Impl), DataType::Float64), DataType::Decimal64(m) => { let datatype = DataType::Decimal64(m); - ( - Box::new(SumDecimalImpl::::new(datatype.clone())), - datatype, - ) + (Box::new(SumDecimalImpl::::new()), datatype) } DataType::Decimal128(m) => { let datatype = DataType::Decimal128(m); - ( - Box::new(SumDecimalImpl::::new(datatype.clone())), - datatype, - ) + (Box::new(SumDecimalImpl::::new()), datatype) } other => return Err(invalid_input_types_error(self, &[other])), }; @@ -117,10 +108,11 @@ pub struct SumInt64Impl; impl AggregateFunctionImpl for SumInt64Impl { fn new_states(&self) -> Box { - new_unary_aggregate_states2::( + Box::new(TypedAggregateGroupStates::new( SumStateCheckedAdd::::default, - move |states| primitive_finalize(DataType::Int64, states), - ) + unary_update::, + drain::, + )) } } @@ -129,59 +121,57 @@ pub struct SumFloat64Impl; impl AggregateFunctionImpl for SumFloat64Impl { fn new_states(&self) -> Box { - new_unary_aggregate_states2::( + Box::new(TypedAggregateGroupStates::new( SumStateAdd::::default, - move |states| primitive_finalize(DataType::Float64, states), - ) + unary_update::, + drain::, + )) } } #[derive(Debug, Clone)] pub struct SumDecimalImpl { - datatype: DataType, _d: PhantomData, } impl SumDecimalImpl { - fn new(datatype: DataType) -> Self { - SumDecimalImpl { - datatype, - _d: PhantomData, - } + const fn new() -> Self { + SumDecimalImpl { _d: PhantomData } } } impl AggregateFunctionImpl for SumDecimalImpl where D: DecimalType, - ArrayData2: From>, { fn new_states(&self) -> Box { - let datatype = self.datatype.clone(); - - new_unary_aggregate_states2::( + Box::new(TypedAggregateGroupStates::new( SumStateCheckedAdd::::default, - move |states| primitive_finalize(datatype.clone(), states), - ) + unary_update::, + drain::, + )) } } #[derive(Debug, Default)] pub struct SumStateCheckedAdd { sum: T, - set: bool, + valid: bool, } -impl AggregateState for SumStateCheckedAdd { +impl AggregateState<&T, T> for SumStateCheckedAdd +where + T: CheckedAdd + Default + Debug + Copy, +{ fn merge(&mut self, other: &mut Self) -> Result<()> { self.sum = self.sum.checked_add(&other.sum).unwrap_or_default(); // TODO - self.set = self.set || other.set; + self.valid = self.valid || other.valid; Ok(()) } - fn update(&mut self, input: T) -> Result<()> { - self.sum = self.sum.checked_add(&input).unwrap_or_default(); // TODO - self.set = true; + fn update(&mut self, input: &T) -> Result<()> { + self.sum = self.sum.checked_add(input).unwrap_or_default(); // TODO + self.valid = true; Ok(()) } @@ -189,34 +179,12 @@ impl AggregateState for SumStateCh where M: AddressableMut, { - if self.set { - output.put_null(); - } else { + if self.valid { output.put(&self.sum); - } - Ok(()) - } -} - -impl AggregateState2 for SumStateCheckedAdd { - fn merge(&mut self, other: &mut Self) -> Result<()> { - self.sum = self.sum.checked_add(&other.sum).unwrap_or_default(); // TODO - self.set = self.set || other.set; - Ok(()) - } - - fn update(&mut self, input: T) -> Result<()> { - self.sum = self.sum.checked_add(&input).unwrap_or_default(); // TODO - self.set = true; - Ok(()) - } - - fn finalize(&mut self) -> Result<(T, bool)> { - if self.set { - Ok((self.sum, true)) } else { - Ok((T::default(), false)) + output.put_null(); } + Ok(()) } } @@ -226,33 +194,46 @@ pub struct SumStateAdd { valid: bool, } -impl AggregateState2 for SumStateAdd { +impl AggregateState<&T, T> for SumStateAdd +where + T: AddAssign + Default + Debug + Copy, +{ fn merge(&mut self, other: &mut Self) -> Result<()> { self.sum += other.sum; self.valid = self.valid || other.valid; Ok(()) } - fn update(&mut self, input: T) -> Result<()> { + fn update(&mut self, &input: &T) -> Result<()> { self.sum += input; self.valid = true; Ok(()) } - fn finalize(&mut self) -> Result<(T, bool)> { + fn finalize(&mut self, output: PutBuffer) -> Result<()> + where + M: AddressableMut, + { if self.valid { - Ok((self.sum, true)) + output.put(&self.sum); } else { - Ok((T::default(), false)) + output.put_null(); } + Ok(()) } } #[cfg(test)] mod tests { + use stdutil::iter::TryFromExactSizeIterator; + use super::*; + use crate::arrays::array::exp::Array; + use crate::arrays::array::selection::Selection; use crate::arrays::array::Array2; + use crate::arrays::buffer::buffer_manager::NopBufferManager; use crate::arrays::scalar::ScalarValue; + use crate::arrays::testutil::{assert_arrays_eq, assert_arrays_eq_sel}; use crate::execution::operators::hash_aggregate::hash_table::GroupAddress; use crate::expr; use crate::functions::aggregate::ChunkGroupAddressIter; @@ -261,8 +242,8 @@ mod tests { fn sum_i64_single_group_two_partitions() { // Single group, two partitions, 'SELECT SUM(a) FROM table' - let partition_1_vals = &Array2::from_iter::<[i64; 3]>([1, 2, 3]); - let partition_2_vals = &Array2::from_iter::<[i64; 3]>([4, 5, 6]); + let partition_1_vals = Array::try_from_iter::<[i64; 3]>([1, 2, 3]).unwrap(); + let partition_2_vals = Array::try_from_iter::<[i64; 3]>([4, 5, 6]).unwrap(); let mut table_list = TableList::empty(); let table_ref = table_list @@ -276,50 +257,31 @@ mod tests { let mut states_1 = specialized.function_impl.new_states(); let mut states_2 = specialized.function_impl.new_states(); - states_1.new_states(1); - states_2.new_states(1); + states_1.new_groups(1); + states_2.new_groups(1); // All inputs map to the same group (no GROUP BY clause) - let addrs_1: Vec<_> = (0..partition_1_vals.logical_len()) - .map(|_| GroupAddress { - chunk_idx: 0, - row_idx: 0, - }) - .collect(); - let addrs_2: Vec<_> = (0..partition_2_vals.logical_len()) - .map(|_| GroupAddress { - chunk_idx: 0, - row_idx: 0, - }) - .collect(); - states_1 - .update_states2(&[partition_1_vals], ChunkGroupAddressIter::new(0, &addrs_1)) + .update_group_states(&[partition_1_vals], Selection::linear(3), &[0, 0, 0]) .unwrap(); states_2 - .update_states2(&[partition_2_vals], ChunkGroupAddressIter::new(0, &addrs_2)) + .update_group_states(&[partition_2_vals], Selection::linear(3), &[0, 0, 0]) .unwrap(); // Combine states. // // Both partitions hold a single state (representing a single group), // and those states map to each other. - let combine_mapping = vec![GroupAddress { - chunk_idx: 0, - row_idx: 0, - }]; states_1 - .combine( - &mut states_2, - ChunkGroupAddressIter::new(0, &combine_mapping), - ) + .combine(&mut states_2, Selection::selection(&[0]), &[0]) .unwrap(); // Get final output. - let out = states_1.finalize2().unwrap(); + let mut out = Array::new(&NopBufferManager, DataType::Int64, 1).unwrap(); + states_1.drain(&mut out).unwrap(); - assert_eq!(1, out.logical_len()); - assert_eq!(ScalarValue::Int64(21), out.logical_value(0).unwrap()); + let expected = Array::try_from_iter([21_i64]).unwrap(); + assert_arrays_eq(&expected, &out); } #[test] @@ -338,8 +300,8 @@ mod tests { // Partition values and mappings represent the positions of the above // table. The actual grouping values are stored in the operator, and // operator is what computes the mappings. - let partition_1_vals = &Array2::from_iter::<[i64; 3]>([1, 2, 3]); - let partition_2_vals = &Array2::from_iter::<[i64; 3]>([4, 5, 6]); + let partition_1_vals = Array::try_from_iter::<[i64; 3]>([1, 2, 3]).unwrap(); + let partition_2_vals = Array::try_from_iter::<[i64; 3]>([4, 5, 6]).unwrap(); let mut table_list = TableList::empty(); let table_ref = table_list @@ -358,48 +320,19 @@ mod tests { let mut states_2 = specialized.function_impl.new_states(); // Both partitions are operating on two groups ('a' and 'b'). - states_1.new_states(1); - states_1.new_states(1); + states_1.new_groups(1); + states_1.new_groups(1); - states_2.new_states(1); - states_2.new_states(1); + states_2.new_groups(1); + states_2.new_groups(1); // Mapping corresponding to the above table. Group 'a' == 0 and group // 'b' == 1. - let addrs_1 = vec![ - GroupAddress { - chunk_idx: 0, - row_idx: 0, - }, - GroupAddress { - chunk_idx: 0, - row_idx: 0, - }, - GroupAddress { - chunk_idx: 0, - row_idx: 1, - }, - ]; - let addrs_2 = vec![ - GroupAddress { - chunk_idx: 0, - row_idx: 1, - }, - GroupAddress { - chunk_idx: 0, - row_idx: 1, - }, - GroupAddress { - chunk_idx: 0, - row_idx: 0, - }, - ]; - states_1 - .update_states2(&[partition_1_vals], ChunkGroupAddressIter::new(0, &addrs_1)) + .update_group_states(&[partition_1_vals], Selection::linear(3), &[0, 0, 1]) .unwrap(); states_2 - .update_states2(&[partition_2_vals], ChunkGroupAddressIter::new(0, &addrs_2)) + .update_group_states(&[partition_2_vals], Selection::linear(3), &[1, 1, 0]) .unwrap(); // Combine states. @@ -411,29 +344,20 @@ mod tests { // The mapping here indicates the the 0th state for both partitions // should be combined, and the 1st state for both partitions should be // combined. - let combine_mapping = vec![ - GroupAddress { - chunk_idx: 0, - row_idx: 0, - }, - GroupAddress { - chunk_idx: 0, - row_idx: 1, - }, - ]; states_1 .combine( &mut states_2, - ChunkGroupAddressIter::new(0, &combine_mapping), + Selection::linear(2), // States 0 ('a') and 1 ('b') + &[0, 1], ) .unwrap(); // Get final output. - let out = states_1.finalize2().unwrap(); + let mut out = Array::new(&NopBufferManager, DataType::Int64, 2).unwrap(); + states_1.drain(&mut out).unwrap(); - assert_eq!(2, out.logical_len()); - assert_eq!(ScalarValue::Int64(9), out.logical_value(0).unwrap()); - assert_eq!(ScalarValue::Int64(12), out.logical_value(1).unwrap()); + let expected = Array::try_from_iter([9_i64, 12_i64]).unwrap(); + assert_arrays_eq(&expected, &out); } #[test] @@ -460,8 +384,8 @@ mod tests { // Partition values and mappings represent the positions of the above // table. The actual grouping values are stored in the operator, and // operator is what computes the mappings. - let partition_1_vals = &Array2::from_iter::<[i64; 4]>([1, 2, 3, 4]); - let partition_2_vals = &Array2::from_iter::<[i64; 4]>([5, 6, 7, 8]); + let partition_1_vals = Array::try_from_iter::<[i64; 4]>([1, 2, 3, 4]).unwrap(); + let partition_2_vals = Array::try_from_iter::<[i64; 4]>([5, 6, 7, 8]).unwrap(); let mut table_list = TableList::empty(); let table_ref = table_list @@ -480,58 +404,21 @@ mod tests { let mut states_2 = specialized.function_impl.new_states(); // Partition 1 sees groups 'x', 'y', and 'z'. - states_1.new_states(1); - states_1.new_states(1); - states_1.new_states(1); + states_1.new_groups(1); + states_1.new_groups(1); + states_1.new_groups(1); // Partition 2 see groups 'x' and 'z' (no 'y'). - states_2.new_states(1); - states_2.new_states(1); + states_2.new_groups(1); + states_2.new_groups(1); // For partition 1: 'x' == 0, 'y' == 1, 'z' == 2 - let addrs_1 = vec![ - GroupAddress { - chunk_idx: 0, - row_idx: 0, - }, - GroupAddress { - chunk_idx: 0, - row_idx: 0, - }, - GroupAddress { - chunk_idx: 0, - row_idx: 1, - }, - GroupAddress { - chunk_idx: 0, - row_idx: 2, - }, - ]; - // For partition 2: 'x' == 0, 'z' == 1 - let addrs_2 = vec![ - GroupAddress { - chunk_idx: 0, - row_idx: 0, - }, - GroupAddress { - chunk_idx: 0, - row_idx: 1, - }, - GroupAddress { - chunk_idx: 0, - row_idx: 1, - }, - GroupAddress { - chunk_idx: 0, - row_idx: 1, - }, - ]; - states_1 - .update_states2(&[partition_1_vals], ChunkGroupAddressIter::new(0, &addrs_1)) + .update_group_states(&[partition_1_vals], Selection::linear(4), &[0, 0, 1, 2]) .unwrap(); + // For partition 2: 'x' == 0, 'z' == 1 states_2 - .update_states2(&[partition_2_vals], ChunkGroupAddressIter::new(0, &addrs_2)) + .update_group_states(&[partition_2_vals], Selection::linear(4), &[0, 1, 1, 1]) .unwrap(); // Combine states. @@ -539,86 +426,61 @@ mod tests { // States for 'x' both at the same position. // // States for 'y' at different positions, partition_2_state[1] => partition_1_state[2] - let combine_mapping = vec![ - GroupAddress { - chunk_idx: 0, - row_idx: 0, - }, - GroupAddress { - chunk_idx: 0, - row_idx: 2, - }, - ]; states_1 - .combine( - &mut states_2, - ChunkGroupAddressIter::new(0, &combine_mapping), - ) + .combine(&mut states_2, Selection::selection(&[0, 1]), &[0, 2]) .unwrap(); // Get final output. - let out = states_1.finalize2().unwrap(); + let mut out = Array::new(&NopBufferManager, DataType::Int64, 3).unwrap(); + states_1.drain(&mut out).unwrap(); - assert_eq!(3, out.logical_len()); - assert_eq!(ScalarValue::Int64(8), out.logical_value(0).unwrap()); - assert_eq!(ScalarValue::Int64(3), out.logical_value(1).unwrap()); - assert_eq!(ScalarValue::Int64(25), out.logical_value(2).unwrap()); + let expected = Array::try_from_iter([8_i64, 3_i64, 25_i64]).unwrap(); + assert_arrays_eq(&expected, &out); } - // #[test] - // fn sum_i64_drain_multiple() { - // // Three groups, single partition, test that drain can be called - // // multiple times until states are exhausted. - // let vals = &Array::from_iter::<[i64; 6]>([1, 2, 3, 4, 5, 6]); - - // let specialized = Sum.plan_from_datatypes(&[DataType::Int64]).unwrap(); - // let mut states = specialized.new_grouped_state(); - - // states.new_group(); - // states.new_group(); - // states.new_group(); - - // let addrs = vec![ - // GroupAddress { - // chunk_idx: 0, - // row_idx: 0, - // }, - // GroupAddress { - // chunk_idx: 0, - // row_idx: 0, - // }, - // GroupAddress { - // chunk_idx: 0, - // row_idx: 1, - // }, - // GroupAddress { - // chunk_idx: 0, - // row_idx: 1, - // }, - // GroupAddress { - // chunk_idx: 0, - // row_idx: 2, - // }, - // GroupAddress { - // chunk_idx: 0, - // row_idx: 2, - // }, - // ]; - - // states - // .update_states(&[vals], ChunkGroupAddressIter::new(0, &addrs)) - // .unwrap(); - - // let out_1 = states.drain_next(2).unwrap().unwrap(); - // assert_eq!(2, out_1.logical_len()); - // assert_eq!(ScalarValue::Int64(3), out_1.logical_value(0).unwrap()); - // assert_eq!(ScalarValue::Int64(7), out_1.logical_value(1).unwrap()); - - // let out_2 = states.drain_next(2).unwrap().unwrap(); - // assert_eq!(1, out_2.logical_len()); - // assert_eq!(ScalarValue::Int64(11), out_2.logical_value(0).unwrap()); - - // let out_3 = states.drain_next(2).unwrap(); - // assert_eq!(None, out_3); - // } + #[test] + fn sum_i64_drain_multiple() { + // Three groups, single partition, test that drain can be called + // multiple times until states are exhausted. + let vals = Array::try_from_iter::<[i64; 6]>([1, 2, 3, 4, 5, 6]).unwrap(); + + let mut table_list = TableList::empty(); + let table_ref = table_list + .push_table( + None, + vec![DataType::Utf8, DataType::Int64], + vec!["col1".to_string(), "col2".to_string()], + ) + .unwrap(); + + let specialized = Sum + .plan(&table_list, vec![expr::col_ref(table_ref, 1)]) + .unwrap(); + let mut states = specialized.function_impl.new_states(); + + states.new_groups(3); + + states + .update_group_states(&[vals], Selection::linear(6), &[0, 0, 1, 1, 2, 2]) + .unwrap(); + + let mut out = Array::new(&NopBufferManager, DataType::Int64, 2).unwrap(); + + let n = states.drain(&mut out).unwrap(); + assert_eq!(2, n); + + let expected = Array::try_from_iter([3_i64, 7]).unwrap(); + assert_arrays_eq(&expected, &out); + + out.reset_for_write(&NopBufferManager).unwrap(); + let n = states.drain(&mut out).unwrap(); + assert_eq!(1, n); + + let expected = Array::try_from_iter([11_i64]).unwrap(); + assert_arrays_eq_sel(&expected, 0..1, &out, 0..1); + + out.reset_for_write(&NopBufferManager).unwrap(); + let n = states.drain(&mut out).unwrap(); + assert_eq!(0, n); + } } diff --git a/crates/rayexec_execution/src/functions/aggregate/states.rs b/crates/rayexec_execution/src/functions/aggregate/states.rs index 808218e32..c3c70da52 100644 --- a/crates/rayexec_execution/src/functions/aggregate/states.rs +++ b/crates/rayexec_execution/src/functions/aggregate/states.rs @@ -11,12 +11,7 @@ use super::ChunkGroupAddressIter; use crate::arrays::array::exp::Array; use crate::arrays::array::selection::Selection; use crate::arrays::array::{Array2, ArrayData2}; -use crate::arrays::buffer::physical_type::{ - MutablePhysicalStorage, - PhysicalBool, - PhysicalStorage, - PhysicalType, -}; +use crate::arrays::buffer::physical_type::{MutablePhysicalStorage, PhysicalStorage}; use crate::arrays::datatype::DataType; use crate::arrays::executor::aggregate::{ AggregateState2, @@ -94,7 +89,7 @@ where OpaqueStatesMut(&mut self.states) } - fn new_states(&mut self, count: usize) { + fn new_groups(&mut self, count: usize) { debug_assert_eq!(0, self.drain_idx); self.states.extend((0..count).map(|_| (self.state_init)())) } @@ -107,7 +102,7 @@ where unimplemented!() } - fn update_states( + fn update_group_states( &mut self, inputs: &[Array], selection: Selection, @@ -122,11 +117,19 @@ where fn combine( &mut self, consume: &mut Box, - mapping: ChunkGroupAddressIter, + selection: Selection, + mapping: &[usize], ) -> Result<()> { debug_assert_eq!(0, self.drain_idx); + debug_assert_eq!(selection.len(), mapping.len()); + let consume_states = consume.opaque_states_mut().downcast::>()?; - StateCombiner::combine(consume_states, mapping, &mut self.states) + + StateCombiner::combine( + consume_states, + selection.iter().zip(mapping.iter().copied()), + &mut self.states, + ) } fn finalize2(&mut self) -> Result { @@ -203,33 +206,6 @@ impl } } -/// Helper for create an `AggregateGroupStates` that accepts one input. -pub fn new_unary_aggregate_states2( - state_init: StateInit, - state_finalize: StateFinalize, -) -> Box -where - Storage: PhysicalStorage2, - State: for<'a> AggregateState2< - <::Storage<'a> as AddressableStorage>::T, - Output, - > + Sync - + Send - + 'static, - Output: Sync + Send + 'static, - StateInit: Fn() -> State + Sync + Send + 'static, - StateFinalize: Fn(&mut [State]) -> Result + Sync + Send + 'static, -{ - Box::new(TypedAggregateGroupStates2 { - states: Vec::::new(), - state_init, - state_update: unary_update2::, - state_finalize, - _input: PhantomData, - _output: PhantomData, - }) -} - /// Helper for create an `AggregateGroupStates` that accepts two inputs. pub fn new_binary_aggregate_states2( state_init: StateInit, @@ -270,7 +246,7 @@ where OpaqueStatesMut(&mut self.states) } - fn new_states(&mut self, count: usize) { + fn new_groups(&mut self, count: usize) { self.states.extend((0..count).map(|_| (self.state_init)())) } @@ -285,9 +261,10 @@ where fn combine( &mut self, consume: &mut Box, - mapping: ChunkGroupAddressIter, + selection: Selection, + mapping: &[usize], ) -> Result<()> { - let consume_states = consume.opaque_states_mut().downcast::>()?; + // let consume_states = consume.opaque_states_mut().downcast::>()?; // StateCombiner2::combine(consume_states, mapping, &mut self.states) unimplemented!() } @@ -315,7 +292,7 @@ pub trait AggregateGroupStates: Debug + Sync + Send { fn opaque_states_mut(&mut self) -> OpaqueStatesMut<'_>; /// Create `count` number of new states. - fn new_states(&mut self, count: usize); + fn new_groups(&mut self, count: usize); /// Returns the number of states being tracked. fn num_states(&self) -> usize; @@ -323,7 +300,7 @@ pub trait AggregateGroupStates: Debug + Sync + Send { /// Update states from inputs using some mapping. fn update_states2(&mut self, inputs: &[&Array2], mapping: ChunkGroupAddressIter) -> Result<()>; - fn update_states( + fn update_group_states( &mut self, inputs: &[Array], selection: Selection, @@ -336,7 +313,8 @@ pub trait AggregateGroupStates: Debug + Sync + Send { fn combine( &mut self, consume: &mut Box, - mapping: ChunkGroupAddressIter, + selection: Selection, + mapping: &[usize], ) -> Result<()>; /// Finalize the states and return an array. @@ -437,21 +415,6 @@ where // ) } -pub fn untyped_null_finalize(states: &mut [State]) -> Result { - Ok(Array2::new_untyped_null_array(states.len())) -} - -pub fn boolean_finalize(datatype: DataType, states: &mut [State]) -> Result -where - State: AggregateState2, -{ - let builder = ArrayBuilder { - datatype, - buffer: BooleanBuffer::with_len(states.len()), - }; - StateFinalizer::finalize(states, builder) -} - pub fn primitive_finalize( datatype: DataType, states: &mut [State], From 60a1af185aee440b055db23f47ef338781ab7638 Mon Sep 17 00:00:00 2001 From: Sean Smith Date: Sun, 5 Jan 2025 15:00:36 -0600 Subject: [PATCH 52/59] some unimplementeds --- .../operators/hash_aggregate/chunk.rs | 9 +- .../operators/hash_aggregate/distinct.rs | 181 +++++++++-------- .../operators/hash_aggregate/drain.rs | 23 ++- .../operators/ungrouped_aggregate.rs | 24 ++- .../functions/aggregate/builtin/regr_avg.rs | 41 ++-- .../src/functions/aggregate/states.rs | 191 +----------------- 6 files changed, 156 insertions(+), 313 deletions(-) diff --git a/crates/rayexec_execution/src/execution/operators/hash_aggregate/chunk.rs b/crates/rayexec_execution/src/execution/operators/hash_aggregate/chunk.rs index 94e9522b0..05d6d0c9a 100644 --- a/crates/rayexec_execution/src/execution/operators/hash_aggregate/chunk.rs +++ b/crates/rayexec_execution/src/execution/operators/hash_aggregate/chunk.rs @@ -91,10 +91,11 @@ impl GroupChunk { .filter_map(|(selected, arr)| if selected { Some(arr) } else { None }) .collect(); - agg_states.states.update_states2( - &input_cols, - ChunkGroupAddressIter::new(self.chunk_idx, addrs), - )?; + unimplemented!() + // agg_states.states.update_states2( + // &input_cols, + // ChunkGroupAddressIter::new(self.chunk_idx, addrs), + // )?; } Ok(()) diff --git a/crates/rayexec_execution/src/execution/operators/hash_aggregate/distinct.rs b/crates/rayexec_execution/src/execution/operators/hash_aggregate/distinct.rs index 0f1bb4b83..01f9b943c 100644 --- a/crates/rayexec_execution/src/execution/operators/hash_aggregate/distinct.rs +++ b/crates/rayexec_execution/src/execution/operators/hash_aggregate/distinct.rs @@ -3,6 +3,7 @@ use std::sync::Arc; use rayexec_error::Result; use super::hash_table::HashTable; +use crate::arrays::array::exp::Array; use crate::arrays::array::selection::Selection; use crate::arrays::array::Array2; use crate::arrays::executor::scalar::HashExecutor; @@ -18,6 +19,8 @@ use crate::functions::aggregate::ChunkGroupAddressIter; pub struct DistinctGroupedStates { /// Distinct inputs per group. distinct_inputs: Vec>, + /// Index to begin draining at. + drain_idx: usize, /// The underlying states. /// /// These won't be initialized until we've received all distinct input. @@ -32,6 +35,7 @@ impl DistinctGroupedStates { distinct_inputs: Vec::new(), states, hash_buf: Vec::new(), + drain_idx: 0, } } } @@ -51,52 +55,61 @@ impl AggregateGroupStates for DistinctGroupedStates { self.distinct_inputs.len() } - fn update_states2(&mut self, inputs: &[&Array2], mapping: ChunkGroupAddressIter) -> Result<()> { - // TODO: Would be cool not needing to do this. - let mappings: Vec<_> = mapping.collect(); - - // For each group we're tracking, select the rows from the input and - // insert into the group specific hash table. - for state_idx in 0..self.distinct_inputs.len() { - let row_sel = Arc::new(SelectionVector::from_iter(mappings.iter().filter_map( - |&(from, to)| { - if to == state_idx { - Some(from) - } else { - None - } - }, - ))); - - let inputs: Vec<_> = inputs - .iter() - .map(|&arr| { - let mut arr = arr.clone(); - arr.select_mut(row_sel.clone()); - arr - }) - .collect(); - - let len = match inputs.first() { - Some(arr) => arr.logical_len(), - None => return Ok(()), - }; - - self.hash_buf.clear(); - self.hash_buf.resize(len, 0); - - HashExecutor::hash_many(&inputs, &mut self.hash_buf)?; - - // Insert into hash map with empty inputs. - self.distinct_inputs[state_idx] - .as_mut() - .expect("hash table to exist") - .insert(&inputs, &self.hash_buf, &[])?; - } - - Ok(()) + fn update_group_states( + &mut self, + inputs: &[Array], + selection: Selection, + mapping: &[usize], + ) -> Result<()> { + unimplemented!() } + // fn update_states2(&mut self, inputs: &[&Array2], mapping: ChunkGroupAddressIter) -> Result<()> { + // // TODO: Would be cool not needing to do this. + // let mappings: Vec<_> = mapping.collect(); + + // // For each group we're tracking, select the rows from the input and + // // insert into the group specific hash table. + // for state_idx in 0..self.distinct_inputs.len() { + // let row_sel = Arc::new(SelectionVector::from_iter(mappings.iter().filter_map( + // |&(from, to)| { + // if to == state_idx { + // Some(from) + // } else { + // None + // } + // }, + // ))); + + // let inputs: Vec<_> = inputs + // .iter() + // .map(|&arr| { + // let mut arr = arr.clone(); + // arr.select_mut(row_sel.clone()); + // arr + // }) + // .collect(); + + // let len = match inputs.first() { + // Some(arr) => arr.logical_len(), + // None => return Ok(()), + // }; + + // self.hash_buf.clear(); + // self.hash_buf.resize(len, 0); + + // HashExecutor::hash_many(&inputs, &mut self.hash_buf)?; + + // // Insert into hash map with empty inputs. + // self.distinct_inputs[state_idx] + // .as_mut() + // .expect("hash table to exist") + // .insert(&inputs, &self.hash_buf, &[])?; + // } + + // Ok(()) + // } + fn combine( &mut self, consume: &mut Box, @@ -116,43 +129,47 @@ impl AggregateGroupStates for DistinctGroupedStates { Ok(()) } - fn finalize2(&mut self) -> Result { - // And now we actually create the states we need. - self.states.new_groups(self.distinct_inputs.len()); - - let mut addresses_buf = Vec::new(); - - for (group_idx, hash_table) in self.distinct_inputs.iter_mut().enumerate() { - // Drain the hash table and inserting them into the newly created - // states. - let drain = hash_table.take().unwrap().into_drain(); - - for result in drain { - let batch = result?; - let len = batch.num_rows(); - // TODO: Prune group id column? - let arrays = batch.into_arrays(); - - // TODO: Bit jank, but works. We just assume we're working with - // chunk 0 always. - // - // I would like to have `GroupStates` be able to accept any - // iterator that produce row mappings, but can't really do that - // with dynamic dispatch. - addresses_buf.clear(); - addresses_buf.extend((0..len).map(|_| GroupAddress { - chunk_idx: 0, - row_idx: group_idx as u16, - })); - - let chunk_iter = ChunkGroupAddressIter::new(0, &addresses_buf); - - let inputs: Vec<_> = arrays.iter().collect(); // TODO - self.states.update_states2(&inputs, chunk_iter)?; - } - } - - // Now we can actually drain the states. - self.states.finalize2() + // fn finalize2(&mut self) -> Result { + // // And now we actually create the states we need. + // self.states.new_groups(self.distinct_inputs.len()); + + // let mut addresses_buf = Vec::new(); + + // for (group_idx, hash_table) in self.distinct_inputs.iter_mut().enumerate() { + // // Drain the hash table and inserting them into the newly created + // // states. + // let drain = hash_table.take().unwrap().into_drain(); + + // for result in drain { + // let batch = result?; + // let len = batch.num_rows(); + // // TODO: Prune group id column? + // let arrays = batch.into_arrays(); + + // // TODO: Bit jank, but works. We just assume we're working with + // // chunk 0 always. + // // + // // I would like to have `GroupStates` be able to accept any + // // iterator that produce row mappings, but can't really do that + // // with dynamic dispatch. + // addresses_buf.clear(); + // addresses_buf.extend((0..len).map(|_| GroupAddress { + // chunk_idx: 0, + // row_idx: group_idx as u16, + // })); + + // let chunk_iter = ChunkGroupAddressIter::new(0, &addresses_buf); + + // let inputs: Vec<_> = arrays.iter().collect(); // TODO + // self.states.update_states2(&inputs, chunk_iter)?; + // } + // } + + // // Now we can actually drain the states. + // self.states.finalize2() + // } + + fn drain(&mut self, output: &mut Array) -> Result { + unimplemented!() } } diff --git a/crates/rayexec_execution/src/execution/operators/hash_aggregate/drain.rs b/crates/rayexec_execution/src/execution/operators/hash_aggregate/drain.rs index 62b4021c0..c71829984 100644 --- a/crates/rayexec_execution/src/execution/operators/hash_aggregate/drain.rs +++ b/crates/rayexec_execution/src/execution/operators/hash_aggregate/drain.rs @@ -23,17 +23,18 @@ impl HashTableDrain { let chunk = &mut self.table.chunks[self.drain_idx]; self.drain_idx += 1; - // Computed aggregate columns. - let results = chunk - .aggregate_states - .iter_mut() - .map(|s| s.states.finalize2()) - .collect::>>()?; - - // Chunk arrays includes the GROUP ID column (last). - let batch = Batch2::try_new(results.into_iter().chain(chunk.arrays.drain(..)))?; - - Ok(Some(batch)) + unimplemented!() + // // Computed aggregate columns. + // let results = chunk + // .aggregate_states + // .iter_mut() + // .map(|s| s.states.finalize2()) + // .collect::>>()?; + + // // Chunk arrays includes the GROUP ID column (last). + // let batch = Batch2::try_new(results.into_iter().chain(chunk.arrays.drain(..)))?; + + // Ok(Some(batch)) } } diff --git a/crates/rayexec_execution/src/execution/operators/ungrouped_aggregate.rs b/crates/rayexec_execution/src/execution/operators/ungrouped_aggregate.rs index 623e7bd45..8b1bb3b38 100644 --- a/crates/rayexec_execution/src/execution/operators/ungrouped_aggregate.rs +++ b/crates/rayexec_execution/src/execution/operators/ungrouped_aggregate.rs @@ -165,8 +165,9 @@ impl ExecutableOperator for PhysicalUngroupedAggregate { .map(|expr| batch.column(expr.idx).expect("column to exist")) .collect(); - agg_states[agg_idx] - .update_states2(&cols, ChunkGroupAddressIter::new(0, &addrs))?; + unimplemented!() + // agg_states[agg_idx] + // .update_states2(&cols, ChunkGroupAddressIter::new(0, &addrs))?; } // Keep pushing. @@ -229,17 +230,18 @@ impl ExecutableOperator for PhysicalUngroupedAggregate { // Lock no longer needed. std::mem::drop(shared); - let arrays = final_states - .iter_mut() - .map(|s| s.finalize2()) - .collect::>>()?; + unimplemented!() + // let arrays = final_states + // .iter_mut() + // .map(|s| s.finalize2()) + // .collect::>>()?; - let batch = Batch2::try_new(arrays)?; + // let batch = Batch2::try_new(arrays)?; - *state = UngroupedAggregatePartitionState::Producing { - partition_idx: *partition_idx, - batches: vec![batch], - } + // *state = UngroupedAggregatePartitionState::Producing { + // partition_idx: *partition_idx, + // batches: vec![batch], + // } } Ok(PollFinalize::Finalized) diff --git a/crates/rayexec_execution/src/functions/aggregate/builtin/regr_avg.rs b/crates/rayexec_execution/src/functions/aggregate/builtin/regr_avg.rs index 4efe5fa3e..2966e1bc4 100644 --- a/crates/rayexec_execution/src/functions/aggregate/builtin/regr_avg.rs +++ b/crates/rayexec_execution/src/functions/aggregate/builtin/regr_avg.rs @@ -3,14 +3,16 @@ use std::marker::PhantomData; use rayexec_error::Result; +use crate::arrays::buffer::physical_type::{AddressableMut, PhysicalF64}; use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::aggregate::AggregateState2; -use crate::arrays::executor::physical_type::PhysicalF64_2; +use crate::arrays::executor_exp::aggregate::AggregateState; +use crate::arrays::executor_exp::PutBuffer; use crate::expr::Expression; use crate::functions::aggregate::states::{ - new_binary_aggregate_states2, - primitive_finalize, + binary_update, + drain, AggregateGroupStates, + TypedAggregateGroupStates, }; use crate::functions::aggregate::{ AggregateFunction, @@ -72,10 +74,11 @@ pub struct RegrAvgYImpl; impl AggregateFunctionImpl for RegrAvgYImpl { fn new_states(&self) -> Box { - new_binary_aggregate_states2::( + Box::new(TypedAggregateGroupStates::new( RegrAvgState::::default, - move |states| primitive_finalize(DataType::Float64, states), - ) + binary_update::, + drain::, + )) } } @@ -137,10 +140,11 @@ pub struct RegrAvgXImpl; impl AggregateFunctionImpl for RegrAvgXImpl { fn new_states(&self) -> Box { - new_binary_aggregate_states2::( + Box::new(TypedAggregateGroupStates::new( RegrAvgState::::default, - move |states| primitive_finalize(DataType::Float64, states), - ) + binary_update::, + drain::, + )) } } @@ -165,7 +169,7 @@ where _input: PhantomData, } -impl AggregateState2<(f64, f64), f64> for RegrAvgState +impl AggregateState<(&f64, &f64), f64> for RegrAvgState where F: RegrAvgInput, { @@ -175,17 +179,22 @@ where Ok(()) } - fn update(&mut self, input: (f64, f64)) -> Result<()> { - self.sum += F::input(input); + fn update(&mut self, (&y, &x): (&f64, &f64)) -> Result<()> { + self.sum += F::input((y, x)); self.count += 1; Ok(()) } - fn finalize(&mut self) -> Result<(f64, bool)> { + fn finalize(&mut self, output: PutBuffer) -> Result<()> + where + M: AddressableMut, + { if self.count == 0 { - Ok((0.0, false)) + output.put_null(); } else { - Ok((self.sum / self.count as f64, true)) + let v = self.sum / self.count as f64; + output.put(&v); } + Ok(()) } } diff --git a/crates/rayexec_execution/src/functions/aggregate/states.rs b/crates/rayexec_execution/src/functions/aggregate/states.rs index c3c70da52..8d1d3f84a 100644 --- a/crates/rayexec_execution/src/functions/aggregate/states.rs +++ b/crates/rayexec_execution/src/functions/aggregate/states.rs @@ -1,32 +1,17 @@ use core::fmt; use std::any::Any; use std::fmt::Debug; -use std::marker::PhantomData; use rayexec_error::{RayexecError, Result}; -use stdutil::iter::IntoExactSizeIterator; use stdutil::marker::PhantomCovariant; -use super::ChunkGroupAddressIter; use crate::arrays::array::exp::Array; use crate::arrays::array::selection::Selection; -use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::buffer::physical_type::{MutablePhysicalStorage, PhysicalStorage}; -use crate::arrays::datatype::DataType; -use crate::arrays::executor::aggregate::{ - AggregateState2, - BinaryNonNullUpdater2, - StateCombiner2, - StateFinalizer, - UnaryNonNullUpdater2, -}; -use crate::arrays::executor::builder::{ArrayBuilder, BooleanBuffer, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::PhysicalStorage2; use crate::arrays::executor_exp::aggregate::binary::BinaryNonNullUpdater; use crate::arrays::executor_exp::aggregate::unary::UnaryNonNullUpdater; use crate::arrays::executor_exp::aggregate::{AggregateState, StateCombiner}; use crate::arrays::executor_exp::PutBuffer; -use crate::arrays::storage::{AddressableStorage, PrimitiveStorage}; pub struct TypedAggregateGroupStates< State, @@ -98,10 +83,6 @@ where self.states.len() } - fn update_states2(&mut self, inputs: &[&Array2], mapping: ChunkGroupAddressIter) -> Result<()> { - unimplemented!() - } - fn update_group_states( &mut self, inputs: &[Array], @@ -132,10 +113,6 @@ where ) } - fn finalize2(&mut self) -> Result { - unimplemented!() - } - fn drain(&mut self, output: &mut Array) -> Result { let num_drain = usize::min(self.states.len() - self.drain_idx, output.capacity()); let drain_states = &mut self.states[self.drain_idx..self.drain_idx + num_drain]; @@ -176,114 +153,6 @@ impl fmt::D } } -pub struct TypedAggregateGroupStates2 { - states: Vec, - - state_init: StateInit, - state_update: StateUpdate, - state_finalize: StateFinalize, - - _input: PhantomData, - _output: PhantomData, -} - -impl - TypedAggregateGroupStates2 -{ - pub fn new( - state_init: StateInit, - state_update: StateUpdate, - state_finalize: StateFinalize, - ) -> Self { - TypedAggregateGroupStates2 { - states: Vec::::new(), - state_init, - state_update, - state_finalize, - _input: PhantomData, - _output: PhantomData, - } - } -} - -/// Helper for create an `AggregateGroupStates` that accepts two inputs. -pub fn new_binary_aggregate_states2( - state_init: StateInit, - state_finalize: StateFinalize, -) -> Box -where - Storage1: PhysicalStorage2, - Storage2: PhysicalStorage2, - State: for<'a> AggregateState2<(Storage1::Type<'a>, Storage2::Type<'a>), Output> - + Sync - + Send - + 'static, - Output: Sync + Send + 'static, - StateInit: Fn() -> State + Sync + Send + 'static, - StateFinalize: Fn(&mut [State]) -> Result + Sync + Send + 'static, -{ - Box::new(TypedAggregateGroupStates2 { - states: Vec::::new(), - state_init, - state_update: binary_update2::, - state_finalize, - _input: PhantomData, - _output: PhantomData, - }) -} - -impl AggregateGroupStates - for TypedAggregateGroupStates2 -where - State: AggregateState2 + Sync + Send + 'static, - Input: Sync + Send, - Output: Sync + Send, - StateInit: Fn() -> State + Sync + Send, - StateUpdate: Fn(&[&Array2], ChunkGroupAddressIter, &mut [State]) -> Result<()> + Sync + Send, - StateFinalize: Fn(&mut [State]) -> Result + Sync + Send, -{ - fn opaque_states_mut(&mut self) -> OpaqueStatesMut<'_> { - OpaqueStatesMut(&mut self.states) - } - - fn new_groups(&mut self, count: usize) { - self.states.extend((0..count).map(|_| (self.state_init)())) - } - - fn num_states(&self) -> usize { - self.states.len() - } - - fn update_states2(&mut self, inputs: &[&Array2], mapping: ChunkGroupAddressIter) -> Result<()> { - (self.state_update)(inputs, mapping, &mut self.states) - } - - fn combine( - &mut self, - consume: &mut Box, - selection: Selection, - mapping: &[usize], - ) -> Result<()> { - // let consume_states = consume.opaque_states_mut().downcast::>()?; - // StateCombiner2::combine(consume_states, mapping, &mut self.states) - unimplemented!() - } - - fn finalize2(&mut self) -> Result { - (self.state_finalize)(&mut self.states) - } -} - -impl fmt::Debug - for TypedAggregateGroupStates2 -{ - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("TypedAggregateGroupedStates") - .field("num_states", &self.states.len()) - .finish_non_exhaustive() - } -} - pub trait AggregateGroupStates: Debug + Sync + Send { /// Get a mutable reference to the underlying states. /// @@ -297,17 +166,12 @@ pub trait AggregateGroupStates: Debug + Sync + Send { /// Returns the number of states being tracked. fn num_states(&self) -> usize; - /// Update states from inputs using some mapping. - fn update_states2(&mut self, inputs: &[&Array2], mapping: ChunkGroupAddressIter) -> Result<()>; - fn update_group_states( &mut self, inputs: &[Array], selection: Selection, mapping: &[usize], - ) -> Result<()> { - unimplemented!() - } + ) -> Result<()>; /// Combine states from another partition into self using some mapping. fn combine( @@ -317,16 +181,11 @@ pub trait AggregateGroupStates: Debug + Sync + Send { mapping: &[usize], ) -> Result<()>; - /// Finalize the states and return an array. - fn finalize2(&mut self) -> Result; - /// Finalize and drain state into `output`. /// /// Returns the number of states drained. If the number of states drained is /// less than the capacity of the output arrays, then draining is finished. - fn drain(&mut self, output: &mut Array) -> Result { - unimplemented!() - } + fn drain(&mut self, output: &mut Array) -> Result; } #[derive(Debug)] @@ -384,49 +243,3 @@ where states, ) } - -/// Update function for a unary aggregate. -pub fn unary_update2( - arrays: &[&Array2], - mapping: ChunkGroupAddressIter, - states: &mut [State], -) -> Result<()> -where - Storage: PhysicalStorage2, - State: for<'a> AggregateState2, Output>, -{ - unimplemented!() - // UnaryNonNullUpdater::update::(arrays[0], mapping, states) -} - -pub fn binary_update2( - arrays: &[&Array2], - mapping: ChunkGroupAddressIter, - states: &mut [State], -) -> Result<()> -where - Storage1: PhysicalStorage2, - Storage2: PhysicalStorage2, - State: for<'a> AggregateState2<(Storage1::Type<'a>, Storage2::Type<'a>), Output>, -{ - unimplemented!() - // BinaryNonNullUpdater::update::( - // arrays[0], arrays[1], mapping, states, - // ) -} - -pub fn primitive_finalize( - datatype: DataType, - states: &mut [State], -) -> Result -where - State: AggregateState2, - Output: Copy + Default, - ArrayData2: From>, -{ - let builder = ArrayBuilder { - datatype, - buffer: PrimitiveBuffer::with_len(states.len()), - }; - StateFinalizer::finalize(states, builder) -} From b9aa85dd6374db72fcc5a403b6afe33589c2a6d8 Mon Sep 17 00:00:00 2001 From: Sean Smith Date: Sun, 5 Jan 2025 15:14:06 -0600 Subject: [PATCH 53/59] rename --- .../execution/operators/hash_aggregate/distinct.rs | 2 +- .../src/functions/aggregate/states.rs | 12 ++++++++++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/crates/rayexec_execution/src/execution/operators/hash_aggregate/distinct.rs b/crates/rayexec_execution/src/execution/operators/hash_aggregate/distinct.rs index 01f9b943c..146ea5fb7 100644 --- a/crates/rayexec_execution/src/execution/operators/hash_aggregate/distinct.rs +++ b/crates/rayexec_execution/src/execution/operators/hash_aggregate/distinct.rs @@ -51,7 +51,7 @@ impl AggregateGroupStates for DistinctGroupedStates { .extend((0..count).map(|_| Some(HashTable::new(16, Vec::new())))); } - fn num_states(&self) -> usize { + fn num_groups(&self) -> usize { self.distinct_inputs.len() } diff --git a/crates/rayexec_execution/src/functions/aggregate/states.rs b/crates/rayexec_execution/src/functions/aggregate/states.rs index 8d1d3f84a..e6ea716c5 100644 --- a/crates/rayexec_execution/src/functions/aggregate/states.rs +++ b/crates/rayexec_execution/src/functions/aggregate/states.rs @@ -79,7 +79,7 @@ where self.states.extend((0..count).map(|_| (self.state_init)())) } - fn num_states(&self) -> usize { + fn num_groups(&self) -> usize { self.states.len() } @@ -164,8 +164,13 @@ pub trait AggregateGroupStates: Debug + Sync + Send { fn new_groups(&mut self, count: usize); /// Returns the number of states being tracked. - fn num_states(&self) -> usize; + fn num_groups(&self) -> usize; + /// Updates groups states from array inputs. + /// + /// Selection indicates with rows from the input array to use during state + /// updates, and `mapping` provides the state index to use for each row. + /// Selection length and mapping array must be the same length. fn update_group_states( &mut self, inputs: &[Array], @@ -174,6 +179,9 @@ pub trait AggregateGroupStates: Debug + Sync + Send { ) -> Result<()>; /// Combine states from another partition into self using some mapping. + /// + /// Selection indices which states to use from the `consume`, and mapping + /// indicates the target states to merge into for each selected states. fn combine( &mut self, consume: &mut Box, From 4b745fdcd81d0f0820e4193006613056923e0560 Mon Sep 17 00:00:00 2001 From: Sean Smith Date: Sun, 5 Jan 2025 15:17:49 -0600 Subject: [PATCH 54/59] some cleanup --- .../src/arrays/array/array_data.rs | 2 +- .../src/arrays/array/selection.rs | 4 ++-- .../rayexec_execution/src/arrays/array/validity.rs | 2 +- crates/rayexec_execution/src/arrays/buffer/any.rs | 2 +- crates/rayexec_execution/src/arrays/buffer/mod.rs | 1 - crates/rayexec_execution/src/arrays/buffer/raw.rs | 4 ++-- .../src/arrays/buffer/string_view.rs | 14 ++++++++++---- .../src/arrays/compute/cast/array.rs | 4 +--- .../rayexec_execution/src/arrays/compute/date.rs | 2 +- .../src/arrays/executor_exp/aggregate/unary.rs | 4 ++-- .../src/arrays/executor_exp/scalar/binary.rs | 4 ++-- .../src/arrays/executor_exp/scalar/unary.rs | 14 +++++++------- .../src/arrays/executor_exp/scalar/uniform.rs | 2 +- crates/rayexec_execution/src/arrays/testutil.rs | 4 ++-- .../execution/operators/hash_aggregate/chunk.rs | 1 - .../execution/operators/hash_aggregate/distinct.rs | 6 ------ .../src/execution/operators/ungrouped_aggregate.rs | 5 ++--- .../src/expr/physical/evaluator.rs | 2 +- .../src/expr/physical/literal_expr.rs | 1 - .../src/functions/aggregate/builtin/sum.rs | 8 ++++---- .../src/functions/aggregate/mod.rs | 1 - .../src/functions/scalar/builtin/comparison.rs | 3 +-- .../functions/scalar/builtin/list/list_extract.rs | 2 +- .../src/functions/scalar/builtin/numeric/abs.rs | 6 ------ .../src/functions/scalar/builtin/numeric/acos.rs | 6 ------ .../src/functions/scalar/builtin/numeric/asin.rs | 6 ------ .../src/functions/scalar/builtin/numeric/atan.rs | 6 ------ .../src/functions/scalar/builtin/numeric/cbrt.rs | 6 ------ .../src/functions/scalar/builtin/numeric/ceil.rs | 6 ------ .../src/functions/scalar/builtin/numeric/cos.rs | 6 ------ .../functions/scalar/builtin/numeric/degrees.rs | 6 ------ .../src/functions/scalar/builtin/numeric/exp.rs | 6 ------ .../src/functions/scalar/builtin/numeric/floor.rs | 6 ------ .../src/functions/scalar/builtin/numeric/ln.rs | 6 ------ .../src/functions/scalar/builtin/numeric/log.rs | 6 ------ .../functions/scalar/builtin/numeric/radians.rs | 6 ------ .../src/functions/scalar/builtin/numeric/sin.rs | 6 ------ .../src/functions/scalar/builtin/numeric/sqrt.rs | 6 ------ .../scalar/builtin/similarity/l2_distance.rs | 1 - .../functions/scalar/builtin/string/substring.rs | 4 ---- .../src/functions/scalar/builtin/string/trim.rs | 4 ---- crates/rayexec_parquet/src/reader/primitive.rs | 4 +--- crates/stdutil/src/marker.rs | 2 +- 43 files changed, 44 insertions(+), 153 deletions(-) diff --git a/crates/rayexec_execution/src/arrays/array/array_data.rs b/crates/rayexec_execution/src/arrays/array/array_data.rs index 1d4ef56ca..8337bebd6 100644 --- a/crates/rayexec_execution/src/arrays/array/array_data.rs +++ b/crates/rayexec_execution/src/arrays/array/array_data.rs @@ -152,6 +152,6 @@ where type Target = ArrayBuffer; fn deref(&self) -> &Self::Target { - ArrayData::as_ref(&self) + ArrayData::as_ref(self) } } diff --git a/crates/rayexec_execution/src/arrays/array/selection.rs b/crates/rayexec_execution/src/arrays/array/selection.rs index 2eade31da..fe40a5588 100644 --- a/crates/rayexec_execution/src/arrays/array/selection.rs +++ b/crates/rayexec_execution/src/arrays/array/selection.rs @@ -82,7 +82,7 @@ pub struct FlatSelectionIter<'a> { sel: Selection<'a>, } -impl<'a> Iterator for FlatSelectionIter<'a> { +impl Iterator for FlatSelectionIter<'_> { type Item = usize; fn next(&mut self) -> Option { @@ -107,4 +107,4 @@ impl<'a> Iterator for FlatSelectionIter<'a> { } } -impl<'a> ExactSizeIterator for FlatSelectionIter<'a> {} +impl ExactSizeIterator for FlatSelectionIter<'_> {} diff --git a/crates/rayexec_execution/src/arrays/array/validity.rs b/crates/rayexec_execution/src/arrays/array/validity.rs index 13464a7c4..beaa15ff9 100644 --- a/crates/rayexec_execution/src/arrays/array/validity.rs +++ b/crates/rayexec_execution/src/arrays/array/validity.rs @@ -94,7 +94,7 @@ pub struct ValidityIter<'a> { validity: &'a Validity, } -impl<'a> Iterator for ValidityIter<'a> { +impl Iterator for ValidityIter<'_> { type Item = bool; fn next(&mut self) -> Option { diff --git a/crates/rayexec_execution/src/arrays/buffer/any.rs b/crates/rayexec_execution/src/arrays/buffer/any.rs index 3da75fb1d..be7b4b39d 100644 --- a/crates/rayexec_execution/src/arrays/buffer/any.rs +++ b/crates/rayexec_execution/src/arrays/buffer/any.rs @@ -15,7 +15,7 @@ pub struct AnyAddressable<'a, B: BufferManager> { pub(crate) buffer: &'a ArrayBuffer, } -impl<'a, B> Addressable for AnyAddressable<'a, B> +impl Addressable for AnyAddressable<'_, B> where B: BufferManager, { diff --git a/crates/rayexec_execution/src/arrays/buffer/mod.rs b/crates/rayexec_execution/src/arrays/buffer/mod.rs index 3686517bb..36b473300 100644 --- a/crates/rayexec_execution/src/arrays/buffer/mod.rs +++ b/crates/rayexec_execution/src/arrays/buffer/mod.rs @@ -23,7 +23,6 @@ use string_view::{ use super::array::array_data::ArrayData; use super::array::exp::Array; use super::array::validity::Validity; -use super::scalar::ScalarValue; /// Buffer for arrays. /// diff --git a/crates/rayexec_execution/src/arrays/buffer/raw.rs b/crates/rayexec_execution/src/arrays/buffer/raw.rs index d55189f74..65fd91a61 100644 --- a/crates/rayexec_execution/src/arrays/buffer/raw.rs +++ b/crates/rayexec_execution/src/arrays/buffer/raw.rs @@ -5,7 +5,7 @@ use super::buffer_manager::BufferManager; #[derive(Debug)] pub struct RawBufferParts { /// Memory reservation for this buffer. - pub(crate) reservation: B::Reservation, + pub(crate) _reservation: B::Reservation, /// Raw pointer to start of vec. pub(crate) ptr: *mut u8, /// Number of elements `T` in the vec, not bytes. @@ -40,7 +40,7 @@ impl RawBufferParts { std::mem::forget(data); Ok(RawBufferParts { - reservation, + _reservation: reservation, ptr: ptr.cast(), len, cap, diff --git a/crates/rayexec_execution/src/arrays/buffer/string_view.rs b/crates/rayexec_execution/src/arrays/buffer/string_view.rs index 6240c148d..2bfb476cd 100644 --- a/crates/rayexec_execution/src/arrays/buffer/string_view.rs +++ b/crates/rayexec_execution/src/arrays/buffer/string_view.rs @@ -8,7 +8,7 @@ pub struct StringViewAddressable<'a> { pub(crate) heap: &'a StringViewHeap, } -impl<'a> Addressable for StringViewAddressable<'a> { +impl Addressable for StringViewAddressable<'_> { type T = str; fn len(&self) -> usize { @@ -28,7 +28,7 @@ pub struct StringViewAddressableMut<'a> { pub(crate) heap: &'a mut StringViewHeap, } -impl<'a> AddressableMut for StringViewAddressableMut<'a> { +impl AddressableMut for StringViewAddressableMut<'_> { type T = str; fn len(&self) -> usize { @@ -54,7 +54,7 @@ pub struct BinaryViewAddressable<'a> { pub(crate) heap: &'a StringViewHeap, } -impl<'a> Addressable for BinaryViewAddressable<'a> { +impl Addressable for BinaryViewAddressable<'_> { type T = [u8]; fn len(&self) -> usize { @@ -73,7 +73,7 @@ pub struct BinaryViewAddressableMut<'a> { pub(crate) heap: &'a mut StringViewHeap, } -impl<'a> AddressableMut for BinaryViewAddressableMut<'a> { +impl AddressableMut for BinaryViewAddressableMut<'_> { type T = [u8]; fn len(&self) -> usize { @@ -204,6 +204,12 @@ pub struct StringViewHeap { buffer: Vec, } +impl Default for StringViewHeap { + fn default() -> Self { + Self::new() + } +} + impl StringViewHeap { // TODO: Tracker pub const fn new() -> Self { diff --git a/crates/rayexec_execution/src/arrays/compute/cast/array.rs b/crates/rayexec_execution/src/arrays/compute/cast/array.rs index 2156a127f..5896836ec 100644 --- a/crates/rayexec_execution/src/arrays/compute/cast/array.rs +++ b/crates/rayexec_execution/src/arrays/compute/cast/array.rs @@ -386,7 +386,6 @@ where None => { fail_state.set_error(|| RayexecError::new("Failed cast decimal")); buf.put_null(); - return; } } }, @@ -429,7 +428,6 @@ where None => { fail_state.set_error(|| RayexecError::new("Failed to cast float to decimal")); buf.put_null(); - return; } }, )?; @@ -795,7 +793,7 @@ where OutBuffer::from_array(out)?, |v, buf| { string_buf.clear(); - match formatter.write(&v, &mut string_buf) { + match formatter.write(v, &mut string_buf) { Ok(_) => buf.put(string_buf.as_str()), Err(_) => { fail_state.set_error(|| RayexecError::new("Failed to cast to utf8")); diff --git a/crates/rayexec_execution/src/arrays/compute/date.rs b/crates/rayexec_execution/src/arrays/compute/date.rs index a9c4213a1..7f1a2cc4c 100644 --- a/crates/rayexec_execution/src/arrays/compute/date.rs +++ b/crates/rayexec_execution/src/arrays/compute/date.rs @@ -171,7 +171,7 @@ where arr, sel, f, - |val| DateTime::from_timestamp_nanos(val), + DateTime::from_timestamp_nanos, out, ), } diff --git a/crates/rayexec_execution/src/arrays/executor_exp/aggregate/unary.rs b/crates/rayexec_execution/src/arrays/executor_exp/aggregate/unary.rs index 463b568af..2c09667d4 100644 --- a/crates/rayexec_execution/src/arrays/executor_exp/aggregate/unary.rs +++ b/crates/rayexec_execution/src/arrays/executor_exp/aggregate/unary.rs @@ -52,8 +52,8 @@ impl UnaryNonNullUpdater { Ok(()) } - pub fn update_flat<'a, S, State, Output>( - array: FlatArrayView<'a>, + pub fn update_flat( + array: FlatArrayView<'_>, selection: impl IntoExactSizeIterator, mapping: impl IntoExactSizeIterator, states: &mut [State], diff --git a/crates/rayexec_execution/src/arrays/executor_exp/scalar/binary.rs b/crates/rayexec_execution/src/arrays/executor_exp/scalar/binary.rs index 894a56487..3ddbdbc75 100644 --- a/crates/rayexec_execution/src/arrays/executor_exp/scalar/binary.rs +++ b/crates/rayexec_execution/src/arrays/executor_exp/scalar/binary.rs @@ -92,8 +92,8 @@ impl BinaryExecutor { { // TODO: length validation - let input1 = S1::get_addressable(&array1.array_buffer)?; - let input2 = S2::get_addressable(&array2.array_buffer)?; + let input1 = S1::get_addressable(array1.array_buffer)?; + let input2 = S2::get_addressable(array2.array_buffer)?; let mut output = O::get_addressable_mut(out.buffer)?; diff --git a/crates/rayexec_execution/src/arrays/executor_exp/scalar/unary.rs b/crates/rayexec_execution/src/arrays/executor_exp/scalar/unary.rs index 3e2f62a92..3cf7e2122 100644 --- a/crates/rayexec_execution/src/arrays/executor_exp/scalar/unary.rs +++ b/crates/rayexec_execution/src/arrays/executor_exp/scalar/unary.rs @@ -60,8 +60,8 @@ impl UnaryExecutor { Ok(()) } - pub fn execute_flat<'a, S, O, Op>( - array: FlatArrayView<'a>, + pub fn execute_flat( + array: FlatArrayView<'_>, selection: impl IntoExactSizeIterator, out: OutBuffer, mut op: Op, @@ -71,7 +71,7 @@ impl UnaryExecutor { O: MutablePhysicalStorage, for<'b> Op: FnMut(&S::StorageType, PutBuffer>), { - let input = S::get_addressable(&array.array_buffer)?; + let input = S::get_addressable(array.array_buffer)?; let mut output = O::get_addressable_mut(out.buffer)?; let validity = array.validity; @@ -141,8 +141,8 @@ impl UnaryExecutor { /// with None. /// /// Note this should really only be used for tests. - pub fn for_each_flat<'a, S, Op>( - array: FlatArrayView<'a>, + pub fn for_each_flat( + array: FlatArrayView<'_>, selection: impl IntoExactSizeIterator, mut op: Op, ) -> Result<()> @@ -150,7 +150,7 @@ impl UnaryExecutor { S: PhysicalStorage, Op: FnMut(usize, Option<&S::StorageType>), { - let input = S::get_addressable(&array.array_buffer)?; + let input = S::get_addressable(array.array_buffer)?; let validity = array.validity; if validity.all_valid() { @@ -242,7 +242,7 @@ mod tests { fn int32_inc_by_2_in_place() { let mut array = Array::try_from_iter([1, 2, 3]).unwrap(); - UnaryExecutor::execute_in_place::(&mut array, 0..3, |v| *v = *v + 2) + UnaryExecutor::execute_in_place::(&mut array, 0..3, |v| *v += 2) .unwrap(); let arr_slice = array.data().try_as_slice::().unwrap(); diff --git a/crates/rayexec_execution/src/arrays/executor_exp/scalar/uniform.rs b/crates/rayexec_execution/src/arrays/executor_exp/scalar/uniform.rs index d45f71741..8770acda3 100644 --- a/crates/rayexec_execution/src/arrays/executor_exp/scalar/uniform.rs +++ b/crates/rayexec_execution/src/arrays/executor_exp/scalar/uniform.rs @@ -96,7 +96,7 @@ impl UniformExecutor { let inputs = arrays .iter() - .map(|arr| S::get_addressable(&arr.array_buffer)) + .map(|arr| S::get_addressable(arr.array_buffer)) .collect::>>()?; let all_valid = arrays.iter().all(|arr| arr.validity.all_valid()); diff --git a/crates/rayexec_execution/src/arrays/testutil.rs b/crates/rayexec_execution/src/arrays/testutil.rs index 988e760ec..4ca566d66 100644 --- a/crates/rayexec_execution/src/arrays/testutil.rs +++ b/crates/rayexec_execution/src/arrays/testutil.rs @@ -119,8 +119,8 @@ fn assert_arrays_eq_sel_list_inner( _ => panic!("Missing child for array 2"), }; - let metas1 = PhysicalList::get_addressable(&flat1.array_buffer).unwrap(); - let metas2 = PhysicalList::get_addressable(&flat2.array_buffer).unwrap(); + let metas1 = PhysicalList::get_addressable(flat1.array_buffer).unwrap(); + let metas2 = PhysicalList::get_addressable(flat2.array_buffer).unwrap(); let sel1 = sel1.into_iter(); let sel2 = sel2.into_iter(); diff --git a/crates/rayexec_execution/src/execution/operators/hash_aggregate/chunk.rs b/crates/rayexec_execution/src/execution/operators/hash_aggregate/chunk.rs index 05d6d0c9a..c404b4179 100644 --- a/crates/rayexec_execution/src/execution/operators/hash_aggregate/chunk.rs +++ b/crates/rayexec_execution/src/execution/operators/hash_aggregate/chunk.rs @@ -6,7 +6,6 @@ use crate::arrays::array::Array2; use crate::arrays::executor::physical_type::PhysicalType2; use crate::arrays::executor::scalar::concat; use crate::execution::operators::util::resizer::DEFAULT_TARGET_BATCH_SIZE; -use crate::functions::aggregate::ChunkGroupAddressIter; /// Holds a chunk of value for the aggregate hash table. #[derive(Debug)] diff --git a/crates/rayexec_execution/src/execution/operators/hash_aggregate/distinct.rs b/crates/rayexec_execution/src/execution/operators/hash_aggregate/distinct.rs index 146ea5fb7..787a3a3b3 100644 --- a/crates/rayexec_execution/src/execution/operators/hash_aggregate/distinct.rs +++ b/crates/rayexec_execution/src/execution/operators/hash_aggregate/distinct.rs @@ -1,16 +1,10 @@ -use std::sync::Arc; use rayexec_error::Result; use super::hash_table::HashTable; use crate::arrays::array::exp::Array; use crate::arrays::array::selection::Selection; -use crate::arrays::array::Array2; -use crate::arrays::executor::scalar::HashExecutor; -use crate::arrays::selection::SelectionVector; -use crate::execution::operators::hash_aggregate::hash_table::GroupAddress; use crate::functions::aggregate::states::{AggregateGroupStates, OpaqueStatesMut}; -use crate::functions::aggregate::ChunkGroupAddressIter; /// And implementation of GroupedStates that buffers inputs to an aggregate in a /// hash table to ensure the aggregate is computed with distinct values. diff --git a/crates/rayexec_execution/src/execution/operators/ungrouped_aggregate.rs b/crates/rayexec_execution/src/execution/operators/ungrouped_aggregate.rs index 8b1bb3b38..961e7e5a2 100644 --- a/crates/rayexec_execution/src/execution/operators/ungrouped_aggregate.rs +++ b/crates/rayexec_execution/src/execution/operators/ungrouped_aggregate.rs @@ -23,7 +23,6 @@ use crate::execution::operators::InputOutputStates; use crate::explain::explainable::{ExplainConfig, ExplainEntry, Explainable}; use crate::expr::physical::PhysicalAggregateExpression; use crate::functions::aggregate::states::AggregateGroupStates; -use crate::functions::aggregate::ChunkGroupAddressIter; use crate::proto::DatabaseProtoConv; #[derive(Debug)] @@ -149,7 +148,7 @@ impl ExecutableOperator for PhysicalUngroupedAggregate { }; match state { - UngroupedAggregatePartitionState::Aggregating { agg_states, .. } => { + UngroupedAggregatePartitionState::Aggregating { .. } => { // All rows map to the same group (group 0) let addrs: Vec<_> = (0..batch.num_rows()) .map(|_| GroupAddress { @@ -217,7 +216,7 @@ impl ExecutableOperator for PhysicalUngroupedAggregate { if shared.remaining == 0 { // This partition is the chosen one to produce the output. - let mut final_states = std::mem::take(&mut shared.agg_states); + let final_states = std::mem::take(&mut shared.agg_states); // Wake up other partitions to let them know they are not // the chosen ones. diff --git a/crates/rayexec_execution/src/expr/physical/evaluator.rs b/crates/rayexec_execution/src/expr/physical/evaluator.rs index 1cf350a07..b694ecfa6 100644 --- a/crates/rayexec_execution/src/expr/physical/evaluator.rs +++ b/crates/rayexec_execution/src/expr/physical/evaluator.rs @@ -5,7 +5,7 @@ use crate::arrays::array::exp::Array; use crate::arrays::array::selection::Selection; use crate::arrays::batch_exp::Batch; use crate::arrays::buffer::buffer_manager::NopBufferManager; -use crate::arrays::scalar::{OwnedScalarValue, ScalarValue}; +use crate::arrays::scalar::OwnedScalarValue; /// Evaluate expressions on batch inputs. #[derive(Debug)] diff --git a/crates/rayexec_execution/src/expr/physical/literal_expr.rs b/crates/rayexec_execution/src/expr/physical/literal_expr.rs index a35bced6e..96314c7d9 100644 --- a/crates/rayexec_execution/src/expr/physical/literal_expr.rs +++ b/crates/rayexec_execution/src/expr/physical/literal_expr.rs @@ -1,4 +1,3 @@ -use std::borrow::Cow; use std::fmt; use rayexec_error::{OptionExt, Result}; diff --git a/crates/rayexec_execution/src/functions/aggregate/builtin/sum.rs b/crates/rayexec_execution/src/functions/aggregate/builtin/sum.rs index d6f766750..60e761f50 100644 --- a/crates/rayexec_execution/src/functions/aggregate/builtin/sum.rs +++ b/crates/rayexec_execution/src/functions/aggregate/builtin/sum.rs @@ -230,13 +230,13 @@ mod tests { use super::*; use crate::arrays::array::exp::Array; use crate::arrays::array::selection::Selection; - use crate::arrays::array::Array2; + use crate::arrays::buffer::buffer_manager::NopBufferManager; - use crate::arrays::scalar::ScalarValue; + use crate::arrays::testutil::{assert_arrays_eq, assert_arrays_eq_sel}; - use crate::execution::operators::hash_aggregate::hash_table::GroupAddress; + use crate::expr; - use crate::functions::aggregate::ChunkGroupAddressIter; + #[test] fn sum_i64_single_group_two_partitions() { diff --git a/crates/rayexec_execution/src/functions/aggregate/mod.rs b/crates/rayexec_execution/src/functions/aggregate/mod.rs index 7ccc951ad..a75647076 100644 --- a/crates/rayexec_execution/src/functions/aggregate/mod.rs +++ b/crates/rayexec_execution/src/functions/aggregate/mod.rs @@ -10,7 +10,6 @@ use states::AggregateGroupStates; use super::FunctionInfo; use crate::arrays::datatype::DataType; -use crate::arrays::executor::aggregate::RowToStateMapping; use crate::execution::operators::hash_aggregate::hash_table::GroupAddress; use crate::expr::Expression; use crate::logical::binder::table_list::TableList; diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/comparison.rs b/crates/rayexec_execution/src/functions/scalar/builtin/comparison.rs index 99be81f0c..df0425fe4 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/comparison.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/comparison.rs @@ -41,7 +41,6 @@ use crate::arrays::executor::physical_type::{ PhysicalI64_2, PhysicalI8_2, PhysicalInterval_2, - PhysicalStorage2, PhysicalType2, PhysicalU128_2, PhysicalU16_2, @@ -51,7 +50,7 @@ use crate::arrays::executor::physical_type::{ PhysicalUntypedNull_2, PhysicalUtf8_2, }; -use crate::arrays::executor::scalar::{BinaryExecutor2, BinaryListReducer2, FlexibleListExecutor}; +use crate::arrays::executor::scalar::{BinaryListReducer2, FlexibleListExecutor}; use crate::arrays::executor_exp::scalar::binary::BinaryExecutor; use crate::arrays::executor_exp::OutBuffer; use crate::expr::cast_expr::CastExpr; diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/list/list_extract.rs b/crates/rayexec_execution/src/functions/scalar/builtin/list/list_extract.rs index 89773eb85..757568c68 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/list/list_extract.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/list/list_extract.rs @@ -166,7 +166,7 @@ where { let flat = array.flat_view()?; - let metas = PhysicalList::get_addressable(&flat.array_buffer)?; + let metas = PhysicalList::get_addressable(flat.array_buffer)?; let child = match flat.array_buffer.get_secondary() { SecondaryBuffer::List(l) => &l.child, _ => return Err(RayexecError::new("Missing secondary buffer for list")), diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/abs.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/abs.rs index be949b68f..7e3579827 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/abs.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/abs.rs @@ -4,15 +4,9 @@ use rayexec_error::Result; use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; use crate::arrays::array::exp::Array; -use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::buffer::physical_type::MutablePhysicalStorage; -use crate::arrays::datatype::DataType; -use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::PhysicalStorage2; -use crate::arrays::executor::scalar::UnaryExecutor2; use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; use crate::arrays::executor_exp::OutBuffer; -use crate::arrays::storage::PrimitiveStorage; pub type Abs = UnaryInputNumericScalar; diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/acos.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/acos.rs index 46b26bdec..9fe35a5ec 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/acos.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/acos.rs @@ -4,15 +4,9 @@ use rayexec_error::Result; use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; use crate::arrays::array::exp::Array; -use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::buffer::physical_type::MutablePhysicalStorage; -use crate::arrays::datatype::DataType; -use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::PhysicalStorage2; -use crate::arrays::executor::scalar::UnaryExecutor2; use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; use crate::arrays::executor_exp::OutBuffer; -use crate::arrays::storage::PrimitiveStorage; pub type Acos = UnaryInputNumericScalar; diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/asin.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/asin.rs index 6c1ddc6d5..bec4271a3 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/asin.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/asin.rs @@ -4,15 +4,9 @@ use rayexec_error::Result; use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; use crate::arrays::array::exp::Array; -use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::buffer::physical_type::MutablePhysicalStorage; -use crate::arrays::datatype::DataType; -use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::PhysicalStorage2; -use crate::arrays::executor::scalar::UnaryExecutor2; use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; use crate::arrays::executor_exp::OutBuffer; -use crate::arrays::storage::PrimitiveStorage; pub type Asin = UnaryInputNumericScalar; diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/atan.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/atan.rs index 61a4ab7c1..4657884b2 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/atan.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/atan.rs @@ -4,15 +4,9 @@ use rayexec_error::Result; use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; use crate::arrays::array::exp::Array; -use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::buffer::physical_type::MutablePhysicalStorage; -use crate::arrays::datatype::DataType; -use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::PhysicalStorage2; -use crate::arrays::executor::scalar::UnaryExecutor2; use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; use crate::arrays::executor_exp::OutBuffer; -use crate::arrays::storage::PrimitiveStorage; pub type Atan = UnaryInputNumericScalar; diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/cbrt.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/cbrt.rs index ee6979ac0..51d97f78d 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/cbrt.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/cbrt.rs @@ -4,15 +4,9 @@ use rayexec_error::Result; use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; use crate::arrays::array::exp::Array; -use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::buffer::physical_type::MutablePhysicalStorage; -use crate::arrays::datatype::DataType; -use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::PhysicalStorage2; -use crate::arrays::executor::scalar::UnaryExecutor2; use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; use crate::arrays::executor_exp::OutBuffer; -use crate::arrays::storage::PrimitiveStorage; pub type Cbrt = UnaryInputNumericScalar; diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/ceil.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/ceil.rs index b439a0f02..cd40c8e2c 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/ceil.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/ceil.rs @@ -4,15 +4,9 @@ use rayexec_error::Result; use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; use crate::arrays::array::exp::Array; -use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::buffer::physical_type::MutablePhysicalStorage; -use crate::arrays::datatype::DataType; -use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::PhysicalStorage2; -use crate::arrays::executor::scalar::UnaryExecutor2; use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; use crate::arrays::executor_exp::OutBuffer; -use crate::arrays::storage::PrimitiveStorage; pub type Ceil = UnaryInputNumericScalar; diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/cos.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/cos.rs index bc3c36af1..38e377c8a 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/cos.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/cos.rs @@ -4,15 +4,9 @@ use rayexec_error::Result; use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; use crate::arrays::array::exp::Array; -use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::buffer::physical_type::MutablePhysicalStorage; -use crate::arrays::datatype::DataType; -use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::PhysicalStorage2; -use crate::arrays::executor::scalar::UnaryExecutor2; use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; use crate::arrays::executor_exp::OutBuffer; -use crate::arrays::storage::PrimitiveStorage; pub type Cos = UnaryInputNumericScalar; diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/degrees.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/degrees.rs index b79f51c4a..a48f4e052 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/degrees.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/degrees.rs @@ -4,15 +4,9 @@ use rayexec_error::Result; use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; use crate::arrays::array::exp::Array; -use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::buffer::physical_type::MutablePhysicalStorage; -use crate::arrays::datatype::DataType; -use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::PhysicalStorage2; -use crate::arrays::executor::scalar::UnaryExecutor2; use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; use crate::arrays::executor_exp::OutBuffer; -use crate::arrays::storage::PrimitiveStorage; pub type Degrees = UnaryInputNumericScalar; diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/exp.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/exp.rs index 649b8c3bf..0042edae9 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/exp.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/exp.rs @@ -4,15 +4,9 @@ use rayexec_error::Result; use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; use crate::arrays::array::exp::Array; -use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::buffer::physical_type::MutablePhysicalStorage; -use crate::arrays::datatype::DataType; -use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::PhysicalStorage2; -use crate::arrays::executor::scalar::UnaryExecutor2; use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; use crate::arrays::executor_exp::OutBuffer; -use crate::arrays::storage::PrimitiveStorage; pub type Exp = UnaryInputNumericScalar; diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/floor.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/floor.rs index c47e8ba09..67e17252e 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/floor.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/floor.rs @@ -4,15 +4,9 @@ use rayexec_error::Result; use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; use crate::arrays::array::exp::Array; -use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::buffer::physical_type::MutablePhysicalStorage; -use crate::arrays::datatype::DataType; -use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::PhysicalStorage2; -use crate::arrays::executor::scalar::UnaryExecutor2; use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; use crate::arrays::executor_exp::OutBuffer; -use crate::arrays::storage::PrimitiveStorage; pub type Floor = UnaryInputNumericScalar; diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/ln.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/ln.rs index 5650abb5e..51f013c8c 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/ln.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/ln.rs @@ -4,15 +4,9 @@ use rayexec_error::Result; use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; use crate::arrays::array::exp::Array; -use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::buffer::physical_type::MutablePhysicalStorage; -use crate::arrays::datatype::DataType; -use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::PhysicalStorage2; -use crate::arrays::executor::scalar::UnaryExecutor2; use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; use crate::arrays::executor_exp::OutBuffer; -use crate::arrays::storage::PrimitiveStorage; pub type Ln = UnaryInputNumericScalar; diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/log.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/log.rs index cb9c9ccec..042faf9a5 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/log.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/log.rs @@ -4,15 +4,9 @@ use rayexec_error::Result; use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; use crate::arrays::array::exp::Array; -use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::buffer::physical_type::MutablePhysicalStorage; -use crate::arrays::datatype::DataType; -use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::PhysicalStorage2; -use crate::arrays::executor::scalar::UnaryExecutor2; use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; use crate::arrays::executor_exp::OutBuffer; -use crate::arrays::storage::PrimitiveStorage; pub type Log = UnaryInputNumericScalar; diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/radians.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/radians.rs index 86e94812c..b58d4fee5 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/radians.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/radians.rs @@ -4,15 +4,9 @@ use rayexec_error::Result; use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; use crate::arrays::array::exp::Array; -use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::buffer::physical_type::MutablePhysicalStorage; -use crate::arrays::datatype::DataType; -use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::PhysicalStorage2; -use crate::arrays::executor::scalar::UnaryExecutor2; use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; use crate::arrays::executor_exp::OutBuffer; -use crate::arrays::storage::PrimitiveStorage; pub type Radians = UnaryInputNumericScalar; diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/sin.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/sin.rs index 592a1a090..29d055644 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/sin.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/sin.rs @@ -4,15 +4,9 @@ use rayexec_error::Result; use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; use crate::arrays::array::exp::Array; -use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::buffer::physical_type::MutablePhysicalStorage; -use crate::arrays::datatype::DataType; -use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::PhysicalStorage2; -use crate::arrays::executor::scalar::UnaryExecutor2; use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; use crate::arrays::executor_exp::OutBuffer; -use crate::arrays::storage::PrimitiveStorage; pub type Sin = UnaryInputNumericScalar; diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/sqrt.rs b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/sqrt.rs index c0c3f58e2..7f0f6e64f 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/numeric/sqrt.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/numeric/sqrt.rs @@ -4,15 +4,9 @@ use rayexec_error::Result; use super::{UnaryInputNumericOperation, UnaryInputNumericScalar}; use crate::arrays::array::exp::Array; -use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::buffer::physical_type::MutablePhysicalStorage; -use crate::arrays::datatype::DataType; -use crate::arrays::executor::builder::{ArrayBuilder, PrimitiveBuffer}; -use crate::arrays::executor::physical_type::PhysicalStorage2; -use crate::arrays::executor::scalar::UnaryExecutor2; use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; use crate::arrays::executor_exp::OutBuffer; -use crate::arrays::storage::PrimitiveStorage; pub type Sqrt = UnaryInputNumericScalar; diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/similarity/l2_distance.rs b/crates/rayexec_execution/src/functions/scalar/builtin/similarity/l2_distance.rs index 857a093f7..919064350 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/similarity/l2_distance.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/similarity/l2_distance.rs @@ -5,7 +5,6 @@ use num_traits::{AsPrimitive, Float}; use rayexec_error::Result; use crate::arrays::array::exp::Array; -use crate::arrays::array::Array2; use crate::arrays::batch_exp::Batch; use crate::arrays::buffer::physical_type::{ MutablePhysicalStorage, diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/string/substring.rs b/crates/rayexec_execution/src/functions/scalar/builtin/string/substring.rs index 894abfdcf..82c5ed1db 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/string/substring.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/string/substring.rs @@ -1,13 +1,9 @@ use rayexec_error::Result; use crate::arrays::array::exp::Array; -use crate::arrays::array::Array2; use crate::arrays::batch_exp::Batch; use crate::arrays::buffer::physical_type::{PhysicalI64, PhysicalUtf8}; use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::builder::{ArrayBuilder, GermanVarlenBuffer}; -use crate::arrays::executor::physical_type::{PhysicalI64_2, PhysicalUtf8_2}; -use crate::arrays::executor::scalar::{BinaryExecutor2, TernaryExecutor2}; use crate::arrays::executor_exp::scalar::binary::BinaryExecutor; use crate::arrays::executor_exp::scalar::ternary::TernaryExecutor; use crate::arrays::executor_exp::OutBuffer; diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/string/trim.rs b/crates/rayexec_execution/src/functions/scalar/builtin/string/trim.rs index 02ba39e98..0cb13955e 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/string/trim.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/string/trim.rs @@ -4,13 +4,9 @@ use std::marker::PhantomData; use rayexec_error::Result; use crate::arrays::array::exp::Array; -use crate::arrays::array::Array2; use crate::arrays::batch_exp::Batch; use crate::arrays::buffer::physical_type::PhysicalUtf8; use crate::arrays::datatype::{DataType, DataTypeId}; -use crate::arrays::executor::builder::{ArrayBuilder, GermanVarlenBuffer}; -use crate::arrays::executor::physical_type::PhysicalUtf8_2; -use crate::arrays::executor::scalar::{BinaryExecutor2, UnaryExecutor2}; use crate::arrays::executor_exp::scalar::binary::BinaryExecutor; use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; use crate::arrays::executor_exp::OutBuffer; diff --git a/crates/rayexec_parquet/src/reader/primitive.rs b/crates/rayexec_parquet/src/reader/primitive.rs index 65f26d094..f7ccb09e1 100644 --- a/crates/rayexec_parquet/src/reader/primitive.rs +++ b/crates/rayexec_parquet/src/reader/primitive.rs @@ -6,8 +6,6 @@ use parquet::schema::types::ColumnDescPtr; use rayexec_error::{RayexecError, Result}; use rayexec_execution::arrays::array::{Array2, ArrayData2}; use rayexec_execution::arrays::bitmap::Bitmap; -use rayexec_execution::arrays::compute::cast::array::cast_array; -use rayexec_execution::arrays::compute::cast::behavior::CastFailBehavior; use rayexec_execution::arrays::datatype::DataType; use rayexec_execution::arrays::storage::{BooleanStorage, PrimitiveStorage}; @@ -86,7 +84,7 @@ where let needs_cast = build_type != self.datatype; - let mut array = match bitmap { + let array = match bitmap { Some(bitmap) => { Array2::new_with_validity_and_array_data(build_type, bitmap, array_data) } diff --git a/crates/stdutil/src/marker.rs b/crates/stdutil/src/marker.rs index 956ddfd7e..b835146fb 100644 --- a/crates/stdutil/src/marker.rs +++ b/crates/stdutil/src/marker.rs @@ -26,7 +26,7 @@ where T: ?Sized, { fn clone(&self) -> Self { - Self::new() + *self } } From bf2acd673cf6e602a0135c0cbc5102137c81e14f Mon Sep 17 00:00:00 2001 From: Sean Smith Date: Sun, 5 Jan 2025 15:30:21 -0600 Subject: [PATCH 55/59] temp rename --- .../src/execution/executable/pipeline.rs | 30 +- .../src/execution/executable/planner.rs | 28 +- .../src/execution/operators/analyze.rs | 24 +- .../src/execution/operators/batch_resizer.rs | 46 +-- .../src/execution/operators/create_schema.rs | 34 +-- .../src/execution/operators/create_view.rs | 34 +-- .../src/execution/operators/drop.rs | 34 +-- .../src/execution/operators/empty.rs | 34 +-- .../execution/operators/hash_aggregate/mod.rs | 38 +-- .../src/execution/operators/hash_join/mod.rs | 70 ++--- .../src/execution/operators/limit.rs | 68 ++--- .../src/execution/operators/mod.rs | 284 +++++++++--------- .../src/execution/operators/nl_join.rs | 50 +-- .../src/execution/operators/round_robin.rs | 38 +-- .../src/execution/operators/scan.rs | 42 +-- .../src/execution/operators/simple.rs | 42 +-- .../src/execution/operators/sink.rs | 56 ++-- .../execution/operators/sort/gather_sort.rs | 80 ++--- .../execution/operators/sort/scatter_sort.rs | 64 ++-- .../src/execution/operators/sort/top_k.rs | 24 +- .../src/execution/operators/source.rs | 42 +-- .../src/execution/operators/table_function.rs | 42 +-- .../src/execution/operators/table_inout.rs | 38 +-- .../src/execution/operators/test_util.rs | 16 +- .../operators/ungrouped_aggregate.rs | 44 +-- .../src/execution/operators/union.rs | 50 +-- .../src/execution/operators/unnest.rs | 42 +-- .../src/execution/operators/values.rs | 34 +-- .../src/execution/operators/window/mod.rs | 24 +- .../src/functions/table/builtin/series.rs | 12 +- .../src/functions/table/builtin/unnest.rs | 12 +- .../src/functions/table/inout.rs | 6 +- 32 files changed, 742 insertions(+), 740 deletions(-) diff --git a/crates/rayexec_execution/src/execution/executable/pipeline.rs b/crates/rayexec_execution/src/execution/executable/pipeline.rs index df452e00a..533233e48 100644 --- a/crates/rayexec_execution/src/execution/executable/pipeline.rs +++ b/crates/rayexec_execution/src/execution/executable/pipeline.rs @@ -13,9 +13,9 @@ use crate::execution::operators::{ OperatorState, PartitionState, PhysicalOperator, - PollFinalize, - PollPull, - PollPush, + PollFinalize2, + PollPull2, + PollPush2, }; use crate::explain::explainable::{ExplainConfig, ExplainEntry, Explainable}; use crate::runtime::time::{RuntimeInstant, Timer}; @@ -346,7 +346,7 @@ impl ExecutablePartitionPipeline { // Otherwise do a normal pull. let timer = Timer::::start(); - let poll_pull = operator.physical.poll_pull( + let poll_pull = operator.physical.poll_pull2( cx, &mut operator.partition_state, &operator.operator_state, @@ -355,7 +355,7 @@ impl ExecutablePartitionPipeline { operator.profile_data.elapsed += elapsed; match poll_pull { - Ok(PollPull::Computed(mut computed)) => { + Ok(PollPull2::Computed(mut computed)) => { operator.profile_data.rows_emitted += computed.total_num_rows(); // TODO: We should have something to indicate materialized vs not. let batch = match computed.try_pop_front()? { @@ -385,10 +385,10 @@ impl ExecutablePartitionPipeline { }; continue; } - Ok(PollPull::Pending) => { + Ok(PollPull2::Pending) => { return Poll::Pending; } - Ok(PollPull::Exhausted) => { + Ok(PollPull2::Exhausted) => { // Finalize the next operator to indicate that it // will no longer be receiving batch inputs. *state = PipelinePartitionState::FinalizePush { @@ -416,7 +416,7 @@ impl ExecutablePartitionPipeline { .expect("next operator to exist"); let timer = Timer::::start(); - let poll_finalize = next_operator.physical.poll_finalize_push( + let poll_finalize = next_operator.physical.poll_finalize_push2( cx, &mut next_operator.partition_state, &next_operator.operator_state, @@ -425,7 +425,7 @@ impl ExecutablePartitionPipeline { next_operator.profile_data.elapsed += elapsed; match poll_finalize { - Ok(PollFinalize::Finalized) => { + Ok(PollFinalize2::Finalized) => { if self.pull_start.pull_start == self.operators.len() - 1 { // This partition pipeline has been completely exhausted, and // we've just finalized the "sink" operator. We're done. @@ -437,7 +437,7 @@ impl ExecutablePartitionPipeline { // next non-exhausted operator. *state = self.pull_start.next_start_state()?; } - Ok(PollFinalize::Pending) => return Poll::Pending, + Ok(PollFinalize2::Pending) => return Poll::Pending, Err(e) => { // Erroring on finalize is not recoverable. *state = PipelinePartitionState::Completed; @@ -460,7 +460,7 @@ impl ExecutablePartitionPipeline { operator.profile_data.rows_read += batch.num_rows(); let timer = Timer::::start(); - let poll_push = operator.physical.poll_push( + let poll_push = operator.physical.poll_push2( cx, &mut operator.partition_state, &operator.operator_state, @@ -470,7 +470,7 @@ impl ExecutablePartitionPipeline { operator.profile_data.elapsed += elapsed; match poll_push { - Ok(PollPush::Pushed) => { + Ok(PollPush2::Pushed) => { // We successfully pushed to the operator. // // If we pushed to last operator (the 'sink'), we @@ -489,7 +489,7 @@ impl ExecutablePartitionPipeline { } continue; } - Ok(PollPush::Pending(batch)) => { + Ok(PollPush2::Pending(batch)) => { // Operator not ready to accept input. // // Waker has been registered, and this pipeline will @@ -502,7 +502,7 @@ impl ExecutablePartitionPipeline { }; return Poll::Pending; } - Ok(PollPush::Break) => { + Ok(PollPush2::Break) => { // Operator has received everything it needs. Set // the pipeline to start pulling from the operator, // even if the operator we're currently pull from @@ -520,7 +520,7 @@ impl ExecutablePartitionPipeline { }; continue; } - Ok(PollPush::NeedsMore) => { + Ok(PollPush2::NeedsMore) => { // Operator accepted input, but needs more input // before it will produce output. // diff --git a/crates/rayexec_execution/src/execution/executable/planner.rs b/crates/rayexec_execution/src/execution/executable/planner.rs index e26b4d20c..fbd177dcf 100644 --- a/crates/rayexec_execution/src/execution/executable/planner.rs +++ b/crates/rayexec_execution/src/execution/executable/planner.rs @@ -22,7 +22,7 @@ use crate::execution::operators::sink::{SinkOperation, SinkOperator}; use crate::execution::operators::source::{SourceOperation, SourceOperator}; use crate::execution::operators::{ ExecutableOperator, - InputOutputStates, + InputOutputStates2, OperatorState, PartitionState, PhysicalOperator, @@ -363,9 +363,9 @@ impl PendingQuery { } let operator = Arc::new(PhysicalOperator::ResultSink(SinkOperator::new(sink))); - let states = operator.create_states(context, vec![partitions])?; + let states = operator.create_states2(context, vec![partitions])?; let partition_states = match states.partition_states { - InputOutputStates::OneToOne { partition_states } => partition_states, + InputOutputStates2::OneToOne { partition_states } => partition_states, _ => return Err(RayexecError::new("invalid partition states for query sink")), }; @@ -429,9 +429,9 @@ impl PendingQuery { } }; - let states = operator.create_states(context, vec![partitions])?; + let states = operator.create_states2(context, vec![partitions])?; let partition_states = match states.partition_states { - InputOutputStates::OneToOne { partition_states } => partition_states, + InputOutputStates2::OneToOne { partition_states } => partition_states, _ => return Err(RayexecError::new("invalid partition states")), }; @@ -549,9 +549,9 @@ impl PendingQuery { } }; - let states = operator.create_states(context, vec![partitions])?; + let states = operator.create_states2(context, vec![partitions])?; let partition_states = match states.partition_states { - InputOutputStates::OneToOne { partition_states } => partition_states, + InputOutputStates2::OneToOne { partition_states } => partition_states, _ => { return Err(RayexecError::new( "Invalid partition states for query source", @@ -610,10 +610,10 @@ impl PendingQuery { ) -> Result { let rr_operator = Arc::new(PhysicalOperator::RoundRobin(PhysicalRoundRobinRepartition)); let states = rr_operator - .create_states(context, vec![pipeline.num_partitions(), output_partitions])?; + .create_states2(context, vec![pipeline.num_partitions(), output_partitions])?; let (push_states, pull_states) = match states.partition_states { - InputOutputStates::SeparateInputOutput { + InputOutputStates2::SeparateInputOutput { push_states, pull_states, } => (push_states, pull_states), @@ -696,17 +696,19 @@ impl PendingOperatorWithState { .unwrap_or(config.partitions); // TODO: How to get other input partitions. - let states = operator.operator.create_states(context, vec![partitions])?; + let states = operator + .operator + .create_states2(context, vec![partitions])?; Ok(match states.partition_states { - InputOutputStates::OneToOne { partition_states } => PendingOperatorWithState { + InputOutputStates2::OneToOne { partition_states } => PendingOperatorWithState { operator: operator.operator, operator_state: states.operator_state, input_states: vec![Some(partition_states)], pull_states: VecDeque::new(), trunk_idx: 0, }, - InputOutputStates::NaryInputSingleOutput { + InputOutputStates2::NaryInputSingleOutput { partition_states, pull_states, } => { @@ -719,7 +721,7 @@ impl PendingOperatorWithState { trunk_idx: pull_states, } } - InputOutputStates::SeparateInputOutput { + InputOutputStates2::SeparateInputOutput { push_states, pull_states, } => PendingOperatorWithState { diff --git a/crates/rayexec_execution/src/execution/operators/analyze.rs b/crates/rayexec_execution/src/execution/operators/analyze.rs index eb572feab..8fdbd2921 100644 --- a/crates/rayexec_execution/src/execution/operators/analyze.rs +++ b/crates/rayexec_execution/src/execution/operators/analyze.rs @@ -4,12 +4,12 @@ use rayexec_error::Result; use super::{ ExecutableOperator, - ExecutionStates, + ExecutionStates2, OperatorState, PartitionState, - PollFinalize, - PollPull, - PollPush, + PollFinalize2, + PollPull2, + PollPush2, }; use crate::arrays::batch::Batch2; use crate::database::DatabaseContext; @@ -20,39 +20,39 @@ use crate::explain::explainable::{ExplainConfig, ExplainEntry, Explainable}; pub struct PhysicalAnalyze {} impl ExecutableOperator for PhysicalAnalyze { - fn create_states( + fn create_states2( &self, _context: &DatabaseContext, _partitions: Vec, - ) -> Result { + ) -> Result { unimplemented!() } - fn poll_push( + fn poll_push2( &self, _cx: &mut Context, _partition_state: &mut PartitionState, _operator_state: &OperatorState, _batch: Batch2, - ) -> Result { + ) -> Result { unimplemented!() } - fn poll_finalize_push( + fn poll_finalize_push2( &self, _cx: &mut Context, _partition_state: &mut PartitionState, _operator_state: &OperatorState, - ) -> Result { + ) -> Result { unimplemented!() } - fn poll_pull( + fn poll_pull2( &self, _cx: &mut Context, _partition_state: &mut PartitionState, _operator_state: &OperatorState, - ) -> Result { + ) -> Result { unimplemented!() } } diff --git a/crates/rayexec_execution/src/execution/operators/batch_resizer.rs b/crates/rayexec_execution/src/execution/operators/batch_resizer.rs index f43d94fc7..34c798d10 100644 --- a/crates/rayexec_execution/src/execution/operators/batch_resizer.rs +++ b/crates/rayexec_execution/src/execution/operators/batch_resizer.rs @@ -6,13 +6,13 @@ use rayexec_error::Result; use super::util::resizer::{BatchResizer, DEFAULT_TARGET_BATCH_SIZE}; use super::{ ExecutableOperator, - ExecutionStates, - InputOutputStates, + ExecutionStates2, + InputOutputStates2, OperatorState, PartitionState, - PollFinalize, - PollPull, - PollPush, + PollFinalize2, + PollPull2, + PollPush2, }; use crate::arrays::batch::Batch2; use crate::database::DatabaseContext; @@ -37,14 +37,14 @@ pub struct BatchResizerPartitionState { pub struct PhysicalBatchResizer; impl ExecutableOperator for PhysicalBatchResizer { - fn create_states( + fn create_states2( &self, _context: &DatabaseContext, partitions: Vec, - ) -> Result { - Ok(ExecutionStates { + ) -> Result { + Ok(ExecutionStates2 { operator_state: Arc::new(OperatorState::None), - partition_states: InputOutputStates::OneToOne { + partition_states: InputOutputStates2::OneToOne { partition_states: (0..partitions[0]) .map(|_| { PartitionState::BatchResizer(BatchResizerPartitionState { @@ -60,13 +60,13 @@ impl ExecutableOperator for PhysicalBatchResizer { }) } - fn poll_push( + fn poll_push2( &self, cx: &mut Context, partition_state: &mut PartitionState, _operator_state: &OperatorState, batch: Batch2, - ) -> Result { + ) -> Result { let state = match partition_state { PartitionState::BatchResizer(state) => state, other => panic!("invalid state: {other:?}"), @@ -80,7 +80,7 @@ impl ExecutableOperator for PhysicalBatchResizer { waker.wake(); } - return Ok(PollPush::Pending(batch)); + return Ok(PollPush2::Pending(batch)); } let computed = state.resizer.try_push(batch)?; @@ -92,19 +92,19 @@ impl ExecutableOperator for PhysicalBatchResizer { waker.wake(); } - Ok(PollPush::Pushed) + Ok(PollPush2::Pushed) } else { // Otherwise we need more batches. - Ok(PollPush::NeedsMore) + Ok(PollPush2::NeedsMore) } } - fn poll_finalize_push( + fn poll_finalize_push2( &self, cx: &mut Context, partition_state: &mut PartitionState, _operator_state: &OperatorState, - ) -> Result { + ) -> Result { let state = match partition_state { PartitionState::BatchResizer(state) => state, other => panic!("invalid state: {other:?}"), @@ -118,7 +118,7 @@ impl ExecutableOperator for PhysicalBatchResizer { waker.wake(); } - return Ok(PollFinalize::Pending); + return Ok(PollFinalize2::Pending); } state.exhausted = true; @@ -128,15 +128,15 @@ impl ExecutableOperator for PhysicalBatchResizer { waker.wake(); } - Ok(PollFinalize::Finalized) + Ok(PollFinalize2::Finalized) } - fn poll_pull( + fn poll_pull2( &self, cx: &mut Context, partition_state: &mut PartitionState, _operator_state: &OperatorState, - ) -> Result { + ) -> Result { let state = match partition_state { PartitionState::BatchResizer(state) => state, other => panic!("invalid state: {other:?}"), @@ -144,7 +144,7 @@ impl ExecutableOperator for PhysicalBatchResizer { if state.buffered.is_empty() { if state.exhausted { - return Ok(PollPull::Exhausted); + return Ok(PollPull2::Exhausted); } // Register wakeup. @@ -153,7 +153,7 @@ impl ExecutableOperator for PhysicalBatchResizer { waker.wake() } - return Ok(PollPull::Pending); + return Ok(PollPull2::Pending); } let buffered = state.buffered.take(); @@ -162,7 +162,7 @@ impl ExecutableOperator for PhysicalBatchResizer { waker.wake() } - Ok(PollPull::Computed(buffered)) + Ok(PollPull2::Computed(buffered)) } } diff --git a/crates/rayexec_execution/src/execution/operators/create_schema.rs b/crates/rayexec_execution/src/execution/operators/create_schema.rs index 27305fe62..ea68d4f1a 100644 --- a/crates/rayexec_execution/src/execution/operators/create_schema.rs +++ b/crates/rayexec_execution/src/execution/operators/create_schema.rs @@ -9,13 +9,13 @@ use rayexec_proto::ProtoConv; use super::{ ExecutableOperator, - ExecutionStates, - InputOutputStates, + ExecutionStates2, + InputOutputStates2, OperatorState, PartitionState, - PollFinalize, - PollPull, - PollPush, + PollFinalize2, + PollPull2, + PollPush2, }; use crate::arrays::batch::Batch2; use crate::database::catalog::CatalogTx; @@ -50,11 +50,11 @@ impl PhysicalCreateSchema { } impl ExecutableOperator for PhysicalCreateSchema { - fn create_states( + fn create_states2( &self, context: &DatabaseContext, partitions: Vec, - ) -> Result { + ) -> Result { if partitions[0] != 1 { return Err(RayexecError::new( "Create schema operator can only handle 1 partition", @@ -73,9 +73,9 @@ impl ExecutableOperator for PhysicalCreateSchema { Ok(()) }); - Ok(ExecutionStates { + Ok(ExecutionStates2 { operator_state: Arc::new(OperatorState::None), - partition_states: InputOutputStates::OneToOne { + partition_states: InputOutputStates2::OneToOne { partition_states: vec![PartitionState::CreateSchema(CreateSchemaPartitionState { create, })], @@ -83,36 +83,36 @@ impl ExecutableOperator for PhysicalCreateSchema { }) } - fn poll_push( + fn poll_push2( &self, _cx: &mut Context, _partition_state: &mut PartitionState, _operator_state: &OperatorState, _batch: Batch2, - ) -> Result { + ) -> Result { Err(RayexecError::new("Cannot push to physical create table")) } - fn poll_finalize_push( + fn poll_finalize_push2( &self, _cx: &mut Context, _partition_state: &mut PartitionState, _operator_state: &OperatorState, - ) -> Result { + ) -> Result { Err(RayexecError::new("Cannot push to physical create table")) } - fn poll_pull( + fn poll_pull2( &self, cx: &mut Context, partition_state: &mut PartitionState, _operator_state: &OperatorState, - ) -> Result { + ) -> Result { match partition_state { PartitionState::CreateSchema(state) => match state.create.poll_unpin(cx) { - Poll::Ready(Ok(_)) => Ok(PollPull::Exhausted), + Poll::Ready(Ok(_)) => Ok(PollPull2::Exhausted), Poll::Ready(Err(e)) => Err(e), - Poll::Pending => Ok(PollPull::Pending), + Poll::Pending => Ok(PollPull2::Pending), }, other => panic!("invalid partition state: {other:?}"), } diff --git a/crates/rayexec_execution/src/execution/operators/create_view.rs b/crates/rayexec_execution/src/execution/operators/create_view.rs index dc2208304..7a5bda5ea 100644 --- a/crates/rayexec_execution/src/execution/operators/create_view.rs +++ b/crates/rayexec_execution/src/execution/operators/create_view.rs @@ -8,13 +8,13 @@ use rayexec_error::{RayexecError, Result}; use super::{ ExecutableOperator, - ExecutionStates, - InputOutputStates, + ExecutionStates2, + InputOutputStates2, OperatorState, PartitionState, - PollFinalize, - PollPull, - PollPush, + PollFinalize2, + PollPull2, + PollPush2, }; use crate::arrays::batch::Batch2; use crate::database::catalog::CatalogTx; @@ -41,11 +41,11 @@ pub struct PhysicalCreateView { } impl ExecutableOperator for PhysicalCreateView { - fn create_states( + fn create_states2( &self, context: &DatabaseContext, partitions: Vec, - ) -> Result { + ) -> Result { if partitions[0] != 1 { return Err(RayexecError::new( "Create schema operator can only handle 1 partition", @@ -71,9 +71,9 @@ impl ExecutableOperator for PhysicalCreateView { Ok(()) }); - Ok(ExecutionStates { + Ok(ExecutionStates2 { operator_state: Arc::new(OperatorState::None), - partition_states: InputOutputStates::OneToOne { + partition_states: InputOutputStates2::OneToOne { partition_states: vec![PartitionState::CreateView(CreateViewPartitionState { create, })], @@ -81,36 +81,36 @@ impl ExecutableOperator for PhysicalCreateView { }) } - fn poll_push( + fn poll_push2( &self, _cx: &mut Context, _partition_state: &mut PartitionState, _operator_state: &OperatorState, _batch: Batch2, - ) -> Result { + ) -> Result { Err(RayexecError::new("Cannot push to physical create view")) } - fn poll_finalize_push( + fn poll_finalize_push2( &self, _cx: &mut Context, _partition_state: &mut PartitionState, _operator_state: &OperatorState, - ) -> Result { + ) -> Result { Err(RayexecError::new("Cannot push to physical create view")) } - fn poll_pull( + fn poll_pull2( &self, cx: &mut Context, partition_state: &mut PartitionState, _operator_state: &OperatorState, - ) -> Result { + ) -> Result { match partition_state { PartitionState::CreateView(state) => match state.create.poll_unpin(cx) { - Poll::Ready(Ok(_)) => Ok(PollPull::Exhausted), + Poll::Ready(Ok(_)) => Ok(PollPull2::Exhausted), Poll::Ready(Err(e)) => Err(e), - Poll::Pending => Ok(PollPull::Pending), + Poll::Pending => Ok(PollPull2::Pending), }, other => panic!("invalid partition state: {other:?}"), } diff --git a/crates/rayexec_execution/src/execution/operators/drop.rs b/crates/rayexec_execution/src/execution/operators/drop.rs index 13420f9a6..c30a5a20a 100644 --- a/crates/rayexec_execution/src/execution/operators/drop.rs +++ b/crates/rayexec_execution/src/execution/operators/drop.rs @@ -9,13 +9,13 @@ use rayexec_proto::ProtoConv; use super::{ ExecutableOperator, - ExecutionStates, - InputOutputStates, + ExecutionStates2, + InputOutputStates2, OperatorState, PartitionState, - PollFinalize, - PollPull, - PollPush, + PollFinalize2, + PollPull2, + PollPush2, }; use crate::arrays::batch::Batch2; use crate::database::catalog::CatalogTx; @@ -47,11 +47,11 @@ impl PhysicalDrop { } impl ExecutableOperator for PhysicalDrop { - fn create_states( + fn create_states2( &self, context: &DatabaseContext, partitions: Vec, - ) -> Result { + ) -> Result { if partitions[0] != 1 { return Err(RayexecError::new("Drop can only handle one partition")); } @@ -68,44 +68,44 @@ impl ExecutableOperator for PhysicalDrop { Ok(()) }); - Ok(ExecutionStates { + Ok(ExecutionStates2 { operator_state: Arc::new(OperatorState::None), - partition_states: InputOutputStates::OneToOne { + partition_states: InputOutputStates2::OneToOne { partition_states: vec![PartitionState::Drop(DropPartitionState { drop })], }, }) } - fn poll_push( + fn poll_push2( &self, _cx: &mut Context, _partition_state: &mut PartitionState, _operator_state: &OperatorState, _batch: Batch2, - ) -> Result { + ) -> Result { Err(RayexecError::new("Cannot push to physical create table")) } - fn poll_finalize_push( + fn poll_finalize_push2( &self, _cx: &mut Context, _partition_state: &mut PartitionState, _operator_state: &OperatorState, - ) -> Result { + ) -> Result { Err(RayexecError::new("Cannot push to physical create table")) } - fn poll_pull( + fn poll_pull2( &self, cx: &mut Context, partition_state: &mut PartitionState, _operator_state: &OperatorState, - ) -> Result { + ) -> Result { match partition_state { PartitionState::Drop(state) => match state.drop.poll_unpin(cx) { - Poll::Ready(Ok(_)) => Ok(PollPull::Exhausted), + Poll::Ready(Ok(_)) => Ok(PollPull2::Exhausted), Poll::Ready(Err(e)) => Err(e), - Poll::Pending => Ok(PollPull::Pending), + Poll::Pending => Ok(PollPull2::Pending), }, other => panic!("invalid partition state: {other:?}"), } diff --git a/crates/rayexec_execution/src/execution/operators/empty.rs b/crates/rayexec_execution/src/execution/operators/empty.rs index ac812691c..e8adec2bc 100644 --- a/crates/rayexec_execution/src/execution/operators/empty.rs +++ b/crates/rayexec_execution/src/execution/operators/empty.rs @@ -5,16 +5,16 @@ use rayexec_error::{RayexecError, Result}; use super::{ ExecutableOperator, - ExecutionStates, + ExecutionStates2, OperatorState, PartitionState, - PollFinalize, - PollPull, - PollPush, + PollFinalize2, + PollPull2, + PollPush2, }; use crate::arrays::batch::Batch2; use crate::database::DatabaseContext; -use crate::execution::operators::InputOutputStates; +use crate::execution::operators::InputOutputStates2; use crate::explain::explainable::{ExplainConfig, ExplainEntry, Explainable}; use crate::proto::DatabaseProtoConv; @@ -29,14 +29,14 @@ pub struct EmptyPartitionState { pub struct PhysicalEmpty; impl ExecutableOperator for PhysicalEmpty { - fn create_states( + fn create_states2( &self, _context: &DatabaseContext, partitions: Vec, - ) -> Result { - Ok(ExecutionStates { + ) -> Result { + Ok(ExecutionStates2 { operator_state: Arc::new(OperatorState::None), - partition_states: InputOutputStates::OneToOne { + partition_states: InputOutputStates2::OneToOne { partition_states: (0..partitions[0]) .map(|_| PartitionState::Empty(EmptyPartitionState { finished: false })) .collect(), @@ -44,38 +44,38 @@ impl ExecutableOperator for PhysicalEmpty { }) } - fn poll_push( + fn poll_push2( &self, _cx: &mut Context, _partition_state: &mut PartitionState, _operator_state: &OperatorState, _batch: Batch2, - ) -> Result { + ) -> Result { Err(RayexecError::new("Cannot push to physical empty")) } - fn poll_finalize_push( + fn poll_finalize_push2( &self, _cx: &mut Context, _partition_state: &mut PartitionState, _operator_state: &OperatorState, - ) -> Result { + ) -> Result { Err(RayexecError::new("Cannot push to physical empty")) } - fn poll_pull( + fn poll_pull2( &self, _cx: &mut Context, partition_state: &mut PartitionState, _operator_state: &OperatorState, - ) -> Result { + ) -> Result { match partition_state { PartitionState::Empty(state) => { if state.finished { - Ok(PollPull::Exhausted) + Ok(PollPull2::Exhausted) } else { state.finished = true; - Ok(PollPull::Computed(Batch2::empty_with_num_rows(1).into())) + Ok(PollPull2::Computed(Batch2::empty_with_num_rows(1).into())) } } other => panic!("inner join state is not building: {other:?}"), diff --git a/crates/rayexec_execution/src/execution/operators/hash_aggregate/mod.rs b/crates/rayexec_execution/src/execution/operators/hash_aggregate/mod.rs index 8399bda8a..738b51ef9 100644 --- a/crates/rayexec_execution/src/execution/operators/hash_aggregate/mod.rs +++ b/crates/rayexec_execution/src/execution/operators/hash_aggregate/mod.rs @@ -15,7 +15,7 @@ use hash_table::HashTable; use parking_lot::Mutex; use rayexec_error::{RayexecError, Result}; -use super::{ExecutionStates, InputOutputStates, PollFinalize}; +use super::{ExecutionStates2, InputOutputStates2, PollFinalize2}; use crate::arrays::array::Array2; use crate::arrays::batch::Batch2; use crate::arrays::bitmap::Bitmap; @@ -32,8 +32,8 @@ use crate::execution::operators::{ ExecutableOperator, OperatorState, PartitionState, - PollPull, - PollPush, + PollPull2, + PollPush2, }; use crate::explain::explainable::{ExplainConfig, ExplainEntry, Explainable}; use crate::expr::physical::PhysicalAggregateExpression; @@ -223,11 +223,11 @@ impl PhysicalHashAggregate { } impl ExecutableOperator for PhysicalHashAggregate { - fn create_states( + fn create_states2( &self, _context: &DatabaseContext, partitions: Vec, - ) -> Result { + ) -> Result { let num_partitions = partitions[0]; // Create column selection bitmaps for each aggregate expression. These @@ -287,19 +287,19 @@ impl ExecutableOperator for PhysicalHashAggregate { partition_states.push(partition_state); } - Ok(ExecutionStates { + Ok(ExecutionStates2 { operator_state: Arc::new(operator_state), - partition_states: InputOutputStates::OneToOne { partition_states }, + partition_states: InputOutputStates2::OneToOne { partition_states }, }) } - fn poll_push( + fn poll_push2( &self, _cx: &mut Context, partition_state: &mut PartitionState, _operator_state: &OperatorState, batch: Batch2, - ) -> Result { + ) -> Result { let state = match partition_state { PartitionState::HashAggregate(state) => state, other => panic!("invalid partition state: {other:?}"), @@ -310,7 +310,7 @@ impl ExecutableOperator for PhysicalHashAggregate { self.insert_batch_agg_hash_table(state, batch)?; // Aggregates don't produce anything until it's been finalized. - Ok(PollPush::NeedsMore) + Ok(PollPush2::NeedsMore) } HashAggregatePartitionState::Producing { .. } => Err(RayexecError::new( "Attempted to push to partition that should be producing batches", @@ -318,12 +318,12 @@ impl ExecutableOperator for PhysicalHashAggregate { } } - fn poll_finalize_push( + fn poll_finalize_push2( &self, _cx: &mut Context, partition_state: &mut PartitionState, operator_state: &OperatorState, - ) -> Result { + ) -> Result { let state = match partition_state { PartitionState::HashAggregate(state) => state, other => panic!("invalid partition state: {other:?}"), @@ -368,7 +368,7 @@ impl ExecutableOperator for PhysicalHashAggregate { } } - Ok(PollFinalize::Finalized) + Ok(PollFinalize2::Finalized) } HashAggregatePartitionState::Producing { .. } => Err(RayexecError::new( "Attempted to finalize a partition that's producing output", @@ -376,12 +376,12 @@ impl ExecutableOperator for PhysicalHashAggregate { } } - fn poll_pull( + fn poll_pull2( &self, cx: &mut Context, partition_state: &mut PartitionState, operator_state: &OperatorState, - ) -> Result { + ) -> Result { let state = match partition_state { PartitionState::HashAggregate(state) => state, other => panic!("invalid partition state: {other:?}"), @@ -402,7 +402,7 @@ impl ExecutableOperator for PhysicalHashAggregate { // Still need to wait for some input partitions to complete. Store our // waker and come back later. shared_state.pull_waker = Some(cx.waker().clone()); - return Ok(PollPull::Pending); + return Ok(PollPull2::Pending); } // Othewise let's build the final table. Note that @@ -422,7 +422,7 @@ impl ExecutableOperator for PhysicalHashAggregate { let batch = match state.hashtable_drain.as_mut().unwrap().next() { Some(Ok(batch)) => batch, Some(Err(e)) => return Err(e), - None => return Ok(PollPull::Exhausted), + None => return Ok(PollPull2::Exhausted), }; // Prune off GROUP ID column, generate appropriate GROUPING @@ -468,12 +468,12 @@ impl ExecutableOperator for PhysicalHashAggregate { let batch = Batch2::try_new(arrays)?; - Ok(PollPull::Computed(ComputedBatches::Single(batch))) + Ok(PollPull2::Computed(ComputedBatches::Single(batch))) } HashAggregatePartitionState::Aggregating(state) => { let mut shared = operator_state.output_states[state.partition_idx].lock(); shared.pull_waker = Some(cx.waker().clone()); - Ok(PollPull::Pending) + Ok(PollPull2::Pending) } } } diff --git a/crates/rayexec_execution/src/execution/operators/hash_join/mod.rs b/crates/rayexec_execution/src/execution/operators/hash_join/mod.rs index 11e7a9241..4f0756c28 100644 --- a/crates/rayexec_execution/src/execution/operators/hash_join/mod.rs +++ b/crates/rayexec_execution/src/execution/operators/hash_join/mod.rs @@ -16,13 +16,13 @@ use super::util::outer_join_tracker::{LeftOuterJoinDrainState, LeftOuterJoinTrac use super::{ ComputedBatches, ExecutableOperator, - ExecutionStates, - InputOutputStates, + ExecutionStates2, + InputOutputStates2, OperatorState, PartitionState, - PollFinalize, - PollPull, - PollPush, + PollFinalize2, + PollPull2, + PollPush2, }; use crate::arrays::batch::Batch2; use crate::arrays::datatype::DataType; @@ -188,11 +188,11 @@ impl PhysicalHashJoin { } impl ExecutableOperator for PhysicalHashJoin { - fn create_states( + fn create_states2( &self, _context: &DatabaseContext, partitions: Vec, - ) -> Result { + ) -> Result { // TODO: Determine if this is what we want. let build_partitions = partitions[0]; let probe_partitions = partitions[0]; @@ -237,33 +237,33 @@ impl ExecutableOperator for PhysicalHashJoin { }) .collect(); - Ok(ExecutionStates { + Ok(ExecutionStates2 { operator_state: Arc::new(OperatorState::HashJoin(operator_state)), - partition_states: InputOutputStates::NaryInputSingleOutput { + partition_states: InputOutputStates2::NaryInputSingleOutput { partition_states: vec![build_states, probe_states], pull_states: Self::PROBE_SIDE_INPUT_INDEX, }, }) } - fn poll_push( + fn poll_push2( &self, cx: &mut Context, partition_state: &mut PartitionState, operator_state: &OperatorState, batch: Batch2, - ) -> Result { + ) -> Result { match partition_state { PartitionState::HashJoinBuild(state) => { self.insert_into_local_table(state, batch)?; - Ok(PollPush::NeedsMore) + Ok(PollPush2::NeedsMore) } PartitionState::HashJoinProbe(state) => { // If we have pending output, we need to wait for that to get // pulled before trying to compute additional batches. if !state.buffered_output.is_empty() { state.push_waker = Some(cx.waker().clone()); - return Ok(PollPush::Pending(batch)); + return Ok(PollPush2::Pending(batch)); } let operator_state = match operator_state { @@ -280,7 +280,7 @@ impl ExecutableOperator for PhysicalHashJoin { // waker to come back later. if shared.build_inputs_remaining != 0 { shared.probe_push_wakers[state.partition_idx] = Some(cx.waker().clone()); - return Ok(PollPush::Pending(batch)); + return Ok(PollPush2::Pending(batch)); } let global = match shared.global_hash_table.as_ref() { @@ -290,7 +290,7 @@ impl ExecutableOperator for PhysicalHashJoin { // thread. Come back when it's ready. shared.probe_push_wakers[state.partition_idx] = Some(cx.waker().clone()); - return Ok(PollPush::Pending(batch)); + return Ok(PollPush2::Pending(batch)); } }; @@ -334,25 +334,25 @@ impl ExecutableOperator for PhysicalHashJoin { state.buffered_output = ComputedBatches::new(batches); if state.buffered_output.is_empty() { // No batches joined, keep pushing to this operator. - return Ok(PollPush::NeedsMore); + return Ok(PollPush2::NeedsMore); } if let Some(waker) = state.pull_waker.take() { waker.wake(); } - Ok(PollPush::Pushed) + Ok(PollPush2::Pushed) } other => panic!("invalid partition state: {other:?}"), } } - fn poll_finalize_push( + fn poll_finalize_push2( &self, cx: &mut Context, partition_state: &mut PartitionState, operator_state: &OperatorState, - ) -> Result { + ) -> Result { match partition_state { PartitionState::HashJoinBuild(state) => { let mut shared = match operator_state { @@ -413,7 +413,7 @@ impl ExecutableOperator for PhysicalHashJoin { } } - Ok(PollFinalize::Finalized) + Ok(PollFinalize2::Finalized) } PartitionState::HashJoinProbe(state) => { let mut shared = match operator_state { @@ -429,7 +429,7 @@ impl ExecutableOperator for PhysicalHashJoin { // left side. if shared.build_inputs_remaining != 0 { shared.probe_push_wakers[state.partition_idx] = Some(cx.waker().clone()); - return Ok(PollFinalize::Pending); + return Ok(PollFinalize2::Pending); } // It's possible for this partition not have this if we pushed @@ -443,7 +443,7 @@ impl ExecutableOperator for PhysicalHashJoin { } None => { shared.probe_push_wakers[state.partition_idx] = Some(cx.waker().clone()); - return Ok(PollFinalize::Pending); + return Ok(PollFinalize2::Pending); } } @@ -485,18 +485,18 @@ impl ExecutableOperator for PhysicalHashJoin { waker.wake(); } - Ok(PollFinalize::Finalized) + Ok(PollFinalize2::Finalized) } other => panic!("invalid partition state: {other:?}"), } } - fn poll_pull( + fn poll_pull2( &self, cx: &mut Context, partition_state: &mut PartitionState, operator_state: &OperatorState, - ) -> Result { + ) -> Result { let state = match partition_state { PartitionState::HashJoinProbe(state) => state, PartitionState::HashJoinBuild(_) => { @@ -515,7 +515,7 @@ impl ExecutableOperator for PhysicalHashJoin { waker.wake(); } - Ok(PollPull::Computed(computed)) + Ok(PollPull2::Computed(computed)) } else { // No batches computed, check if we're done. if state.input_finished { @@ -530,7 +530,7 @@ impl ExecutableOperator for PhysicalHashJoin { if shared.probe_inputs_remaining != 0 { // Global state does not yet have all inputs. Need to wait. shared.probe_drain_wakers[state.partition_idx] = Some(cx.waker().clone()); - return Ok(PollPull::Pending); + return Ok(PollPull2::Pending); } let start_idx = state.partition_idx; @@ -565,26 +565,26 @@ impl ExecutableOperator for PhysicalHashJoin { if matches!(self.join_type, JoinType::LeftMark { .. }) { // Mark drain match drain_state.drain_mark_next()? { - Some(batch) => return Ok(PollPull::Computed(batch.into())), - None => return Ok(PollPull::Exhausted), + Some(batch) => return Ok(PollPull2::Computed(batch.into())), + None => return Ok(PollPull2::Exhausted), } } else if matches!(self.join_type, JoinType::Semi) { // Semi drain match drain_state.drain_semi_next()? { - Some(batch) => return Ok(PollPull::Computed(batch.into())), - None => return Ok(PollPull::Exhausted), + Some(batch) => return Ok(PollPull2::Computed(batch.into())), + None => return Ok(PollPull2::Exhausted), } } else { // Normal left drain match drain_state.drain_next()? { - Some(batch) => return Ok(PollPull::Computed(batch.into())), - None => return Ok(PollPull::Exhausted), + Some(batch) => return Ok(PollPull2::Computed(batch.into())), + None => return Ok(PollPull2::Exhausted), } } } // We're done. - return Ok(PollPull::Exhausted); + return Ok(PollPull2::Exhausted); } // No batch available, come back later. @@ -595,7 +595,7 @@ impl ExecutableOperator for PhysicalHashJoin { waker.wake(); } - Ok(PollPull::Pending) + Ok(PollPull2::Pending) } } } diff --git a/crates/rayexec_execution/src/execution/operators/limit.rs b/crates/rayexec_execution/src/execution/operators/limit.rs index 0335bb6b2..b63529f2b 100644 --- a/crates/rayexec_execution/src/execution/operators/limit.rs +++ b/crates/rayexec_execution/src/execution/operators/limit.rs @@ -5,13 +5,13 @@ use rayexec_error::Result; use super::{ ExecutableOperator, - ExecutionStates, - InputOutputStates, + ExecutionStates2, + InputOutputStates2, OperatorState, PartitionState, - PollFinalize, - PollPull, - PollPush, + PollFinalize2, + PollPull2, + PollPush2, }; use crate::arrays::batch::Batch2; use crate::database::DatabaseContext; @@ -62,16 +62,16 @@ impl PhysicalLimit { } impl ExecutableOperator for PhysicalLimit { - fn create_states( + fn create_states2( &self, _context: &DatabaseContext, partitions: Vec, - ) -> Result { + ) -> Result { let partitions = partitions[0]; - Ok(ExecutionStates { + Ok(ExecutionStates2 { operator_state: Arc::new(OperatorState::None), - partition_states: InputOutputStates::OneToOne { + partition_states: InputOutputStates2::OneToOne { partition_states: (0..partitions) .map(|_| { PartitionState::Limit(LimitPartitionState { @@ -88,13 +88,13 @@ impl ExecutableOperator for PhysicalLimit { }) } - fn poll_push( + fn poll_push2( &self, cx: &mut Context, partition_state: &mut PartitionState, _operator_state: &OperatorState, batch: Batch2, - ) -> Result { + ) -> Result { let state = match partition_state { PartitionState::Limit(state) => state, other => panic!("invalid partition state: {other:?}"), @@ -102,7 +102,7 @@ impl ExecutableOperator for PhysicalLimit { if state.buffer.is_some() { state.push_waker = Some(cx.waker().clone()); - return Ok(PollPush::Pending(batch)); + return Ok(PollPush2::Pending(batch)); } let batch = if state.remaining_offset > 0 { @@ -110,7 +110,7 @@ impl ExecutableOperator for PhysicalLimit { // batch, and keep asking for more input. if state.remaining_offset >= batch.num_rows() { state.remaining_offset -= batch.num_rows(); - return Ok(PollPush::NeedsMore); + return Ok(PollPush2::NeedsMore); } // Otherwise we have to slice the batch at the offset point. @@ -148,18 +148,18 @@ impl ExecutableOperator for PhysicalLimit { // instead the partition pipeline will immediately start to pull // from this operator. state.finished = true; - Ok(PollPush::Break) + Ok(PollPush2::Break) } else { - Ok(PollPush::Pushed) + Ok(PollPush2::Pushed) } } - fn poll_finalize_push( + fn poll_finalize_push2( &self, _cx: &mut Context, partition_state: &mut PartitionState, _operator_state: &OperatorState, - ) -> Result { + ) -> Result { let state = match partition_state { PartitionState::Limit(state) => state, other => panic!("invalid partition state: {other:?}"), @@ -170,31 +170,31 @@ impl ExecutableOperator for PhysicalLimit { waker.wake(); } - Ok(PollFinalize::Finalized) + Ok(PollFinalize2::Finalized) } - fn poll_pull( + fn poll_pull2( &self, cx: &mut Context, partition_state: &mut PartitionState, _operator_state: &OperatorState, - ) -> Result { + ) -> Result { let state = match partition_state { PartitionState::Limit(state) => state, other => panic!("invalid partition state: {other:?}"), }; match state.buffer.take() { - Some(batch) => Ok(PollPull::Computed(batch.into())), + Some(batch) => Ok(PollPull2::Computed(batch.into())), None => { if state.finished { - return Ok(PollPull::Exhausted); + return Ok(PollPull2::Exhausted); } state.pull_waker = Some(cx.waker().clone()); if let Some(waker) = state.push_waker.take() { waker.wake(); } - Ok(PollPull::Pending) + Ok(PollPull2::Pending) } } } @@ -244,10 +244,10 @@ mod tests { fn create_states(operator: &PhysicalLimit, partitions: usize) -> Vec { let context = test_database_context(); - let states = operator.create_states(&context, vec![partitions]).unwrap(); + let states = operator.create_states2(&context, vec![partitions]).unwrap(); match states.partition_states { - InputOutputStates::OneToOne { partition_states } => partition_states, + InputOutputStates2::OneToOne { partition_states } => partition_states, other => panic!("invalid states: {other:?}"), } } @@ -268,7 +268,7 @@ mod tests { let poll_pull = pull_cx .poll_pull(&operator, &mut partition_states[0], &operator_state) .unwrap(); - assert_eq!(PollPull::Pending, poll_pull); + assert_eq!(PollPull2::Pending, poll_pull); // Push our first batch. let push_cx = TestWakerContext::new(); @@ -280,7 +280,7 @@ mod tests { inputs.remove(0), ) .unwrap(); - assert_eq!(PollPush::Pushed, poll_push); + assert_eq!(PollPush2::Pushed, poll_push); // Pull side should have been woken. assert_eq!(1, pull_cx.wake_count()); @@ -300,7 +300,7 @@ mod tests { inputs.remove(0), ) .unwrap(); - assert_eq!(PollPush::Break, poll_push); + assert_eq!(PollPush2::Break, poll_push); // We did _not_ store a new pull waker, the current count for the pull // waker should still be one. @@ -340,7 +340,7 @@ mod tests { inputs.remove(0), ) .unwrap(); - assert_eq!(PollPush::Pushed, poll_push); + assert_eq!(PollPush2::Pushed, poll_push); let pull_cx = TestWakerContext::new(); let poll_pull = pull_cx @@ -360,7 +360,7 @@ mod tests { inputs.remove(0), ) .unwrap(); - assert_eq!(PollPush::Break, poll_push); + assert_eq!(PollPush2::Break, poll_push); // Pull part of next batch. let poll_pull = pull_cx @@ -395,7 +395,7 @@ mod tests { inputs.remove(0), ) .unwrap(); - assert_eq!(PollPush::NeedsMore, poll_push); + assert_eq!(PollPush2::NeedsMore, poll_push); // Keep pushing... let poll_push = push_cx @@ -406,7 +406,7 @@ mod tests { inputs.remove(0), ) .unwrap(); - assert_eq!(PollPush::Break, poll_push); + assert_eq!(PollPush2::Break, poll_push); let pull_cx = TestWakerContext::new(); let poll_pull = pull_cx @@ -435,7 +435,7 @@ mod tests { inputs.remove(0), ) .unwrap(); - assert_eq!(PollPush::Break, poll_push); + assert_eq!(PollPush2::Break, poll_push); let pull_cx = TestWakerContext::new(); let poll_pull = pull_cx @@ -446,6 +446,6 @@ mod tests { let poll_pull = pull_cx .poll_pull(&operator, &mut partition_states[0], &operator_state) .unwrap(); - assert_eq!(PollPull::Exhausted, poll_pull); + assert_eq!(PollPull2::Exhausted, poll_pull); } } diff --git a/crates/rayexec_execution/src/execution/operators/mod.rs b/crates/rayexec_execution/src/execution/operators/mod.rs index 481822914..6b18c5c54 100644 --- a/crates/rayexec_execution/src/execution/operators/mod.rs +++ b/crates/rayexec_execution/src/execution/operators/mod.rs @@ -162,7 +162,7 @@ pub enum OperatorState { /// something else to complete (e.g. the right side of a join needs to the left /// side to complete first) or some internal buffer is full. #[derive(Debug, PartialEq)] -pub enum PollPush { +pub enum PollPush2 { /// Batch was successfully pushed. Pushed, @@ -184,7 +184,7 @@ pub enum PollPush { /// Result of a pull from a Source. #[derive(Debug, PartialEq)] -pub enum PollPull { +pub enum PollPull2 { /// Successfully received computed results. Computed(ComputedBatches), @@ -199,14 +199,14 @@ pub enum PollPull { } #[derive(Debug, PartialEq)] -pub enum PollFinalize { +pub enum PollFinalize2 { Finalized, Pending, } /// Describes the relationships of partition states for operators. #[derive(Debug)] -pub enum InputOutputStates { +pub enum InputOutputStates2 { /// Input and output partition states have a one-to-one mapping. /// /// The states used for pushing to an operator are the same states used to @@ -262,12 +262,12 @@ pub enum InputOutputStates { /// States generates from an operator to use during execution. #[derive(Debug)] -pub struct ExecutionStates { +pub struct ExecutionStates2 { /// Global operator state. pub operator_state: Arc, /// Partition states for the operator. - pub partition_states: InputOutputStates, + pub partition_states: InputOutputStates2, } pub trait ExecutableOperator: Sync + Send + Debug + Explainable { @@ -277,39 +277,39 @@ pub trait ExecutableOperator: Sync + Send + Debug + Explainable { /// pushing batches through this operator. /// /// Joins are assumed to have two inputs. - fn create_states( + fn create_states2( &self, _context: &DatabaseContext, _partitions: Vec, - ) -> Result; + ) -> Result; /// Try to push a batch for this partition. - fn poll_push( + fn poll_push2( &self, cx: &mut Context, partition_state: &mut PartitionState, operator_state: &OperatorState, batch: Batch2, - ) -> Result; + ) -> Result; /// Finalize pushing to partition. /// /// This indicates the operator will receive no more input for a given /// partition, allowing the operator to execution some finalization logic. - fn poll_finalize_push( + fn poll_finalize_push2( &self, cx: &mut Context, partition_state: &mut PartitionState, operator_state: &OperatorState, - ) -> Result; + ) -> Result; /// Try to pull a batch for this partition. - fn poll_pull( + fn poll_pull2( &self, cx: &mut Context, partition_state: &mut PartitionState, operator_state: &OperatorState, - ) -> Result; + ) -> Result; } // 144 bytes @@ -348,173 +348,173 @@ pub enum PhysicalOperator { } impl ExecutableOperator for PhysicalOperator { - fn create_states( + fn create_states2( &self, context: &DatabaseContext, partitions: Vec, - ) -> Result { + ) -> Result { match self { - Self::HashAggregate(op) => op.create_states(context, partitions), - Self::UngroupedAggregate(op) => op.create_states(context, partitions), - Self::Window(op) => op.create_states(context, partitions), - Self::NestedLoopJoin(op) => op.create_states(context, partitions), - Self::HashJoin(op) => op.create_states(context, partitions), - Self::Values(op) => op.create_states(context, partitions), - Self::ResultSink(op) => op.create_states(context, partitions), - Self::DynSink(op) => op.create_states(context, partitions), - Self::DynSource(op) => op.create_states(context, partitions), - Self::MaterializedSink(op) => op.create_states(context, partitions), - Self::MaterializedSource(op) => op.create_states(context, partitions), - Self::RoundRobin(op) => op.create_states(context, partitions), - Self::MergeSorted(op) => op.create_states(context, partitions), - Self::LocalSort(op) => op.create_states(context, partitions), - Self::Limit(op) => op.create_states(context, partitions), - Self::Union(op) => op.create_states(context, partitions), - Self::Filter(op) => op.create_states(context, partitions), - Self::Project(op) => op.create_states(context, partitions), - Self::Unnest(op) => op.create_states(context, partitions), - Self::Scan(op) => op.create_states(context, partitions), - Self::TableFunction(op) => op.create_states(context, partitions), - Self::TableInOut(op) => op.create_states(context, partitions), - Self::Insert(op) => op.create_states(context, partitions), - Self::CopyTo(op) => op.create_states(context, partitions), - Self::CreateTable(op) => op.create_states(context, partitions), - Self::CreateSchema(op) => op.create_states(context, partitions), - Self::CreateView(op) => op.create_states(context, partitions), - Self::Drop(op) => op.create_states(context, partitions), - Self::Empty(op) => op.create_states(context, partitions), - Self::BatchResizer(op) => op.create_states(context, partitions), + Self::HashAggregate(op) => op.create_states2(context, partitions), + Self::UngroupedAggregate(op) => op.create_states2(context, partitions), + Self::Window(op) => op.create_states2(context, partitions), + Self::NestedLoopJoin(op) => op.create_states2(context, partitions), + Self::HashJoin(op) => op.create_states2(context, partitions), + Self::Values(op) => op.create_states2(context, partitions), + Self::ResultSink(op) => op.create_states2(context, partitions), + Self::DynSink(op) => op.create_states2(context, partitions), + Self::DynSource(op) => op.create_states2(context, partitions), + Self::MaterializedSink(op) => op.create_states2(context, partitions), + Self::MaterializedSource(op) => op.create_states2(context, partitions), + Self::RoundRobin(op) => op.create_states2(context, partitions), + Self::MergeSorted(op) => op.create_states2(context, partitions), + Self::LocalSort(op) => op.create_states2(context, partitions), + Self::Limit(op) => op.create_states2(context, partitions), + Self::Union(op) => op.create_states2(context, partitions), + Self::Filter(op) => op.create_states2(context, partitions), + Self::Project(op) => op.create_states2(context, partitions), + Self::Unnest(op) => op.create_states2(context, partitions), + Self::Scan(op) => op.create_states2(context, partitions), + Self::TableFunction(op) => op.create_states2(context, partitions), + Self::TableInOut(op) => op.create_states2(context, partitions), + Self::Insert(op) => op.create_states2(context, partitions), + Self::CopyTo(op) => op.create_states2(context, partitions), + Self::CreateTable(op) => op.create_states2(context, partitions), + Self::CreateSchema(op) => op.create_states2(context, partitions), + Self::CreateView(op) => op.create_states2(context, partitions), + Self::Drop(op) => op.create_states2(context, partitions), + Self::Empty(op) => op.create_states2(context, partitions), + Self::BatchResizer(op) => op.create_states2(context, partitions), } } - fn poll_push( + fn poll_push2( &self, cx: &mut Context, partition_state: &mut PartitionState, operator_state: &OperatorState, batch: Batch2, - ) -> Result { + ) -> Result { match self { - Self::HashAggregate(op) => op.poll_push(cx, partition_state, operator_state, batch), + Self::HashAggregate(op) => op.poll_push2(cx, partition_state, operator_state, batch), Self::UngroupedAggregate(op) => { - op.poll_push(cx, partition_state, operator_state, batch) + op.poll_push2(cx, partition_state, operator_state, batch) } - Self::Window(op) => op.poll_push(cx, partition_state, operator_state, batch), - Self::NestedLoopJoin(op) => op.poll_push(cx, partition_state, operator_state, batch), - Self::HashJoin(op) => op.poll_push(cx, partition_state, operator_state, batch), - Self::Values(op) => op.poll_push(cx, partition_state, operator_state, batch), - Self::ResultSink(op) => op.poll_push(cx, partition_state, operator_state, batch), - Self::DynSink(op) => op.poll_push(cx, partition_state, operator_state, batch), - Self::DynSource(op) => op.poll_push(cx, partition_state, operator_state, batch), - Self::MaterializedSink(op) => op.poll_push(cx, partition_state, operator_state, batch), + Self::Window(op) => op.poll_push2(cx, partition_state, operator_state, batch), + Self::NestedLoopJoin(op) => op.poll_push2(cx, partition_state, operator_state, batch), + Self::HashJoin(op) => op.poll_push2(cx, partition_state, operator_state, batch), + Self::Values(op) => op.poll_push2(cx, partition_state, operator_state, batch), + Self::ResultSink(op) => op.poll_push2(cx, partition_state, operator_state, batch), + Self::DynSink(op) => op.poll_push2(cx, partition_state, operator_state, batch), + Self::DynSource(op) => op.poll_push2(cx, partition_state, operator_state, batch), + Self::MaterializedSink(op) => op.poll_push2(cx, partition_state, operator_state, batch), Self::MaterializedSource(op) => { - op.poll_push(cx, partition_state, operator_state, batch) + op.poll_push2(cx, partition_state, operator_state, batch) } - Self::RoundRobin(op) => op.poll_push(cx, partition_state, operator_state, batch), - Self::MergeSorted(op) => op.poll_push(cx, partition_state, operator_state, batch), - Self::LocalSort(op) => op.poll_push(cx, partition_state, operator_state, batch), - Self::Limit(op) => op.poll_push(cx, partition_state, operator_state, batch), - Self::Union(op) => op.poll_push(cx, partition_state, operator_state, batch), - Self::Filter(op) => op.poll_push(cx, partition_state, operator_state, batch), - Self::Project(op) => op.poll_push(cx, partition_state, operator_state, batch), - Self::Unnest(op) => op.poll_push(cx, partition_state, operator_state, batch), - Self::Scan(op) => op.poll_push(cx, partition_state, operator_state, batch), - Self::TableFunction(op) => op.poll_push(cx, partition_state, operator_state, batch), - Self::TableInOut(op) => op.poll_push(cx, partition_state, operator_state, batch), - Self::Insert(op) => op.poll_push(cx, partition_state, operator_state, batch), - Self::CopyTo(op) => op.poll_push(cx, partition_state, operator_state, batch), - Self::CreateTable(op) => op.poll_push(cx, partition_state, operator_state, batch), - Self::CreateSchema(op) => op.poll_push(cx, partition_state, operator_state, batch), - Self::CreateView(op) => op.poll_push(cx, partition_state, operator_state, batch), - Self::Drop(op) => op.poll_push(cx, partition_state, operator_state, batch), - Self::Empty(op) => op.poll_push(cx, partition_state, operator_state, batch), - Self::BatchResizer(op) => op.poll_push(cx, partition_state, operator_state, batch), + Self::RoundRobin(op) => op.poll_push2(cx, partition_state, operator_state, batch), + Self::MergeSorted(op) => op.poll_push2(cx, partition_state, operator_state, batch), + Self::LocalSort(op) => op.poll_push2(cx, partition_state, operator_state, batch), + Self::Limit(op) => op.poll_push2(cx, partition_state, operator_state, batch), + Self::Union(op) => op.poll_push2(cx, partition_state, operator_state, batch), + Self::Filter(op) => op.poll_push2(cx, partition_state, operator_state, batch), + Self::Project(op) => op.poll_push2(cx, partition_state, operator_state, batch), + Self::Unnest(op) => op.poll_push2(cx, partition_state, operator_state, batch), + Self::Scan(op) => op.poll_push2(cx, partition_state, operator_state, batch), + Self::TableFunction(op) => op.poll_push2(cx, partition_state, operator_state, batch), + Self::TableInOut(op) => op.poll_push2(cx, partition_state, operator_state, batch), + Self::Insert(op) => op.poll_push2(cx, partition_state, operator_state, batch), + Self::CopyTo(op) => op.poll_push2(cx, partition_state, operator_state, batch), + Self::CreateTable(op) => op.poll_push2(cx, partition_state, operator_state, batch), + Self::CreateSchema(op) => op.poll_push2(cx, partition_state, operator_state, batch), + Self::CreateView(op) => op.poll_push2(cx, partition_state, operator_state, batch), + Self::Drop(op) => op.poll_push2(cx, partition_state, operator_state, batch), + Self::Empty(op) => op.poll_push2(cx, partition_state, operator_state, batch), + Self::BatchResizer(op) => op.poll_push2(cx, partition_state, operator_state, batch), } } - fn poll_finalize_push( + fn poll_finalize_push2( &self, cx: &mut Context, partition_state: &mut PartitionState, operator_state: &OperatorState, - ) -> Result { + ) -> Result { match self { - Self::HashAggregate(op) => op.poll_finalize_push(cx, partition_state, operator_state), + Self::HashAggregate(op) => op.poll_finalize_push2(cx, partition_state, operator_state), Self::UngroupedAggregate(op) => { - op.poll_finalize_push(cx, partition_state, operator_state) + op.poll_finalize_push2(cx, partition_state, operator_state) } - Self::Window(op) => op.poll_finalize_push(cx, partition_state, operator_state), - Self::NestedLoopJoin(op) => op.poll_finalize_push(cx, partition_state, operator_state), - Self::HashJoin(op) => op.poll_finalize_push(cx, partition_state, operator_state), - Self::Values(op) => op.poll_finalize_push(cx, partition_state, operator_state), - Self::ResultSink(op) => op.poll_finalize_push(cx, partition_state, operator_state), - Self::DynSink(op) => op.poll_finalize_push(cx, partition_state, operator_state), - Self::DynSource(op) => op.poll_finalize_push(cx, partition_state, operator_state), + Self::Window(op) => op.poll_finalize_push2(cx, partition_state, operator_state), + Self::NestedLoopJoin(op) => op.poll_finalize_push2(cx, partition_state, operator_state), + Self::HashJoin(op) => op.poll_finalize_push2(cx, partition_state, operator_state), + Self::Values(op) => op.poll_finalize_push2(cx, partition_state, operator_state), + Self::ResultSink(op) => op.poll_finalize_push2(cx, partition_state, operator_state), + Self::DynSink(op) => op.poll_finalize_push2(cx, partition_state, operator_state), + Self::DynSource(op) => op.poll_finalize_push2(cx, partition_state, operator_state), Self::MaterializedSink(op) => { - op.poll_finalize_push(cx, partition_state, operator_state) + op.poll_finalize_push2(cx, partition_state, operator_state) } Self::MaterializedSource(op) => { - op.poll_finalize_push(cx, partition_state, operator_state) + op.poll_finalize_push2(cx, partition_state, operator_state) } - Self::RoundRobin(op) => op.poll_finalize_push(cx, partition_state, operator_state), - Self::MergeSorted(op) => op.poll_finalize_push(cx, partition_state, operator_state), - Self::LocalSort(op) => op.poll_finalize_push(cx, partition_state, operator_state), - Self::Limit(op) => op.poll_finalize_push(cx, partition_state, operator_state), - Self::Union(op) => op.poll_finalize_push(cx, partition_state, operator_state), - Self::Filter(op) => op.poll_finalize_push(cx, partition_state, operator_state), - Self::Project(op) => op.poll_finalize_push(cx, partition_state, operator_state), - Self::Unnest(op) => op.poll_finalize_push(cx, partition_state, operator_state), - Self::Scan(op) => op.poll_finalize_push(cx, partition_state, operator_state), - Self::TableFunction(op) => op.poll_finalize_push(cx, partition_state, operator_state), - Self::TableInOut(op) => op.poll_finalize_push(cx, partition_state, operator_state), - Self::Insert(op) => op.poll_finalize_push(cx, partition_state, operator_state), - Self::CopyTo(op) => op.poll_finalize_push(cx, partition_state, operator_state), - Self::CreateTable(op) => op.poll_finalize_push(cx, partition_state, operator_state), - Self::CreateSchema(op) => op.poll_finalize_push(cx, partition_state, operator_state), - Self::CreateView(op) => op.poll_finalize_push(cx, partition_state, operator_state), - Self::Drop(op) => op.poll_finalize_push(cx, partition_state, operator_state), - Self::Empty(op) => op.poll_finalize_push(cx, partition_state, operator_state), - Self::BatchResizer(op) => op.poll_finalize_push(cx, partition_state, operator_state), + Self::RoundRobin(op) => op.poll_finalize_push2(cx, partition_state, operator_state), + Self::MergeSorted(op) => op.poll_finalize_push2(cx, partition_state, operator_state), + Self::LocalSort(op) => op.poll_finalize_push2(cx, partition_state, operator_state), + Self::Limit(op) => op.poll_finalize_push2(cx, partition_state, operator_state), + Self::Union(op) => op.poll_finalize_push2(cx, partition_state, operator_state), + Self::Filter(op) => op.poll_finalize_push2(cx, partition_state, operator_state), + Self::Project(op) => op.poll_finalize_push2(cx, partition_state, operator_state), + Self::Unnest(op) => op.poll_finalize_push2(cx, partition_state, operator_state), + Self::Scan(op) => op.poll_finalize_push2(cx, partition_state, operator_state), + Self::TableFunction(op) => op.poll_finalize_push2(cx, partition_state, operator_state), + Self::TableInOut(op) => op.poll_finalize_push2(cx, partition_state, operator_state), + Self::Insert(op) => op.poll_finalize_push2(cx, partition_state, operator_state), + Self::CopyTo(op) => op.poll_finalize_push2(cx, partition_state, operator_state), + Self::CreateTable(op) => op.poll_finalize_push2(cx, partition_state, operator_state), + Self::CreateSchema(op) => op.poll_finalize_push2(cx, partition_state, operator_state), + Self::CreateView(op) => op.poll_finalize_push2(cx, partition_state, operator_state), + Self::Drop(op) => op.poll_finalize_push2(cx, partition_state, operator_state), + Self::Empty(op) => op.poll_finalize_push2(cx, partition_state, operator_state), + Self::BatchResizer(op) => op.poll_finalize_push2(cx, partition_state, operator_state), } } - fn poll_pull( + fn poll_pull2( &self, cx: &mut Context, partition_state: &mut PartitionState, operator_state: &OperatorState, - ) -> Result { + ) -> Result { match self { - Self::HashAggregate(op) => op.poll_pull(cx, partition_state, operator_state), - Self::UngroupedAggregate(op) => op.poll_pull(cx, partition_state, operator_state), - Self::Window(op) => op.poll_pull(cx, partition_state, operator_state), - Self::NestedLoopJoin(op) => op.poll_pull(cx, partition_state, operator_state), - Self::HashJoin(op) => op.poll_pull(cx, partition_state, operator_state), - Self::Values(op) => op.poll_pull(cx, partition_state, operator_state), - Self::ResultSink(op) => op.poll_pull(cx, partition_state, operator_state), - Self::DynSink(op) => op.poll_pull(cx, partition_state, operator_state), - Self::DynSource(op) => op.poll_pull(cx, partition_state, operator_state), - Self::MaterializedSink(op) => op.poll_pull(cx, partition_state, operator_state), - Self::MaterializedSource(op) => op.poll_pull(cx, partition_state, operator_state), - Self::RoundRobin(op) => op.poll_pull(cx, partition_state, operator_state), - Self::MergeSorted(op) => op.poll_pull(cx, partition_state, operator_state), - Self::LocalSort(op) => op.poll_pull(cx, partition_state, operator_state), - Self::Limit(op) => op.poll_pull(cx, partition_state, operator_state), - Self::Union(op) => op.poll_pull(cx, partition_state, operator_state), - Self::Filter(op) => op.poll_pull(cx, partition_state, operator_state), - Self::Project(op) => op.poll_pull(cx, partition_state, operator_state), - Self::Unnest(op) => op.poll_pull(cx, partition_state, operator_state), - Self::Scan(op) => op.poll_pull(cx, partition_state, operator_state), - Self::TableFunction(op) => op.poll_pull(cx, partition_state, operator_state), - Self::TableInOut(op) => op.poll_pull(cx, partition_state, operator_state), - Self::Insert(op) => op.poll_pull(cx, partition_state, operator_state), - Self::CopyTo(op) => op.poll_pull(cx, partition_state, operator_state), - Self::CreateTable(op) => op.poll_pull(cx, partition_state, operator_state), - Self::CreateSchema(op) => op.poll_pull(cx, partition_state, operator_state), - Self::CreateView(op) => op.poll_pull(cx, partition_state, operator_state), - Self::Drop(op) => op.poll_pull(cx, partition_state, operator_state), - Self::Empty(op) => op.poll_pull(cx, partition_state, operator_state), - Self::BatchResizer(op) => op.poll_pull(cx, partition_state, operator_state), + Self::HashAggregate(op) => op.poll_pull2(cx, partition_state, operator_state), + Self::UngroupedAggregate(op) => op.poll_pull2(cx, partition_state, operator_state), + Self::Window(op) => op.poll_pull2(cx, partition_state, operator_state), + Self::NestedLoopJoin(op) => op.poll_pull2(cx, partition_state, operator_state), + Self::HashJoin(op) => op.poll_pull2(cx, partition_state, operator_state), + Self::Values(op) => op.poll_pull2(cx, partition_state, operator_state), + Self::ResultSink(op) => op.poll_pull2(cx, partition_state, operator_state), + Self::DynSink(op) => op.poll_pull2(cx, partition_state, operator_state), + Self::DynSource(op) => op.poll_pull2(cx, partition_state, operator_state), + Self::MaterializedSink(op) => op.poll_pull2(cx, partition_state, operator_state), + Self::MaterializedSource(op) => op.poll_pull2(cx, partition_state, operator_state), + Self::RoundRobin(op) => op.poll_pull2(cx, partition_state, operator_state), + Self::MergeSorted(op) => op.poll_pull2(cx, partition_state, operator_state), + Self::LocalSort(op) => op.poll_pull2(cx, partition_state, operator_state), + Self::Limit(op) => op.poll_pull2(cx, partition_state, operator_state), + Self::Union(op) => op.poll_pull2(cx, partition_state, operator_state), + Self::Filter(op) => op.poll_pull2(cx, partition_state, operator_state), + Self::Project(op) => op.poll_pull2(cx, partition_state, operator_state), + Self::Unnest(op) => op.poll_pull2(cx, partition_state, operator_state), + Self::Scan(op) => op.poll_pull2(cx, partition_state, operator_state), + Self::TableFunction(op) => op.poll_pull2(cx, partition_state, operator_state), + Self::TableInOut(op) => op.poll_pull2(cx, partition_state, operator_state), + Self::Insert(op) => op.poll_pull2(cx, partition_state, operator_state), + Self::CopyTo(op) => op.poll_pull2(cx, partition_state, operator_state), + Self::CreateTable(op) => op.poll_pull2(cx, partition_state, operator_state), + Self::CreateSchema(op) => op.poll_pull2(cx, partition_state, operator_state), + Self::CreateView(op) => op.poll_pull2(cx, partition_state, operator_state), + Self::Drop(op) => op.poll_pull2(cx, partition_state, operator_state), + Self::Empty(op) => op.poll_pull2(cx, partition_state, operator_state), + Self::BatchResizer(op) => op.poll_pull2(cx, partition_state, operator_state), } } } diff --git a/crates/rayexec_execution/src/execution/operators/nl_join.rs b/crates/rayexec_execution/src/execution/operators/nl_join.rs index 4a4457540..bdef053f8 100644 --- a/crates/rayexec_execution/src/execution/operators/nl_join.rs +++ b/crates/rayexec_execution/src/execution/operators/nl_join.rs @@ -11,13 +11,13 @@ use crate::arrays::selection::SelectionVector; use crate::database::DatabaseContext; use crate::execution::operators::{ ExecutableOperator, - ExecutionStates, - InputOutputStates, + ExecutionStates2, + InputOutputStates2, OperatorState, PartitionState, - PollFinalize, - PollPull, - PollPush, + PollFinalize2, + PollPull2, + PollPush2, }; use crate::explain::explainable::{ExplainConfig, ExplainEntry, Explainable}; use crate::expr::physical::PhysicalScalarExpression; @@ -209,11 +209,11 @@ impl PhysicalNestedLoopJoin { } impl ExecutableOperator for PhysicalNestedLoopJoin { - fn create_states( + fn create_states2( &self, _context: &DatabaseContext, partitions: Vec, - ) -> Result { + ) -> Result { // TODO: Allow different number of partitions on left & right? let num_partitions = partitions[0]; @@ -231,28 +231,28 @@ impl ExecutableOperator for PhysicalNestedLoopJoin { }) .collect(); - Ok(ExecutionStates { + Ok(ExecutionStates2 { operator_state: Arc::new(OperatorState::NestedLoopJoin( NestedLoopJoinOperatorState::new(num_partitions, num_partitions), )), - partition_states: InputOutputStates::NaryInputSingleOutput { + partition_states: InputOutputStates2::NaryInputSingleOutput { partition_states: vec![left_states, right_states], pull_states: 1, }, }) } - fn poll_push( + fn poll_push2( &self, cx: &mut Context, partition_state: &mut PartitionState, operator_state: &OperatorState, batch: Batch2, - ) -> Result { + ) -> Result { match partition_state { PartitionState::NestedLoopJoinBuild(state) => { state.batches.push(batch); - Ok(PollPush::Pushed) + Ok(PollPush2::Pushed) } PartitionState::NestedLoopJoinProbe(state) => { // Check that the partition-local state has a reference to the @@ -273,7 +273,7 @@ impl ExecutableOperator for PhysicalNestedLoopJoin { // ourselves for a later wakeup when the build is // complete. probe_side_wakers[state.partition_idx] = Some(cx.waker().clone()); - return Ok(PollPush::Pending(batch)); + return Ok(PollPush2::Pending(batch)); } SharedOperatorState::Probing { batches, @@ -298,7 +298,7 @@ impl ExecutableOperator for PhysicalNestedLoopJoin { // it's empty. if !state.buffered.is_empty() { state.push_waker = Some(cx.waker().clone()); - return Ok(PollPush::Pending(batch)); + return Ok(PollPush2::Pending(batch)); } // Do the join. @@ -318,7 +318,7 @@ impl ExecutableOperator for PhysicalNestedLoopJoin { state.buffered = ComputedBatches::new(batches); if state.buffered.is_empty() { // Nothing produces, signal to push more. - return Ok(PollPush::NeedsMore); + return Ok(PollPush2::NeedsMore); } // We have stuff in the buffer, wake up the puller. @@ -326,18 +326,18 @@ impl ExecutableOperator for PhysicalNestedLoopJoin { waker.wake(); } - Ok(PollPush::Pushed) + Ok(PollPush2::Pushed) } other => panic!("invalid partition state: {other:?}"), } } - fn poll_finalize_push( + fn poll_finalize_push2( &self, _cx: &mut Context, partition_state: &mut PartitionState, operator_state: &OperatorState, - ) -> Result { + ) -> Result { match partition_state { PartitionState::NestedLoopJoinBuild(state) => { let operator_state = match operator_state { @@ -365,7 +365,7 @@ impl ExecutableOperator for PhysicalNestedLoopJoin { } // And we're done. - Ok(PollFinalize::Finalized) + Ok(PollFinalize2::Finalized) } other => panic!("inner join state is not building: {other:?}"), } @@ -375,32 +375,32 @@ impl ExecutableOperator for PhysicalNestedLoopJoin { if let Some(waker) = state.pull_waker.take() { waker.wake(); } - Ok(PollFinalize::Finalized) + Ok(PollFinalize2::Finalized) } other => panic!("invalid partition state: {other:?}"), } } - fn poll_pull( + fn poll_pull2( &self, cx: &mut Context, partition_state: &mut PartitionState, _operator_state: &OperatorState, - ) -> Result { + ) -> Result { match partition_state { PartitionState::NestedLoopJoinProbe(state) => { let computed = state.buffered.take(); if computed.has_batches() { - Ok(PollPull::Computed(computed)) + Ok(PollPull2::Computed(computed)) } else if state.input_finished { - Ok(PollPull::Exhausted) + Ok(PollPull2::Exhausted) } else { // We just gotta wait for more input. if let Some(waker) = state.push_waker.take() { waker.wake(); } state.pull_waker = Some(cx.waker().clone()); - Ok(PollPull::Pending) + Ok(PollPull2::Pending) } } PartitionState::NestedLoopJoinBuild(_) => { diff --git a/crates/rayexec_execution/src/execution/operators/round_robin.rs b/crates/rayexec_execution/src/execution/operators/round_robin.rs index 6852fb988..4e09bda61 100644 --- a/crates/rayexec_execution/src/execution/operators/round_robin.rs +++ b/crates/rayexec_execution/src/execution/operators/round_robin.rs @@ -7,15 +7,15 @@ use std::task::{Context, Waker}; use parking_lot::Mutex; use rayexec_error::{RayexecError, Result}; -use super::{ExecutionStates, InputOutputStates, PollFinalize}; +use super::{ExecutionStates2, InputOutputStates2, PollFinalize2}; use crate::arrays::batch::Batch2; use crate::database::DatabaseContext; use crate::execution::operators::{ ExecutableOperator, OperatorState, PartitionState, - PollPull, - PollPush, + PollPull2, + PollPush2, }; use crate::explain::explainable::{ExplainConfig, ExplainEntry, Explainable}; @@ -58,11 +58,11 @@ pub struct RoundRobinOperatorState { pub struct PhysicalRoundRobinRepartition; impl ExecutableOperator for PhysicalRoundRobinRepartition { - fn create_states( + fn create_states2( &self, _context: &DatabaseContext, partitions: Vec, - ) -> Result { + ) -> Result { if partitions.len() != 2 { return Err(RayexecError::new( "Round robin expects to values (input, output) in partition vec", @@ -114,22 +114,22 @@ impl ExecutableOperator for PhysicalRoundRobinRepartition { .map(|buffer| PartitionState::RoundRobinPull(RoundRobinPullPartitionState { buffer })) .collect(); - Ok(ExecutionStates { + Ok(ExecutionStates2 { operator_state: Arc::new(OperatorState::RoundRobin(operator_state)), - partition_states: InputOutputStates::SeparateInputOutput { + partition_states: InputOutputStates2::SeparateInputOutput { push_states, pull_states, }, }) } - fn poll_push( + fn poll_push2( &self, cx: &mut Context, partition_state: &mut PartitionState, _operator_state: &OperatorState, batch: Batch2, - ) -> Result { + ) -> Result { let state = match partition_state { PartitionState::RoundRobinPush(state) => state, other => panic!("invalid partition state: {other:?}"), @@ -146,7 +146,7 @@ impl ExecutableOperator for PhysicalRoundRobinRepartition { // wakeup when there's room. if output.batches.len() >= state.max_buffer_capacity { output.send_wakers[state.own_idx] = Some(cx.waker().clone()); - return Ok(PollPush::Pending(batch)); + return Ok(PollPush2::Pending(batch)); } // Otherwise push our batch. @@ -160,15 +160,15 @@ impl ExecutableOperator for PhysicalRoundRobinRepartition { // call to `poll_push`. state.push_to = (state.push_to + 1) % state.output_buffers.len(); - Ok(PollPush::Pushed) + Ok(PollPush2::Pushed) } - fn poll_finalize_push( + fn poll_finalize_push2( &self, _cx: &mut Context, partition_state: &mut PartitionState, operator_state: &OperatorState, - ) -> Result { + ) -> Result { let operator_state = match operator_state { OperatorState::RoundRobin(state) => state, other => panic!("invalid operator state: {other:?}"), @@ -199,15 +199,15 @@ impl ExecutableOperator for PhysicalRoundRobinRepartition { } } - Ok(PollFinalize::Finalized) + Ok(PollFinalize2::Finalized) } - fn poll_pull( + fn poll_pull2( &self, cx: &mut Context, partition_state: &mut PartitionState, _operator_state: &OperatorState, - ) -> Result { + ) -> Result { let state = match partition_state { PartitionState::RoundRobinPull(state) => state, other => panic!("invalid partition state: {other:?}"), @@ -218,11 +218,11 @@ impl ExecutableOperator for PhysicalRoundRobinRepartition { match inner.batches.pop_front() { Some(batch) => { inner.wake_n_senders(1); - Ok(PollPull::Computed(batch.into())) + Ok(PollPull2::Computed(batch.into())) } None => { if inner.exhausted { - return Ok(PollPull::Exhausted); + return Ok(PollPull2::Exhausted); } // Register ourselves for wakeup. inner.recv_waker = Some(cx.waker().clone()); @@ -230,7 +230,7 @@ impl ExecutableOperator for PhysicalRoundRobinRepartition { // Try to wake up any pushers to fill up the buffer. inner.wake_all_senders(); - Ok(PollPull::Pending) + Ok(PollPull2::Pending) } } } diff --git a/crates/rayexec_execution/src/execution/operators/scan.rs b/crates/rayexec_execution/src/execution/operators/scan.rs index 500827472..c963f565f 100644 --- a/crates/rayexec_execution/src/execution/operators/scan.rs +++ b/crates/rayexec_execution/src/execution/operators/scan.rs @@ -9,13 +9,13 @@ use rayexec_error::{RayexecError, Result}; use super::util::futures::make_static; use super::{ ExecutableOperator, - ExecutionStates, - InputOutputStates, + ExecutionStates2, + InputOutputStates2, OperatorState, PartitionState, - PollFinalize, - PollPull, - PollPush, + PollFinalize2, + PollPull2, + PollPush2, }; use crate::arrays::batch::Batch2; use crate::database::catalog::CatalogTx; @@ -62,11 +62,11 @@ impl PhysicalScan { } impl ExecutableOperator for PhysicalScan { - fn create_states( + fn create_states2( &self, context: &DatabaseContext, partitions: Vec, - ) -> Result { + ) -> Result { // TODO: Placeholder for now. Transaction info should probably go on the // operator. let _tx = CatalogTx::new(); @@ -86,63 +86,63 @@ impl ExecutableOperator for PhysicalScan { .map(|scan| PartitionState::Scan(ScanPartitionState { scan, future: None })) .collect(); - Ok(ExecutionStates { + Ok(ExecutionStates2 { operator_state: Arc::new(OperatorState::None), - partition_states: InputOutputStates::OneToOne { + partition_states: InputOutputStates2::OneToOne { partition_states: states, }, }) } - fn poll_push( + fn poll_push2( &self, _cx: &mut Context, _partition_state: &mut PartitionState, _operator_state: &OperatorState, _batch: Batch2, - ) -> Result { + ) -> Result { Err(RayexecError::new("Cannot push to physical scan")) } - fn poll_finalize_push( + fn poll_finalize_push2( &self, _cx: &mut Context, _partition_state: &mut PartitionState, _operator_state: &OperatorState, - ) -> Result { + ) -> Result { Err(RayexecError::new("Cannot push to physical scan")) } - fn poll_pull( + fn poll_pull2( &self, cx: &mut Context, partition_state: &mut PartitionState, _operator_state: &OperatorState, - ) -> Result { + ) -> Result { match partition_state { PartitionState::Scan(state) => { if let Some(future) = &mut state.future { match future.poll_unpin(cx) { Poll::Ready(Ok(Some(batch))) => { state.future = None; // Future complete, next pull with create a new one. - return Ok(PollPull::Computed(batch.into())); + return Ok(PollPull2::Computed(batch.into())); } - Poll::Ready(Ok(None)) => return Ok(PollPull::Exhausted), + Poll::Ready(Ok(None)) => return Ok(PollPull2::Exhausted), Poll::Ready(Err(e)) => return Err(e), - Poll::Pending => return Ok(PollPull::Pending), + Poll::Pending => return Ok(PollPull2::Pending), } } let mut future = state.scan.pull(); match future.poll_unpin(cx) { - Poll::Ready(Ok(Some(batch))) => Ok(PollPull::Computed(batch.into())), - Poll::Ready(Ok(None)) => Ok(PollPull::Exhausted), + Poll::Ready(Ok(Some(batch))) => Ok(PollPull2::Computed(batch.into())), + Poll::Ready(Ok(None)) => Ok(PollPull2::Exhausted), Poll::Ready(Err(e)) => Err(e), Poll::Pending => { // SAFETY: Scan lives on the partition state and // outlives this future. state.future = Some(unsafe { make_static(future) }); - Ok(PollPull::Pending) + Ok(PollPull2::Pending) } } } diff --git a/crates/rayexec_execution/src/execution/operators/simple.rs b/crates/rayexec_execution/src/execution/operators/simple.rs index 75a145f76..5bb08dc36 100644 --- a/crates/rayexec_execution/src/execution/operators/simple.rs +++ b/crates/rayexec_execution/src/execution/operators/simple.rs @@ -6,13 +6,13 @@ use rayexec_error::Result; use super::{ ExecutableOperator, - ExecutionStates, - InputOutputStates, + ExecutionStates2, + InputOutputStates2, OperatorState, PartitionState, - PollFinalize, - PollPull, - PollPush, + PollFinalize2, + PollPull2, + PollPush2, }; use crate::arrays::batch::Batch2; use crate::database::DatabaseContext; @@ -77,14 +77,14 @@ impl SimpleOperator { } impl ExecutableOperator for SimpleOperator { - fn create_states( + fn create_states2( &self, _context: &DatabaseContext, partitions: Vec, - ) -> Result { - Ok(ExecutionStates { + ) -> Result { + Ok(ExecutionStates2 { operator_state: Arc::new(OperatorState::None), - partition_states: InputOutputStates::OneToOne { + partition_states: InputOutputStates2::OneToOne { partition_states: (0..partitions[0]) .map(|_| PartitionState::Simple(SimplePartitionState::new())) .collect(), @@ -92,13 +92,13 @@ impl ExecutableOperator for SimpleOperator { }) } - fn poll_push( + fn poll_push2( &self, cx: &mut Context, partition_state: &mut PartitionState, _operator_state: &OperatorState, batch: Batch2, - ) -> Result { + ) -> Result { let state = match partition_state { PartitionState::Simple(state) => state, other => panic!("invalid partition state: {other:?}"), @@ -110,7 +110,7 @@ impl ExecutableOperator for SimpleOperator { if let Some(waker) = state.pull_waker.take() { waker.wake(); } - return Ok(PollPush::Pending(batch)); + return Ok(PollPush2::Pending(batch)); } // Otherwise we're good to go. @@ -121,15 +121,15 @@ impl ExecutableOperator for SimpleOperator { waker.wake(); } - Ok(PollPush::Pushed) + Ok(PollPush2::Pushed) } - fn poll_finalize_push( + fn poll_finalize_push2( &self, _cx: &mut Context, partition_state: &mut PartitionState, _operator_state: &OperatorState, - ) -> Result { + ) -> Result { let state = match partition_state { PartitionState::Simple(state) => state, other => panic!("invalid partition state: {other:?}"), @@ -141,15 +141,15 @@ impl ExecutableOperator for SimpleOperator { waker.wake(); } - Ok(PollFinalize::Finalized) + Ok(PollFinalize2::Finalized) } - fn poll_pull( + fn poll_pull2( &self, cx: &mut Context, partition_state: &mut PartitionState, _operator_state: &OperatorState, - ) -> Result { + ) -> Result { let state = match partition_state { PartitionState::Simple(state) => state, other => panic!("invalid partition state: {other:?}"), @@ -160,18 +160,18 @@ impl ExecutableOperator for SimpleOperator { if let Some(waker) = state.push_waker.take() { waker.wake(); } - Ok(PollPull::Computed(out.into())) + Ok(PollPull2::Computed(out.into())) } None => { if state.exhausted { - return Ok(PollPull::Exhausted); + return Ok(PollPull2::Exhausted); } state.pull_waker = Some(cx.waker().clone()); if let Some(waker) = state.push_waker.take() { waker.wake(); } - Ok(PollPull::Pending) + Ok(PollPull2::Pending) } } } diff --git a/crates/rayexec_execution/src/execution/operators/sink.rs b/crates/rayexec_execution/src/execution/operators/sink.rs index 41ad3d3aa..be36d824b 100644 --- a/crates/rayexec_execution/src/execution/operators/sink.rs +++ b/crates/rayexec_execution/src/execution/operators/sink.rs @@ -10,13 +10,13 @@ use rayexec_error::{RayexecError, Result}; use super::util::futures::make_static; use super::{ ExecutableOperator, - ExecutionStates, - InputOutputStates, + ExecutionStates2, + InputOutputStates2, OperatorState, PartitionState, - PollFinalize, - PollPull, - PollPush, + PollFinalize2, + PollPull2, + PollPush2, }; use crate::arrays::array::Array2; use crate::arrays::batch::Batch2; @@ -141,11 +141,11 @@ impl SinkOperator { } impl ExecutableOperator for SinkOperator { - fn create_states( + fn create_states2( &self, context: &DatabaseContext, partitions: Vec, - ) -> Result { + ) -> Result { let partitions = partitions[0]; let states: Vec<_> = self @@ -164,7 +164,7 @@ impl ExecutableOperator for SinkOperator { }) .collect(); - Ok(ExecutionStates { + Ok(ExecutionStates2 { operator_state: Arc::new(OperatorState::Sink(SinkOperatorState { inner: Mutex::new(SinkOperatorStateInner { global_row_count: 0, @@ -172,19 +172,19 @@ impl ExecutableOperator for SinkOperator { partitions_remaining: partitions, }), })), - partition_states: InputOutputStates::OneToOne { + partition_states: InputOutputStates2::OneToOne { partition_states: states, }, }) } - fn poll_push( + fn poll_push2( &self, cx: &mut Context, partition_state: &mut PartitionState, _operator_state: &OperatorState, batch: Batch2, - ) -> Result { + ) -> Result { match partition_state { PartitionState::Sink(state) => match state { SinkPartitionState::Writing { inner, future } => { @@ -199,7 +199,7 @@ impl ExecutableOperator for SinkOperator { *future = None; } Poll::Ready(Err(e)) => return Err(e), - Poll::Pending => return Ok(PollPush::Pending(batch)), + Poll::Pending => return Ok(PollPush2::Pending(batch)), } } @@ -207,7 +207,7 @@ impl ExecutableOperator for SinkOperator { // necessary, but it makes me a feel a bit better than the // hacky stuff is localized to just here. if batch.num_rows() == 0 { - return Ok(PollPush::NeedsMore); + return Ok(PollPush2::NeedsMore); } let inner = inner.as_mut().unwrap(); @@ -217,7 +217,7 @@ impl ExecutableOperator for SinkOperator { match push_future.poll_unpin(cx) { Poll::Ready(Ok(_)) => { // Future completed, need more batches. - Ok(PollPush::NeedsMore) + Ok(PollPush2::NeedsMore) } Poll::Ready(Err(e)) => Err(e), Poll::Pending => { @@ -233,7 +233,7 @@ impl ExecutableOperator for SinkOperator { // // I think we'll want to do a similar thing for inserts so that // we can implement them as "just" async functions. - Ok(PollPush::Pending(Batch2::empty())) + Ok(PollPush2::Pending(Batch2::empty())) } } } @@ -245,12 +245,12 @@ impl ExecutableOperator for SinkOperator { } } - fn poll_finalize_push( + fn poll_finalize_push2( &self, cx: &mut Context, partition_state: &mut PartitionState, operator_state: &OperatorState, - ) -> Result { + ) -> Result { match partition_state { PartitionState::Sink(state) => match state { SinkPartitionState::Writing { inner, future } => { @@ -262,7 +262,7 @@ impl ExecutableOperator for SinkOperator { *future = None; } Poll::Ready(Err(e)) => return Err(e), - Poll::Pending => return Ok(PollFinalize::Pending), + Poll::Pending => return Ok(PollFinalize2::Pending), } } @@ -288,7 +288,7 @@ impl ExecutableOperator for SinkOperator { *state = SinkPartitionState::Finished; - Ok(PollFinalize::Finalized) + Ok(PollFinalize2::Finalized) } Poll::Ready(Err(e)) => Err(e), Poll::Pending => { @@ -302,7 +302,7 @@ impl ExecutableOperator for SinkOperator { future, }; - Ok(PollFinalize::Pending) + Ok(PollFinalize2::Pending) } } } @@ -326,10 +326,10 @@ impl ExecutableOperator for SinkOperator { *state = SinkPartitionState::Finished; - Ok(PollFinalize::Finalized) + Ok(PollFinalize2::Finalized) } Poll::Ready(Err(e)) => Err(e), - Poll::Pending => Ok(PollFinalize::Pending), + Poll::Pending => Ok(PollFinalize2::Pending), } } other => Err(RayexecError::new(format!( @@ -340,12 +340,12 @@ impl ExecutableOperator for SinkOperator { } } - fn poll_pull( + fn poll_pull2( &self, cx: &mut Context, partition_state: &mut PartitionState, operator_state: &OperatorState, - ) -> Result { + ) -> Result { match partition_state { PartitionState::Sink(state) => match state { SinkPartitionState::Writing { inner, .. } @@ -353,7 +353,7 @@ impl ExecutableOperator for SinkOperator { if let Some(inner) = inner.as_mut() { inner.pull_waker = Some(cx.waker().clone()); } - Ok(PollPull::Pending) + Ok(PollPull2::Pending) } SinkPartitionState::Finished => { let mut shared = match operator_state { @@ -362,7 +362,7 @@ impl ExecutableOperator for SinkOperator { }; if shared.global_row_count_returned { - return Ok(PollPull::Exhausted); + return Ok(PollPull2::Exhausted); } if shared.partitions_remaining == 0 { @@ -372,10 +372,10 @@ impl ExecutableOperator for SinkOperator { let row_count_batch = Batch2::try_new([Array2::from_iter([row_count])])?; - return Ok(PollPull::Computed(row_count_batch.into())); + return Ok(PollPull2::Computed(row_count_batch.into())); } - Ok(PollPull::Exhausted) + Ok(PollPull2::Exhausted) } }, other => panic!("invalid partition state: {other:?}"), diff --git a/crates/rayexec_execution/src/execution/operators/sort/gather_sort.rs b/crates/rayexec_execution/src/execution/operators/sort/gather_sort.rs index 031571c8d..a41019a36 100644 --- a/crates/rayexec_execution/src/execution/operators/sort/gather_sort.rs +++ b/crates/rayexec_execution/src/execution/operators/sort/gather_sort.rs @@ -12,13 +12,13 @@ use crate::database::DatabaseContext; use crate::execution::operators::sort::util::merger::IterState; use crate::execution::operators::{ ExecutableOperator, - ExecutionStates, - InputOutputStates, + ExecutionStates2, + InputOutputStates2, OperatorState, PartitionState, - PollFinalize, - PollPull, - PollPush, + PollFinalize2, + PollPull2, + PollPush2, }; use crate::explain::explainable::{ExplainConfig, ExplainEntry, Explainable}; use crate::expr::physical::PhysicalSortExpression; @@ -192,11 +192,11 @@ impl PhysicalGatherSort { } impl ExecutableOperator for PhysicalGatherSort { - fn create_states( + fn create_states2( &self, _context: &DatabaseContext, partitions: Vec, - ) -> Result { + ) -> Result { let input_partitions = partitions[0]; let operator_state = OperatorState::GatherSort(GatherSortOperatorState { @@ -230,22 +230,22 @@ impl ExecutableOperator for PhysicalGatherSort { }, )]; - Ok(ExecutionStates { + Ok(ExecutionStates2 { operator_state: Arc::new(operator_state), - partition_states: InputOutputStates::SeparateInputOutput { + partition_states: InputOutputStates2::SeparateInputOutput { push_states, pull_states, }, }) } - fn poll_push( + fn poll_push2( &self, cx: &mut Context, partition_state: &mut PartitionState, operator_state: &OperatorState, batch: Batch2, - ) -> Result { + ) -> Result { let state = match partition_state { PartitionState::GatherSortPush(state) => state, PartitionState::GatherSortPull(_) => { @@ -262,7 +262,7 @@ impl ExecutableOperator for PhysicalGatherSort { if shared.batches[state.partition_idx].is_some() { // Can't push, global state already has a batch for this partition. shared.push_wakers[state.partition_idx] = Some(cx.waker().clone()); - return Ok(PollPush::Pending(batch)); + return Ok(PollPush2::Pending(batch)); } let keys = state.extractor.sort_keys(&batch)?; @@ -283,15 +283,15 @@ impl ExecutableOperator for PhysicalGatherSort { // matter where the operator is in the pipeline. // // Changing this to NeedsMore wouldn't change behavior. - Ok(PollPush::Pushed) + Ok(PollPush2::Pushed) } - fn poll_finalize_push( + fn poll_finalize_push2( &self, _cx: &mut Context, partition_state: &mut PartitionState, operator_state: &OperatorState, - ) -> Result { + ) -> Result { let state = match partition_state { PartitionState::GatherSortPush(state) => state, PartitionState::GatherSortPull(_) => { @@ -314,15 +314,15 @@ impl ExecutableOperator for PhysicalGatherSort { } } - Ok(PollFinalize::Finalized) + Ok(PollFinalize2::Finalized) } - fn poll_pull( + fn poll_pull2( &self, cx: &mut Context, partition_state: &mut PartitionState, operator_state: &OperatorState, - ) -> Result { + ) -> Result { let state = match partition_state { PartitionState::GatherSortPull(state) => state, PartitionState::GatherSortPush(_) => { @@ -350,7 +350,7 @@ impl ExecutableOperator for PhysicalGatherSort { // Not finished initializing, still waiting on some input. // // `try_finish_initialize` registers a waker for us. - return Ok(PollPull::Pending); + return Ok(PollPull2::Pending); } } } @@ -370,7 +370,7 @@ impl ExecutableOperator for PhysicalGatherSort { )?; if !input_pushed { // `try_push_input_batch_to_merger` registers a waker for us. - return Ok(PollPull::Pending); + return Ok(PollPull2::Pending); } // Input no longer required, we've either pushed the batch @@ -386,7 +386,7 @@ impl ExecutableOperator for PhysicalGatherSort { loop { // TODO: Configurable batch size. match merger.try_merge(1024)? { - MergeResult::Batch(batch) => return Ok(PollPull::Computed(batch.into())), + MergeResult::Batch(batch) => return Ok(PollPull2::Computed(batch.into())), MergeResult::NeedsInput(input_idx) => { let pushed = Self::try_push_input_batch_to_merger( cx, @@ -409,10 +409,10 @@ impl ExecutableOperator for PhysicalGatherSort { // call to `poll_pull` ensures that we // get that input. *input_required = Some(input_idx); - return Ok(PollPull::Pending); + return Ok(PollPull2::Pending); } } - MergeResult::Exhausted => return Ok(PollPull::Exhausted), + MergeResult::Exhausted => return Ok(PollPull2::Exhausted), } } } @@ -653,7 +653,7 @@ mod tests { let poll_pull = pull_cx .poll_pull(&operator, &mut pull_states[0], &operator_state) .unwrap(); - assert_eq!(PollPull::Pending, poll_pull); + assert_eq!(PollPull2::Pending, poll_pull); // Push our first batch. let push_cx = TestWakerContext::new(); @@ -665,7 +665,7 @@ mod tests { p0_inputs.remove(0), ) .unwrap(); - assert_eq!(PollPush::Pushed, poll_push); + assert_eq!(PollPush2::Pushed, poll_push); // Kind of an implementation detail, but the puller is waiting on // partition 0 to push. Multiple partitions would trigger this wakeup @@ -684,17 +684,17 @@ mod tests { let poll_pull = pull_cx .poll_pull(&operator, &mut pull_states[0], &operator_state) .unwrap(); - assert_eq!(PollPull::Pending, poll_pull); + assert_eq!(PollPull2::Pending, poll_pull); let poll_push = push_cx .poll_push(&operator, &mut push_states[0], &operator_state, p1_input) .unwrap(); - assert_eq!(PollPush::Pushed, poll_push); + assert_eq!(PollPush2::Pushed, poll_push); } // Partition input is finished. operator - .poll_finalize_push(&mut push_cx.context(), &mut push_states[0], &operator_state) + .poll_finalize_push2(&mut push_cx.context(), &mut push_states[0], &operator_state) .unwrap(); // Now we can pull the sorted result. @@ -708,7 +708,7 @@ mod tests { let poll_pull = pull_cx .poll_pull(&operator, &mut pull_states[0], &operator_state) .unwrap(); - assert_eq!(PollPull::Exhausted, poll_pull); + assert_eq!(PollPull2::Exhausted, poll_pull); } #[test] @@ -748,7 +748,7 @@ mod tests { let poll_pull = pull_cx .poll_pull(&operator, &mut pull_states[0], &operator_state) .unwrap(); - assert_eq!(PollPull::Pending, poll_pull); + assert_eq!(PollPull2::Pending, poll_pull); // Push batch for partition 0. let p0_push_cx = TestWakerContext::new(); @@ -760,7 +760,7 @@ mod tests { p0_inputs.remove(0), ) .unwrap(); - assert_eq!(PollPush::Pushed, poll_push); + assert_eq!(PollPush2::Pushed, poll_push); // Triggers pull wake up. assert_eq!(1, pull_cx.wake_count()); @@ -769,7 +769,7 @@ mod tests { let poll_pull = pull_cx .poll_pull(&operator, &mut pull_states[0], &operator_state) .unwrap(); - assert_eq!(PollPull::Pending, poll_pull); + assert_eq!(PollPull2::Pending, poll_pull); // Push batch for partition 1. let p1_push_cx = TestWakerContext::new(); @@ -781,7 +781,7 @@ mod tests { p1_inputs.remove(0), ) .unwrap(); - assert_eq!(PollPush::Pushed, poll_push); + assert_eq!(PollPush2::Pushed, poll_push); // Also triggers wake up. assert_eq!(1, pull_cx.wake_count()); @@ -790,7 +790,7 @@ mod tests { let poll_pull = pull_cx .poll_pull(&operator, &mut pull_states[0], &operator_state) .unwrap(); - assert_eq!(PollPull::Pending, poll_pull); + assert_eq!(PollPull2::Pending, poll_pull); // Push the rest of the batches. // @@ -800,29 +800,29 @@ mod tests { let poll_pull = pull_cx .poll_pull(&operator, &mut pull_states[0], &operator_state) .unwrap(); - assert_eq!(PollPull::Pending, poll_pull); + assert_eq!(PollPull2::Pending, poll_pull); let poll_push = p0_push_cx .poll_push(&operator, &mut push_states[0], &operator_state, p0_input) .unwrap(); - assert_eq!(PollPush::Pushed, poll_push); + assert_eq!(PollPush2::Pushed, poll_push); let poll_push = p1_push_cx .poll_push(&operator, &mut push_states[1], &operator_state, p1_input) .unwrap(); - assert_eq!(PollPush::Pushed, poll_push); + assert_eq!(PollPush2::Pushed, poll_push); } // Partition inputs is finished. operator - .poll_finalize_push( + .poll_finalize_push2( &mut p0_push_cx.context(), &mut push_states[0], &operator_state, ) .unwrap(); operator - .poll_finalize_push( + .poll_finalize_push2( &mut p1_push_cx.context(), &mut push_states[1], &operator_state, @@ -840,6 +840,6 @@ mod tests { let poll_pull = pull_cx .poll_pull(&operator, &mut pull_states[0], &operator_state) .unwrap(); - assert_eq!(PollPull::Exhausted, poll_pull); + assert_eq!(PollPull2::Exhausted, poll_pull); } } diff --git a/crates/rayexec_execution/src/execution/operators/sort/scatter_sort.rs b/crates/rayexec_execution/src/execution/operators/sort/scatter_sort.rs index b70897911..6d215b0dd 100644 --- a/crates/rayexec_execution/src/execution/operators/sort/scatter_sort.rs +++ b/crates/rayexec_execution/src/execution/operators/sort/scatter_sort.rs @@ -11,13 +11,13 @@ use crate::database::DatabaseContext; use crate::execution::operators::util::resizer::DEFAULT_TARGET_BATCH_SIZE; use crate::execution::operators::{ ExecutableOperator, - ExecutionStates, - InputOutputStates, + ExecutionStates2, + InputOutputStates2, OperatorState, PartitionState, - PollFinalize, - PollPull, - PollPush, + PollFinalize2, + PollPull2, + PollPush2, }; use crate::explain::explainable::{ExplainConfig, ExplainEntry, Explainable}; use crate::expr::physical::PhysicalSortExpression; @@ -63,11 +63,11 @@ impl PhysicalScatterSort { } impl ExecutableOperator for PhysicalScatterSort { - fn create_states( + fn create_states2( &self, _context: &DatabaseContext, partitions: Vec, - ) -> Result { + ) -> Result { let partitions = partitions[0]; let extractor = SortKeysExtractor::new(&self.exprs); @@ -83,21 +83,21 @@ impl ExecutableOperator for PhysicalScatterSort { }) .collect(); - Ok(ExecutionStates { + Ok(ExecutionStates2 { operator_state: Arc::new(OperatorState::None), - partition_states: InputOutputStates::OneToOne { + partition_states: InputOutputStates2::OneToOne { partition_states: states, }, }) } - fn poll_push( + fn poll_push2( &self, _cx: &mut Context, partition_state: &mut PartitionState, _operator_state: &OperatorState, batch: Batch2, - ) -> Result { + ) -> Result { let state = match partition_state { PartitionState::ScatterSort(state) => state, other => panic!("invalid partition state: {other:?}"), @@ -107,7 +107,7 @@ impl ExecutableOperator for PhysicalScatterSort { ScatterSortPartitionState::Consuming(state) => { self.insert_batch_for_comparison(state, batch)?; - Ok(PollPush::NeedsMore) + Ok(PollPush2::NeedsMore) } ScatterSortPartitionState::Producing { .. } => { panic!("attempted to push to partition that's already produding data") @@ -115,12 +115,12 @@ impl ExecutableOperator for PhysicalScatterSort { } } - fn poll_finalize_push( + fn poll_finalize_push2( &self, _cx: &mut Context, partition_state: &mut PartitionState, _operator_state: &OperatorState, - ) -> Result { + ) -> Result { let state = match partition_state { PartitionState::ScatterSort(state) => state, other => panic!("invalid partition state: {other:?}"), @@ -154,7 +154,7 @@ impl ExecutableOperator for PhysicalScatterSort { // Update partition state to "producing" using the merger. *state = ScatterSortPartitionState::Producing(ProducingPartitionState { merger }); - Ok(PollFinalize::Finalized) + Ok(PollFinalize2::Finalized) } ScatterSortPartitionState::Producing { .. } => { panic!("attempted to finalize partition that's already producing data") @@ -162,12 +162,12 @@ impl ExecutableOperator for PhysicalScatterSort { } } - fn poll_pull( + fn poll_pull2( &self, cx: &mut Context, partition_state: &mut PartitionState, _operator_state: &OperatorState, - ) -> Result { + ) -> Result { let mut state = match partition_state { PartitionState::ScatterSort(state) => state, other => panic!("invalid partition state: {other:?}"), @@ -177,17 +177,17 @@ impl ExecutableOperator for PhysicalScatterSort { ScatterSortPartitionState::Consuming(state) => { // Partition still collecting data to sort. state.pull_waker = Some(cx.waker().clone()); - Ok(PollPull::Pending) + Ok(PollPull2::Pending) } ScatterSortPartitionState::Producing(state) => { loop { // TODO: Configurable batch size. match state.merger.try_merge(DEFAULT_TARGET_BATCH_SIZE)? { MergeResult::Batch(batch) => { - return Ok(PollPull::Computed(batch.into())); + return Ok(PollPull2::Computed(batch.into())); } MergeResult::Exhausted => { - return Ok(PollPull::Exhausted); + return Ok(PollPull2::Exhausted); } MergeResult::NeedsInput(idx) => { // We're merging all batch in this partition, and @@ -274,10 +274,10 @@ mod tests { fn create_states(operator: &PhysicalScatterSort, partitions: usize) -> Vec { let context = test_database_context(); - let states = operator.create_states(&context, vec![partitions]).unwrap(); + let states = operator.create_states2(&context, vec![partitions]).unwrap(); match states.partition_states { - InputOutputStates::OneToOne { partition_states } => partition_states, + InputOutputStates2::OneToOne { partition_states } => partition_states, other => panic!("unexpected states: {other:?}"), } } @@ -307,10 +307,10 @@ mod tests { let poll_push = push_cx .poll_push(&operator, &mut partition_states[0], &operator_state, input) .unwrap(); - assert_eq!(PollPush::NeedsMore, poll_push); + assert_eq!(PollPush2::NeedsMore, poll_push); } operator - .poll_finalize_push( + .poll_finalize_push2( &mut push_cx.context(), &mut partition_states[0], &operator_state, @@ -352,10 +352,10 @@ mod tests { let poll_push = push_cx .poll_push(&operator, &mut partition_states[0], &operator_state, input) .unwrap(); - assert_eq!(PollPush::NeedsMore, poll_push); + assert_eq!(PollPush2::NeedsMore, poll_push); } operator - .poll_finalize_push( + .poll_finalize_push2( &mut push_cx.context(), &mut partition_states[0], &operator_state, @@ -401,10 +401,10 @@ mod tests { let poll_push = push_cx .poll_push(&operator, &mut partition_states[0], &operator_state, input) .unwrap(); - assert_eq!(PollPush::NeedsMore, poll_push); + assert_eq!(PollPush2::NeedsMore, poll_push); } operator - .poll_finalize_push( + .poll_finalize_push2( &mut push_cx.context(), &mut partition_states[0], &operator_state, @@ -441,7 +441,7 @@ mod tests { let poll_pull = pull_cx .poll_pull(&operator, &mut partition_states[0], &operator_state) .unwrap(); - assert_eq!(PollPull::Exhausted, poll_pull); + assert_eq!(PollPull2::Exhausted, poll_pull); } #[test] @@ -469,10 +469,10 @@ mod tests { let poll_push = push_cx .poll_push(&operator, &mut partition_states[0], &operator_state, input) .unwrap(); - assert_eq!(PollPush::NeedsMore, poll_push); + assert_eq!(PollPush2::NeedsMore, poll_push); } operator - .poll_finalize_push( + .poll_finalize_push2( &mut push_cx.context(), &mut partition_states[0], &operator_state, @@ -504,6 +504,6 @@ mod tests { let poll_pull = pull_cx .poll_pull(&operator, &mut partition_states[0], &operator_state) .unwrap(); - assert_eq!(PollPull::Exhausted, poll_pull); + assert_eq!(PollPull2::Exhausted, poll_pull); } } diff --git a/crates/rayexec_execution/src/execution/operators/sort/top_k.rs b/crates/rayexec_execution/src/execution/operators/sort/top_k.rs index b1d853710..21b7f18bd 100644 --- a/crates/rayexec_execution/src/execution/operators/sort/top_k.rs +++ b/crates/rayexec_execution/src/execution/operators/sort/top_k.rs @@ -6,12 +6,12 @@ use crate::arrays::batch::Batch2; use crate::database::DatabaseContext; use crate::execution::operators::{ ExecutableOperator, - ExecutionStates, + ExecutionStates2, OperatorState, PartitionState, - PollFinalize, - PollPull, - PollPush, + PollFinalize2, + PollPull2, + PollPush2, }; use crate::explain::explainable::{ExplainConfig, ExplainEntry, Explainable}; @@ -25,39 +25,39 @@ pub struct TopKOperatorState {} pub struct PhysicalTopK {} impl ExecutableOperator for PhysicalTopK { - fn create_states( + fn create_states2( &self, _context: &DatabaseContext, _partitions: Vec, - ) -> Result { + ) -> Result { unimplemented!() } - fn poll_push( + fn poll_push2( &self, _cx: &mut Context, _partition_state: &mut PartitionState, _operator_state: &OperatorState, _batch: Batch2, - ) -> Result { + ) -> Result { unimplemented!() } - fn poll_finalize_push( + fn poll_finalize_push2( &self, _cx: &mut Context, _partition_state: &mut PartitionState, _operator_state: &OperatorState, - ) -> Result { + ) -> Result { unimplemented!() } - fn poll_pull( + fn poll_pull2( &self, _cx: &mut Context, _partition_state: &mut PartitionState, _operator_state: &OperatorState, - ) -> Result { + ) -> Result { unimplemented!() } } diff --git a/crates/rayexec_execution/src/execution/operators/source.rs b/crates/rayexec_execution/src/execution/operators/source.rs index cf899a0ba..d40dce9a7 100644 --- a/crates/rayexec_execution/src/execution/operators/source.rs +++ b/crates/rayexec_execution/src/execution/operators/source.rs @@ -10,13 +10,13 @@ use rayexec_error::{RayexecError, Result}; use super::util::futures::make_static; use super::{ ExecutableOperator, - ExecutionStates, - InputOutputStates, + ExecutionStates2, + InputOutputStates2, OperatorState, PartitionState, - PollFinalize, - PollPull, - PollPush, + PollFinalize2, + PollPull2, + PollPush2, }; use crate::arrays::batch::Batch2; use crate::database::DatabaseContext; @@ -85,11 +85,11 @@ impl SourceOperator { } impl ExecutableOperator for SourceOperator { - fn create_states( + fn create_states2( &self, _context: &DatabaseContext, partitions: Vec, - ) -> Result { + ) -> Result { let states = self .source .create_partition_sources(partitions[0]) @@ -102,63 +102,63 @@ impl ExecutableOperator for SourceOperator { }) .collect(); - Ok(ExecutionStates { + Ok(ExecutionStates2 { operator_state: Arc::new(OperatorState::None), - partition_states: InputOutputStates::OneToOne { + partition_states: InputOutputStates2::OneToOne { partition_states: states, }, }) } - fn poll_push( + fn poll_push2( &self, _cx: &mut Context, _partition_state: &mut PartitionState, _operator_state: &OperatorState, _batch: Batch2, - ) -> Result { + ) -> Result { Err(RayexecError::new("Cannot push to physical scan")) } - fn poll_finalize_push( + fn poll_finalize_push2( &self, _cx: &mut Context, _partition_state: &mut PartitionState, _operator_state: &OperatorState, - ) -> Result { + ) -> Result { Err(RayexecError::new("Cannot push to physical scan")) } - fn poll_pull( + fn poll_pull2( &self, cx: &mut Context, partition_state: &mut PartitionState, _operator_state: &OperatorState, - ) -> Result { + ) -> Result { match partition_state { PartitionState::Source(state) => { if let Some(future) = &mut state.future { match future.poll_unpin(cx) { Poll::Ready(Ok(Some(batch))) => { state.future = None; // Future complete, next pull with create a new one. - return Ok(PollPull::Computed(batch.into())); + return Ok(PollPull2::Computed(batch.into())); } - Poll::Ready(Ok(None)) => return Ok(PollPull::Exhausted), + Poll::Ready(Ok(None)) => return Ok(PollPull2::Exhausted), Poll::Ready(Err(e)) => return Err(e), - Poll::Pending => return Ok(PollPull::Pending), + Poll::Pending => return Ok(PollPull2::Pending), } } let mut future = state.source.pull(); match future.poll_unpin(cx) { - Poll::Ready(Ok(Some(batch))) => Ok(PollPull::Computed(batch.into())), - Poll::Ready(Ok(None)) => Ok(PollPull::Exhausted), + Poll::Ready(Ok(Some(batch))) => Ok(PollPull2::Computed(batch.into())), + Poll::Ready(Ok(None)) => Ok(PollPull2::Exhausted), Poll::Ready(Err(e)) => Err(e), Poll::Pending => { // SAFETY: Source lives on the partition state and // outlives this future. state.future = Some(unsafe { make_static(future) }); - Ok(PollPull::Pending) + Ok(PollPull2::Pending) } } } diff --git a/crates/rayexec_execution/src/execution/operators/table_function.rs b/crates/rayexec_execution/src/execution/operators/table_function.rs index e0880fcbd..93668870c 100644 --- a/crates/rayexec_execution/src/execution/operators/table_function.rs +++ b/crates/rayexec_execution/src/execution/operators/table_function.rs @@ -9,13 +9,13 @@ use rayexec_error::{RayexecError, Result}; use super::util::futures::make_static; use super::{ ExecutableOperator, - ExecutionStates, - InputOutputStates, + ExecutionStates2, + InputOutputStates2, OperatorState, PartitionState, - PollFinalize, - PollPull, - PollPush, + PollFinalize2, + PollPull2, + PollPush2, }; use crate::arrays::batch::Batch2; use crate::database::DatabaseContext; @@ -53,11 +53,11 @@ impl PhysicalTableFunction { } impl ExecutableOperator for PhysicalTableFunction { - fn create_states( + fn create_states2( &self, _context: &DatabaseContext, partitions: Vec, - ) -> Result { + ) -> Result { let scan_func = match &self.function.function_impl { TableFunctionImpl::Scan(scan) => scan, _ => { @@ -81,64 +81,64 @@ impl ExecutableOperator for PhysicalTableFunction { }) .collect(); - Ok(ExecutionStates { + Ok(ExecutionStates2 { operator_state: Arc::new(OperatorState::None), - partition_states: InputOutputStates::OneToOne { + partition_states: InputOutputStates2::OneToOne { partition_states: states, }, }) } - fn poll_push( + fn poll_push2( &self, _cx: &mut Context, _partition_state: &mut PartitionState, _operator_state: &OperatorState, _batch: Batch2, - ) -> Result { + ) -> Result { // Could UNNEST be implemented as a table function? Err(RayexecError::new("Cannot push to physical table function")) } - fn poll_finalize_push( + fn poll_finalize_push2( &self, _cx: &mut Context, _partition_state: &mut PartitionState, _operator_state: &OperatorState, - ) -> Result { + ) -> Result { Err(RayexecError::new("Cannot push to physical table function")) } - fn poll_pull( + fn poll_pull2( &self, cx: &mut Context, partition_state: &mut PartitionState, _operator_state: &OperatorState, - ) -> Result { + ) -> Result { match partition_state { PartitionState::TableFunction(state) => { if let Some(future) = &mut state.future { match future.poll_unpin(cx) { Poll::Ready(Ok(Some(batch))) => { state.future = None; // Future complete, next pull with create a new one. - return Ok(PollPull::Computed(batch.into())); + return Ok(PollPull2::Computed(batch.into())); } - Poll::Ready(Ok(None)) => return Ok(PollPull::Exhausted), + Poll::Ready(Ok(None)) => return Ok(PollPull2::Exhausted), Poll::Ready(Err(e)) => return Err(e), - Poll::Pending => return Ok(PollPull::Pending), + Poll::Pending => return Ok(PollPull2::Pending), } } let mut future = state.scan_state.pull(); match future.poll_unpin(cx) { - Poll::Ready(Ok(Some(batch))) => Ok(PollPull::Computed(batch.into())), - Poll::Ready(Ok(None)) => Ok(PollPull::Exhausted), + Poll::Ready(Ok(Some(batch))) => Ok(PollPull2::Computed(batch.into())), + Poll::Ready(Ok(None)) => Ok(PollPull2::Exhausted), Poll::Ready(Err(e)) => Err(e), Poll::Pending => { // SAFETY: Scan lives on the partition state and // outlives this future. state.future = Some(unsafe { make_static(future) }); - Ok(PollPull::Pending) + Ok(PollPull2::Pending) } } } diff --git a/crates/rayexec_execution/src/execution/operators/table_inout.rs b/crates/rayexec_execution/src/execution/operators/table_inout.rs index 73a47e3fe..fb513988a 100644 --- a/crates/rayexec_execution/src/execution/operators/table_inout.rs +++ b/crates/rayexec_execution/src/execution/operators/table_inout.rs @@ -5,13 +5,13 @@ use rayexec_error::{RayexecError, Result}; use super::{ ExecutableOperator, - ExecutionStates, - InputOutputStates, + ExecutionStates2, + InputOutputStates2, OperatorState, PartitionState, - PollFinalize, - PollPull, - PollPush, + PollFinalize2, + PollPull2, + PollPush2, }; use crate::arrays::array::Array2; use crate::arrays::batch::Batch2; @@ -39,11 +39,11 @@ pub struct PhysicalTableInOut { } impl ExecutableOperator for PhysicalTableInOut { - fn create_states( + fn create_states2( &self, _context: &DatabaseContext, partitions: Vec, - ) -> Result { + ) -> Result { let partitions = partitions[0]; let states = match &self.function.function_impl { @@ -66,21 +66,21 @@ impl ExecutableOperator for PhysicalTableInOut { }) .collect(); - Ok(ExecutionStates { + Ok(ExecutionStates2 { operator_state: Arc::new(OperatorState::None), - partition_states: InputOutputStates::OneToOne { + partition_states: InputOutputStates2::OneToOne { partition_states: states, }, }) } - fn poll_push( + fn poll_push2( &self, cx: &mut Context, partition_state: &mut PartitionState, _operator_state: &OperatorState, batch: Batch2, - ) -> Result { + ) -> Result { let state = match partition_state { PartitionState::TableInOut(state) => state, other => panic!("invalid partition state: {other:?}"), @@ -108,7 +108,7 @@ impl ExecutableOperator for PhysicalTableInOut { // TODO: Remove needing to do this, the clones should be cheap, but the // expression execution is wasteful. match state.function_state.poll_push(cx, inputs)? { - PollPush::Pending(_) => Ok(PollPush::Pending(orig)), + PollPush2::Pending(_) => Ok(PollPush2::Pending(orig)), other => { // Batch was pushed to the function state, compute additional // outputs. @@ -128,12 +128,12 @@ impl ExecutableOperator for PhysicalTableInOut { } } - fn poll_finalize_push( + fn poll_finalize_push2( &self, cx: &mut Context, partition_state: &mut PartitionState, _operator_state: &OperatorState, - ) -> Result { + ) -> Result { let state = match partition_state { PartitionState::TableInOut(state) => state, other => panic!("invalid state: {other:?}"), @@ -142,12 +142,12 @@ impl ExecutableOperator for PhysicalTableInOut { state.function_state.poll_finalize_push(cx) } - fn poll_pull( + fn poll_pull2( &self, cx: &mut Context, partition_state: &mut PartitionState, _operator_state: &OperatorState, - ) -> Result { + ) -> Result { let state = match partition_state { PartitionState::TableInOut(state) => state, other => panic!("invalid partition state: {other:?}"), @@ -177,10 +177,10 @@ impl ExecutableOperator for PhysicalTableInOut { let new_batch = Batch2::try_new(arrays)?; - Ok(PollPull::Computed(new_batch.into())) + Ok(PollPull2::Computed(new_batch.into())) } - inout::InOutPollPull::Pending => Ok(PollPull::Pending), - inout::InOutPollPull::Exhausted => Ok(PollPull::Exhausted), + inout::InOutPollPull::Pending => Ok(PollPull2::Pending), + inout::InOutPollPull::Exhausted => Ok(PollPull2::Exhausted), } } } diff --git a/crates/rayexec_execution/src/execution/operators/test_util.rs b/crates/rayexec_execution/src/execution/operators/test_util.rs index 32f034030..e3aa864e8 100644 --- a/crates/rayexec_execution/src/execution/operators/test_util.rs +++ b/crates/rayexec_execution/src/execution/operators/test_util.rs @@ -10,8 +10,8 @@ use super::{ ExecutableOperator, OperatorState, PartitionState, - PollPull, - PollPush, + PollPull2, + PollPush2, }; use crate::arrays::array::Array2; use crate::arrays::batch::Batch2; @@ -73,8 +73,8 @@ impl TestWakerContext { partition_state: &mut PartitionState, operator_state: &OperatorState, batch: impl Into, - ) -> Result { - operator.as_ref().poll_push( + ) -> Result { + operator.as_ref().poll_push2( &mut self.context(), partition_state, operator_state, @@ -87,10 +87,10 @@ impl TestWakerContext { operator: impl AsRef, partition_state: &mut PartitionState, operator_state: &OperatorState, - ) -> Result { + ) -> Result { operator .as_ref() - .poll_pull(&mut self.context(), partition_state, operator_state) + .poll_pull2(&mut self.context(), partition_state, operator_state) } } @@ -101,9 +101,9 @@ impl Wake for TestWakerInner { } /// Unwraps a batch from the PollPull::Batch variant. -pub fn unwrap_poll_pull_batch(poll: PollPull) -> Batch2 { +pub fn unwrap_poll_pull_batch(poll: PollPull2) -> Batch2 { match poll { - PollPull::Computed(ComputedBatches::Single(batch)) => batch, + PollPull2::Computed(ComputedBatches::Single(batch)) => batch, other => panic!("unexpected poll pull: {other:?}"), } } diff --git a/crates/rayexec_execution/src/execution/operators/ungrouped_aggregate.rs b/crates/rayexec_execution/src/execution/operators/ungrouped_aggregate.rs index 961e7e5a2..a286a39bc 100644 --- a/crates/rayexec_execution/src/execution/operators/ungrouped_aggregate.rs +++ b/crates/rayexec_execution/src/execution/operators/ungrouped_aggregate.rs @@ -9,17 +9,17 @@ use super::hash_aggregate::distinct::DistinctGroupedStates; use super::hash_aggregate::hash_table::GroupAddress; use super::{ ExecutableOperator, - ExecutionStates, + ExecutionStates2, OperatorState, PartitionState, - PollFinalize, - PollPull, - PollPush, + PollFinalize2, + PollPull2, + PollPush2, }; use crate::arrays::array::selection::Selection; use crate::arrays::batch::Batch2; use crate::database::DatabaseContext; -use crate::execution::operators::InputOutputStates; +use crate::execution::operators::InputOutputStates2; use crate::explain::explainable::{ExplainConfig, ExplainEntry, Explainable}; use crate::expr::physical::PhysicalAggregateExpression; use crate::functions::aggregate::states::AggregateGroupStates; @@ -102,11 +102,11 @@ impl PhysicalUngroupedAggregate { } impl ExecutableOperator for PhysicalUngroupedAggregate { - fn create_states( + fn create_states2( &self, _context: &DatabaseContext, partitions: Vec, - ) -> Result { + ) -> Result { let num_partitions = partitions[0]; let inner = OperatorStateInner { @@ -129,26 +129,26 @@ impl ExecutableOperator for PhysicalUngroupedAggregate { }) .collect::>>()?; - Ok(ExecutionStates { + Ok(ExecutionStates2 { operator_state: Arc::new(OperatorState::UngroupedAggregate(operator_state)), - partition_states: InputOutputStates::OneToOne { partition_states }, + partition_states: InputOutputStates2::OneToOne { partition_states }, }) } - fn poll_push( + fn poll_push2( &self, _cx: &mut Context, partition_state: &mut PartitionState, _operator_state: &OperatorState, batch: Batch2, - ) -> Result { + ) -> Result { let state = match partition_state { PartitionState::UngroupedAggregate(state) => state, other => panic!("invalid partition state: {other:?}"), }; match state { - UngroupedAggregatePartitionState::Aggregating { .. } => { + UngroupedAggregatePartitionState::Aggregating { .. } => { // All rows map to the same group (group 0) let addrs: Vec<_> = (0..batch.num_rows()) .map(|_| GroupAddress { @@ -170,7 +170,7 @@ impl ExecutableOperator for PhysicalUngroupedAggregate { } // Keep pushing. - Ok(PollPush::NeedsMore) + Ok(PollPush2::NeedsMore) } UngroupedAggregatePartitionState::Producing { .. } => Err(RayexecError::new( "Attempted to push to partition that should be producing batches", @@ -178,12 +178,12 @@ impl ExecutableOperator for PhysicalUngroupedAggregate { } } - fn poll_finalize_push( + fn poll_finalize_push2( &self, _cx: &mut Context, partition_state: &mut PartitionState, operator_state: &OperatorState, - ) -> Result { + ) -> Result { let state = match partition_state { PartitionState::UngroupedAggregate(state) => state, other => panic!("invalid partition state: {other:?}"), @@ -243,7 +243,7 @@ impl ExecutableOperator for PhysicalUngroupedAggregate { // } } - Ok(PollFinalize::Finalized) + Ok(PollFinalize2::Finalized) } UngroupedAggregatePartitionState::Producing { .. } => Err(RayexecError::new( "Attempted to finalize push partition that's producing", @@ -251,12 +251,12 @@ impl ExecutableOperator for PhysicalUngroupedAggregate { } } - fn poll_pull( + fn poll_pull2( &self, cx: &mut Context, partition_state: &mut PartitionState, operator_state: &OperatorState, - ) -> Result { + ) -> Result { let state = match partition_state { PartitionState::UngroupedAggregate(state) => state, other => panic!("invalid partition state: {other:?}"), @@ -264,8 +264,8 @@ impl ExecutableOperator for PhysicalUngroupedAggregate { match state { UngroupedAggregatePartitionState::Producing { batches, .. } => match batches.pop() { - Some(batch) => Ok(PollPull::Computed(batch.into())), - None => Ok(PollPull::Exhausted), + Some(batch) => Ok(PollPull2::Computed(batch.into())), + None => Ok(PollPull2::Exhausted), }, UngroupedAggregatePartitionState::Aggregating { partition_idx, .. } => { let mut shared = match operator_state { @@ -275,12 +275,12 @@ impl ExecutableOperator for PhysicalUngroupedAggregate { if shared.remaining == 0 { // We weren't the chosen partition to produce output. Immediately exhausted. - return Ok(PollPull::Exhausted); + return Ok(PollPull2::Exhausted); } shared.pull_wakers[*partition_idx] = Some(cx.waker().clone()); - Ok(PollPull::Pending) + Ok(PollPull2::Pending) } } } diff --git a/crates/rayexec_execution/src/execution/operators/union.rs b/crates/rayexec_execution/src/execution/operators/union.rs index ad7732e37..d3eba86d0 100644 --- a/crates/rayexec_execution/src/execution/operators/union.rs +++ b/crates/rayexec_execution/src/execution/operators/union.rs @@ -6,13 +6,13 @@ use rayexec_error::Result; use super::{ ExecutableOperator, - ExecutionStates, - InputOutputStates, + ExecutionStates2, + InputOutputStates2, OperatorState, PartitionState, - PollFinalize, - PollPull, - PollPush, + PollFinalize2, + PollPull2, + PollPush2, }; use crate::arrays::batch::Batch2; use crate::database::DatabaseContext; @@ -68,11 +68,11 @@ impl PhysicalUnion { } impl ExecutableOperator for PhysicalUnion { - fn create_states( + fn create_states2( &self, _context: &DatabaseContext, partitions: Vec, - ) -> Result { + ) -> Result { let num_partitions = partitions[0]; let top_states = (0..num_partitions) @@ -106,27 +106,27 @@ impl ExecutableOperator for PhysicalUnion { .collect(), }; - Ok(ExecutionStates { + Ok(ExecutionStates2 { operator_state: Arc::new(OperatorState::Union(operator_state)), - partition_states: InputOutputStates::NaryInputSingleOutput { + partition_states: InputOutputStates2::NaryInputSingleOutput { partition_states: vec![top_states, bottom_states], pull_states: Self::UNION_TOP_INPUT_INDEX, }, }) } - fn poll_push( + fn poll_push2( &self, cx: &mut Context, partition_state: &mut PartitionState, operator_state: &OperatorState, batch: Batch2, - ) -> Result { + ) -> Result { match partition_state { PartitionState::UnionTop(state) => { if state.batch.is_some() { state.push_waker = Some(cx.waker().clone()); - return Ok(PollPush::Pending(batch)); + return Ok(PollPush2::Pending(batch)); } state.batch = Some(batch); @@ -134,7 +134,7 @@ impl ExecutableOperator for PhysicalUnion { waker.wake(); } - Ok(PollPush::Pushed) + Ok(PollPush2::Pushed) } PartitionState::UnionBottom(state) => { @@ -147,7 +147,7 @@ impl ExecutableOperator for PhysicalUnion { if shared.batch.is_some() { shared.push_waker = Some(cx.waker().clone()); - return Ok(PollPush::Pending(batch)); + return Ok(PollPush2::Pending(batch)); } shared.batch = Some(batch); @@ -156,26 +156,26 @@ impl ExecutableOperator for PhysicalUnion { waker.wake(); } - Ok(PollPush::Pushed) + Ok(PollPush2::Pushed) } other => panic!("invalid partition state: {other:?}"), } } - fn poll_finalize_push( + fn poll_finalize_push2( &self, _cx: &mut Context, partition_state: &mut PartitionState, operator_state: &OperatorState, - ) -> Result { + ) -> Result { match partition_state { PartitionState::UnionTop(state) => { state.finished = true; if let Some(waker) = state.pull_waker.take() { waker.wake(); } - Ok(PollFinalize::Finalized) + Ok(PollFinalize2::Finalized) } PartitionState::UnionBottom(state) => { @@ -191,26 +191,26 @@ impl ExecutableOperator for PhysicalUnion { waker.wake(); } - Ok(PollFinalize::Finalized) + Ok(PollFinalize2::Finalized) } other => panic!("invalid partition state: {other:?}"), } } - fn poll_pull( + fn poll_pull2( &self, cx: &mut Context, partition_state: &mut PartitionState, operator_state: &OperatorState, - ) -> Result { + ) -> Result { match partition_state { PartitionState::UnionTop(state) => match state.batch.take() { Some(batch) => { if let Some(waker) = state.push_waker.take() { waker.wake(); } - Ok(PollPull::Computed(batch.into())) + Ok(PollPull2::Computed(batch.into())) } None => { let mut shared = match operator_state { @@ -225,12 +225,12 @@ impl ExecutableOperator for PhysicalUnion { if let Some(waker) = shared.push_waker.take() { waker.wake(); } - return Ok(PollPull::Computed(batch.into())); + return Ok(PollPull2::Computed(batch.into())); } // If not, check if we're finished. if shared.finished && state.finished { - return Ok(PollPull::Exhausted); + return Ok(PollPull2::Exhausted); } // No batches, and we're not finished. Need to wait. @@ -239,7 +239,7 @@ impl ExecutableOperator for PhysicalUnion { waker.wake(); } - Ok(PollPull::Pending) + Ok(PollPull2::Pending) } }, other => panic!("invalid partition state: {other:?}"), diff --git a/crates/rayexec_execution/src/execution/operators/unnest.rs b/crates/rayexec_execution/src/execution/operators/unnest.rs index bc84bb236..af68b9b8e 100644 --- a/crates/rayexec_execution/src/execution/operators/unnest.rs +++ b/crates/rayexec_execution/src/execution/operators/unnest.rs @@ -7,13 +7,13 @@ use rayexec_error::{not_implemented, RayexecError, Result}; use super::{ ExecutableOperator, - ExecutionStates, - InputOutputStates, + ExecutionStates2, + InputOutputStates2, OperatorState, PartitionState, - PollFinalize, - PollPull, - PollPush, + PollFinalize2, + PollPull2, + PollPush2, }; use crate::arrays::array::{Array2, ArrayData2}; use crate::arrays::batch::Batch2; @@ -82,11 +82,11 @@ pub struct PhysicalUnnest { } impl ExecutableOperator for PhysicalUnnest { - fn create_states( + fn create_states2( &self, _context: &DatabaseContext, partitions: Vec, - ) -> Result { + ) -> Result { let partitions = partitions[0]; let states: Vec<_> = (0..partitions) @@ -109,21 +109,21 @@ impl ExecutableOperator for PhysicalUnnest { }) .collect(); - Ok(ExecutionStates { + Ok(ExecutionStates2 { operator_state: Arc::new(OperatorState::None), - partition_states: InputOutputStates::OneToOne { + partition_states: InputOutputStates2::OneToOne { partition_states: states, }, }) } - fn poll_push( + fn poll_push2( &self, cx: &mut Context, partition_state: &mut PartitionState, _operator_state: &OperatorState, batch: Batch2, - ) -> Result { + ) -> Result { let state = match partition_state { PartitionState::Unnest(state) => state, other => panic!("invalid state: {other:?}"), @@ -136,7 +136,7 @@ impl ExecutableOperator for PhysicalUnnest { waker.wake(); } - return Ok(PollPush::Pending(batch)); + return Ok(PollPush2::Pending(batch)); } // Compute inputs. These will be stored until we've processed all rows. @@ -155,15 +155,15 @@ impl ExecutableOperator for PhysicalUnnest { waker.wake(); } - Ok(PollPush::Pushed) + Ok(PollPush2::Pushed) } - fn poll_finalize_push( + fn poll_finalize_push2( &self, _cx: &mut Context, partition_state: &mut PartitionState, _operator_state: &OperatorState, - ) -> Result { + ) -> Result { let state = match partition_state { PartitionState::Unnest(state) => state, other => panic!("invalid state: {other:?}"), @@ -175,15 +175,15 @@ impl ExecutableOperator for PhysicalUnnest { waker.wake(); } - Ok(PollFinalize::Finalized) + Ok(PollFinalize2::Finalized) } - fn poll_pull( + fn poll_pull2( &self, cx: &mut Context, partition_state: &mut PartitionState, _operator_state: &OperatorState, - ) -> Result { + ) -> Result { let state = match partition_state { PartitionState::Unnest(state) => state, other => panic!("invalid state: {other:?}"), @@ -191,7 +191,7 @@ impl ExecutableOperator for PhysicalUnnest { if state.current_row >= state.input_num_rows { if state.finished { - return Ok(PollPull::Exhausted); + return Ok(PollPull2::Exhausted); } // We're done with these inputs. Come back later. @@ -200,7 +200,7 @@ impl ExecutableOperator for PhysicalUnnest { waker.wake(); } - return Ok(PollPull::Pending); + return Ok(PollPull2::Pending); } // We have input ready, get the longest list for the current row. @@ -292,7 +292,7 @@ impl ExecutableOperator for PhysicalUnnest { let batch = Batch2::try_new(outputs)?; - Ok(PollPull::Computed(batch.into())) + Ok(PollPull2::Computed(batch.into())) } } diff --git a/crates/rayexec_execution/src/execution/operators/values.rs b/crates/rayexec_execution/src/execution/operators/values.rs index c96f7b3d4..f08e1cc94 100644 --- a/crates/rayexec_execution/src/execution/operators/values.rs +++ b/crates/rayexec_execution/src/execution/operators/values.rs @@ -5,13 +5,13 @@ use rayexec_error::{RayexecError, Result}; use super::{ ExecutableOperator, - ExecutionStates, - InputOutputStates, + ExecutionStates2, + InputOutputStates2, OperatorState, PartitionState, - PollFinalize, - PollPull, - PollPush, + PollFinalize2, + PollPull2, + PollPush2, }; use crate::arrays::batch::Batch2; use crate::database::DatabaseContext; @@ -35,11 +35,11 @@ impl PhysicalValues { } impl ExecutableOperator for PhysicalValues { - fn create_states( + fn create_states2( &self, _context: &DatabaseContext, partitions: Vec, - ) -> Result { + ) -> Result { let num_partitions = partitions[0]; let mut states: Vec<_> = (0..num_partitions) @@ -52,43 +52,43 @@ impl ExecutableOperator for PhysicalValues { states[idx % num_partitions].batches.push(batch.clone()); } - Ok(ExecutionStates { + Ok(ExecutionStates2 { operator_state: Arc::new(OperatorState::None), - partition_states: InputOutputStates::OneToOne { + partition_states: InputOutputStates2::OneToOne { partition_states: states.into_iter().map(PartitionState::Values).collect(), }, }) } - fn poll_push( + fn poll_push2( &self, _cx: &mut Context, _partition_state: &mut PartitionState, _operator_state: &OperatorState, _batch: Batch2, - ) -> Result { + ) -> Result { Err(RayexecError::new("Cannot push to Values operator")) } - fn poll_finalize_push( + fn poll_finalize_push2( &self, _cx: &mut Context, _partition_state: &mut PartitionState, _operator_state: &OperatorState, - ) -> Result { + ) -> Result { Err(RayexecError::new("Cannot push to Values operator")) } - fn poll_pull( + fn poll_pull2( &self, _cx: &mut Context, partition_state: &mut PartitionState, _operator_state: &OperatorState, - ) -> Result { + ) -> Result { match partition_state { PartitionState::Values(state) => match state.batches.pop() { - Some(batch) => Ok(PollPull::Computed(batch.into())), - None => Ok(PollPull::Exhausted), + Some(batch) => Ok(PollPull2::Computed(batch.into())), + None => Ok(PollPull2::Exhausted), }, other => panic!("invalid partition state: {other:?}"), } diff --git a/crates/rayexec_execution/src/execution/operators/window/mod.rs b/crates/rayexec_execution/src/execution/operators/window/mod.rs index b8da47f84..af935b541 100644 --- a/crates/rayexec_execution/src/execution/operators/window/mod.rs +++ b/crates/rayexec_execution/src/execution/operators/window/mod.rs @@ -4,12 +4,12 @@ use rayexec_error::Result; use super::{ ExecutableOperator, - ExecutionStates, + ExecutionStates2, OperatorState, PartitionState, - PollFinalize, - PollPull, - PollPush, + PollFinalize2, + PollPull2, + PollPush2, }; use crate::arrays::batch::Batch2; use crate::database::DatabaseContext; @@ -19,39 +19,39 @@ use crate::explain::explainable::{ExplainConfig, ExplainEntry, Explainable}; pub struct PhysicalWindow {} impl ExecutableOperator for PhysicalWindow { - fn create_states( + fn create_states2( &self, _context: &DatabaseContext, _partitions: Vec, - ) -> Result { + ) -> Result { unimplemented!() } - fn poll_push( + fn poll_push2( &self, _cx: &mut Context, _partition_state: &mut PartitionState, _operator_state: &OperatorState, _batch: Batch2, - ) -> Result { + ) -> Result { unimplemented!() } - fn poll_finalize_push( + fn poll_finalize_push2( &self, _cx: &mut Context, _partition_state: &mut PartitionState, _operator_state: &OperatorState, - ) -> Result { + ) -> Result { unimplemented!() } - fn poll_pull( + fn poll_pull2( &self, _cx: &mut Context, _partition_state: &mut PartitionState, _operator_state: &OperatorState, - ) -> Result { + ) -> Result { unimplemented!() } } diff --git a/crates/rayexec_execution/src/functions/table/builtin/series.rs b/crates/rayexec_execution/src/functions/table/builtin/series.rs index df6c5404a..f0cff25ea 100644 --- a/crates/rayexec_execution/src/functions/table/builtin/series.rs +++ b/crates/rayexec_execution/src/functions/table/builtin/series.rs @@ -11,7 +11,7 @@ use crate::arrays::executor::scalar::UnaryExecutor2; use crate::arrays::field::{Field, Schema}; use crate::arrays::scalar::OwnedScalarValue; use crate::arrays::storage::PrimitiveStorage; -use crate::execution::operators::{PollFinalize, PollPush}; +use crate::execution::operators::{PollFinalize2, PollPush2}; use crate::expr::{self, Expression}; use crate::functions::documentation::{Category, Documentation}; use crate::functions::table::inout::{InOutPollPull, TableInOutFunction, TableInOutPartitionState}; @@ -215,29 +215,29 @@ pub struct GenerateSeriesInOutPartitionState { } impl TableInOutPartitionState for GenerateSeriesInOutPartitionState { - fn poll_push(&mut self, cx: &mut Context, batch: Batch2) -> Result { + fn poll_push(&mut self, cx: &mut Context, batch: Batch2) -> Result { if self.batch.is_some() { // Still processing current batch, come back later. self.push_waker = Some(cx.waker().clone()); if let Some(pull_waker) = self.pull_waker.take() { pull_waker.wake(); } - return Ok(PollPush::Pending(batch)); + return Ok(PollPush2::Pending(batch)); } self.batch = Some(batch); self.next_row_idx = 0; - Ok(PollPush::Pushed) + Ok(PollPush2::Pushed) } - fn poll_finalize_push(&mut self, _cx: &mut Context) -> Result { + fn poll_finalize_push(&mut self, _cx: &mut Context) -> Result { self.finished = true; if let Some(waker) = self.pull_waker.take() { waker.wake(); } - Ok(PollFinalize::Finalized) + Ok(PollFinalize2::Finalized) } fn poll_pull(&mut self, cx: &mut Context) -> Result { diff --git a/crates/rayexec_execution/src/functions/table/builtin/unnest.rs b/crates/rayexec_execution/src/functions/table/builtin/unnest.rs index 1b46461ca..50baf2f58 100644 --- a/crates/rayexec_execution/src/functions/table/builtin/unnest.rs +++ b/crates/rayexec_execution/src/functions/table/builtin/unnest.rs @@ -11,7 +11,7 @@ use crate::arrays::executor::scalar::UnaryExecutor2; use crate::arrays::field::{Field, Schema}; use crate::arrays::scalar::OwnedScalarValue; use crate::execution::operators::unnest::unnest; -use crate::execution::operators::{PollFinalize, PollPush}; +use crate::execution::operators::{PollFinalize2, PollPush2}; use crate::expr::Expression; use crate::functions::documentation::{Category, Documentation}; use crate::functions::table::inout::{InOutPollPull, TableInOutFunction, TableInOutPartitionState}; @@ -152,7 +152,7 @@ pub struct UnnestInOutPartitionState { } impl TableInOutPartitionState for UnnestInOutPartitionState { - fn poll_push(&mut self, cx: &mut Context, inputs: Batch2) -> Result { + fn poll_push(&mut self, cx: &mut Context, inputs: Batch2) -> Result { if self.current_row < self.input_num_rows { // Still processing inputs, come back later. self.push_waker = Some(cx.waker().clone()); @@ -160,7 +160,7 @@ impl TableInOutPartitionState for UnnestInOutPartitionState { waker.wake(); } - return Ok(PollPush::Pending(inputs)); + return Ok(PollPush2::Pending(inputs)); } self.input_num_rows = inputs.num_rows(); @@ -177,17 +177,17 @@ impl TableInOutPartitionState for UnnestInOutPartitionState { waker.wake(); } - Ok(PollPush::Pushed) + Ok(PollPush2::Pushed) } - fn poll_finalize_push(&mut self, _cx: &mut Context) -> Result { + fn poll_finalize_push(&mut self, _cx: &mut Context) -> Result { self.finished = true; if let Some(waker) = self.pull_waker.take() { waker.wake(); } - Ok(PollFinalize::Finalized) + Ok(PollFinalize2::Finalized) } fn poll_pull(&mut self, cx: &mut Context) -> Result { diff --git a/crates/rayexec_execution/src/functions/table/inout.rs b/crates/rayexec_execution/src/functions/table/inout.rs index 5108c6577..94d3901ef 100644 --- a/crates/rayexec_execution/src/functions/table/inout.rs +++ b/crates/rayexec_execution/src/functions/table/inout.rs @@ -5,7 +5,7 @@ use dyn_clone::DynClone; use rayexec_error::Result; use crate::arrays::batch::Batch2; -use crate::execution::operators::{PollFinalize, PollPush}; +use crate::execution::operators::{PollFinalize2, PollPush2}; pub trait TableInOutFunction: Debug + Sync + Send + DynClone { fn create_states( @@ -22,8 +22,8 @@ pub enum InOutPollPull { } pub trait TableInOutPartitionState: Debug + Sync + Send { - fn poll_push(&mut self, cx: &mut Context, inputs: Batch2) -> Result; - fn poll_finalize_push(&mut self, cx: &mut Context) -> Result; + fn poll_push(&mut self, cx: &mut Context, inputs: Batch2) -> Result; + fn poll_finalize_push(&mut self, cx: &mut Context) -> Result; fn poll_pull(&mut self, cx: &mut Context) -> Result; } From eeff12f3f37184a234acb48c8a060633364e4ae2 Mon Sep 17 00:00:00 2001 From: Sean Smith Date: Sun, 5 Jan 2025 16:09:39 -0600 Subject: [PATCH 56/59] project --- .../rayexec_execution/src/arrays/batch_exp.rs | 4 +- .../rayexec_execution/src/arrays/testutil.rs | 8 +- .../intermediate/planner/plan_aggregate.rs | 4 +- .../intermediate/planner/plan_distinct.rs | 4 +- .../src/execution/operators/limit.rs | 4 +- .../src/execution/operators/mod.rs | 158 ++++++++++++++- .../execution/operators/physical_project.rs | 191 ++++++++++++++++++ .../src/execution/operators/project.rs | 4 +- .../execution/operators/sort/gather_sort.rs | 2 +- .../execution/operators/sort/scatter_sort.rs | 4 +- .../operators/testutil/db_context.rs | 12 ++ .../{test_util.rs => testutil/mod.rs} | 10 +- .../execution/operators/testutil/wrapper.rs | 74 +++++++ .../src/expr/physical/case_expr.rs | 6 +- .../src/expr/physical/cast_expr.rs | 2 +- .../src/expr/physical/column_expr.rs | 4 +- .../src/expr/physical/evaluator.rs | 5 + .../src/expr/physical/mod.rs | 64 ------ .../src/expr/physical/scalar_function_expr.rs | 2 +- .../src/functions/scalar/builtin/arith/add.rs | 2 +- .../src/functions/scalar/builtin/arith/div.rs | 2 +- .../src/functions/scalar/builtin/arith/mul.rs | 2 +- .../src/functions/scalar/builtin/arith/rem.rs | 2 +- .../src/functions/scalar/builtin/arith/sub.rs | 2 +- .../src/functions/scalar/builtin/boolean.rs | 6 +- .../functions/scalar/builtin/comparison.rs | 12 +- .../src/functions/scalar/builtin/is.rs | 8 +- .../scalar/builtin/list/list_values.rs | 4 +- .../scalar/builtin/similarity/l2_distance.rs | 2 +- 29 files changed, 480 insertions(+), 124 deletions(-) create mode 100644 crates/rayexec_execution/src/execution/operators/physical_project.rs create mode 100644 crates/rayexec_execution/src/execution/operators/testutil/db_context.rs rename crates/rayexec_execution/src/execution/operators/{test_util.rs => testutil/mod.rs} (94%) create mode 100644 crates/rayexec_execution/src/execution/operators/testutil/wrapper.rs diff --git a/crates/rayexec_execution/src/arrays/batch_exp.rs b/crates/rayexec_execution/src/arrays/batch_exp.rs index 0888785ff..12ffbce36 100644 --- a/crates/rayexec_execution/src/arrays/batch_exp.rs +++ b/crates/rayexec_execution/src/arrays/batch_exp.rs @@ -75,7 +75,7 @@ where /// `row_eq_cap` indicates if the logical cardinality of the batch should /// equal the capacity of the arrays. If false, the logical cardinality will /// be set to zero. - pub(crate) fn from_arrays( + pub(crate) fn try_from_arrays( arrays: impl IntoIterator>, rows_eq_cap: bool, ) -> Result { @@ -148,7 +148,7 @@ mod tests { let a = Array::try_from_iter([3, 4, 5]).unwrap(); let b = Array::try_from_iter(["a", "b", "c"]).unwrap(); - let batch = Batch::from_arrays([a, b], true).unwrap(); + let batch = Batch::try_from_arrays([a, b], true).unwrap(); assert_eq!(3, batch.selection().len()); } diff --git a/crates/rayexec_execution/src/arrays/testutil.rs b/crates/rayexec_execution/src/arrays/testutil.rs index 4ca566d66..7b6b8fb64 100644 --- a/crates/rayexec_execution/src/arrays/testutil.rs +++ b/crates/rayexec_execution/src/arrays/testutil.rs @@ -236,7 +236,7 @@ mod tests { #[test] fn assert_batches_eq_simple() { - let batch1 = Batch::from_arrays( + let batch1 = Batch::try_from_arrays( [ Array::try_from_iter([4, 5, 6]).unwrap(), Array::try_from_iter(["a", "b", "c"]).unwrap(), @@ -244,7 +244,7 @@ mod tests { true, ) .unwrap(); - let batch2 = Batch::from_arrays( + let batch2 = Batch::try_from_arrays( [ Array::try_from_iter([4, 5, 6]).unwrap(), Array::try_from_iter(["a", "b", "c"]).unwrap(), @@ -258,7 +258,7 @@ mod tests { #[test] fn assert_batches_eq_logical_row_count() { - let mut batch1 = Batch::from_arrays( + let mut batch1 = Batch::try_from_arrays( [ Array::try_from_iter([4, 5, 6, 7, 8]).unwrap(), Array::try_from_iter(["a", "b", "c", "d", "e"]).unwrap(), @@ -268,7 +268,7 @@ mod tests { .unwrap(); batch1.set_num_rows(3).unwrap(); - let batch2 = Batch::from_arrays( + let batch2 = Batch::try_from_arrays( [ Array::try_from_iter([4, 5, 6]).unwrap(), Array::try_from_iter(["a", "b", "c"]).unwrap(), diff --git a/crates/rayexec_execution/src/execution/intermediate/planner/plan_aggregate.rs b/crates/rayexec_execution/src/execution/intermediate/planner/plan_aggregate.rs index a2ce125a3..ecc45944c 100644 --- a/crates/rayexec_execution/src/execution/intermediate/planner/plan_aggregate.rs +++ b/crates/rayexec_execution/src/execution/intermediate/planner/plan_aggregate.rs @@ -5,7 +5,7 @@ use rayexec_error::{RayexecError, Result, ResultExt}; use super::{IntermediatePipelineBuildState, Materializations, PipelineIdGen}; use crate::execution::intermediate::pipeline::IntermediateOperator; use crate::execution::operators::hash_aggregate::PhysicalHashAggregate; -use crate::execution::operators::project::{PhysicalProject, ProjectOperation}; +use crate::execution::operators::project::{PhysicalProject2, ProjectOperation}; use crate::execution::operators::ungrouped_aggregate::PhysicalUngroupedAggregate; use crate::execution::operators::PhysicalOperator; use crate::expr::physical::column_expr::PhysicalColumnExpr; @@ -82,7 +82,7 @@ impl IntermediatePipelineBuildState<'_> { self.push_intermediate_operator( IntermediateOperator { - operator: Arc::new(PhysicalOperator::Project(PhysicalProject { + operator: Arc::new(PhysicalOperator::Project(PhysicalProject2 { operation: ProjectOperation::new(preproject_exprs), })), partitioning_requirement: None, diff --git a/crates/rayexec_execution/src/execution/intermediate/planner/plan_distinct.rs b/crates/rayexec_execution/src/execution/intermediate/planner/plan_distinct.rs index 3845bb7a5..9678e120f 100644 --- a/crates/rayexec_execution/src/execution/intermediate/planner/plan_distinct.rs +++ b/crates/rayexec_execution/src/execution/intermediate/planner/plan_distinct.rs @@ -6,7 +6,7 @@ use rayexec_error::Result; use super::{IntermediatePipelineBuildState, Materializations, PipelineIdGen}; use crate::execution::intermediate::pipeline::IntermediateOperator; use crate::execution::operators::hash_aggregate::PhysicalHashAggregate; -use crate::execution::operators::project::{PhysicalProject, ProjectOperation}; +use crate::execution::operators::project::{PhysicalProject2, ProjectOperation}; use crate::execution::operators::PhysicalOperator; use crate::logical::logical_distinct::LogicalDistinct; use crate::logical::operator::{LogicalNode, Node}; @@ -35,7 +35,7 @@ impl IntermediatePipelineBuildState<'_> { self.push_intermediate_operator( IntermediateOperator { - operator: Arc::new(PhysicalOperator::Project(PhysicalProject { + operator: Arc::new(PhysicalOperator::Project(PhysicalProject2 { operation: ProjectOperation::new(group_exprs), })), partitioning_requirement: None, diff --git a/crates/rayexec_execution/src/execution/operators/limit.rs b/crates/rayexec_execution/src/execution/operators/limit.rs index b63529f2b..3f6fe68df 100644 --- a/crates/rayexec_execution/src/execution/operators/limit.rs +++ b/crates/rayexec_execution/src/execution/operators/limit.rs @@ -234,10 +234,10 @@ mod tests { use super::*; use crate::arrays::scalar::ScalarValue; - use crate::execution::operators::test_util::{ + use crate::execution::operators::testutil::db_context::test_database_context; + use crate::execution::operators::testutil::{ logical_value, make_i32_batch, - test_database_context, unwrap_poll_pull_batch, TestWakerContext, }; diff --git a/crates/rayexec_execution/src/execution/operators/mod.rs b/crates/rayexec_execution/src/execution/operators/mod.rs index 6b18c5c54..19744954b 100644 --- a/crates/rayexec_execution/src/execution/operators/mod.rs +++ b/crates/rayexec_execution/src/execution/operators/mod.rs @@ -30,10 +30,12 @@ pub mod unnest; pub mod values; pub mod window; +pub mod physical_project; + pub(crate) mod util; #[cfg(test)] -mod test_util; +mod testutil; use std::fmt::Debug; use std::sync::Arc; @@ -58,8 +60,9 @@ use insert::PhysicalInsert; use limit::PhysicalLimit; use materialize::{MaterializeSourceOperation, MaterializedSinkOperation}; use nl_join::PhysicalNestedLoopJoin; -use project::{PhysicalProject, ProjectOperation}; -use rayexec_error::{not_implemented, OptionExt, Result}; +use physical_project::ProjectPartitionState; +use project::{PhysicalProject2, ProjectOperation}; +use rayexec_error::{not_implemented, OptionExt, RayexecError, Result}; use round_robin::PhysicalRoundRobinRepartition; use scan::{PhysicalScan, ScanPartitionState}; use simple::SimpleOperator; @@ -102,6 +105,7 @@ use self::sort::scatter_sort::ScatterSortPartitionState; use self::values::ValuesPartitionState; use super::computed_batch::ComputedBatches; use crate::arrays::batch::Batch2; +use crate::arrays::batch_exp::Batch; use crate::database::DatabaseContext; use crate::engine::result::ResultSink; use crate::explain::explainable::{ExplainConfig, ExplainEntry, Explainable}; @@ -111,6 +115,8 @@ use crate::proto::DatabaseProtoConv; // Current size: 264 bytes #[derive(Debug)] pub enum PartitionState { + Project(ProjectPartitionState), + HashAggregate(HashAggregatePartitionState), UngroupedAggregate(UngroupedAggregatePartitionState), NestedLoopJoinBuild(NestedLoopJoinBuildPartitionState), @@ -156,6 +162,106 @@ pub enum OperatorState { None, } +/// Poll result for operator execution. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum PollExecute { + /// Operator accepted input and wrote its output to the output batch. + /// + /// The next poll should be with a new input batch. + Ready, + /// Push pending. Waker stored, re-execute with the exact same state. + Pending, + /// Operator accepted as much input at can handle. Don't provide any + /// additional input. + Break, + /// Operator needs more input before it'll produce any meaningful output. + NeedsMore, + /// Operator has more output. Call again with the same input batch. + HasMore, + /// No more output. + Exhausted, +} + +/// Poll result for operator finalization. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum PollFinalize { + /// Operator finalized, execution of this operator finished. + /// + /// `poll_execute` will not be called after this is returned. + Finalized, + /// This operator needs to be drained. + /// + /// `poll_execute` will be called with empty input batches until the + /// opperator indicates it's been exhausted. + NeedsDrain, + /// Finalize pending, re-execute with the same state. + Pending, +} + +#[derive(Debug)] +pub enum PartitionAndOperatorStates { + /// Operators that have a single input/output. + Branchless { + /// Global operator state. + operator_state: OperatorState, + /// State per-partition. + partition_states: Vec, + }, + /// Operators that produce 1 or more output branches. + /// + /// Mostly for materializations. + BranchingOutput { + /// Global operator state. + operator_state: OperatorState, + /// Single set of input states. + inputs_states: Vec, + /// Multiple sets of output states. + output_states: Vec>, + }, + /// Operators that have two children, with this operator acting as the + /// "sink" for one child. + /// + /// For joins, the build side is the terminating input, while the probe side + /// is non-terminating. + TerminatingInput { + /// Global operator state. + operator_state: OperatorState, + /// States for the input that is non-terminating. + nonterminating_states: Vec, + /// States for the input that is terminated by this operator. + terminating_states: Vec, + }, +} + +impl PartitionAndOperatorStates { + pub fn branchless_into_states(self) -> Result<(OperatorState, Vec)> { + match self { + Self::Branchless { + operator_state, + partition_states, + } => Ok((operator_state, partition_states)), + Self::BranchingOutput { .. } => Err(RayexecError::new( + "Expected branchless states, got branching output", + )), + Self::TerminatingInput { .. } => Err(RayexecError::new( + "Expected branchless states, got terminating input", + )), + } + } +} + +#[derive(Debug)] +pub struct ExecuteInOutState<'a> { + /// Input batch being pushed to the operator. + /// + /// May be None for operators that are only producing output. + input: Option<&'a mut Batch>, + /// Output batch the operator should write to. + /// + /// May be None for operators that only consume batches. + output: Option<&'a mut Batch>, +} + /// Result of a push to an operator. /// /// An operator may not be ready to accept input either because it's waiting on @@ -271,6 +377,34 @@ pub struct ExecutionStates2 { } pub trait ExecutableOperator: Sync + Send + Debug + Explainable { + fn create_states( + &self, + context: &DatabaseContext, + batch_size: usize, + partitions: usize, + ) -> Result { + unimplemented!() + } + + fn poll_execute( + &self, + cx: &mut Context, + partition_state: &mut PartitionState, + operator_state: &OperatorState, + inout: ExecuteInOutState, + ) -> Result { + unimplemented!() + } + + fn poll_finalize( + &self, + cx: &mut Context, + partition_state: &mut PartitionState, + operator_state: &OperatorState, + ) -> Result { + unimplemented!() + } + /// Create execution states for this operator. /// /// `input_partitions` is the partitioning for each input that will be @@ -281,7 +415,9 @@ pub trait ExecutableOperator: Sync + Send + Debug + Explainable { &self, _context: &DatabaseContext, _partitions: Vec, - ) -> Result; + ) -> Result { + unimplemented!() + } /// Try to push a batch for this partition. fn poll_push2( @@ -290,7 +426,9 @@ pub trait ExecutableOperator: Sync + Send + Debug + Explainable { partition_state: &mut PartitionState, operator_state: &OperatorState, batch: Batch2, - ) -> Result; + ) -> Result { + unimplemented!() + } /// Finalize pushing to partition. /// @@ -301,7 +439,9 @@ pub trait ExecutableOperator: Sync + Send + Debug + Explainable { cx: &mut Context, partition_state: &mut PartitionState, operator_state: &OperatorState, - ) -> Result; + ) -> Result { + unimplemented!() + } /// Try to pull a batch for this partition. fn poll_pull2( @@ -309,7 +449,9 @@ pub trait ExecutableOperator: Sync + Send + Debug + Explainable { cx: &mut Context, partition_state: &mut PartitionState, operator_state: &OperatorState, - ) -> Result; + ) -> Result { + unimplemented!() + } } // 144 bytes @@ -604,7 +746,7 @@ impl DatabaseProtoConv for PhysicalOperator { PhysicalOperator::Filter(PhysicalFilter::from_proto_ctx(op, context)?) } Value::Project(op) => { - PhysicalOperator::Project(PhysicalProject::from_proto_ctx(op, context)?) + PhysicalOperator::Project(PhysicalProject2::from_proto_ctx(op, context)?) } Value::Insert(op) => { PhysicalOperator::Insert(PhysicalInsert::from_proto_ctx(op, context)?) diff --git a/crates/rayexec_execution/src/execution/operators/physical_project.rs b/crates/rayexec_execution/src/execution/operators/physical_project.rs new file mode 100644 index 000000000..48caf8ed1 --- /dev/null +++ b/crates/rayexec_execution/src/execution/operators/physical_project.rs @@ -0,0 +1,191 @@ +use std::task::Context; + +use rayexec_error::{OptionExt, Result}; + +use super::{ + ExecutableOperator, + ExecuteInOutState, + OperatorState, + PartitionAndOperatorStates, + PartitionState, + PollExecute, + PollFinalize, +}; +use crate::database::DatabaseContext; +use crate::explain::explainable::{ExplainConfig, ExplainEntry, Explainable}; +use crate::expr::physical::evaluator::ExpressionEvaluator; +use crate::expr::physical::PhysicalScalarExpression; + +#[derive(Debug)] +pub struct PhysicalProject { + pub(crate) projections: Vec, +} + +#[derive(Debug)] +pub struct ProjectPartitionState { + evaluator: ExpressionEvaluator, +} + +impl ExecutableOperator for PhysicalProject { + fn create_states( + &self, + _context: &DatabaseContext, + batch_size: usize, + partitions: usize, + ) -> Result { + let partition_states = (0..partitions) + .map(|_| { + Ok(PartitionState::Project(ProjectPartitionState { + evaluator: ExpressionEvaluator::try_new(self.projections.clone(), batch_size)?, + })) + }) + .collect::>>()?; + + Ok(PartitionAndOperatorStates::Branchless { + operator_state: OperatorState::None, + partition_states, + }) + } + + fn poll_execute( + &self, + _cx: &mut Context, + partition_state: &mut PartitionState, + _operator_state: &OperatorState, + inout: ExecuteInOutState, + ) -> Result { + let state = match partition_state { + PartitionState::Project(state) => state, + other => panic!("invalid state: {other:?}"), + }; + + let input = inout.input.required("batch input")?; + let output = inout.output.required("batch output")?; + + let sel = input.selection(); + state.evaluator.eval_batch(input, sel, output)?; + + Ok(PollExecute::Ready) + } + + fn poll_finalize( + &self, + _cx: &mut Context, + _partition_state: &mut PartitionState, + _operator_state: &OperatorState, + ) -> Result { + Ok(PollFinalize::Finalized) + } +} + +impl Explainable for PhysicalProject { + fn explain_entry(&self, conf: ExplainConfig) -> ExplainEntry { + unimplemented!() + } +} + +#[cfg(test)] +mod tests { + use stdutil::iter::TryFromExactSizeIterator; + + use super::*; + use crate::arrays::array::exp::Array; + use crate::arrays::batch_exp::Batch; + use crate::arrays::buffer::buffer_manager::NopBufferManager; + use crate::arrays::datatype::DataType; + use crate::arrays::testutil::assert_batches_eq; + use crate::execution::operators::testutil::db_context::test_database_context; + use crate::execution::operators::testutil::wrapper::OperatorWrapper; + use crate::expr::physical::column_expr::PhysicalColumnExpr; + use crate::expr::physical::literal_expr::PhysicalLiteralExpr; + + #[test] + fn project_simple() { + let projections = vec![ + PhysicalScalarExpression::Column(PhysicalColumnExpr { + datatype: DataType::Int32, + idx: 1, + }), + PhysicalScalarExpression::Literal(PhysicalLiteralExpr { + literal: "lit".into(), + }), + ]; + + let operator = PhysicalProject { projections }; + let states = operator + .create_states(&test_database_context(), 4, 1) + .unwrap(); + let (operator_state, mut partition_states) = states.branchless_into_states().unwrap(); + + let wrapper = OperatorWrapper::new(operator); + + let mut out = Batch::try_from_arrays( + [ + Array::new(&NopBufferManager, DataType::Int32, 4).unwrap(), + Array::new(&NopBufferManager, DataType::Utf8, 4).unwrap(), + ], + false, + ) + .unwrap(); + + let mut in1 = Batch::try_from_arrays( + [ + Array::try_from_iter([true, false, true, true]).unwrap(), + Array::try_from_iter([8, 9, 7, 6]).unwrap(), + ], + true, + ) + .unwrap(); + + wrapper + .poll_execute( + &mut partition_states[0], + &operator_state, + ExecuteInOutState { + input: Some(&mut in1), + output: Some(&mut out), + }, + ) + .unwrap(); + + let expected1 = Batch::try_from_arrays( + [ + Array::try_from_iter([8, 9, 7, 6]).unwrap(), + Array::try_from_iter(["lit", "lit", "lit", "lit"]).unwrap(), + ], + true, + ) + .unwrap(); + assert_batches_eq(&expected1, &out); + + let mut in2 = Batch::try_from_arrays( + [ + Array::try_from_iter([true, false, true, true]).unwrap(), + Array::try_from_iter([Some(4), Some(5), None, Some(7)]).unwrap(), + ], + true, + ) + .unwrap(); + + wrapper + .poll_execute( + &mut partition_states[0], + &operator_state, + ExecuteInOutState { + input: Some(&mut in2), + output: Some(&mut out), + }, + ) + .unwrap(); + + let expected2 = Batch::try_from_arrays( + [ + Array::try_from_iter([Some(4), Some(5), None, Some(7)]).unwrap(), + Array::try_from_iter(["lit", "lit", "lit", "lit"]).unwrap(), + ], + true, + ) + .unwrap(); + assert_batches_eq(&expected2, &out); + } +} diff --git a/crates/rayexec_execution/src/execution/operators/project.rs b/crates/rayexec_execution/src/execution/operators/project.rs index 40f23557f..49b4abbd8 100644 --- a/crates/rayexec_execution/src/execution/operators/project.rs +++ b/crates/rayexec_execution/src/execution/operators/project.rs @@ -7,7 +7,7 @@ use crate::explain::explainable::{ExplainConfig, ExplainEntry, Explainable}; use crate::expr::physical::PhysicalScalarExpression; use crate::proto::DatabaseProtoConv; -pub type PhysicalProject = SimpleOperator; +pub type PhysicalProject2 = SimpleOperator; #[derive(Debug)] pub struct ProjectOperation { @@ -41,7 +41,7 @@ impl Explainable for ProjectOperation { } } -impl DatabaseProtoConv for PhysicalProject { +impl DatabaseProtoConv for PhysicalProject2 { type ProtoType = rayexec_proto::generated::execution::PhysicalProject; fn to_proto_ctx(&self, context: &DatabaseContext) -> Result { diff --git a/crates/rayexec_execution/src/execution/operators/sort/gather_sort.rs b/crates/rayexec_execution/src/execution/operators/sort/gather_sort.rs index a41019a36..cf624a08a 100644 --- a/crates/rayexec_execution/src/execution/operators/sort/gather_sort.rs +++ b/crates/rayexec_execution/src/execution/operators/sort/gather_sort.rs @@ -614,7 +614,7 @@ mod tests { use super::*; use crate::arrays::datatype::DataType; - use crate::execution::operators::test_util::{ + use crate::execution::operators::testutil::{ make_i32_batch, unwrap_poll_pull_batch, TestWakerContext, diff --git a/crates/rayexec_execution/src/execution/operators/sort/scatter_sort.rs b/crates/rayexec_execution/src/execution/operators/sort/scatter_sort.rs index 6d215b0dd..76ae8d739 100644 --- a/crates/rayexec_execution/src/execution/operators/sort/scatter_sort.rs +++ b/crates/rayexec_execution/src/execution/operators/sort/scatter_sort.rs @@ -264,9 +264,9 @@ mod tests { use super::*; use crate::arrays::datatype::DataType; - use crate::execution::operators::test_util::{ + use crate::execution::operators::testutil::db_context::test_database_context; + use crate::execution::operators::testutil::{ make_i32_batch, - test_database_context, unwrap_poll_pull_batch, TestWakerContext, }; diff --git a/crates/rayexec_execution/src/execution/operators/testutil/db_context.rs b/crates/rayexec_execution/src/execution/operators/testutil/db_context.rs new file mode 100644 index 000000000..0f6cf425c --- /dev/null +++ b/crates/rayexec_execution/src/execution/operators/testutil/db_context.rs @@ -0,0 +1,12 @@ +use std::sync::Arc; + +use crate::database::system::new_system_catalog; +use crate::database::DatabaseContext; +use crate::datasource::DataSourceRegistry; + +pub fn test_database_context() -> DatabaseContext { + DatabaseContext::new(Arc::new( + new_system_catalog(&DataSourceRegistry::default()).unwrap(), + )) + .unwrap() +} diff --git a/crates/rayexec_execution/src/execution/operators/test_util.rs b/crates/rayexec_execution/src/execution/operators/testutil/mod.rs similarity index 94% rename from crates/rayexec_execution/src/execution/operators/test_util.rs rename to crates/rayexec_execution/src/execution/operators/testutil/mod.rs index e3aa864e8..29c6757bc 100644 --- a/crates/rayexec_execution/src/execution/operators/test_util.rs +++ b/crates/rayexec_execution/src/execution/operators/testutil/mod.rs @@ -1,4 +1,7 @@ //! Utilities for testing operator implementations. +pub mod db_context; +pub mod wrapper; + use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::Arc; use std::task::{Context, Wake, Waker}; @@ -20,13 +23,6 @@ use crate::database::system::new_system_catalog; use crate::database::DatabaseContext; use crate::datasource::DataSourceRegistry; -pub fn test_database_context() -> DatabaseContext { - DatabaseContext::new(Arc::new( - new_system_catalog(&DataSourceRegistry::default()).unwrap(), - )) - .unwrap() -} - /// Test context containg a waker implementation that counts the number of times /// it's woken. /// diff --git a/crates/rayexec_execution/src/execution/operators/testutil/wrapper.rs b/crates/rayexec_execution/src/execution/operators/testutil/wrapper.rs new file mode 100644 index 000000000..e83dff465 --- /dev/null +++ b/crates/rayexec_execution/src/execution/operators/testutil/wrapper.rs @@ -0,0 +1,74 @@ +use std::sync::atomic::{AtomicUsize, Ordering}; +use std::sync::Arc; +use std::task::{Context, Wake, Waker}; + +use rayexec_error::Result; + +use crate::execution::operators::{ + ExecutableOperator, + ExecuteInOutState, + OperatorState, + PartitionState, + PollExecute, + PollFinalize, +}; + +#[derive(Debug, Default)] +pub struct CountingWaker { + count: AtomicUsize, +} + +impl CountingWaker { + pub fn wake_count(&self) -> usize { + self.count.load(Ordering::SeqCst) + } +} + +impl Wake for CountingWaker { + fn wake(self: Arc) { + self.count.fetch_add(1, Ordering::SeqCst); + } +} + +/// Wrapper around an operator that uses a stub waker that tracks how many times +/// it's woken. +#[derive(Debug)] +pub struct OperatorWrapper { + pub waker: Arc, + pub operator: O, +} + +impl OperatorWrapper +where + O: ExecutableOperator, +{ + pub fn new(operator: O) -> Self { + OperatorWrapper { + waker: Arc::new(CountingWaker::default()), + operator, + } + } + + pub fn poll_execute( + &self, + partition_state: &mut PartitionState, + operator_state: &OperatorState, + inout: ExecuteInOutState, + ) -> Result { + let waker = Waker::from(self.waker.clone()); + let mut cx = Context::from_waker(&waker); + self.operator + .poll_execute(&mut cx, partition_state, operator_state, inout) + } + + pub fn poll_finalize( + &self, + partition_state: &mut PartitionState, + operator_state: &OperatorState, + ) -> Result { + let waker = Waker::from(self.waker.clone()); + let mut cx = Context::from_waker(&waker); + self.operator + .poll_finalize(&mut cx, partition_state, operator_state) + } +} diff --git a/crates/rayexec_execution/src/expr/physical/case_expr.rs b/crates/rayexec_execution/src/expr/physical/case_expr.rs index 48a4c78b4..21f077540 100644 --- a/crates/rayexec_execution/src/expr/physical/case_expr.rs +++ b/crates/rayexec_execution/src/expr/physical/case_expr.rs @@ -50,7 +50,7 @@ impl PhysicalCaseExpr { // 2 arrays in the buffer, one 'boolean' for conditional evaluation, one // for the result if condition is true. 'then' and 'else' expressions // should evaluate to the same type. - let buffer = Batch::from_arrays( + let buffer = Batch::try_from_arrays( [ Array::new(&NopBufferManager, DataType::Boolean, batch_size)?, Array::new(&NopBufferManager, self.else_expr.datatype(), batch_size)?, @@ -216,7 +216,7 @@ mod tests { datatype: DataType::Int32, }; - let mut input = Batch::from_arrays( + let mut input = Batch::try_from_arrays( [ Array::try_from_iter([true, true, false]).unwrap(), Array::try_from_iter([1, 2, 3]).unwrap(), @@ -258,7 +258,7 @@ mod tests { datatype: DataType::Int32, }; - let mut input = Batch::from_arrays( + let mut input = Batch::try_from_arrays( [ Array::try_from_iter([Some(true), None, Some(false)]).unwrap(), Array::try_from_iter([1, 2, 3]).unwrap(), diff --git a/crates/rayexec_execution/src/expr/physical/cast_expr.rs b/crates/rayexec_execution/src/expr/physical/cast_expr.rs index 729e3f7e8..8895c95d6 100644 --- a/crates/rayexec_execution/src/expr/physical/cast_expr.rs +++ b/crates/rayexec_execution/src/expr/physical/cast_expr.rs @@ -24,7 +24,7 @@ pub struct PhysicalCastExpr { impl PhysicalCastExpr { pub(crate) fn create_state(&self, batch_size: usize) -> Result { let inputs = vec![self.expr.create_state(batch_size)?]; - let buffer = Batch::from_arrays( + let buffer = Batch::try_from_arrays( [Array::new( &NopBufferManager, self.expr.datatype(), diff --git a/crates/rayexec_execution/src/expr/physical/column_expr.rs b/crates/rayexec_execution/src/expr/physical/column_expr.rs index 40537bc5f..0a5e37e5c 100644 --- a/crates/rayexec_execution/src/expr/physical/column_expr.rs +++ b/crates/rayexec_execution/src/expr/physical/column_expr.rs @@ -78,7 +78,7 @@ mod tests { #[test] fn column_expr_eval() { - let mut input = Batch::from_arrays( + let mut input = Batch::try_from_arrays( [ Array::try_from_iter(["a", "b", "c", "d"]).unwrap(), Array::try_from_iter([1, 2, 3, 4]).unwrap(), @@ -103,7 +103,7 @@ mod tests { #[test] fn column_expr_eval_with_selection() { - let mut input = Batch::from_arrays( + let mut input = Batch::try_from_arrays( [ Array::try_from_iter(["a", "b", "c", "d"]).unwrap(), Array::try_from_iter([1, 2, 3, 4]).unwrap(), diff --git a/crates/rayexec_execution/src/expr/physical/evaluator.rs b/crates/rayexec_execution/src/expr/physical/evaluator.rs index b694ecfa6..c79fe9434 100644 --- a/crates/rayexec_execution/src/expr/physical/evaluator.rs +++ b/crates/rayexec_execution/src/expr/physical/evaluator.rs @@ -77,6 +77,9 @@ impl ExpressionEvaluator { /// /// `input` is mutable only to allow converting arrays from owned to /// managed. + /// + /// `output` will have num rows set to the number of logical rows in the + /// selection. pub fn eval_batch( &mut self, input: &mut Batch, @@ -92,6 +95,8 @@ impl ExpressionEvaluator { Self::eval_expression(expr, input, state, sel, output)?; } + output.set_num_rows(sel.len())?; + Ok(()) } diff --git a/crates/rayexec_execution/src/expr/physical/mod.rs b/crates/rayexec_execution/src/expr/physical/mod.rs index 0f1cbdc6b..3bd1ff7b8 100644 --- a/crates/rayexec_execution/src/expr/physical/mod.rs +++ b/crates/rayexec_execution/src/expr/physical/mod.rs @@ -210,67 +210,3 @@ impl DatabaseProtoConv for PhysicalSortExpression { }) } } - -#[cfg(test)] -mod tests { - use planner::PhysicalExpressionPlanner; - - use super::*; - use crate::arrays::datatype::DataType; - use crate::expr; - use crate::logical::binder::table_list::TableList; - - #[test] - fn select_some() { - let batch = Batch2::try_new([ - Array2::from_iter([1, 4, 6, 9, 12]), - Array2::from_iter([2, 3, 8, 9, 10]), - ]) - .unwrap(); - - let mut table_list = TableList::empty(); - let table_ref = table_list - .push_table( - None, - vec![DataType::Int32, DataType::Int32], - vec!["a".to_string(), "b".to_string()], - ) - .unwrap(); - - let expr = expr::gt(expr::col_ref(table_ref, 0), expr::col_ref(table_ref, 1)); - let planner = PhysicalExpressionPlanner::new(&table_list); - let physical = planner.plan_scalar(&[table_ref], &expr).unwrap(); - - let selection = physical.select(&batch).unwrap(); - let expected = SelectionVector::from_iter([1, 4]); - - assert_eq!(expected, selection) - } - - #[test] - fn select_none() { - let batch = Batch2::try_new([ - Array2::from_iter([1, 2, 6, 9, 9]), - Array2::from_iter([2, 3, 8, 9, 10]), - ]) - .unwrap(); - - let mut table_list = TableList::empty(); - let table_ref = table_list - .push_table( - None, - vec![DataType::Int32, DataType::Int32], - vec!["a".to_string(), "b".to_string()], - ) - .unwrap(); - - let expr = expr::gt(expr::col_ref(table_ref, 0), expr::col_ref(table_ref, 1)); - let planner = PhysicalExpressionPlanner::new(&table_list); - let physical = planner.plan_scalar(&[table_ref], &expr).unwrap(); - - let selection = physical.select(&batch).unwrap(); - let expected = SelectionVector::empty(); - - assert_eq!(expected, selection) - } -} diff --git a/crates/rayexec_execution/src/expr/physical/scalar_function_expr.rs b/crates/rayexec_execution/src/expr/physical/scalar_function_expr.rs index b6011fb91..3d9208666 100644 --- a/crates/rayexec_execution/src/expr/physical/scalar_function_expr.rs +++ b/crates/rayexec_execution/src/expr/physical/scalar_function_expr.rs @@ -34,7 +34,7 @@ impl PhysicalScalarFunctionExpr { .map(|input| Array::new(&NopBufferManager, input.datatype(), batch_size)) .collect::>>()?; - let buffer = Batch::from_arrays(arrays, false)?; + let buffer = Batch::try_from_arrays(arrays, false)?; Ok(ExpressionState { buffer, inputs }) } diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/arith/add.rs b/crates/rayexec_execution/src/functions/scalar/builtin/arith/add.rs index fa67ea729..36b584f55 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/arith/add.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/arith/add.rs @@ -217,7 +217,7 @@ mod tests { fn add_i32() { let a = Array::try_from_iter([1, 2, 3]).unwrap(); let b = Array::try_from_iter([4, 5, 6]).unwrap(); - let batch = Batch::from_arrays([a, b], true).unwrap(); + let batch = Batch::try_from_arrays([a, b], true).unwrap(); let mut table_list = TableList::empty(); let table_ref = table_list diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/arith/div.rs b/crates/rayexec_execution/src/functions/scalar/builtin/arith/div.rs index 76a56409c..91b82308a 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/arith/div.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/arith/div.rs @@ -264,7 +264,7 @@ mod tests { fn div_i32() { let a = Array::try_from_iter([4, 5, 6]).unwrap(); let b = Array::try_from_iter([1, 2, 3]).unwrap(); - let batch = Batch::from_arrays([a, b], true).unwrap(); + let batch = Batch::try_from_arrays([a, b], true).unwrap(); let mut table_list = TableList::empty(); let table_ref = table_list diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/arith/mul.rs b/crates/rayexec_execution/src/functions/scalar/builtin/arith/mul.rs index 4edbfbe1d..2fc7ad1e7 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/arith/mul.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/arith/mul.rs @@ -330,7 +330,7 @@ mod tests { fn mul_i32() { let a = Array::try_from_iter([4, 5, 6]).unwrap(); let b = Array::try_from_iter([1, 2, 3]).unwrap(); - let batch = Batch::from_arrays([a, b], true).unwrap(); + let batch = Batch::try_from_arrays([a, b], true).unwrap(); let mut table_list = TableList::empty(); let table_ref = table_list diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/arith/rem.rs b/crates/rayexec_execution/src/functions/scalar/builtin/arith/rem.rs index f7c590704..59b48e546 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/arith/rem.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/arith/rem.rs @@ -213,7 +213,7 @@ mod tests { fn rem_i32() { let a = Array::try_from_iter([4, 5, 6]).unwrap(); let b = Array::try_from_iter([1, 2, 3]).unwrap(); - let batch = Batch::from_arrays([a, b], true).unwrap(); + let batch = Batch::try_from_arrays([a, b], true).unwrap(); let mut table_list = TableList::empty(); let table_ref = table_list diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/arith/sub.rs b/crates/rayexec_execution/src/functions/scalar/builtin/arith/sub.rs index fc00d326c..455411538 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/arith/sub.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/arith/sub.rs @@ -225,7 +225,7 @@ mod tests { fn sub_i32() { let a = Array::try_from_iter([4, 5, 6]).unwrap(); let b = Array::try_from_iter([1, 2, 3]).unwrap(); - let batch = Batch::from_arrays([a, b], true).unwrap(); + let batch = Batch::try_from_arrays([a, b], true).unwrap(); let mut table_list = TableList::empty(); let table_ref = table_list diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/boolean.rs b/crates/rayexec_execution/src/functions/scalar/builtin/boolean.rs index c9d3cc7e8..d594b922e 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/boolean.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/boolean.rs @@ -236,7 +236,7 @@ mod tests { fn and_bool_2() { let a = Array::try_from_iter([true, false, false]).unwrap(); let b = Array::try_from_iter([true, true, false]).unwrap(); - let batch = Batch::from_arrays([a, b], true).unwrap(); + let batch = Batch::try_from_arrays([a, b], true).unwrap(); let mut table_list = TableList::empty(); let table_ref = table_list @@ -267,7 +267,7 @@ mod tests { let a = Array::try_from_iter([true, true, true]).unwrap(); let b = Array::try_from_iter([false, true, true]).unwrap(); let c = Array::try_from_iter([true, true, false]).unwrap(); - let batch = Batch::from_arrays([a, b, c], true).unwrap(); + let batch = Batch::try_from_arrays([a, b, c], true).unwrap(); let mut table_list = TableList::empty(); let table_ref = table_list @@ -301,7 +301,7 @@ mod tests { fn or_bool_2() { let a = Array::try_from_iter([true, false, false]).unwrap(); let b = Array::try_from_iter([true, true, false]).unwrap(); - let batch = Batch::from_arrays([a, b], true).unwrap(); + let batch = Batch::try_from_arrays([a, b], true).unwrap(); let mut table_list = TableList::empty(); let table_ref = table_list diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/comparison.rs b/crates/rayexec_execution/src/functions/scalar/builtin/comparison.rs index df0425fe4..43b1575d7 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/comparison.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/comparison.rs @@ -864,7 +864,7 @@ mod tests { fn eq_i32() { let a = Array::try_from_iter([1, 2, 3]).unwrap(); let b = Array::try_from_iter([2, 2, 6]).unwrap(); - let batch = Batch::from_arrays([a, b], true).unwrap(); + let batch = Batch::try_from_arrays([a, b], true).unwrap(); let mut table_list = TableList::empty(); let table_ref = table_list @@ -893,7 +893,7 @@ mod tests { fn neq_i32() { let a = Array::try_from_iter([1, 2, 3]).unwrap(); let b = Array::try_from_iter([2, 2, 6]).unwrap(); - let batch = Batch::from_arrays([a, b], true).unwrap(); + let batch = Batch::try_from_arrays([a, b], true).unwrap(); let mut table_list = TableList::empty(); let table_ref = table_list @@ -922,7 +922,7 @@ mod tests { fn lt_i32() { let a = Array::try_from_iter([1, 2, 3]).unwrap(); let b = Array::try_from_iter([2, 2, 6]).unwrap(); - let batch = Batch::from_arrays([a, b], true).unwrap(); + let batch = Batch::try_from_arrays([a, b], true).unwrap(); let mut table_list = TableList::empty(); let table_ref = table_list @@ -951,7 +951,7 @@ mod tests { fn lt_eq_i32() { let a = Array::try_from_iter([1, 2, 3]).unwrap(); let b = Array::try_from_iter([2, 2, 6]).unwrap(); - let batch = Batch::from_arrays([a, b], true).unwrap(); + let batch = Batch::try_from_arrays([a, b], true).unwrap(); let mut table_list = TableList::empty(); let table_ref = table_list @@ -980,7 +980,7 @@ mod tests { fn gt_i32() { let a = Array::try_from_iter([1, 2, 3]).unwrap(); let b = Array::try_from_iter([2, 2, 6]).unwrap(); - let batch = Batch::from_arrays([a, b], true).unwrap(); + let batch = Batch::try_from_arrays([a, b], true).unwrap(); let mut table_list = TableList::empty(); let table_ref = table_list @@ -1009,7 +1009,7 @@ mod tests { fn gt_eq_i32() { let a = Array::try_from_iter([1, 2, 3]).unwrap(); let b = Array::try_from_iter([2, 2, 6]).unwrap(); - let batch = Batch::from_arrays([a, b], true).unwrap(); + let batch = Batch::try_from_arrays([a, b], true).unwrap(); let mut table_list = TableList::empty(); let table_ref = table_list diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/is.rs b/crates/rayexec_execution/src/functions/scalar/builtin/is.rs index 888bd0b2b..79fe65077 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/is.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/is.rs @@ -347,7 +347,7 @@ mod tests { #[test] fn is_null_all_valid() { let a = Array::try_from_iter([1, 2, 3]).unwrap(); - let batch = Batch::from_arrays([a], true).unwrap(); + let batch = Batch::try_from_arrays([a], true).unwrap(); let mut table_list = TableList::empty(); let table_ref = table_list @@ -369,7 +369,7 @@ mod tests { #[test] fn is_null_some_invalid() { let a = Array::try_from_iter([Some(1), None, None]).unwrap(); - let batch = Batch::from_arrays([a], true).unwrap(); + let batch = Batch::try_from_arrays([a], true).unwrap(); let mut table_list = TableList::empty(); let table_ref = table_list @@ -391,7 +391,7 @@ mod tests { #[test] fn is_true() { let a = Array::try_from_iter([Some(true), Some(false), None]).unwrap(); - let batch = Batch::from_arrays([a], true).unwrap(); + let batch = Batch::try_from_arrays([a], true).unwrap(); let mut table_list = TableList::empty(); let table_ref = table_list @@ -413,7 +413,7 @@ mod tests { #[test] fn is_not_true() { let a = Array::try_from_iter([Some(true), Some(false), None]).unwrap(); - let batch = Batch::from_arrays([a], true).unwrap(); + let batch = Batch::try_from_arrays([a], true).unwrap(); let mut table_list = TableList::empty(); let table_ref = table_list diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/list/list_values.rs b/crates/rayexec_execution/src/functions/scalar/builtin/list/list_values.rs index 63ea2dbdd..3c7f001a1 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/list/list_values.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/list/list_values.rs @@ -1,5 +1,5 @@ -use stdutil::iter::IntoExactSizeIterator; use rayexec_error::{not_implemented, RayexecError, Result}; +use stdutil::iter::IntoExactSizeIterator; use crate::arrays::array::exp::Array; use crate::arrays::array::validity::Validity; @@ -244,7 +244,7 @@ mod tests { fn list_values_primitive() { let a = Array::try_from_iter([1, 2, 3]).unwrap(); let b = Array::try_from_iter([4, 5, 6]).unwrap(); - let batch = Batch::from_arrays([a, b], true).unwrap(); + let batch = Batch::try_from_arrays([a, b], true).unwrap(); let mut table_list = TableList::empty(); let table_ref = table_list diff --git a/crates/rayexec_execution/src/functions/scalar/builtin/similarity/l2_distance.rs b/crates/rayexec_execution/src/functions/scalar/builtin/similarity/l2_distance.rs index 919064350..6aa951fa3 100644 --- a/crates/rayexec_execution/src/functions/scalar/builtin/similarity/l2_distance.rs +++ b/crates/rayexec_execution/src/functions/scalar/builtin/similarity/l2_distance.rs @@ -193,7 +193,7 @@ mod tests { ) .unwrap(); - let batch = Batch::from_arrays([a, b], true).unwrap(); + let batch = Batch::try_from_arrays([a, b], true).unwrap(); let mut table_list = TableList::empty(); let table_ref = table_list From 95feac492dacc2aa1eb0a1f3e9707c1b0d8b1987 Mon Sep 17 00:00:00 2001 From: Sean Smith Date: Mon, 6 Jan 2025 10:16:09 -0600 Subject: [PATCH 57/59] filter --- .../rayexec_execution/src/arrays/array/exp.rs | 16 +- .../rayexec_execution/src/arrays/batch_exp.rs | 26 +++ .../src/arrays/executor_exp/scalar/unary.rs | 40 +++- .../src/execution/operators/mod.rs | 3 + .../execution/operators/physical_filter.rs | 215 ++++++++++++++++++ .../src/expr/physical/evaluator.rs | 16 ++ 6 files changed, 307 insertions(+), 9 deletions(-) create mode 100644 crates/rayexec_execution/src/execution/operators/physical_filter.rs diff --git a/crates/rayexec_execution/src/arrays/array/exp.rs b/crates/rayexec_execution/src/arrays/array/exp.rs index adee68af3..a726359f0 100644 --- a/crates/rayexec_execution/src/arrays/array/exp.rs +++ b/crates/rayexec_execution/src/arrays/array/exp.rs @@ -244,13 +244,15 @@ where .with_field("other_datatype", other.datatype.clone())); } - if self.capacity() != other.capacity() { - return Err(RayexecError::new( - "Attempted to clone into array from other array with different capacity", - ) - .with_field("own_capacity", self.capacity()) - .with_field("other_capacity", other.capacity())); - } + // TODO: Do we want this check? Dictionaries right now can have differing capacities based + // on selection inputs. + // if self.capacity() != other.capacity() { + // return Err(RayexecError::new( + // "Attempted to clone into array from other array with different capacity", + // ) + // .with_field("own_capacity", self.capacity()) + // .with_field("other_capacity", other.capacity())); + // } let managed = other.data.make_managed(manager)?; self.data.set_managed(managed)?; diff --git a/crates/rayexec_execution/src/arrays/batch_exp.rs b/crates/rayexec_execution/src/arrays/batch_exp.rs index 12ffbce36..90659fcb2 100644 --- a/crates/rayexec_execution/src/arrays/batch_exp.rs +++ b/crates/rayexec_execution/src/arrays/batch_exp.rs @@ -108,6 +108,32 @@ where }) } + pub fn clone_from(&mut self, manager: &B, other: &mut Self) -> Result<()> { + if self.arrays.len() != other.arrays.len() { + return Err(RayexecError::new( + "Attempted to clone from other batch with different number of arrays", + )); + } + + for (a, b) in self.arrays.iter_mut().zip(other.arrays.iter_mut()) { + a.clone_from(manager, b)?; + } + + self.set_num_rows(other.num_rows())?; + + Ok(()) + } + + pub fn select(&mut self, manager: &B, selection: &[usize]) -> Result<()> { + for arr in &mut self.arrays { + arr.select(manager, selection.iter().copied())?; + } + + self.set_num_rows(selection.len())?; + + Ok(()) + } + pub fn num_rows(&self) -> usize { self.num_rows } diff --git a/crates/rayexec_execution/src/arrays/executor_exp/scalar/unary.rs b/crates/rayexec_execution/src/arrays/executor_exp/scalar/unary.rs index 3cf7e2122..0f55bd6c6 100644 --- a/crates/rayexec_execution/src/arrays/executor_exp/scalar/unary.rs +++ b/crates/rayexec_execution/src/arrays/executor_exp/scalar/unary.rs @@ -7,6 +7,7 @@ use crate::arrays::buffer::physical_type::{ Addressable, AddressableMut, MutablePhysicalStorage, + PhysicalBool, PhysicalStorage, }; use crate::arrays::executor_exp::{OutBuffer, PutBuffer}; @@ -153,6 +154,7 @@ impl UnaryExecutor { let input = S::get_addressable(array.array_buffer)?; let validity = array.validity; + // TODO: `op` should be called with input_idx? if validity.all_valid() { for (output_idx, input_idx) in selection.into_iter().enumerate() { let selected_idx = array.selection.get(input_idx).unwrap(); @@ -175,6 +177,41 @@ impl UnaryExecutor { Ok(()) } + + pub fn select( + array: &Array, + selection: impl IntoExactSizeIterator, + true_indices: &mut Vec, + ) -> Result<()> { + let flat = array.flat_view()?; + + let bools = PhysicalBool::get_addressable(&flat.array_buffer)?; + let validity = flat.validity; + + if validity.all_valid() { + for input_idx in selection.into_iter() { + let selected_idx = flat.selection.get(input_idx).unwrap(); + let v = *bools.get(selected_idx).unwrap(); + + if v { + true_indices.push(input_idx); + } + } + } else { + for input_idx in selection.into_iter() { + let selected_idx = flat.selection.get(input_idx).unwrap(); + + if validity.is_valid(selected_idx) { + let v = *bools.get(selected_idx).unwrap(); + if v { + true_indices.push(input_idx); + } + } + } + } + + Ok(()) + } } #[cfg(test)] @@ -242,8 +279,7 @@ mod tests { fn int32_inc_by_2_in_place() { let mut array = Array::try_from_iter([1, 2, 3]).unwrap(); - UnaryExecutor::execute_in_place::(&mut array, 0..3, |v| *v += 2) - .unwrap(); + UnaryExecutor::execute_in_place::(&mut array, 0..3, |v| *v += 2).unwrap(); let arr_slice = array.data().try_as_slice::().unwrap(); assert_eq!(&[3, 4, 5], arr_slice); diff --git a/crates/rayexec_execution/src/execution/operators/mod.rs b/crates/rayexec_execution/src/execution/operators/mod.rs index 19744954b..2fec8a440 100644 --- a/crates/rayexec_execution/src/execution/operators/mod.rs +++ b/crates/rayexec_execution/src/execution/operators/mod.rs @@ -30,6 +30,7 @@ pub mod unnest; pub mod values; pub mod window; +pub mod physical_filter; pub mod physical_project; pub(crate) mod util; @@ -60,6 +61,7 @@ use insert::PhysicalInsert; use limit::PhysicalLimit; use materialize::{MaterializeSourceOperation, MaterializedSinkOperation}; use nl_join::PhysicalNestedLoopJoin; +use physical_filter::FilterPartitionState; use physical_project::ProjectPartitionState; use project::{PhysicalProject2, ProjectOperation}; use rayexec_error::{not_implemented, OptionExt, RayexecError, Result}; @@ -116,6 +118,7 @@ use crate::proto::DatabaseProtoConv; #[derive(Debug)] pub enum PartitionState { Project(ProjectPartitionState), + Filter(FilterPartitionState), HashAggregate(HashAggregatePartitionState), UngroupedAggregate(UngroupedAggregatePartitionState), diff --git a/crates/rayexec_execution/src/execution/operators/physical_filter.rs b/crates/rayexec_execution/src/execution/operators/physical_filter.rs new file mode 100644 index 000000000..b94f0b969 --- /dev/null +++ b/crates/rayexec_execution/src/execution/operators/physical_filter.rs @@ -0,0 +1,215 @@ +use std::task::Context; + +use rayexec_error::{OptionExt, Result}; + +use super::{ + ExecutableOperator, + ExecuteInOutState, + OperatorState, + PartitionAndOperatorStates, + PartitionState, + PollExecute, + PollFinalize, +}; +use crate::arrays::array::exp::Array; +use crate::arrays::array::selection::Selection; +use crate::arrays::buffer::buffer_manager::NopBufferManager; +use crate::arrays::datatype::DataType; +use crate::arrays::executor_exp::scalar::unary::UnaryExecutor; +use crate::database::DatabaseContext; +use crate::explain::explainable::{ExplainConfig, ExplainEntry, Explainable}; +use crate::expr::physical::evaluator::ExpressionEvaluator; +use crate::expr::physical::PhysicalScalarExpression; + +#[derive(Debug)] +pub struct PhysicalFilter { + pub(crate) predicate: PhysicalScalarExpression, +} + +#[derive(Debug)] +pub struct FilterPartitionState { + evaluator: ExpressionEvaluator, + /// Boolean array for holding the output of the filter expression. + output: Array, + /// Selected indices buffer. + selection: Vec, +} + +impl ExecutableOperator for PhysicalFilter { + fn create_states( + &self, + _context: &DatabaseContext, + batch_size: usize, + partitions: usize, + ) -> Result { + let partition_states = (0..partitions) + .map(|_| { + Ok(PartitionState::Filter(FilterPartitionState { + evaluator: ExpressionEvaluator::try_new( + vec![self.predicate.clone()], + batch_size, + )?, + output: Array::new(&NopBufferManager, DataType::Boolean, batch_size)?, + selection: Vec::with_capacity(batch_size), + })) + }) + .collect::>>()?; + + Ok(PartitionAndOperatorStates::Branchless { + operator_state: OperatorState::None, + partition_states, + }) + } + + fn poll_execute( + &self, + _cx: &mut Context, + partition_state: &mut PartitionState, + _operator_state: &OperatorState, + inout: ExecuteInOutState, + ) -> Result { + let state = match partition_state { + PartitionState::Filter(state) => state, + other => panic!("invalid state: {other:?}"), + }; + + let input = inout.input.required("batch input")?; + let output = inout.output.required("batch output")?; + + state.output.reset_for_write(&NopBufferManager)?; + state + .evaluator + .eval_single_expression(input, input.selection(), &mut state.output)?; + + state.selection.clear(); + UnaryExecutor::select( + &state.output, + Selection::linear(input.num_rows()), + &mut state.selection, + )?; + + output.clone_from(&NopBufferManager, input)?; + + if state.selection.len() != output.num_rows() { + // Only add selection if we're actually omitting rows. + output.select(&NopBufferManager, &state.selection)?; + } + + Ok(PollExecute::Ready) + } + + fn poll_finalize( + &self, + _cx: &mut Context, + _partition_state: &mut PartitionState, + _operator_state: &OperatorState, + ) -> Result { + Ok(PollFinalize::Finalized) + } +} + +impl Explainable for PhysicalFilter { + fn explain_entry(&self, _conf: ExplainConfig) -> ExplainEntry { + ExplainEntry::new("Filter").with_value("predicate", &self.predicate) + } +} + +#[cfg(test)] +mod tests { + use stdutil::iter::TryFromExactSizeIterator; + + use super::*; + use crate::arrays::array::exp::Array; + use crate::arrays::batch_exp::Batch; + use crate::arrays::buffer::buffer_manager::NopBufferManager; + use crate::arrays::datatype::DataType; + use crate::arrays::testutil::assert_batches_eq; + use crate::execution::operators::testutil::db_context::test_database_context; + use crate::execution::operators::testutil::wrapper::OperatorWrapper; + use crate::expr::physical::column_expr::PhysicalColumnExpr; + + #[test] + fn filter_simple() { + let operator = PhysicalFilter { + predicate: PhysicalScalarExpression::Column(PhysicalColumnExpr { + datatype: DataType::Boolean, + idx: 0, + }), + }; + + let states = operator + .create_states(&test_database_context(), 4, 1) + .unwrap(); + let (operator_state, mut partition_states) = states.branchless_into_states().unwrap(); + let wrapper = OperatorWrapper::new(operator); + + let mut out = Batch::try_from_arrays( + [ + Array::new(&NopBufferManager, DataType::Boolean, 4).unwrap(), + Array::new(&NopBufferManager, DataType::Int32, 4).unwrap(), + ], + false, + ) + .unwrap(); + + let mut in1 = Batch::try_from_arrays( + [ + Array::try_from_iter([true, false, true, true]).unwrap(), + Array::try_from_iter([8, 9, 7, 6]).unwrap(), + ], + true, + ) + .unwrap(); + + wrapper + .poll_execute( + &mut partition_states[0], + &operator_state, + ExecuteInOutState { + input: Some(&mut in1), + output: Some(&mut out), + }, + ) + .unwrap(); + + let expected1 = Batch::try_from_arrays( + [ + Array::try_from_iter([true, true, true]).unwrap(), + Array::try_from_iter([8, 7, 6]).unwrap(), + ], + true, + ) + .unwrap(); + assert_batches_eq(&expected1, &out); + + let mut in2 = Batch::try_from_arrays( + [ + Array::try_from_iter([true, false, false, false]).unwrap(), + Array::try_from_iter([4, 3, 2, 1]).unwrap(), + ], + true, + ) + .unwrap(); + + wrapper + .poll_execute( + &mut partition_states[0], + &operator_state, + ExecuteInOutState { + input: Some(&mut in2), + output: Some(&mut out), + }, + ) + .unwrap(); + + let expected2 = Batch::try_from_arrays( + [ + Array::try_from_iter([true]).unwrap(), + Array::try_from_iter([4]).unwrap(), + ], + true, + ) + .unwrap(); + assert_batches_eq(&expected2, &out); + } +} diff --git a/crates/rayexec_execution/src/expr/physical/evaluator.rs b/crates/rayexec_execution/src/expr/physical/evaluator.rs index c79fe9434..4eb896769 100644 --- a/crates/rayexec_execution/src/expr/physical/evaluator.rs +++ b/crates/rayexec_execution/src/expr/physical/evaluator.rs @@ -100,6 +100,22 @@ impl ExpressionEvaluator { Ok(()) } + pub fn eval_single_expression( + &mut self, + input: &mut Batch, + sel: Selection, + output: &mut Array, + ) -> Result<()> { + debug_assert_eq!(1, self.expressions.len()); + Self::eval_expression( + &self.expressions[0], + input, + &mut self.states[0], + sel, + output, + ) + } + pub(crate) fn eval_expression( expr: &PhysicalScalarExpression, input: &mut Batch, From 5f878fa7c4fe630dedd7669ed311efd9cca6dcf8 Mon Sep 17 00:00:00 2001 From: Sean Smith Date: Mon, 6 Jan 2025 10:52:10 -0600 Subject: [PATCH 58/59] block --- .../rayexec_execution/src/arrays/batch_exp.rs | 2 + .../execution/operators/batch_collection.rs | 225 ++++++++++++++++++ .../src/execution/operators/mod.rs | 1 + 3 files changed, 228 insertions(+) create mode 100644 crates/rayexec_execution/src/execution/operators/batch_collection.rs diff --git a/crates/rayexec_execution/src/arrays/batch_exp.rs b/crates/rayexec_execution/src/arrays/batch_exp.rs index 90659fcb2..39a5becad 100644 --- a/crates/rayexec_execution/src/arrays/batch_exp.rs +++ b/crates/rayexec_execution/src/arrays/batch_exp.rs @@ -108,6 +108,7 @@ where }) } + /// Clones `other` into self. pub fn clone_from(&mut self, manager: &B, other: &mut Self) -> Result<()> { if self.arrays.len() != other.arrays.len() { return Err(RayexecError::new( @@ -124,6 +125,7 @@ where Ok(()) } + /// Selects rows from the batch based on `selection`. pub fn select(&mut self, manager: &B, selection: &[usize]) -> Result<()> { for arr in &mut self.arrays { arr.select(manager, selection.iter().copied())?; diff --git a/crates/rayexec_execution/src/execution/operators/batch_collection.rs b/crates/rayexec_execution/src/execution/operators/batch_collection.rs new file mode 100644 index 000000000..449aeaacb --- /dev/null +++ b/crates/rayexec_execution/src/execution/operators/batch_collection.rs @@ -0,0 +1,225 @@ +use rayexec_error::{RayexecError, Result}; + +use crate::arrays::array::exp::Array; +use crate::arrays::batch_exp::Batch; +use crate::arrays::buffer::buffer_manager::NopBufferManager; +use crate::arrays::datatype::DataType; + +#[derive(Debug)] +pub struct BatchCollection { + /// Datatypes of the arrays we're storing. + datatypes: Vec, + /// All blocks making up this collection. + blocks: Vec, +} + +impl BatchCollection {} + +#[derive(Debug)] +pub struct BatchCollectionBlock { + /// Number of rows we're currently storing in this block. + row_count: usize, + /// Max number of rows this block store. + capacity: usize, + /// Arrays making up this block. + arrays: Vec, +} + +impl BatchCollectionBlock { + pub fn new(datatypes: &[DataType], capacity: usize) -> Result { + let arrays = datatypes + .iter() + .map(|datatype| Array::new(&NopBufferManager, datatype.clone(), capacity)) + .collect::>>()?; + + Ok(BatchCollectionBlock { + row_count: 0, + capacity, + arrays, + }) + } + + pub fn capacity(&self) -> usize { + self.capacity + } + + pub fn set_row_count(&mut self, count: usize) -> Result<()> { + if count > self.capacity { + return Err(RayexecError::new("Row count would exceed capacity")); + } + self.row_count = count; + Ok(()) + } + + pub fn row_count(&self) -> usize { + self.row_count + } + + pub fn arrays(&self) -> &[Array] { + &self.arrays + } + + pub fn has_capacity_for_rows(&self, additional: usize) -> bool { + self.row_count + additional < self.capacity + } + + /// Appends a batch to this block. + pub fn append_batch_data(&mut self, batch: &Batch) -> Result<()> { + let total_num_rows = self.row_count + batch.num_rows(); + if total_num_rows > self.capacity { + return Err( + RayexecError::new("New row count for batch block would exceed capacity") + .with_field("new_row_count", total_num_rows) + .with_field("capacity", self.capacity), + ); + } + + if self.arrays.len() != batch.arrays().len() { + return Err(RayexecError::new("Array length mismatch")); + } + + for (from, to) in batch.arrays.iter().zip(self.arrays.iter_mut()) { + // [0..batch_num_rows) => [self_row_count..) + let mapping = + (0..batch.num_rows()).zip(self.row_count..(self.row_count + batch.num_rows())); + from.copy_rows(mapping, to)?; + } + + self.row_count += batch.num_rows(); + + Ok(()) + } + + /// Copies a single row from another block. + pub fn copy_row_from_other( + &mut self, + dest_row: usize, + source: &BatchCollectionBlock, + source_row: usize, + ) -> Result<()> { + if self.arrays.len() != source.arrays.len() { + return Err(RayexecError::new( + "Number of arrays in self and other differ", + )); + } + + for (from, to) in source.arrays().iter().zip(self.arrays.iter_mut()) { + let mapping = [(source_row, dest_row)]; + from.copy_rows(mapping, to)?; + } + + Ok(()) + } + + pub fn into_batch(self) -> Result { + let mut batch = Batch::try_from_arrays(self.arrays, false)?; + batch.set_num_rows(self.row_count)?; + + Ok(batch) + } +} + +#[cfg(test)] +mod tests { + use stdutil::iter::TryFromExactSizeIterator; + + use super::*; + use crate::arrays::testutil::assert_batches_eq; + + #[test] + fn block_append_i32() { + let mut block = BatchCollectionBlock::new(&[DataType::Int32], 4096).unwrap(); + + let array1 = Array::try_from_iter([4, 5, 6]).unwrap(); + let array2 = Array::try_from_iter([7, 8]).unwrap(); + let array3 = Array::try_from_iter([9, 10, 11]).unwrap(); + + let batch1 = Batch::try_from_arrays([array1], true).unwrap(); + let batch2 = Batch::try_from_arrays([array2], true).unwrap(); + let batch3 = Batch::try_from_arrays([array3], true).unwrap(); + + block.append_batch_data(&batch1).unwrap(); + block.append_batch_data(&batch2).unwrap(); + block.append_batch_data(&batch3).unwrap(); + + let out = block.into_batch().unwrap(); + + let expected = Batch::try_from_arrays( + [Array::try_from_iter([4, 5, 6, 7, 8, 9, 10, 11]).unwrap()], + true, + ) + .unwrap(); + + assert_batches_eq(&expected, &out); + } + + #[test] + fn block_append_i32_dictionary() { + let mut block = BatchCollectionBlock::new(&[DataType::Int32], 4096).unwrap(); + + let mut array = Array::try_from_iter([4, 5, 6]).unwrap(); + // '[4, 4, 6, 6, 5, 5]' + array.select(&NopBufferManager, [0, 0, 2, 2, 1, 1]).unwrap(); + + let batch = Batch::try_from_arrays([array], true).unwrap(); + block.append_batch_data(&batch).unwrap(); + + assert_eq!(6, block.row_count()); + + let out = block.into_batch().unwrap(); + + let expected = + Batch::try_from_arrays([Array::try_from_iter([4, 4, 6, 6, 5, 5]).unwrap()], true) + .unwrap(); + + assert_batches_eq(&expected, &out); + } + + #[test] + fn block_copy_row_i32_string() { + let mut block1 = + BatchCollectionBlock::new(&[DataType::Int32, DataType::Utf8], 4096).unwrap(); + let mut block2 = + BatchCollectionBlock::new(&[DataType::Int32, DataType::Utf8], 4096).unwrap(); + + block1 + .append_batch_data( + &Batch::try_from_arrays( + [ + Array::try_from_iter([4, 5, 6]).unwrap(), + Array::try_from_iter(["a", "b", "c"]).unwrap(), + ], + true, + ) + .unwrap(), + ) + .unwrap(); + + block2 + .append_batch_data( + &Batch::try_from_arrays( + [ + Array::try_from_iter([7, 8]).unwrap(), + Array::try_from_iter(["dog", "cat"]).unwrap(), + ], + true, + ) + .unwrap(), + ) + .unwrap(); + + block1.copy_row_from_other(1, &block2, 0).unwrap(); + + let out = block1.into_batch().unwrap(); + let expected = Batch::try_from_arrays( + [ + Array::try_from_iter([4, 7, 6]).unwrap(), + Array::try_from_iter(["a", "dog", "c"]).unwrap(), + ], + true, + ) + .unwrap(); + + assert_batches_eq(&expected, &out); + } +} diff --git a/crates/rayexec_execution/src/execution/operators/mod.rs b/crates/rayexec_execution/src/execution/operators/mod.rs index 2fec8a440..a04d981aa 100644 --- a/crates/rayexec_execution/src/execution/operators/mod.rs +++ b/crates/rayexec_execution/src/execution/operators/mod.rs @@ -30,6 +30,7 @@ pub mod unnest; pub mod values; pub mod window; +pub mod batch_collection; pub mod physical_filter; pub mod physical_project; From f34f71aec51a0179900a05cf8880cd878dbce653 Mon Sep 17 00:00:00 2001 From: Sean Smith Date: Mon, 6 Jan 2025 12:19:40 -0600 Subject: [PATCH 59/59] stub --- .../src/execution/operators/hash_join/mod.rs | 22 +-- .../src/execution/operators/mod.rs | 9 +- .../operators/physical_hash_join/mod.rs | 129 ++++++++++++++++++ 3 files changed, 145 insertions(+), 15 deletions(-) create mode 100644 crates/rayexec_execution/src/execution/operators/physical_hash_join/mod.rs diff --git a/crates/rayexec_execution/src/execution/operators/hash_join/mod.rs b/crates/rayexec_execution/src/execution/operators/hash_join/mod.rs index 4f0756c28..5b385d0c2 100644 --- a/crates/rayexec_execution/src/execution/operators/hash_join/mod.rs +++ b/crates/rayexec_execution/src/execution/operators/hash_join/mod.rs @@ -32,7 +32,7 @@ use crate::explain::explainable::{ExplainConfig, ExplainEntry, Explainable}; use crate::logical::logical_join::JoinType; #[derive(Debug)] -pub struct HashJoinBuildPartitionState { +pub struct HashJoinBuildPartitionState2 { /// Hash table this partition will be writing to. /// /// Optional to enable moving from the local to global state once this @@ -43,7 +43,7 @@ pub struct HashJoinBuildPartitionState { } #[derive(Debug)] -pub struct HashJoinProbePartitionState { +pub struct HashJoinProbePartitionState2 { /// Index of this partition. partition_idx: usize, /// The final output table. If None, the global state should be checked to @@ -214,7 +214,7 @@ impl ExecutableOperator for PhysicalHashJoin { let build_states: Vec<_> = (0..build_partitions) .map(|_| { - PartitionState::HashJoinBuild(HashJoinBuildPartitionState { + PartitionState::HashJoinBuild2(HashJoinBuildPartitionState2 { local_hashtable: Some(PartitionHashTable::new(&self.conditions)), hash_buf: Vec::new(), }) @@ -223,7 +223,7 @@ impl ExecutableOperator for PhysicalHashJoin { let probe_states: Vec<_> = (0..probe_partitions) .map(|idx| { - PartitionState::HashJoinProbe(HashJoinProbePartitionState { + PartitionState::HashJoinProbe2(HashJoinProbePartitionState2 { partition_idx: idx, global: None, hash_buf: Vec::new(), @@ -254,11 +254,11 @@ impl ExecutableOperator for PhysicalHashJoin { batch: Batch2, ) -> Result { match partition_state { - PartitionState::HashJoinBuild(state) => { + PartitionState::HashJoinBuild2(state) => { self.insert_into_local_table(state, batch)?; Ok(PollPush2::NeedsMore) } - PartitionState::HashJoinProbe(state) => { + PartitionState::HashJoinProbe2(state) => { // If we have pending output, we need to wait for that to get // pulled before trying to compute additional batches. if !state.buffered_output.is_empty() { @@ -354,7 +354,7 @@ impl ExecutableOperator for PhysicalHashJoin { operator_state: &OperatorState, ) -> Result { match partition_state { - PartitionState::HashJoinBuild(state) => { + PartitionState::HashJoinBuild2(state) => { let mut shared = match operator_state { OperatorState::HashJoin(state) => state.inner.lock(), other => panic!("invalid operator state: {other:?}"), @@ -415,7 +415,7 @@ impl ExecutableOperator for PhysicalHashJoin { Ok(PollFinalize2::Finalized) } - PartitionState::HashJoinProbe(state) => { + PartitionState::HashJoinProbe2(state) => { let mut shared = match operator_state { OperatorState::HashJoin(state) => state.inner.lock(), other => panic!("invalid operator state: {other:?}"), @@ -498,8 +498,8 @@ impl ExecutableOperator for PhysicalHashJoin { operator_state: &OperatorState, ) -> Result { let state = match partition_state { - PartitionState::HashJoinProbe(state) => state, - PartitionState::HashJoinBuild(_) => { + PartitionState::HashJoinProbe2(state) => state, + PartitionState::HashJoinBuild2(_) => { // We should only be pulling with the "probe" state. The "build" // state acts as a sink into the operator. panic!("should not pull with a build state") @@ -604,7 +604,7 @@ impl PhysicalHashJoin { /// Inserts a batch into a partition-local hash table. fn insert_into_local_table( &self, - state: &mut HashJoinBuildPartitionState, + state: &mut HashJoinBuildPartitionState2, batch: Batch2, ) -> Result<()> { // Compute left hashes on equality conditions. diff --git a/crates/rayexec_execution/src/execution/operators/mod.rs b/crates/rayexec_execution/src/execution/operators/mod.rs index a04d981aa..7ec92eac5 100644 --- a/crates/rayexec_execution/src/execution/operators/mod.rs +++ b/crates/rayexec_execution/src/execution/operators/mod.rs @@ -32,6 +32,7 @@ pub mod window; pub mod batch_collection; pub mod physical_filter; +pub mod physical_hash_join; pub mod physical_project; pub(crate) mod util; @@ -53,9 +54,9 @@ use empty::PhysicalEmpty; use filter::{FilterOperation, PhysicalFilter}; use hash_aggregate::PhysicalHashAggregate; use hash_join::{ - HashJoinBuildPartitionState, + HashJoinBuildPartitionState2, HashJoinOperatorState, - HashJoinProbePartitionState, + HashJoinProbePartitionState2, PhysicalHashJoin, }; use insert::PhysicalInsert; @@ -125,8 +126,8 @@ pub enum PartitionState { UngroupedAggregate(UngroupedAggregatePartitionState), NestedLoopJoinBuild(NestedLoopJoinBuildPartitionState), NestedLoopJoinProbe(NestedLoopJoinProbePartitionState), - HashJoinBuild(HashJoinBuildPartitionState), - HashJoinProbe(HashJoinProbePartitionState), + HashJoinBuild2(HashJoinBuildPartitionState2), + HashJoinProbe2(HashJoinProbePartitionState2), Values(ValuesPartitionState), Sink(SinkPartitionState), Source(SourcePartitionState), diff --git a/crates/rayexec_execution/src/execution/operators/physical_hash_join/mod.rs b/crates/rayexec_execution/src/execution/operators/physical_hash_join/mod.rs new file mode 100644 index 000000000..8f6f029d9 --- /dev/null +++ b/crates/rayexec_execution/src/execution/operators/physical_hash_join/mod.rs @@ -0,0 +1,129 @@ +use std::task::{Context, Waker}; + +use parking_lot::Mutex; + +use super::{ExecutableOperator, PartitionAndOperatorStates, PartitionState}; +use crate::arrays::datatype::DataType; +use crate::database::DatabaseContext; + +#[derive(Debug)] +pub enum HashJoinBuildPartitionState { + /// Partition is building. + Building(InProgressBuildState), + /// Partition finished building. + Finished, +} + +#[derive(Debug)] +pub struct InProgressBuildState { + // build_data: HashedBlockCollection, +} + +#[derive(Debug)] +pub enum HashJoinProbePartitionState { + /// Partition waiting for build side to complete. + Waiting(usize), + /// Partition is probing. + Probing(ProbeState), + /// Left-join drain state. + Draining(DrainState), + /// Probing finished. + Finished, +} + +#[derive(Debug)] +pub struct ProbeState {} + +#[derive(Debug)] +pub struct DrainState {} + +#[derive(Debug)] +pub struct HashJoinOperatorState { + inner: Mutex, +} + +#[derive(Debug)] +struct HashJoinOperatorStateInner { + /// Wakers from the probe side that are waiting for the build side to + /// complete. + /// + /// Keyed by probe-side partition index. + build_waiting_probers: Vec>, +} + +#[derive(Debug)] +pub struct PhysicalHashJoin { + /// Data types from the left (build) side of the join. + left_types: Vec, + /// Data types from the right (probe) side of the join. + right_types: Vec, +} + +impl ExecutableOperator for PhysicalHashJoin { + fn create_states( + &self, + context: &DatabaseContext, + batch_size: usize, + partitions: usize, + ) -> Result { + unimplemented!() + } + + fn poll_execute( + &self, + cx: &mut Context, + partition_state: &mut PartitionState, + operator_state: &OperatorState, + inout: ExecuteInOutState, + ) -> Result { + match partition_state { + PartitionState::HashJoinBuild2(state) => { + let state = match state { + HashJoinBuildPartitionState::Building(state) => state, + HashJoinBuildPartitionState::Finished => return Ok(PollExecute::Exhausted), // TODO: Probably should error instead. + }; + + let batch = inout.input.required("input batch required")?; + state + .build_data + .push_batch(&NopBufferManager, &self.left_types, batch)?; + + Ok(PollExecute::NeedsMore) + } + PartitionState::HashJoinProbe2(state) => { + match state { + HashJoinProbePartitionState::Waiting(probe_idx) => { + // Still waiting for build side to complete, just need + // to register a waker. + + let mut operator_state = match operator_state { + OperatorState::HashJoin(state) => state.inner.lock(), + other => panic!("invalid operator state: {other:?}"), + }; + + operator_state.build_waiting_probers[*probe_idx] = Some(cx.waker().clone()); + + Ok(PollExecute::Pending) + } + _ => unimplemented!(), + } + } + other => panic!("invalid partition state: {other:?}"), + } + } + + fn poll_finalize( + &self, + cx: &mut Context, + partition_state: &mut PartitionState, + operator_state: &OperatorState, + ) -> Result { + unimplemented!() + } +} + +impl Explainable for PhysicalHashJoin { + fn explain_entry(&self, conf: ExplainConfig) -> ExplainEntry { + unimplemented!() + } +}