Skip to content

Commit

Permalink
bug: Fix edge cases in array_slice (#14489)
Browse files Browse the repository at this point in the history
This commit fixes the following edge cases in the array_slice function
so that it's semantics match DuckDB:

  - When begin < 0 and -begin > length, begin is clamped to the
    beginning of the list.
  - When step < 0 and begin = end, then the result should be a list with
    the single element found at index begin/end.

Fixes #10548
  • Loading branch information
jkosh44 authored Feb 6, 2025
1 parent 5239d1a commit 62e23a2
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 5 deletions.
16 changes: 13 additions & 3 deletions datafusion/functions-nested/src/extract.rs
Original file line number Diff line number Diff line change
Expand Up @@ -482,7 +482,17 @@ where
// 0 ~ len - 1
let adjusted_zero_index = if index < 0 {
if let Ok(index) = index.try_into() {
index + len
// When index < 0 and -index > length, index is clamped to the beginning of the list.
// Otherwise, when index < 0, the index is counted from the end of the list.
//
// Note, we actually test the contrapositive, index < -length, because negating a
// negative will panic if the negative is equal to the smallest representable value
// while negating a positive is always safe.
if index < (O::zero() - O::one()) * len {
O::zero()
} else {
index + len
}
} else {
return exec_err!("array_slice got invalid index: {}", index);
}
Expand Down Expand Up @@ -570,7 +580,7 @@ where
"array_slice got invalid stride: {:?}, it cannot be 0",
stride
);
} else if (from <= to && stride.is_negative())
} else if (from < to && stride.is_negative())
|| (from > to && stride.is_positive())
{
// return empty array
Expand All @@ -582,7 +592,7 @@ where
internal_datafusion_err!("array_slice got invalid stride: {}", stride)
})?;

if from <= to {
if from <= to && stride > O::zero() {
assert!(start + to <= end);
if stride.eq(&O::one()) {
// stride is default to 1
Expand Down
26 changes: 24 additions & 2 deletions datafusion/sqllogictest/test_files/array.slt
Original file line number Diff line number Diff line change
Expand Up @@ -1941,12 +1941,12 @@ select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), -4
query ??
select array_slice(make_array(1, 2, 3, 4, 5), -7, -2), array_slice(make_array('h', 'e', 'l', 'l', 'o'), -7, -3);
----
[] []
[1, 2, 3, 4] [h, e, l]

query ??
select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), -7, -2), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), -7, -3);
----
[] []
[1, 2, 3, 4] [h, e, l]

# array_slice scalar function #20 (with negative indexes; nested array)
query ??
Expand Down Expand Up @@ -1993,6 +1993,28 @@ select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 2,
----
[2, 3, 4] [h, e]

# array_slice scalar function #24 (with first negative index larger than len)
query ??
select array_slice(make_array(1, 2, 3, 4, 5), -2147483648, 1), list_slice(make_array('h', 'e', 'l', 'l', 'o'), -2147483648, 1);
----
[1] [h]

query ??
select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), -9223372036854775808, 1), list_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), -9223372036854775808, 1);
----
[1] [h]

# array_slice scalar function #25 (with negative step and equal indexes)
query ??
select array_slice(make_array(1, 2, 3, 4, 5), 2, 2, -1), list_slice(make_array('h', 'e', 'l', 'l', 'o'), 2, 2, -1);
----
[2] [e]

query ??
select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 2, 2, -1), list_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 2, 2, -1);
----
[2] [e]

# array_slice with columns
query ?
select array_slice(column1, column2, column3) from slices;
Expand Down

0 comments on commit 62e23a2

Please sign in to comment.