Skip to content

Commit

Permalink
Fix uneven chunks case
Browse files Browse the repository at this point in the history
  • Loading branch information
hombit committed Dec 19, 2024
1 parent df8e0f9 commit 5d39c52
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 4 deletions.
8 changes: 4 additions & 4 deletions src/nested_pandas/series/packer.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,11 +206,11 @@ def pack_lists(df: pd.DataFrame, name: str | None = None, *, validate: bool = Tr

# If all chunk arrays have the same chunk lengths, we can build a chunked struct array with no
# data copying.
chunk_lengths = np.array([[len(chunk) for chunk in arr.chunks] for arr in pa_chunked_arrays.values()])
if np.all(chunk_lengths == chunk_lengths[0]):
n_chunks = chunk_lengths.shape[1]
chunk_lengths = pa.array([[len(chunk) for chunk in arr.chunks] for arr in pa_chunked_arrays.values()])
if all(chunk_length == chunk_lengths[0] for chunk_length in chunk_lengths):
chunks = []
for i in range(n_chunks):
numpy_chunks = next(iter(pa_chunked_arrays.values())).num_chunks
for i in range(numpy_chunks):
chunks.append(
pa.StructArray.from_arrays(
[arr.chunk(i) for arr in pa_chunked_arrays.values()],
Expand Down
18 changes: 18 additions & 0 deletions tests/nested_pandas/series/test_packer.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,24 @@ def test_pack_lists_with_chunked_arrays():
assert_series_equal(series.nest.get_list_series("b"), chunked_b)


def test_pack_lists_with_uneven_chunked_arrays():
"""Issue https://github.com/lincc-frameworks/nested-pandas/issues/180"""
chunked_a = pd.Series(
pa.chunked_array([pa.array([[1, 2, 3], [4, 5]])] * 3),
dtype=pd.ArrowDtype(pa.list_(pa.int64())),
name="a",
)
chunked_b = pd.Series(
pa.array([[0.0, 1.0, 2.0], [3.0, 4.0]] * 3),
dtype=pd.ArrowDtype(pa.list_(pa.float64())),
name="b",
)
list_df = pd.DataFrame({"a": chunked_a, "b": chunked_b}, index=[0, 1, 2, 3, 4, 5])
series = packer.pack_lists(list_df)
assert_series_equal(series.nest.get_list_series("a"), chunked_a)
assert_series_equal(series.nest.get_list_series("b"), chunked_b)


def test_pack_seq_with_dfs_and_index():
"""Test pack_seq()."""
dfs = [
Expand Down

0 comments on commit 5d39c52

Please sign in to comment.