Skip to content
This repository has been archived by the owner on Oct 1, 2022. It is now read-only.

Make the conversion of awkward to arrow more robust #10

Merged
merged 1 commit into from
Feb 17, 2020
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 5 additions & 7 deletions transformer_uproot.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@
import uproot
import os
import pyarrow.parquet as pq
import pandas as pd
import pyarrow as pa



# How many bytes does an average awkward array cell take up. This is just
Expand Down Expand Up @@ -120,13 +123,8 @@ def transform_single_file(file_path, output_path, servicex=None, tree_name='Even
table = generated_transformer.run_query(file_path, tree_name)

# Deal with messy, nested lazy arrays which cannot be converted to arrow
concatenated = {}
for column in table.columns:
concatenated[column] = awkward.concatenate(
[x.array.chunks[0].array for x in table[column].chunks])
new_table = awkward.Table(concatenated)

arrow = awkward.toarrow(new_table)
new_table = pd.DataFrame(table)
arrow = pa.Table.from_pandas(new_table)

if output_path:
writer = pq.ParquetWriter(output_path, arrow.schema)
Expand Down