Skip to content
This repository has been archived by the owner on Oct 1, 2022. It is now read-only.

Commit

Permalink
Merge pull request #10 from ssl-hep/pandas_to_arrow
Browse files Browse the repository at this point in the history
Make the conversion of awkward to arrow more robust
  • Loading branch information
BenGalewsky authored Feb 17, 2020
2 parents d35cc7a + 6bdb06b commit 153ed58
Showing 1 changed file with 5 additions and 7 deletions.
12 changes: 5 additions & 7 deletions transformer_uproot.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@
import uproot
import os
import pyarrow.parquet as pq
import pandas as pd
import pyarrow as pa



# How many bytes does an average awkward array cell take up. This is just
Expand Down Expand Up @@ -120,13 +123,8 @@ def transform_single_file(file_path, output_path, servicex=None, tree_name='Even
table = generated_transformer.run_query(file_path, tree_name)

# Deal with messy, nested lazy arrays which cannot be converted to arrow
concatenated = {}
for column in table.columns:
concatenated[column] = awkward.concatenate(
[x.array.chunks[0].array for x in table[column].chunks])
new_table = awkward.Table(concatenated)

arrow = awkward.toarrow(new_table)
new_table = pd.DataFrame(table)
arrow = pa.Table.from_pandas(new_table)

if output_path:
writer = pq.ParquetWriter(output_path, arrow.schema)
Expand Down

0 comments on commit 153ed58

Please sign in to comment.