Skip to content

Commit

Permalink
Merge pull request #382 from bioinfo-chru-strasbourg/fix-annotation-p…
Browse files Browse the repository at this point in the history
…arquet-regions-aggregation-of-value-and-type-list-in-header-num

fix annotation parquet for regions to type as list annotated fields
  • Loading branch information
antonylebechec authored Jan 31, 2025
2 parents 338a02d + a69005e commit 3deadb1
Showing 1 changed file with 14 additions and 9 deletions.
23 changes: 14 additions & 9 deletions howard/objects/variants.py
Original file line number Diff line number Diff line change
Expand Up @@ -6442,10 +6442,10 @@ def annotation_parquet(self, threads: int = None) -> None:
# Load header as VCF object
parquet_hdr_vcf_header_infos = database.get_header().infos
# Log
log.debug(
"Annotation database header: "
+ str(parquet_hdr_vcf_header_infos)
)
# log.debug(
# "Annotation database header: "
# + str(parquet_hdr_vcf_header_infos)
# )

# Get extra infos
parquet_columns = database.get_extra_columns()
Expand Down Expand Up @@ -6576,10 +6576,15 @@ def annotation_parquet(self, threads: int = None) -> None:
)

# Add INFO field to header
parquet_hdr_vcf_header_infos_number = (
parquet_hdr_vcf_header_infos[annotation_field].num
or "."
)

# If regions, force values as list, due to overlap/aggregation
if parquet_type in ["regions"]:
parquet_hdr_vcf_header_infos_number = "."
else:
parquet_hdr_vcf_header_infos_number = (
parquet_hdr_vcf_header_infos[annotation_field].num
or "."
)
parquet_hdr_vcf_header_infos_type = (
parquet_hdr_vcf_header_infos[annotation_field].type
or "String"
Expand Down Expand Up @@ -6828,7 +6833,7 @@ def annotation_parquet(self, threads: int = None) -> None:
f"Annotation '{annotation_name}' - No Annotations available"
)

log.debug("Final header: " + str(vcf_reader.infos))
# log.debug("Final header: " + str(vcf_reader.infos))

# Remove added columns
for added_column in added_columns:
Expand Down

0 comments on commit 3deadb1

Please sign in to comment.