Skip to content

Commit

Permalink
Merge branch 'dev' into ds_vep_conservation
Browse files Browse the repository at this point in the history
  • Loading branch information
project-defiant authored Nov 27, 2024
2 parents 7f1cccb + 8595e5b commit 33be491
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 6 deletions.
9 changes: 8 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,8 +1,15 @@
PROJECT_ID ?= open-targets-genetics-dev
REGION ?= europe-west1
APP_NAME ?= $$(cat pyproject.toml | grep -m 1 "name" | cut -d" " -f3 | sed 's/"//g')
REF ?= $$(git rev-parse --abbrev-ref HEAD)
PACKAGE_VERSION ?= $$(poetry version --short)
# NOTE: git rev-parse will always return the HEAD if it sits in the tag,
# this way we can distinguish the tag vs branch name
ifeq ($(shell git rev-parse --abbrev-ref HEAD)),HEAD)
REF := $(shell git rev-parse --abbrev-ref HEAD)
else
REF := $(shell git describe --exact-match --tags)
endif

CLEAN_PACKAGE_VERSION := $(shell echo "$(PACKAGE_VERSION)" | tr -cd '[:alnum:]')
BUCKET_NAME=gs://genetics_etl_python_playground/initialisation/${APP_NAME}/${REF}

Expand Down
11 changes: 7 additions & 4 deletions src/gentropy/l2g.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,9 @@ def __init__(
fm = credible_set.filter(f.col("studyType") == "gwas").build_feature_matrix(
features_list, features_input_loader
)
fm._df.write.mode(session.write_mode).parquet(feature_matrix_path)
fm._df.coalesce(session.output_partitions).write.mode(
session.write_mode
).parquet(feature_matrix_path)


class LocusToGeneStep:
Expand Down Expand Up @@ -283,9 +285,9 @@ def run_predict(self) -> None:
)
predictions.filter(
f.col("score") >= self.l2g_threshold
).add_locus_to_gene_features(self.feature_matrix).df.write.mode(
self.session.write_mode
).parquet(self.predictions_path)
).add_locus_to_gene_features(self.feature_matrix).df.coalesce(
self.session.output_partitions
).write.mode(self.session.write_mode).parquet(self.predictions_path)
self.session.logger.info("L2G predictions saved successfully.")

def run_train(self) -> None:
Expand Down Expand Up @@ -378,6 +380,7 @@ def __init__(
locus_to_gene_prediction.to_disease_target_evidence(
credible_sets, study_index, locus_to_gene_threshold
)
.coalesce(session.output_partitions)
.write.mode(session.write_mode)
.option("compression", "gzip")
.json(evidence_output_path)
Expand Down
2 changes: 1 addition & 1 deletion src/vep/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM ensemblorg/ensembl-vep:release_111.0
FROM ensemblorg/ensembl-vep:release_113.3

USER root

Expand Down

0 comments on commit 33be491

Please sign in to comment.