From 34b874af31977ecf87fc46bf139b6f35d8885c98 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 19 Feb 2024 14:20:15 +0000 Subject: [PATCH 01/12] build(deps-dev): bump pytest from 7.4.4 to 8.0.1 (#493) Bumps [pytest](https://github.com/pytest-dev/pytest) from 7.4.4 to 8.0.1. - [Release notes](https://github.com/pytest-dev/pytest/releases) - [Changelog](https://github.com/pytest-dev/pytest/blob/main/CHANGELOG.rst) - [Commits](https://github.com/pytest-dev/pytest/compare/7.4.4...8.0.1) --- updated-dependencies: - dependency-name: pytest dependency-type: direct:development update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- poetry.lock | 12 ++++++------ pyproject.toml | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/poetry.lock b/poetry.lock index 697dfd78f..b11b03140 100644 --- a/poetry.lock +++ b/poetry.lock @@ -6456,13 +6456,13 @@ sql = ["pandas (>=1.0.5)", "pyarrow (>=1.0.0)"] [[package]] name = "pytest" -version = "7.4.4" +version = "8.0.1" description = "pytest: simple powerful testing with Python" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "pytest-7.4.4-py3-none-any.whl", hash = "sha256:b090cdf5ed60bf4c45261be03239c2c1c22df034fbffe691abe93cd80cea01d8"}, - {file = "pytest-7.4.4.tar.gz", hash = "sha256:2cf0005922c6ace4a3e2ec8b4080eb0d9753fdc93107415332f50ce9e7994280"}, + {file = "pytest-8.0.1-py3-none-any.whl", hash = "sha256:3e4f16fe1c0a9dc9d9389161c127c3edc5d810c38d6793042fb81d9f48a59fca"}, + {file = "pytest-8.0.1.tar.gz", hash = "sha256:267f6563751877d772019b13aacbe4e860d73fe8f651f28112e9ac37de7513ae"}, ] [package.dependencies] @@ -6470,7 +6470,7 @@ colorama = {version = "*", markers = "sys_platform == \"win32\""} exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""} iniconfig = "*" packaging = "*" -pluggy = ">=0.12,<2.0" +pluggy = ">=1.3.0,<2.0" tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""} [package.extras] @@ -8534,4 +8534,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = "^3.10, <3.11" -content-hash = "f7b9565b1c7780240cbf010bf76c5cbf3bcbe9d448d951fab192d93d720301f3" +content-hash = "c199c08b168ddf0d27580ecb32767d7293754983aecea1e1e5c4f59ae088d156" diff --git a/pyproject.toml b/pyproject.toml index 10c8ffe6a..1c6ef7ac1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -62,7 +62,7 @@ pytest-cov = "^4.1.0" pytest-sugar = ">=0.9.5,<1.1.0" dbldatagen = "^0.3.1" pyparsing = "^3.1.1" -pytest = "^7.4.4" +pytest = ">=7.4.4,<9.0.0" pytest-xdist = "^3.5.0" From 5685270a8c1d0424ab4207aefd12ed2e8542c6b0 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 20 Feb 2024 09:57:26 +0000 Subject: [PATCH 02/12] build(deps-dev): bump pydoclint from 0.3.8 to 0.4.1 (#497) Bumps [pydoclint](https://github.com/jsh9/pydoclint) from 0.3.8 to 0.4.1. - [Release notes](https://github.com/jsh9/pydoclint/releases) - [Changelog](https://github.com/jsh9/pydoclint/blob/main/CHANGELOG.md) - [Commits](https://github.com/jsh9/pydoclint/compare/0.3.8...0.4.1) --- updated-dependencies: - dependency-name: pydoclint dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- poetry.lock | 10 +++++----- pyproject.toml | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/poetry.lock b/poetry.lock index b11b03140..0ab9f51c7 100644 --- a/poetry.lock +++ b/poetry.lock @@ -6313,17 +6313,17 @@ setuptools = "*" [[package]] name = "pydoclint" -version = "0.3.8" +version = "0.4.1" description = "A Python docstring linter that checks arguments, returns, yields, and raises sections" optional = false python-versions = ">=3.8" files = [ - {file = "pydoclint-0.3.8-py2.py3-none-any.whl", hash = "sha256:8e5e020071bb64056fd3f1d68f3b1162ffeb8a3fd6424f73fef7272dac62c166"}, - {file = "pydoclint-0.3.8.tar.gz", hash = "sha256:5a9686a5fb410343e998402686b87cc07df647ea3ab92528c0b0cf8505584e44"}, + {file = "pydoclint-0.4.1-py2.py3-none-any.whl", hash = "sha256:4e32fdf0a47a2199377617f09af0a82a2157f80543026f919a17112a396e752f"}, + {file = "pydoclint-0.4.1.tar.gz", hash = "sha256:d39ed26a793203afadb1917011710fbf258ac3dddcd79b53212e0a2107221643"}, ] [package.dependencies] -click = ">=8.0.0" +click = ">=8.1.0" docstring-parser-fork = ">=0.0.5" tomli = {version = ">=2.0.1", markers = "python_version < \"3.11\""} @@ -8534,4 +8534,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = "^3.10, <3.11" -content-hash = "c199c08b168ddf0d27580ecb32767d7293754983aecea1e1e5c4f59ae088d156" +content-hash = "8d85933db4bf8f4ca92ef227e9a767d26f4f7d2b360083449cfafc54878885fc" diff --git a/pyproject.toml b/pyproject.toml index 1c6ef7ac1..1f43336f3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -72,7 +72,7 @@ ipykernel = "^6.28.0" google-cloud-dataproc = "^5.8.0" apache-airflow = "^2.8.0" apache-airflow-providers-google = "^10.13.1" -pydoclint = "^0.3.8" +pydoclint = ">=0.3.8,<0.5.0" prettier = "^0.0.7" deptry = "^0.12.0" python-semantic-release = ">=8.7,<10.0" From 83a6e0fdb020c987d6c94011a84190fa3588cdc7 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 20 Feb 2024 11:03:19 +0000 Subject: [PATCH 03/12] build(deps-dev): bump yamllint from 1.33.0 to 1.35.1 (#494) Bumps [yamllint](https://github.com/adrienverge/yamllint) from 1.33.0 to 1.35.1. - [Changelog](https://github.com/adrienverge/yamllint/blob/master/CHANGELOG.rst) - [Commits](https://github.com/adrienverge/yamllint/compare/v1.33.0...v1.35.1) --- updated-dependencies: - dependency-name: yamllint dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- poetry.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/poetry.lock b/poetry.lock index 0ab9f51c7..b73137d30 100644 --- a/poetry.lock +++ b/poetry.lock @@ -8413,13 +8413,13 @@ files = [ [[package]] name = "yamllint" -version = "1.33.0" +version = "1.35.1" description = "A linter for YAML files." optional = false python-versions = ">=3.8" files = [ - {file = "yamllint-1.33.0-py3-none-any.whl", hash = "sha256:28a19f5d68d28d8fec538a1db21bb2d84c7dc2e2ea36266da8d4d1c5a683814d"}, - {file = "yamllint-1.33.0.tar.gz", hash = "sha256:2dceab9ef2d99518a2fcf4ffc964d44250ac4459be1ba3ca315118e4a1a81f7d"}, + {file = "yamllint-1.35.1-py3-none-any.whl", hash = "sha256:2e16e504bb129ff515b37823b472750b36b6de07963bd74b307341ef5ad8bdc3"}, + {file = "yamllint-1.35.1.tar.gz", hash = "sha256:7a003809f88324fd2c877734f2d575ee7881dd9043360657cc8049c809eba6cd"}, ] [package.dependencies] From 8fc7b38fb1dd8cfc32b6491ae2bae321c2c3f6c3 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 20 Feb 2024 11:23:52 +0000 Subject: [PATCH 04/12] build(deps-dev): bump python-semantic-release from 9.0.3 to 9.1.0 (#495) Bumps [python-semantic-release](https://github.com/python-semantic-release/python-semantic-release) from 9.0.3 to 9.1.0. - [Release notes](https://github.com/python-semantic-release/python-semantic-release/releases) - [Changelog](https://github.com/python-semantic-release/python-semantic-release/blob/master/CHANGELOG.md) - [Commits](https://github.com/python-semantic-release/python-semantic-release/compare/v9.0.3...v9.1.0) --- updated-dependencies: - dependency-name: python-semantic-release dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- poetry.lock | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/poetry.lock b/poetry.lock index b73137d30..432a9cb41 100644 --- a/poetry.lock +++ b/poetry.lock @@ -6613,13 +6613,13 @@ python-slugify = ">=1.2.5" [[package]] name = "python-semantic-release" -version = "9.0.3" +version = "9.1.0" description = "Automatic Semantic Versioning for Python projects" optional = false python-versions = ">=3.8" files = [ - {file = "python-semantic-release-9.0.3.tar.gz", hash = "sha256:c11ce2d805347ba29175a2c35072afa013cd2bf502e57193ac1ed79e14c057cc"}, - {file = "python_semantic_release-9.0.3-py3-none-any.whl", hash = "sha256:13385d60546eaac80b3bfea18503d551936a3c5bd73f960d59d94d021a9e274c"}, + {file = "python-semantic-release-9.1.0.tar.gz", hash = "sha256:0317e72a940c5080c8aa6ff56cce6230f045b1a8f91f0a58fdc8f80745912369"}, + {file = "python_semantic_release-9.1.0-py3-none-any.whl", hash = "sha256:7ca5fb1ea4d5215182db477fbc53d9f4c907b7030520c4ac6640b9aaa71dbce7"}, ] [package.dependencies] @@ -6633,7 +6633,7 @@ python-gitlab = ">=2,<5" requests = ">=2.25,<3" rich = ">=12.5.1" shellingham = ">=1.5.0.post1" -tomlkit = ">=0.10,<1.0" +tomlkit = ">=0.11,<1.0" [package.extras] dev = ["pre-commit", "ruff (==0.1.11)", "tox"] From f8f3a0852933cc9e2f60d18c3d494ed583a37bc0 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 20 Feb 2024 11:36:05 +0000 Subject: [PATCH 05/12] build(deps-dev): bump apache-airflow-providers-google (#496) Bumps [apache-airflow-providers-google](https://github.com/apache/airflow) from 10.13.1 to 10.15.0. - [Release notes](https://github.com/apache/airflow/releases) - [Changelog](https://github.com/apache/airflow/blob/main/RELEASE_NOTES.rst) - [Commits](https://github.com/apache/airflow/compare/providers-google/10.13.1...providers-google/10.15.0) --- updated-dependencies: - dependency-name: apache-airflow-providers-google dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- poetry.lock | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/poetry.lock b/poetry.lock index 432a9cb41..d70afbb00 100644 --- a/poetry.lock +++ b/poetry.lock @@ -446,13 +446,13 @@ openlineage = ["apache-airflow-providers-openlineage"] [[package]] name = "apache-airflow-providers-google" -version = "10.13.1" +version = "10.15.0" description = "Provider package apache-airflow-providers-google for Apache Airflow" optional = false python-versions = "~=3.8" files = [ - {file = "apache_airflow_providers_google-10.13.1-py3-none-any.whl", hash = "sha256:16c8d5a58346740fb1c9a0feddefd3849a20e70f0f28d6a1f862d5fd7618b894"}, - {file = "apache_airflow_providers_google-10.13.1.tar.gz", hash = "sha256:880388d26c9c01ecc3526dc15d21a79388a01093435ac3a3aa811a990d4ffe84"}, + {file = "apache_airflow_providers_google-10.15.0-py3-none-any.whl", hash = "sha256:70d2d4feb66f06cd750ea673344f3be20d2d575c9645f7e2b030c73551a297fc"}, + {file = "apache_airflow_providers_google-10.15.0.tar.gz", hash = "sha256:ff48fa0a29abec2645a8008c47d0a84a759b7203412707a5e2fc01558c4052a3"}, ] [package.dependencies] @@ -464,7 +464,8 @@ gcloud-aio-bigquery = ">=6.1.2" gcloud-aio-storage = ">=9.0.0" gcsfs = ">=2023.10.0" google-ads = ">=22.1.0" -google-api-core = ">=2.11.0" +google-analytics-admin = "*" +google-api-core = ">=2.11.0,<2.16.0 || >2.16.0" google-api-python-client = ">=1.6.0" google-auth = ">=1.0.0" google-auth-httplib2 = ">=0.0.1" @@ -508,7 +509,7 @@ grpcio-gcp = ">=0.2.2" httpx = "*" json-merge-patch = ">=0.2" looker-sdk = ">=22.2.0" -pandas = ">=0.17.1" +pandas = ">=1.2.5" pandas-gbq = "*" proto-plus = ">=1.19.6" PyOpenSSL = "*" @@ -2301,6 +2302,23 @@ setuptools = ">=40.3.0" [package.extras] tests = ["nox (>=2020.12.31,<2022.6)"] +[[package]] +name = "google-analytics-admin" +version = "0.22.5" +description = "Google Analytics Admin API client library" +optional = false +python-versions = ">=3.7" +files = [ + {file = "google-analytics-admin-0.22.5.tar.gz", hash = "sha256:2de1671ea945c23b3587323ed1dc6e6c78d02a7507b562d425de1dc7fb27b08f"}, + {file = "google_analytics_admin-0.22.5-py2.py3-none-any.whl", hash = "sha256:54c6e00baeb9be61492b4efcd4859ca1b94b181ce454ba70ff5d73a630dd5661"}, +] + +[package.dependencies] +google-api-core = {version = ">=1.34.0,<2.0.dev0 || >=2.11.dev0,<3.0.0dev", extras = ["grpc"]} +google-auth = ">=2.14.1,<3.0.0dev" +proto-plus = ">=1.22.3,<2.0.0dev" +protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0dev" + [[package]] name = "google-api-core" version = "2.12.0" From faa99be3c1ab37c3840b58fbbcf0c600f0f7dc71 Mon Sep 17 00:00:00 2001 From: David Ochoa Date: Tue, 20 Feb 2024 12:32:35 +0000 Subject: [PATCH 06/12] ci: pre-commit updates (#499) --- .pre-commit-config.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 29e6a2ec7..3d713a432 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.1.13 + rev: v0.2.2 hooks: - id: ruff args: @@ -29,7 +29,7 @@ repos: - id: debug-statements - id: check-docstring-first - repo: https://github.com/adrienverge/yamllint.git - rev: v1.33.0 + rev: v1.35.1 hooks: - id: yamllint @@ -93,6 +93,6 @@ repos: - id: beautysh - repo: https://github.com/jsh9/pydoclint - rev: 0.3.8 + rev: 0.4.1 hooks: - id: pydoclint From 08e6ff6c1a029e724fdfc7668e01f0b9afac519a Mon Sep 17 00:00:00 2001 From: David Ochoa Date: Wed, 21 Feb 2024 09:50:37 +0000 Subject: [PATCH 07/12] test: increase modularity of test directories (#502) --- docs/development/contributing.md | 2 +- .../howto/python_api/b_create_dataset.py | 6 +- pyproject.toml | 1 + tests/__init__.py | 4 +- tests/gentropy/__init__.py | 3 + tests/{ => gentropy}/common/test_session.py | 0 tests/{ => gentropy}/conftest.py | 52 ++++++++++-------- .../data_samples/01_test_PIPs.txt | 0 .../data_samples/01_test_lbf_mle.csv | 0 .../data_samples/01_test_lbf_moments.csv | 0 .../data_samples/01_test_ld.csv | 0 .../{ => gentropy}/data_samples/01_test_z.csv | 0 .../data_samples/andersson_sample.bed | 0 .../eqtl_catalogue_studies_sample.tsv | 0 .../data_samples/example_test-pop.bm/_SUCCESS | 0 .../example_test-pop.bm/metadata.json | 0 ...7-0-0-e3ab091a-11ed-50ee-431d-8f2be29e7fb0 | Bin ...6-0-0-9da891bc-b3f3-211c-ec5d-b4586236d3f6 | Bin ...6-1-0-0f48a14a-3dc0-261b-3b0e-e3549671ed4b | Bin ...6-2-0-c23e0dd7-d912-f900-88fd-fae87408e0bd | Bin ...6-3-0-596fbcf8-3832-3676-1b73-987b185558f9 | Bin ...6-4-0-556c0d84-977b-2faa-376b-e758c55e2f42 | Bin ...6-5-0-8ac23bab-8bc8-8b2e-b897-9d818f17a80e | Bin ...6-6-0-821b84fa-ae1c-8e95-c967-485dc621bd09 | Bin ...6-7-0-35b03c95-f5f4-aa0a-c39a-3e0a201c7e60 | Bin ...6-8-0-b6d68819-fa17-6649-1400-4113d86b4b81 | Bin ...6-9-0-7e9bb167-9ee1-443f-de8d-dcebf8c9b5fa | Bin ...7-1-0-50da5bf3-93d4-fb68-d8ad-4450d10b2c28 | Bin ...7-2-0-94bda190-9389-91ab-30ad-9b3c9a3baf39 | Bin ...7-3-0-819f71a1-a22c-8937-48e2-dc686b8ec9c5 | Bin ...7-4-0-b9ac71e5-8432-3dac-e262-4eb17772eb45 | Bin ...7-5-0-8d7756c7-f87c-eff0-3262-a16def109b7a | Bin .../example_test-pop.ht/README.txt | 0 .../data_samples/example_test-pop.ht/_SUCCESS | 0 .../globals/metadata.json.gz | Bin .../example_test-pop.ht/globals/parts/part-0 | Bin .../index | Bin .../metadata.json.gz | Bin .../example_test-pop.ht/metadata.json.gz | Bin .../example_test-pop.ht/rows/metadata.json.gz | Bin ...art-0-020df040-1af5-433e-944d-df08b484cf34 | Bin .../finngen_R9_AB1_ACTINOMYCOSIS.gz | Bin .../finngen_R9_AB1_EBV.SUSIE.snp.gz | Bin .../finngen_credset_summary_sample.tsv | 0 .../data_samples/finngen_studies_sample.json | 0 .../data_samples/finucane_PIPs.npy | Bin .../data_samples/grch37_to_grch38.over.chain | 0 ...g_ancestries_sample_v1.0.3-r2022-11-29.tsv | 0 ...g_associations_sample_e107_r2022-11-29.tsv | 0 .../gwas_catalog_harmonised_list.txt | 0 ...was_catalog_studies_sample-r2022-11-29.tsv | 0 .../GTEx_V8/ge/Adipose_Subcutaneous.tsv.gz | Bin .../data_samples/javierre_sample.parquet | Bin .../data_samples/jung_sample.bed | 0 .../l2g_gold_standard_curation_sample.json.gz | Bin .../neale2_saige_study_manifest.samples.tsv | 0 .../new_format_GCST90293086.h.tsv.gz | Bin .../old_format_GCST006090.h.tsv.gz | Bin .../otp_interactions_sample.parquet | Bin .../sumstats_sample/GCST005523_chr18.parquet | Bin ...4753-b840-4a0b6289c221-c000.snappy.parquet | Bin .../data_samples/thurman_sample.bed8 | 0 .../data_samples/vep_consequences_sample.tsv | 0 .../dataset/test_colocalisation.py | 0 tests/{ => gentropy}/dataset/test_dataset.py | 0 .../{ => gentropy}/dataset/test_gene_index.py | 0 .../{ => gentropy}/dataset/test_intervals.py | 0 tests/{ => gentropy}/dataset/test_l2g.py | 0 tests/{ => gentropy}/dataset/test_ld_index.py | 0 .../dataset/test_study_index.py | 0 .../dataset/test_study_locus.py | 0 .../dataset/test_study_locus_overlap.py | 0 .../dataset/test_study_locus_overlaps.py | 0 .../dataset/test_summary_statistics.py | 0 tests/{ => gentropy}/dataset/test_v2g.py | 0 .../dataset/test_variant_annotation.py | 0 .../dataset/test_variant_index.py | 0 .../test_eqtl_catalogue_study_index.py | 0 .../test_eqtl_catalogue_summary_stats.py | 0 .../finngen/test_finngen_finemapping.py | 4 +- .../finngen/test_finngen_study_index.py | 0 .../finngen/test_finngen_summary_stats.py | 2 +- .../datasource/gnomad/test_gnomad_ld.py | 8 +-- .../test_gwas_catalog_associations.py | 0 .../test_gwas_catalog_curation.py | 0 .../test_gwas_catalog_study_index.py | 0 .../test_gwas_catalog_study_splitter.py | 0 .../test_gwas_catalog_summary_statistics.py | 4 +- .../datasource/intervals/test_andersson.py | 4 +- .../datasource/intervals/test_javierre.py | 4 +- .../datasource/intervals/test_jung.py | 2 +- .../datasource/intervals/test_thurman.py | 4 +- .../open_targets/test_l2g_gold_standard.py | 0 .../datasource/open_targets/test_target.py | 0 .../ukbiobank/test_ukbiobank_study_index.py | 0 .../docs/test_applying_methods.py | 0 .../docs/test_build.py} | 0 .../docs/test_create_dataset.py | 0 .../docs/test_creating_spark_session.py | 0 .../docs/test_inspect_dataset.py | 0 tests/{ => gentropy}/method/test_carma.py | 0 tests/{ => gentropy}/method/test_clump.py | 0 .../method/test_colocalisation_method.py | 0 tests/{ => gentropy}/method/test_ld.py | 0 .../method/test_locus_to_gene.py | 0 tests/{ => gentropy}/method/test_pics.py | 0 tests/{ => gentropy}/method/test_susie_inf.py | 0 .../method/test_window_based_clumping.py | 0 tests/{ => gentropy}/step/test_clump_step.py | 2 +- tests/{ => gentropy}/test_cli.py | 0 tests/{ => gentropy}/test_schemas.py | 0 tests/{ => gentropy}/test_spark_helpers.py | 0 112 files changed, 57 insertions(+), 45 deletions(-) create mode 100644 tests/gentropy/__init__.py rename tests/{ => gentropy}/common/test_session.py (100%) rename tests/{ => gentropy}/conftest.py (91%) rename tests/{ => gentropy}/data_samples/01_test_PIPs.txt (100%) rename tests/{ => gentropy}/data_samples/01_test_lbf_mle.csv (100%) rename tests/{ => gentropy}/data_samples/01_test_lbf_moments.csv (100%) rename tests/{ => gentropy}/data_samples/01_test_ld.csv (100%) rename tests/{ => gentropy}/data_samples/01_test_z.csv (100%) rename tests/{ => gentropy}/data_samples/andersson_sample.bed (100%) rename tests/{ => gentropy}/data_samples/eqtl_catalogue_studies_sample.tsv (100%) rename tests/{ => gentropy}/data_samples/example_test-pop.bm/_SUCCESS (100%) rename tests/{ => gentropy}/data_samples/example_test-pop.bm/metadata.json (100%) rename tests/{ => gentropy}/data_samples/example_test-pop.bm/parts/part-0-17-0-0-e3ab091a-11ed-50ee-431d-8f2be29e7fb0 (100%) rename tests/{ => gentropy}/data_samples/example_test-pop.bm/parts/part-00-36-0-0-9da891bc-b3f3-211c-ec5d-b4586236d3f6 (100%) rename tests/{ => gentropy}/data_samples/example_test-pop.bm/parts/part-01-36-1-0-0f48a14a-3dc0-261b-3b0e-e3549671ed4b (100%) rename tests/{ => gentropy}/data_samples/example_test-pop.bm/parts/part-02-36-2-0-c23e0dd7-d912-f900-88fd-fae87408e0bd (100%) rename tests/{ => gentropy}/data_samples/example_test-pop.bm/parts/part-03-36-3-0-596fbcf8-3832-3676-1b73-987b185558f9 (100%) rename tests/{ => gentropy}/data_samples/example_test-pop.bm/parts/part-04-36-4-0-556c0d84-977b-2faa-376b-e758c55e2f42 (100%) rename tests/{ => gentropy}/data_samples/example_test-pop.bm/parts/part-05-36-5-0-8ac23bab-8bc8-8b2e-b897-9d818f17a80e (100%) rename tests/{ => gentropy}/data_samples/example_test-pop.bm/parts/part-06-36-6-0-821b84fa-ae1c-8e95-c967-485dc621bd09 (100%) rename tests/{ => gentropy}/data_samples/example_test-pop.bm/parts/part-07-36-7-0-35b03c95-f5f4-aa0a-c39a-3e0a201c7e60 (100%) rename tests/{ => gentropy}/data_samples/example_test-pop.bm/parts/part-08-36-8-0-b6d68819-fa17-6649-1400-4113d86b4b81 (100%) rename tests/{ => gentropy}/data_samples/example_test-pop.bm/parts/part-09-36-9-0-7e9bb167-9ee1-443f-de8d-dcebf8c9b5fa (100%) rename tests/{ => gentropy}/data_samples/example_test-pop.bm/parts/part-1-17-1-0-50da5bf3-93d4-fb68-d8ad-4450d10b2c28 (100%) rename tests/{ => gentropy}/data_samples/example_test-pop.bm/parts/part-2-17-2-0-94bda190-9389-91ab-30ad-9b3c9a3baf39 (100%) rename tests/{ => gentropy}/data_samples/example_test-pop.bm/parts/part-3-17-3-0-819f71a1-a22c-8937-48e2-dc686b8ec9c5 (100%) rename tests/{ => gentropy}/data_samples/example_test-pop.bm/parts/part-4-17-4-0-b9ac71e5-8432-3dac-e262-4eb17772eb45 (100%) rename tests/{ => gentropy}/data_samples/example_test-pop.bm/parts/part-5-17-5-0-8d7756c7-f87c-eff0-3262-a16def109b7a (100%) rename tests/{ => gentropy}/data_samples/example_test-pop.ht/README.txt (100%) rename tests/{ => gentropy}/data_samples/example_test-pop.ht/_SUCCESS (100%) rename tests/{ => gentropy}/data_samples/example_test-pop.ht/globals/metadata.json.gz (100%) rename tests/{ => gentropy}/data_samples/example_test-pop.ht/globals/parts/part-0 (100%) rename tests/{ => gentropy}/data_samples/example_test-pop.ht/index/part-0-020df040-1af5-433e-944d-df08b484cf34.idx/index (100%) rename tests/{ => gentropy}/data_samples/example_test-pop.ht/index/part-0-020df040-1af5-433e-944d-df08b484cf34.idx/metadata.json.gz (100%) rename tests/{ => gentropy}/data_samples/example_test-pop.ht/metadata.json.gz (100%) rename tests/{ => gentropy}/data_samples/example_test-pop.ht/rows/metadata.json.gz (100%) rename tests/{ => gentropy}/data_samples/example_test-pop.ht/rows/parts/part-0-020df040-1af5-433e-944d-df08b484cf34 (100%) rename tests/{ => gentropy}/data_samples/finngen_R9_AB1_ACTINOMYCOSIS.gz (100%) rename tests/{ => gentropy}/data_samples/finngen_R9_AB1_EBV.SUSIE.snp.gz (100%) rename tests/{ => gentropy}/data_samples/finngen_credset_summary_sample.tsv (100%) rename tests/{ => gentropy}/data_samples/finngen_studies_sample.json (100%) rename tests/{ => gentropy}/data_samples/finucane_PIPs.npy (100%) rename tests/{ => gentropy}/data_samples/grch37_to_grch38.over.chain (100%) rename tests/{ => gentropy}/data_samples/gwas_catalog_ancestries_sample_v1.0.3-r2022-11-29.tsv (100%) rename tests/{ => gentropy}/data_samples/gwas_catalog_associations_sample_e107_r2022-11-29.tsv (100%) rename tests/{ => gentropy}/data_samples/gwas_catalog_harmonised_list.txt (100%) rename tests/{ => gentropy}/data_samples/gwas_catalog_studies_sample-r2022-11-29.tsv (100%) rename tests/{ => gentropy}/data_samples/imported/GTEx_V8/ge/Adipose_Subcutaneous.tsv.gz (100%) rename tests/{ => gentropy}/data_samples/javierre_sample.parquet (100%) rename tests/{ => gentropy}/data_samples/jung_sample.bed (100%) rename tests/{ => gentropy}/data_samples/l2g_gold_standard_curation_sample.json.gz (100%) rename tests/{ => gentropy}/data_samples/neale2_saige_study_manifest.samples.tsv (100%) rename tests/{ => gentropy}/data_samples/new_format_GCST90293086.h.tsv.gz (100%) rename tests/{ => gentropy}/data_samples/old_format_GCST006090.h.tsv.gz (100%) rename tests/{ => gentropy}/data_samples/otp_interactions_sample.parquet (100%) rename tests/{ => gentropy}/data_samples/sumstats_sample/GCST005523_chr18.parquet (100%) rename tests/{ => gentropy}/data_samples/target_sample.parquet/part-00161-86fc51a8-1d24-4753-b840-4a0b6289c221-c000.snappy.parquet (100%) rename tests/{ => gentropy}/data_samples/thurman_sample.bed8 (100%) rename tests/{ => gentropy}/data_samples/vep_consequences_sample.tsv (100%) rename tests/{ => gentropy}/dataset/test_colocalisation.py (100%) rename tests/{ => gentropy}/dataset/test_dataset.py (100%) rename tests/{ => gentropy}/dataset/test_gene_index.py (100%) rename tests/{ => gentropy}/dataset/test_intervals.py (100%) rename tests/{ => gentropy}/dataset/test_l2g.py (100%) rename tests/{ => gentropy}/dataset/test_ld_index.py (100%) rename tests/{ => gentropy}/dataset/test_study_index.py (100%) rename tests/{ => gentropy}/dataset/test_study_locus.py (100%) rename tests/{ => gentropy}/dataset/test_study_locus_overlap.py (100%) rename tests/{ => gentropy}/dataset/test_study_locus_overlaps.py (100%) rename tests/{ => gentropy}/dataset/test_summary_statistics.py (100%) rename tests/{ => gentropy}/dataset/test_v2g.py (100%) rename tests/{ => gentropy}/dataset/test_variant_annotation.py (100%) rename tests/{ => gentropy}/dataset/test_variant_index.py (100%) rename tests/{ => gentropy}/datasource/eqtl_catalogue/test_eqtl_catalogue_study_index.py (100%) rename tests/{ => gentropy}/datasource/eqtl_catalogue/test_eqtl_catalogue_summary_stats.py (100%) rename tests/{ => gentropy}/datasource/finngen/test_finngen_finemapping.py (72%) rename tests/{ => gentropy}/datasource/finngen/test_finngen_study_index.py (100%) rename tests/{ => gentropy}/datasource/finngen/test_finngen_summary_stats.py (86%) rename tests/{ => gentropy}/datasource/gnomad/test_gnomad_ld.py (94%) rename tests/{ => gentropy}/datasource/gwas_catalog/test_gwas_catalog_associations.py (100%) rename tests/{ => gentropy}/datasource/gwas_catalog/test_gwas_catalog_curation.py (100%) rename tests/{ => gentropy}/datasource/gwas_catalog/test_gwas_catalog_study_index.py (100%) rename tests/{ => gentropy}/datasource/gwas_catalog/test_gwas_catalog_study_splitter.py (100%) rename tests/{ => gentropy}/datasource/gwas_catalog/test_gwas_catalog_summary_statistics.py (95%) rename tests/{ => gentropy}/datasource/intervals/test_andersson.py (90%) rename tests/{ => gentropy}/datasource/intervals/test_javierre.py (90%) rename tests/{ => gentropy}/datasource/intervals/test_jung.py (92%) rename tests/{ => gentropy}/datasource/intervals/test_thurman.py (90%) rename tests/{ => gentropy}/datasource/open_targets/test_l2g_gold_standard.py (100%) rename tests/{ => gentropy}/datasource/open_targets/test_target.py (100%) rename tests/{ => gentropy}/datasource/ukbiobank/test_ukbiobank_study_index.py (100%) rename tests/{ => gentropy}/docs/test_applying_methods.py (100%) rename tests/{test_docs.py => gentropy/docs/test_build.py} (100%) rename tests/{ => gentropy}/docs/test_create_dataset.py (100%) rename tests/{ => gentropy}/docs/test_creating_spark_session.py (100%) rename tests/{ => gentropy}/docs/test_inspect_dataset.py (100%) rename tests/{ => gentropy}/method/test_carma.py (100%) rename tests/{ => gentropy}/method/test_clump.py (100%) rename tests/{ => gentropy}/method/test_colocalisation_method.py (100%) rename tests/{ => gentropy}/method/test_ld.py (100%) rename tests/{ => gentropy}/method/test_locus_to_gene.py (100%) rename tests/{ => gentropy}/method/test_pics.py (100%) rename tests/{ => gentropy}/method/test_susie_inf.py (100%) rename tests/{ => gentropy}/method/test_window_based_clumping.py (100%) rename tests/{ => gentropy}/step/test_clump_step.py (90%) rename tests/{ => gentropy}/test_cli.py (100%) rename tests/{ => gentropy}/test_schemas.py (100%) rename tests/{ => gentropy}/test_spark_helpers.py (100%) diff --git a/docs/development/contributing.md b/docs/development/contributing.md index a12ac4951..9742d3742 100644 --- a/docs/development/contributing.md +++ b/docs/development/contributing.md @@ -82,5 +82,5 @@ For more details on each of these steps, see the sections below. ### Tests - Test study fixture in `tests/conftest.py` (example: `mock_study_index_finngen` in that module) -- Test sample data in `tests/data_samples` (example: `tests/data_samples/finngen_studies_sample.json`) +- Test sample data in `tests/data_samples` (example: `tests/gentropy/data_samples/finngen_studies_sample.json`) - Test definition in `tests/` (example: `tests/dataset/test_study_index.py` → `test_study_index_finngen_creation`) diff --git a/docs/src_snippets/howto/python_api/b_create_dataset.py b/docs/src_snippets/howto/python_api/b_create_dataset.py index 813cd1a02..1a006c97a 100644 --- a/docs/src_snippets/howto/python_api/b_create_dataset.py +++ b/docs/src_snippets/howto/python_api/b_create_dataset.py @@ -17,7 +17,7 @@ def create_from_parquet(session: Session) -> SummaryStatistics: # --8<-- [end:create_from_parquet_import] - path = "tests/data_samples/sumstats_sample/GCST005523_chr18.parquet" + path = "tests/gentropy/data_samples/sumstats_sample/GCST005523_chr18.parquet" # --8<-- [start:create_from_parquet] summary_stats = SummaryStatistics.from_parquet(session, path) # --8<-- [end:create_from_parquet] @@ -31,7 +31,7 @@ def create_from_source(session: Session) -> SummaryStatistics: from gentropy.datasource.finngen.summary_stats import FinnGenSummaryStats # --8<-- [end:create_from_source_import] - path = "tests/data_samples/finngen_R9_AB1_ACTINOMYCOSIS.gz" + path = "tests/gentropy/data_samples/finngen_R9_AB1_ACTINOMYCOSIS.gz" # --8<-- [start:create_from_source] summary_stats = FinnGenSummaryStats.from_source(session.spark, path) # --8<-- [end:create_from_source] @@ -46,7 +46,7 @@ def create_from_pandas() -> SummaryStatistics: # --8<-- [end:create_from_pandas_import] - path = "tests/data_samples/sumstats_sample/GCST005523_chr18.parquet" + path = "tests/gentropy/data_samples/sumstats_sample/GCST005523_chr18.parquet" custom_summary_stats_pandas_df = ps.read_parquet(path) # --8<-- [start:create_from_pandas] diff --git a/pyproject.toml b/pyproject.toml index 1f43336f3..3f4f44545 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -127,6 +127,7 @@ exclude = ["dist"] [tool.pytest.ini_options] addopts = "-n auto --doctest-modules --cov=src/ --cov-report=xml" pythonpath = [".", "./src/airflow/dags"] +testpaths = ["tests/gentropy", "src/gentropy/"] # Semi-strict mode for mypy [tool.mypy] diff --git a/tests/__init__.py b/tests/__init__.py index 4360b337d..fc8f20475 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,3 +1 @@ -"""Unit tests.""" - -from __future__ import annotations +"""Gentropy tests package.""" diff --git a/tests/gentropy/__init__.py b/tests/gentropy/__init__.py new file mode 100644 index 000000000..4360b337d --- /dev/null +++ b/tests/gentropy/__init__.py @@ -0,0 +1,3 @@ +"""Unit tests.""" + +from __future__ import annotations diff --git a/tests/common/test_session.py b/tests/gentropy/common/test_session.py similarity index 100% rename from tests/common/test_session.py rename to tests/gentropy/common/test_session.py diff --git a/tests/conftest.py b/tests/gentropy/conftest.py similarity index 91% rename from tests/conftest.py rename to tests/gentropy/conftest.py index f54544bd6..f3dc197f9 100644 --- a/tests/conftest.py +++ b/tests/gentropy/conftest.py @@ -411,7 +411,7 @@ def mock_ld_index(spark: SparkSession) -> LDIndex: def sample_gwas_catalog_studies(spark: SparkSession) -> DataFrame: """Sample GWAS Catalog studies.""" return spark.read.csv( - "tests/data_samples/gwas_catalog_studies_sample-r2022-11-29.tsv", + "tests/gentropy/data_samples/gwas_catalog_studies_sample-r2022-11-29.tsv", sep="\t", header=True, ) @@ -421,7 +421,7 @@ def sample_gwas_catalog_studies(spark: SparkSession) -> DataFrame: def sample_gwas_catalog_ancestries_lut(spark: SparkSession) -> DataFrame: """Sample GWAS ancestries sample data.""" return spark.read.csv( - "tests/data_samples/gwas_catalog_ancestries_sample_v1.0.3-r2022-11-29.tsv", + "tests/gentropy/data_samples/gwas_catalog_ancestries_sample_v1.0.3-r2022-11-29.tsv", sep="\t", header=True, ) @@ -431,7 +431,7 @@ def sample_gwas_catalog_ancestries_lut(spark: SparkSession) -> DataFrame: def sample_gwas_catalog_harmonised_sumstats_list(spark: SparkSession) -> DataFrame: """Sample GWAS harmonised sumstats sample data.""" return spark.read.csv( - "tests/data_samples/gwas_catalog_harmonised_list.txt", + "tests/gentropy/data_samples/gwas_catalog_harmonised_list.txt", sep="\t", header=False, ) @@ -441,7 +441,7 @@ def sample_gwas_catalog_harmonised_sumstats_list(spark: SparkSession) -> DataFra def sample_gwas_catalog_associations(spark: SparkSession) -> DataFrame: """Sample GWAS raw associations sample data.""" return spark.read.csv( - "tests/data_samples/gwas_catalog_associations_sample_e107_r2022-11-29.tsv", + "tests/gentropy/data_samples/gwas_catalog_associations_sample_e107_r2022-11-29.tsv", sep="\t", header=True, ) @@ -451,7 +451,7 @@ def sample_gwas_catalog_associations(spark: SparkSession) -> DataFrame: def sample_summary_statistics(spark: SparkSession) -> SummaryStatistics: """Sample GWAS raw associations sample data.""" return SummaryStatistics( - _df=spark.read.parquet("tests/data_samples/sumstats_sample"), + _df=spark.read.parquet("tests/gentropy/data_samples/sumstats_sample"), _schema=SummaryStatistics.get_schema(), ) @@ -460,8 +460,10 @@ def sample_summary_statistics(spark: SparkSession) -> SummaryStatistics: def sample_finngen_studies(spark: SparkSession) -> DataFrame: """Sample FinnGen studies.""" # For reference, the sample file was generated with the following command: - # curl https://r9.finngen.fi/api/phenos | jq '.[:10]' > tests/data_samples/finngen_studies_sample.json - with open("tests/data_samples/finngen_studies_sample.json") as finngen_studies: + # curl https://r9.finngen.fi/api/phenos | jq '.[:10]' > tests/gentropy/data_samples/finngen_studies_sample.json + with open( + "tests/gentropy/data_samples/finngen_studies_sample.json" + ) as finngen_studies: json_data = finngen_studies.read() rdd = spark.sparkContext.parallelize([json_data]) return spark.read.json(rdd) @@ -471,8 +473,10 @@ def sample_finngen_studies(spark: SparkSession) -> DataFrame: def sample_eqtl_catalogue_studies(spark: SparkSession) -> DataFrame: """Sample eQTL Catalogue studies.""" # For reference, the sample file was generated with the following command: - # curl https://raw.githubusercontent.com/eQTL-Catalogue/eQTL-Catalogue-resources/master/tabix/tabix_ftp_paths_imported.tsv | head -n11 > tests/data_samples/eqtl_catalogue_studies_sample.tsv - with open("tests/data_samples/eqtl_catalogue_studies_sample.tsv") as eqtl_catalogue: + # curl https://raw.githubusercontent.com/eQTL-Catalogue/eQTL-Catalogue-resources/master/tabix/tabix_ftp_paths_imported.tsv | head -n11 > tests/gentropy/data_samples/eqtl_catalogue_studies_sample.tsv + with open( + "tests/gentropy/data_samples/eqtl_catalogue_studies_sample.tsv" + ) as eqtl_catalogue: tsv = eqtl_catalogue.read() rdd = spark.sparkContext.parallelize([tsv]) return spark.read.csv(rdd, sep="\t", header=True) @@ -482,11 +486,11 @@ def sample_eqtl_catalogue_studies(spark: SparkSession) -> DataFrame: def sample_eqtl_catalogue_summary_stats(spark: SparkSession) -> DataFrame: """Sample eQTL Catalogue summary stats.""" # For reference, the sample file was generated with the following commands: - # mkdir -p tests/data_samples/imported/GTEx_V8/ge - # curl ftp://ftp.ebi.ac.uk/pub/databases/spot/eQTL/imported/GTEx_V8/ge/Adipose_Subcutaneous.tsv.gz | gzip -cd | head -n11 | gzip -c > tests/data_samples/imported/GTEx_V8/ge/Adipose_Subcutaneous.tsv.gz + # mkdir -p tests/gentropy/data_samples/imported/GTEx_V8/ge + # curl ftp://ftp.ebi.ac.uk/pub/databases/spot/eQTL/imported/GTEx_V8/ge/Adipose_Subcutaneous.tsv.gz | gzip -cd | head -n11 | gzip -c > tests/gentropy/data_samples/imported/GTEx_V8/ge/Adipose_Subcutaneous.tsv.gz # It's important for the test file to be named in exactly this way, because eQTL Catalogue study ID is populated based on input file name. return spark.read.option("delimiter", "\t").csv( - "tests/data_samples/imported/GTEx_V8/ge/Adipose_Subcutaneous.tsv.gz", + "tests/gentropy/data_samples/imported/GTEx_V8/ge/Adipose_Subcutaneous.tsv.gz", header=True, ) @@ -496,7 +500,7 @@ def sample_ukbiobank_studies(spark: SparkSession) -> DataFrame: """Sample UKBiobank manifest.""" # Sampled 10 rows of the UKBB manifest tsv return spark.read.csv( - "tests/data_samples/neale2_saige_study_manifest.samples.tsv", + "tests/gentropy/data_samples/neale2_saige_study_manifest.samples.tsv", sep="\t", header=True, inferSchema=True, @@ -507,7 +511,7 @@ def sample_ukbiobank_studies(spark: SparkSession) -> DataFrame: def sample_target_index(spark: SparkSession) -> DataFrame: """Sample target index sample data.""" return spark.read.parquet( - "tests/data_samples/target_sample.parquet", + "tests/gentropy/data_samples/target_sample.parquet", ) @@ -539,14 +543,14 @@ def mock_gene_index(spark: SparkSession) -> GeneIndex: @pytest.fixture() def liftover_chain_37_to_38(spark: SparkSession) -> LiftOverSpark: """Sample liftover chain file.""" - return LiftOverSpark("tests/data_samples/grch37_to_grch38.over.chain") + return LiftOverSpark("tests/gentropy/data_samples/grch37_to_grch38.over.chain") @pytest.fixture() def sample_l2g_gold_standard(spark: SparkSession) -> DataFrame: """Sample L2G gold standard curation.""" return spark.read.json( - "tests/data_samples/l2g_gold_standard_curation_sample.json.gz", + "tests/gentropy/data_samples/l2g_gold_standard_curation_sample.json.gz", ) @@ -554,7 +558,7 @@ def sample_l2g_gold_standard(spark: SparkSession) -> DataFrame: def sample_otp_interactions(spark: SparkSession) -> DataFrame: """Sample OTP gene-gene interactions dataset.""" return spark.read.parquet( - "tests/data_samples/otp_interactions_sample.parquet", + "tests/gentropy/data_samples/otp_interactions_sample.parquet", ) @@ -618,11 +622,11 @@ def mock_l2g_predictions(spark: SparkSession) -> L2GPrediction: @pytest.fixture() def sample_data_for_carma() -> list[np.ndarray]: """Sample data for fine-mapping by CARMA.""" - ld = pd.read_csv("tests/data_samples/01_test_ld.csv", header=None) + ld = pd.read_csv("tests/gentropy/data_samples/01_test_ld.csv", header=None) ld = np.array(ld) - z = pd.read_csv("tests/data_samples/01_test_z.csv") + z = pd.read_csv("tests/gentropy/data_samples/01_test_z.csv") z = np.array(z.iloc[:, 1]) - pips = pd.read_csv("tests/data_samples/01_test_PIPs.txt") + pips = pd.read_csv("tests/gentropy/data_samples/01_test_PIPs.txt") pips = np.array(pips.iloc[:, 0]) return [ld, z, pips] @@ -630,9 +634,9 @@ def sample_data_for_carma() -> list[np.ndarray]: @pytest.fixture() def sample_data_for_susie_inf() -> list[np.ndarray]: """Sample data for fine-mapping by SuSiE-inf.""" - ld = np.loadtxt("tests/data_samples/01_test_ld.csv", delimiter=",") - z = pd.read_csv("tests/data_samples/01_test_z.csv") + ld = np.loadtxt("tests/gentropy/data_samples/01_test_ld.csv", delimiter=",") + z = pd.read_csv("tests/gentropy/data_samples/01_test_z.csv") z = np.array(z.iloc[:, 1]) - lbf_moments = np.loadtxt("tests/data_samples/01_test_lbf_moments.csv") - lbf_mle = np.loadtxt("tests/data_samples/01_test_lbf_mle.csv") + lbf_moments = np.loadtxt("tests/gentropy/data_samples/01_test_lbf_moments.csv") + lbf_mle = np.loadtxt("tests/gentropy/data_samples/01_test_lbf_mle.csv") return [ld, z, lbf_moments, lbf_mle] diff --git a/tests/data_samples/01_test_PIPs.txt b/tests/gentropy/data_samples/01_test_PIPs.txt similarity index 100% rename from tests/data_samples/01_test_PIPs.txt rename to tests/gentropy/data_samples/01_test_PIPs.txt diff --git a/tests/data_samples/01_test_lbf_mle.csv b/tests/gentropy/data_samples/01_test_lbf_mle.csv similarity index 100% rename from tests/data_samples/01_test_lbf_mle.csv rename to tests/gentropy/data_samples/01_test_lbf_mle.csv diff --git a/tests/data_samples/01_test_lbf_moments.csv b/tests/gentropy/data_samples/01_test_lbf_moments.csv similarity index 100% rename from tests/data_samples/01_test_lbf_moments.csv rename to tests/gentropy/data_samples/01_test_lbf_moments.csv diff --git a/tests/data_samples/01_test_ld.csv b/tests/gentropy/data_samples/01_test_ld.csv similarity index 100% rename from tests/data_samples/01_test_ld.csv rename to tests/gentropy/data_samples/01_test_ld.csv diff --git a/tests/data_samples/01_test_z.csv b/tests/gentropy/data_samples/01_test_z.csv similarity index 100% rename from tests/data_samples/01_test_z.csv rename to tests/gentropy/data_samples/01_test_z.csv diff --git a/tests/data_samples/andersson_sample.bed b/tests/gentropy/data_samples/andersson_sample.bed similarity index 100% rename from tests/data_samples/andersson_sample.bed rename to tests/gentropy/data_samples/andersson_sample.bed diff --git a/tests/data_samples/eqtl_catalogue_studies_sample.tsv b/tests/gentropy/data_samples/eqtl_catalogue_studies_sample.tsv similarity index 100% rename from tests/data_samples/eqtl_catalogue_studies_sample.tsv rename to tests/gentropy/data_samples/eqtl_catalogue_studies_sample.tsv diff --git a/tests/data_samples/example_test-pop.bm/_SUCCESS b/tests/gentropy/data_samples/example_test-pop.bm/_SUCCESS similarity index 100% rename from tests/data_samples/example_test-pop.bm/_SUCCESS rename to tests/gentropy/data_samples/example_test-pop.bm/_SUCCESS diff --git a/tests/data_samples/example_test-pop.bm/metadata.json b/tests/gentropy/data_samples/example_test-pop.bm/metadata.json similarity index 100% rename from tests/data_samples/example_test-pop.bm/metadata.json rename to tests/gentropy/data_samples/example_test-pop.bm/metadata.json diff --git a/tests/data_samples/example_test-pop.bm/parts/part-0-17-0-0-e3ab091a-11ed-50ee-431d-8f2be29e7fb0 b/tests/gentropy/data_samples/example_test-pop.bm/parts/part-0-17-0-0-e3ab091a-11ed-50ee-431d-8f2be29e7fb0 similarity index 100% rename from tests/data_samples/example_test-pop.bm/parts/part-0-17-0-0-e3ab091a-11ed-50ee-431d-8f2be29e7fb0 rename to tests/gentropy/data_samples/example_test-pop.bm/parts/part-0-17-0-0-e3ab091a-11ed-50ee-431d-8f2be29e7fb0 diff --git a/tests/data_samples/example_test-pop.bm/parts/part-00-36-0-0-9da891bc-b3f3-211c-ec5d-b4586236d3f6 b/tests/gentropy/data_samples/example_test-pop.bm/parts/part-00-36-0-0-9da891bc-b3f3-211c-ec5d-b4586236d3f6 similarity index 100% rename from tests/data_samples/example_test-pop.bm/parts/part-00-36-0-0-9da891bc-b3f3-211c-ec5d-b4586236d3f6 rename to tests/gentropy/data_samples/example_test-pop.bm/parts/part-00-36-0-0-9da891bc-b3f3-211c-ec5d-b4586236d3f6 diff --git a/tests/data_samples/example_test-pop.bm/parts/part-01-36-1-0-0f48a14a-3dc0-261b-3b0e-e3549671ed4b b/tests/gentropy/data_samples/example_test-pop.bm/parts/part-01-36-1-0-0f48a14a-3dc0-261b-3b0e-e3549671ed4b similarity index 100% rename from tests/data_samples/example_test-pop.bm/parts/part-01-36-1-0-0f48a14a-3dc0-261b-3b0e-e3549671ed4b rename to tests/gentropy/data_samples/example_test-pop.bm/parts/part-01-36-1-0-0f48a14a-3dc0-261b-3b0e-e3549671ed4b diff --git a/tests/data_samples/example_test-pop.bm/parts/part-02-36-2-0-c23e0dd7-d912-f900-88fd-fae87408e0bd b/tests/gentropy/data_samples/example_test-pop.bm/parts/part-02-36-2-0-c23e0dd7-d912-f900-88fd-fae87408e0bd similarity index 100% rename from tests/data_samples/example_test-pop.bm/parts/part-02-36-2-0-c23e0dd7-d912-f900-88fd-fae87408e0bd rename to tests/gentropy/data_samples/example_test-pop.bm/parts/part-02-36-2-0-c23e0dd7-d912-f900-88fd-fae87408e0bd diff --git a/tests/data_samples/example_test-pop.bm/parts/part-03-36-3-0-596fbcf8-3832-3676-1b73-987b185558f9 b/tests/gentropy/data_samples/example_test-pop.bm/parts/part-03-36-3-0-596fbcf8-3832-3676-1b73-987b185558f9 similarity index 100% rename from tests/data_samples/example_test-pop.bm/parts/part-03-36-3-0-596fbcf8-3832-3676-1b73-987b185558f9 rename to tests/gentropy/data_samples/example_test-pop.bm/parts/part-03-36-3-0-596fbcf8-3832-3676-1b73-987b185558f9 diff --git a/tests/data_samples/example_test-pop.bm/parts/part-04-36-4-0-556c0d84-977b-2faa-376b-e758c55e2f42 b/tests/gentropy/data_samples/example_test-pop.bm/parts/part-04-36-4-0-556c0d84-977b-2faa-376b-e758c55e2f42 similarity index 100% rename from tests/data_samples/example_test-pop.bm/parts/part-04-36-4-0-556c0d84-977b-2faa-376b-e758c55e2f42 rename to tests/gentropy/data_samples/example_test-pop.bm/parts/part-04-36-4-0-556c0d84-977b-2faa-376b-e758c55e2f42 diff --git a/tests/data_samples/example_test-pop.bm/parts/part-05-36-5-0-8ac23bab-8bc8-8b2e-b897-9d818f17a80e b/tests/gentropy/data_samples/example_test-pop.bm/parts/part-05-36-5-0-8ac23bab-8bc8-8b2e-b897-9d818f17a80e similarity index 100% rename from tests/data_samples/example_test-pop.bm/parts/part-05-36-5-0-8ac23bab-8bc8-8b2e-b897-9d818f17a80e rename to tests/gentropy/data_samples/example_test-pop.bm/parts/part-05-36-5-0-8ac23bab-8bc8-8b2e-b897-9d818f17a80e diff --git a/tests/data_samples/example_test-pop.bm/parts/part-06-36-6-0-821b84fa-ae1c-8e95-c967-485dc621bd09 b/tests/gentropy/data_samples/example_test-pop.bm/parts/part-06-36-6-0-821b84fa-ae1c-8e95-c967-485dc621bd09 similarity index 100% rename from tests/data_samples/example_test-pop.bm/parts/part-06-36-6-0-821b84fa-ae1c-8e95-c967-485dc621bd09 rename to tests/gentropy/data_samples/example_test-pop.bm/parts/part-06-36-6-0-821b84fa-ae1c-8e95-c967-485dc621bd09 diff --git a/tests/data_samples/example_test-pop.bm/parts/part-07-36-7-0-35b03c95-f5f4-aa0a-c39a-3e0a201c7e60 b/tests/gentropy/data_samples/example_test-pop.bm/parts/part-07-36-7-0-35b03c95-f5f4-aa0a-c39a-3e0a201c7e60 similarity index 100% rename from tests/data_samples/example_test-pop.bm/parts/part-07-36-7-0-35b03c95-f5f4-aa0a-c39a-3e0a201c7e60 rename to tests/gentropy/data_samples/example_test-pop.bm/parts/part-07-36-7-0-35b03c95-f5f4-aa0a-c39a-3e0a201c7e60 diff --git a/tests/data_samples/example_test-pop.bm/parts/part-08-36-8-0-b6d68819-fa17-6649-1400-4113d86b4b81 b/tests/gentropy/data_samples/example_test-pop.bm/parts/part-08-36-8-0-b6d68819-fa17-6649-1400-4113d86b4b81 similarity index 100% rename from tests/data_samples/example_test-pop.bm/parts/part-08-36-8-0-b6d68819-fa17-6649-1400-4113d86b4b81 rename to tests/gentropy/data_samples/example_test-pop.bm/parts/part-08-36-8-0-b6d68819-fa17-6649-1400-4113d86b4b81 diff --git a/tests/data_samples/example_test-pop.bm/parts/part-09-36-9-0-7e9bb167-9ee1-443f-de8d-dcebf8c9b5fa b/tests/gentropy/data_samples/example_test-pop.bm/parts/part-09-36-9-0-7e9bb167-9ee1-443f-de8d-dcebf8c9b5fa similarity index 100% rename from tests/data_samples/example_test-pop.bm/parts/part-09-36-9-0-7e9bb167-9ee1-443f-de8d-dcebf8c9b5fa rename to tests/gentropy/data_samples/example_test-pop.bm/parts/part-09-36-9-0-7e9bb167-9ee1-443f-de8d-dcebf8c9b5fa diff --git a/tests/data_samples/example_test-pop.bm/parts/part-1-17-1-0-50da5bf3-93d4-fb68-d8ad-4450d10b2c28 b/tests/gentropy/data_samples/example_test-pop.bm/parts/part-1-17-1-0-50da5bf3-93d4-fb68-d8ad-4450d10b2c28 similarity index 100% rename from tests/data_samples/example_test-pop.bm/parts/part-1-17-1-0-50da5bf3-93d4-fb68-d8ad-4450d10b2c28 rename to tests/gentropy/data_samples/example_test-pop.bm/parts/part-1-17-1-0-50da5bf3-93d4-fb68-d8ad-4450d10b2c28 diff --git a/tests/data_samples/example_test-pop.bm/parts/part-2-17-2-0-94bda190-9389-91ab-30ad-9b3c9a3baf39 b/tests/gentropy/data_samples/example_test-pop.bm/parts/part-2-17-2-0-94bda190-9389-91ab-30ad-9b3c9a3baf39 similarity index 100% rename from tests/data_samples/example_test-pop.bm/parts/part-2-17-2-0-94bda190-9389-91ab-30ad-9b3c9a3baf39 rename to tests/gentropy/data_samples/example_test-pop.bm/parts/part-2-17-2-0-94bda190-9389-91ab-30ad-9b3c9a3baf39 diff --git a/tests/data_samples/example_test-pop.bm/parts/part-3-17-3-0-819f71a1-a22c-8937-48e2-dc686b8ec9c5 b/tests/gentropy/data_samples/example_test-pop.bm/parts/part-3-17-3-0-819f71a1-a22c-8937-48e2-dc686b8ec9c5 similarity index 100% rename from tests/data_samples/example_test-pop.bm/parts/part-3-17-3-0-819f71a1-a22c-8937-48e2-dc686b8ec9c5 rename to tests/gentropy/data_samples/example_test-pop.bm/parts/part-3-17-3-0-819f71a1-a22c-8937-48e2-dc686b8ec9c5 diff --git a/tests/data_samples/example_test-pop.bm/parts/part-4-17-4-0-b9ac71e5-8432-3dac-e262-4eb17772eb45 b/tests/gentropy/data_samples/example_test-pop.bm/parts/part-4-17-4-0-b9ac71e5-8432-3dac-e262-4eb17772eb45 similarity index 100% rename from tests/data_samples/example_test-pop.bm/parts/part-4-17-4-0-b9ac71e5-8432-3dac-e262-4eb17772eb45 rename to tests/gentropy/data_samples/example_test-pop.bm/parts/part-4-17-4-0-b9ac71e5-8432-3dac-e262-4eb17772eb45 diff --git a/tests/data_samples/example_test-pop.bm/parts/part-5-17-5-0-8d7756c7-f87c-eff0-3262-a16def109b7a b/tests/gentropy/data_samples/example_test-pop.bm/parts/part-5-17-5-0-8d7756c7-f87c-eff0-3262-a16def109b7a similarity index 100% rename from tests/data_samples/example_test-pop.bm/parts/part-5-17-5-0-8d7756c7-f87c-eff0-3262-a16def109b7a rename to tests/gentropy/data_samples/example_test-pop.bm/parts/part-5-17-5-0-8d7756c7-f87c-eff0-3262-a16def109b7a diff --git a/tests/data_samples/example_test-pop.ht/README.txt b/tests/gentropy/data_samples/example_test-pop.ht/README.txt similarity index 100% rename from tests/data_samples/example_test-pop.ht/README.txt rename to tests/gentropy/data_samples/example_test-pop.ht/README.txt diff --git a/tests/data_samples/example_test-pop.ht/_SUCCESS b/tests/gentropy/data_samples/example_test-pop.ht/_SUCCESS similarity index 100% rename from tests/data_samples/example_test-pop.ht/_SUCCESS rename to tests/gentropy/data_samples/example_test-pop.ht/_SUCCESS diff --git a/tests/data_samples/example_test-pop.ht/globals/metadata.json.gz b/tests/gentropy/data_samples/example_test-pop.ht/globals/metadata.json.gz similarity index 100% rename from tests/data_samples/example_test-pop.ht/globals/metadata.json.gz rename to tests/gentropy/data_samples/example_test-pop.ht/globals/metadata.json.gz diff --git a/tests/data_samples/example_test-pop.ht/globals/parts/part-0 b/tests/gentropy/data_samples/example_test-pop.ht/globals/parts/part-0 similarity index 100% rename from tests/data_samples/example_test-pop.ht/globals/parts/part-0 rename to tests/gentropy/data_samples/example_test-pop.ht/globals/parts/part-0 diff --git a/tests/data_samples/example_test-pop.ht/index/part-0-020df040-1af5-433e-944d-df08b484cf34.idx/index b/tests/gentropy/data_samples/example_test-pop.ht/index/part-0-020df040-1af5-433e-944d-df08b484cf34.idx/index similarity index 100% rename from tests/data_samples/example_test-pop.ht/index/part-0-020df040-1af5-433e-944d-df08b484cf34.idx/index rename to tests/gentropy/data_samples/example_test-pop.ht/index/part-0-020df040-1af5-433e-944d-df08b484cf34.idx/index diff --git a/tests/data_samples/example_test-pop.ht/index/part-0-020df040-1af5-433e-944d-df08b484cf34.idx/metadata.json.gz b/tests/gentropy/data_samples/example_test-pop.ht/index/part-0-020df040-1af5-433e-944d-df08b484cf34.idx/metadata.json.gz similarity index 100% rename from tests/data_samples/example_test-pop.ht/index/part-0-020df040-1af5-433e-944d-df08b484cf34.idx/metadata.json.gz rename to tests/gentropy/data_samples/example_test-pop.ht/index/part-0-020df040-1af5-433e-944d-df08b484cf34.idx/metadata.json.gz diff --git a/tests/data_samples/example_test-pop.ht/metadata.json.gz b/tests/gentropy/data_samples/example_test-pop.ht/metadata.json.gz similarity index 100% rename from tests/data_samples/example_test-pop.ht/metadata.json.gz rename to tests/gentropy/data_samples/example_test-pop.ht/metadata.json.gz diff --git a/tests/data_samples/example_test-pop.ht/rows/metadata.json.gz b/tests/gentropy/data_samples/example_test-pop.ht/rows/metadata.json.gz similarity index 100% rename from tests/data_samples/example_test-pop.ht/rows/metadata.json.gz rename to tests/gentropy/data_samples/example_test-pop.ht/rows/metadata.json.gz diff --git a/tests/data_samples/example_test-pop.ht/rows/parts/part-0-020df040-1af5-433e-944d-df08b484cf34 b/tests/gentropy/data_samples/example_test-pop.ht/rows/parts/part-0-020df040-1af5-433e-944d-df08b484cf34 similarity index 100% rename from tests/data_samples/example_test-pop.ht/rows/parts/part-0-020df040-1af5-433e-944d-df08b484cf34 rename to tests/gentropy/data_samples/example_test-pop.ht/rows/parts/part-0-020df040-1af5-433e-944d-df08b484cf34 diff --git a/tests/data_samples/finngen_R9_AB1_ACTINOMYCOSIS.gz b/tests/gentropy/data_samples/finngen_R9_AB1_ACTINOMYCOSIS.gz similarity index 100% rename from tests/data_samples/finngen_R9_AB1_ACTINOMYCOSIS.gz rename to tests/gentropy/data_samples/finngen_R9_AB1_ACTINOMYCOSIS.gz diff --git a/tests/data_samples/finngen_R9_AB1_EBV.SUSIE.snp.gz b/tests/gentropy/data_samples/finngen_R9_AB1_EBV.SUSIE.snp.gz similarity index 100% rename from tests/data_samples/finngen_R9_AB1_EBV.SUSIE.snp.gz rename to tests/gentropy/data_samples/finngen_R9_AB1_EBV.SUSIE.snp.gz diff --git a/tests/data_samples/finngen_credset_summary_sample.tsv b/tests/gentropy/data_samples/finngen_credset_summary_sample.tsv similarity index 100% rename from tests/data_samples/finngen_credset_summary_sample.tsv rename to tests/gentropy/data_samples/finngen_credset_summary_sample.tsv diff --git a/tests/data_samples/finngen_studies_sample.json b/tests/gentropy/data_samples/finngen_studies_sample.json similarity index 100% rename from tests/data_samples/finngen_studies_sample.json rename to tests/gentropy/data_samples/finngen_studies_sample.json diff --git a/tests/data_samples/finucane_PIPs.npy b/tests/gentropy/data_samples/finucane_PIPs.npy similarity index 100% rename from tests/data_samples/finucane_PIPs.npy rename to tests/gentropy/data_samples/finucane_PIPs.npy diff --git a/tests/data_samples/grch37_to_grch38.over.chain b/tests/gentropy/data_samples/grch37_to_grch38.over.chain similarity index 100% rename from tests/data_samples/grch37_to_grch38.over.chain rename to tests/gentropy/data_samples/grch37_to_grch38.over.chain diff --git a/tests/data_samples/gwas_catalog_ancestries_sample_v1.0.3-r2022-11-29.tsv b/tests/gentropy/data_samples/gwas_catalog_ancestries_sample_v1.0.3-r2022-11-29.tsv similarity index 100% rename from tests/data_samples/gwas_catalog_ancestries_sample_v1.0.3-r2022-11-29.tsv rename to tests/gentropy/data_samples/gwas_catalog_ancestries_sample_v1.0.3-r2022-11-29.tsv diff --git a/tests/data_samples/gwas_catalog_associations_sample_e107_r2022-11-29.tsv b/tests/gentropy/data_samples/gwas_catalog_associations_sample_e107_r2022-11-29.tsv similarity index 100% rename from tests/data_samples/gwas_catalog_associations_sample_e107_r2022-11-29.tsv rename to tests/gentropy/data_samples/gwas_catalog_associations_sample_e107_r2022-11-29.tsv diff --git a/tests/data_samples/gwas_catalog_harmonised_list.txt b/tests/gentropy/data_samples/gwas_catalog_harmonised_list.txt similarity index 100% rename from tests/data_samples/gwas_catalog_harmonised_list.txt rename to tests/gentropy/data_samples/gwas_catalog_harmonised_list.txt diff --git a/tests/data_samples/gwas_catalog_studies_sample-r2022-11-29.tsv b/tests/gentropy/data_samples/gwas_catalog_studies_sample-r2022-11-29.tsv similarity index 100% rename from tests/data_samples/gwas_catalog_studies_sample-r2022-11-29.tsv rename to tests/gentropy/data_samples/gwas_catalog_studies_sample-r2022-11-29.tsv diff --git a/tests/data_samples/imported/GTEx_V8/ge/Adipose_Subcutaneous.tsv.gz b/tests/gentropy/data_samples/imported/GTEx_V8/ge/Adipose_Subcutaneous.tsv.gz similarity index 100% rename from tests/data_samples/imported/GTEx_V8/ge/Adipose_Subcutaneous.tsv.gz rename to tests/gentropy/data_samples/imported/GTEx_V8/ge/Adipose_Subcutaneous.tsv.gz diff --git a/tests/data_samples/javierre_sample.parquet b/tests/gentropy/data_samples/javierre_sample.parquet similarity index 100% rename from tests/data_samples/javierre_sample.parquet rename to tests/gentropy/data_samples/javierre_sample.parquet diff --git a/tests/data_samples/jung_sample.bed b/tests/gentropy/data_samples/jung_sample.bed similarity index 100% rename from tests/data_samples/jung_sample.bed rename to tests/gentropy/data_samples/jung_sample.bed diff --git a/tests/data_samples/l2g_gold_standard_curation_sample.json.gz b/tests/gentropy/data_samples/l2g_gold_standard_curation_sample.json.gz similarity index 100% rename from tests/data_samples/l2g_gold_standard_curation_sample.json.gz rename to tests/gentropy/data_samples/l2g_gold_standard_curation_sample.json.gz diff --git a/tests/data_samples/neale2_saige_study_manifest.samples.tsv b/tests/gentropy/data_samples/neale2_saige_study_manifest.samples.tsv similarity index 100% rename from tests/data_samples/neale2_saige_study_manifest.samples.tsv rename to tests/gentropy/data_samples/neale2_saige_study_manifest.samples.tsv diff --git a/tests/data_samples/new_format_GCST90293086.h.tsv.gz b/tests/gentropy/data_samples/new_format_GCST90293086.h.tsv.gz similarity index 100% rename from tests/data_samples/new_format_GCST90293086.h.tsv.gz rename to tests/gentropy/data_samples/new_format_GCST90293086.h.tsv.gz diff --git a/tests/data_samples/old_format_GCST006090.h.tsv.gz b/tests/gentropy/data_samples/old_format_GCST006090.h.tsv.gz similarity index 100% rename from tests/data_samples/old_format_GCST006090.h.tsv.gz rename to tests/gentropy/data_samples/old_format_GCST006090.h.tsv.gz diff --git a/tests/data_samples/otp_interactions_sample.parquet b/tests/gentropy/data_samples/otp_interactions_sample.parquet similarity index 100% rename from tests/data_samples/otp_interactions_sample.parquet rename to tests/gentropy/data_samples/otp_interactions_sample.parquet diff --git a/tests/data_samples/sumstats_sample/GCST005523_chr18.parquet b/tests/gentropy/data_samples/sumstats_sample/GCST005523_chr18.parquet similarity index 100% rename from tests/data_samples/sumstats_sample/GCST005523_chr18.parquet rename to tests/gentropy/data_samples/sumstats_sample/GCST005523_chr18.parquet diff --git a/tests/data_samples/target_sample.parquet/part-00161-86fc51a8-1d24-4753-b840-4a0b6289c221-c000.snappy.parquet b/tests/gentropy/data_samples/target_sample.parquet/part-00161-86fc51a8-1d24-4753-b840-4a0b6289c221-c000.snappy.parquet similarity index 100% rename from tests/data_samples/target_sample.parquet/part-00161-86fc51a8-1d24-4753-b840-4a0b6289c221-c000.snappy.parquet rename to tests/gentropy/data_samples/target_sample.parquet/part-00161-86fc51a8-1d24-4753-b840-4a0b6289c221-c000.snappy.parquet diff --git a/tests/data_samples/thurman_sample.bed8 b/tests/gentropy/data_samples/thurman_sample.bed8 similarity index 100% rename from tests/data_samples/thurman_sample.bed8 rename to tests/gentropy/data_samples/thurman_sample.bed8 diff --git a/tests/data_samples/vep_consequences_sample.tsv b/tests/gentropy/data_samples/vep_consequences_sample.tsv similarity index 100% rename from tests/data_samples/vep_consequences_sample.tsv rename to tests/gentropy/data_samples/vep_consequences_sample.tsv diff --git a/tests/dataset/test_colocalisation.py b/tests/gentropy/dataset/test_colocalisation.py similarity index 100% rename from tests/dataset/test_colocalisation.py rename to tests/gentropy/dataset/test_colocalisation.py diff --git a/tests/dataset/test_dataset.py b/tests/gentropy/dataset/test_dataset.py similarity index 100% rename from tests/dataset/test_dataset.py rename to tests/gentropy/dataset/test_dataset.py diff --git a/tests/dataset/test_gene_index.py b/tests/gentropy/dataset/test_gene_index.py similarity index 100% rename from tests/dataset/test_gene_index.py rename to tests/gentropy/dataset/test_gene_index.py diff --git a/tests/dataset/test_intervals.py b/tests/gentropy/dataset/test_intervals.py similarity index 100% rename from tests/dataset/test_intervals.py rename to tests/gentropy/dataset/test_intervals.py diff --git a/tests/dataset/test_l2g.py b/tests/gentropy/dataset/test_l2g.py similarity index 100% rename from tests/dataset/test_l2g.py rename to tests/gentropy/dataset/test_l2g.py diff --git a/tests/dataset/test_ld_index.py b/tests/gentropy/dataset/test_ld_index.py similarity index 100% rename from tests/dataset/test_ld_index.py rename to tests/gentropy/dataset/test_ld_index.py diff --git a/tests/dataset/test_study_index.py b/tests/gentropy/dataset/test_study_index.py similarity index 100% rename from tests/dataset/test_study_index.py rename to tests/gentropy/dataset/test_study_index.py diff --git a/tests/dataset/test_study_locus.py b/tests/gentropy/dataset/test_study_locus.py similarity index 100% rename from tests/dataset/test_study_locus.py rename to tests/gentropy/dataset/test_study_locus.py diff --git a/tests/dataset/test_study_locus_overlap.py b/tests/gentropy/dataset/test_study_locus_overlap.py similarity index 100% rename from tests/dataset/test_study_locus_overlap.py rename to tests/gentropy/dataset/test_study_locus_overlap.py diff --git a/tests/dataset/test_study_locus_overlaps.py b/tests/gentropy/dataset/test_study_locus_overlaps.py similarity index 100% rename from tests/dataset/test_study_locus_overlaps.py rename to tests/gentropy/dataset/test_study_locus_overlaps.py diff --git a/tests/dataset/test_summary_statistics.py b/tests/gentropy/dataset/test_summary_statistics.py similarity index 100% rename from tests/dataset/test_summary_statistics.py rename to tests/gentropy/dataset/test_summary_statistics.py diff --git a/tests/dataset/test_v2g.py b/tests/gentropy/dataset/test_v2g.py similarity index 100% rename from tests/dataset/test_v2g.py rename to tests/gentropy/dataset/test_v2g.py diff --git a/tests/dataset/test_variant_annotation.py b/tests/gentropy/dataset/test_variant_annotation.py similarity index 100% rename from tests/dataset/test_variant_annotation.py rename to tests/gentropy/dataset/test_variant_annotation.py diff --git a/tests/dataset/test_variant_index.py b/tests/gentropy/dataset/test_variant_index.py similarity index 100% rename from tests/dataset/test_variant_index.py rename to tests/gentropy/dataset/test_variant_index.py diff --git a/tests/datasource/eqtl_catalogue/test_eqtl_catalogue_study_index.py b/tests/gentropy/datasource/eqtl_catalogue/test_eqtl_catalogue_study_index.py similarity index 100% rename from tests/datasource/eqtl_catalogue/test_eqtl_catalogue_study_index.py rename to tests/gentropy/datasource/eqtl_catalogue/test_eqtl_catalogue_study_index.py diff --git a/tests/datasource/eqtl_catalogue/test_eqtl_catalogue_summary_stats.py b/tests/gentropy/datasource/eqtl_catalogue/test_eqtl_catalogue_summary_stats.py similarity index 100% rename from tests/datasource/eqtl_catalogue/test_eqtl_catalogue_summary_stats.py rename to tests/gentropy/datasource/eqtl_catalogue/test_eqtl_catalogue_summary_stats.py diff --git a/tests/datasource/finngen/test_finngen_finemapping.py b/tests/gentropy/datasource/finngen/test_finngen_finemapping.py similarity index 72% rename from tests/datasource/finngen/test_finngen_finemapping.py rename to tests/gentropy/datasource/finngen/test_finngen_finemapping.py index 6f83aed57..e15538280 100644 --- a/tests/datasource/finngen/test_finngen_finemapping.py +++ b/tests/gentropy/datasource/finngen/test_finngen_finemapping.py @@ -14,8 +14,8 @@ def test_finngen_finemapping_from_finngen_susie_finemapping( assert isinstance( FinnGenFinemapping.from_finngen_susie_finemapping( spark=spark, - finngen_finemapping_df="tests/data_samples/finngen_R9_AB1_EBV.SUSIE.snp.gz", - finngen_finemapping_summaries="tests/data_samples/finngen_credset_summary_sample.tsv", + finngen_finemapping_df="tests/gentropy/data_samples/finngen_R9_AB1_EBV.SUSIE.snp.gz", + finngen_finemapping_summaries="tests/gentropy/data_samples/finngen_credset_summary_sample.tsv", ), StudyLocus, ) diff --git a/tests/datasource/finngen/test_finngen_study_index.py b/tests/gentropy/datasource/finngen/test_finngen_study_index.py similarity index 100% rename from tests/datasource/finngen/test_finngen_study_index.py rename to tests/gentropy/datasource/finngen/test_finngen_study_index.py diff --git a/tests/datasource/finngen/test_finngen_summary_stats.py b/tests/gentropy/datasource/finngen/test_finngen_summary_stats.py similarity index 86% rename from tests/datasource/finngen/test_finngen_summary_stats.py rename to tests/gentropy/datasource/finngen/test_finngen_summary_stats.py index 0c5725fdd..624f66d66 100644 --- a/tests/datasource/finngen/test_finngen_summary_stats.py +++ b/tests/gentropy/datasource/finngen/test_finngen_summary_stats.py @@ -12,7 +12,7 @@ def test_finngen_summary_stats_from_source(spark: SparkSession) -> None: assert isinstance( FinnGenSummaryStats.from_source( spark=spark, - raw_file="tests/data_samples/finngen_R9_AB1_ACTINOMYCOSIS.gz", + raw_file="tests/gentropy/data_samples/finngen_R9_AB1_ACTINOMYCOSIS.gz", ), SummaryStatistics, ) diff --git a/tests/datasource/gnomad/test_gnomad_ld.py b/tests/gentropy/datasource/gnomad/test_gnomad_ld.py similarity index 94% rename from tests/datasource/gnomad/test_gnomad_ld.py rename to tests/gentropy/datasource/gnomad/test_gnomad_ld.py index 6176a3723..5fa3b00a4 100644 --- a/tests/datasource/gnomad/test_gnomad_ld.py +++ b/tests/gentropy/datasource/gnomad/test_gnomad_ld.py @@ -97,9 +97,9 @@ def _setup(self: TestGnomADLDMatrixVariants, spark: SparkSession) -> None: ld_test_population = "test-pop" gnomad_ld_matrix = GnomADLDMatrix( - ld_matrix_template="tests/data_samples/example_{POP}.bm", - ld_index_raw_template="tests/data_samples/example_{POP}.ht", - grch37_to_grch38_chain_path="tests/data_samples/grch37_to_grch38.over.chain", + ld_matrix_template="tests/gentropy/data_samples/example_{POP}.bm", + ld_index_raw_template="tests/gentropy/data_samples/example_{POP}.ht", + grch37_to_grch38_chain_path="tests/gentropy/data_samples/grch37_to_grch38.over.chain", ) self.ld_slice = gnomad_ld_matrix.get_ld_variants( gnomad_ancestry=ld_test_population, @@ -173,7 +173,7 @@ def _setup(self: TestGnomADLDMatrixSlice, spark: SparkSession) -> None: """Prepares fixtures for the test.""" hl.init(sc=spark.sparkContext, log="/dev/null", idempotent=True) gnomad_ld_matrix = GnomADLDMatrix( - ld_matrix_template="tests/data_samples/example_{POP}.bm" + ld_matrix_template="tests/gentropy/data_samples/example_{POP}.bm" ) test_ld_population: str = "test-pop" self.slice_start_index: int = 1 diff --git a/tests/datasource/gwas_catalog/test_gwas_catalog_associations.py b/tests/gentropy/datasource/gwas_catalog/test_gwas_catalog_associations.py similarity index 100% rename from tests/datasource/gwas_catalog/test_gwas_catalog_associations.py rename to tests/gentropy/datasource/gwas_catalog/test_gwas_catalog_associations.py diff --git a/tests/datasource/gwas_catalog/test_gwas_catalog_curation.py b/tests/gentropy/datasource/gwas_catalog/test_gwas_catalog_curation.py similarity index 100% rename from tests/datasource/gwas_catalog/test_gwas_catalog_curation.py rename to tests/gentropy/datasource/gwas_catalog/test_gwas_catalog_curation.py diff --git a/tests/datasource/gwas_catalog/test_gwas_catalog_study_index.py b/tests/gentropy/datasource/gwas_catalog/test_gwas_catalog_study_index.py similarity index 100% rename from tests/datasource/gwas_catalog/test_gwas_catalog_study_index.py rename to tests/gentropy/datasource/gwas_catalog/test_gwas_catalog_study_index.py diff --git a/tests/datasource/gwas_catalog/test_gwas_catalog_study_splitter.py b/tests/gentropy/datasource/gwas_catalog/test_gwas_catalog_study_splitter.py similarity index 100% rename from tests/datasource/gwas_catalog/test_gwas_catalog_study_splitter.py rename to tests/gentropy/datasource/gwas_catalog/test_gwas_catalog_study_splitter.py diff --git a/tests/datasource/gwas_catalog/test_gwas_catalog_summary_statistics.py b/tests/gentropy/datasource/gwas_catalog/test_gwas_catalog_summary_statistics.py similarity index 95% rename from tests/datasource/gwas_catalog/test_gwas_catalog_summary_statistics.py rename to tests/gentropy/datasource/gwas_catalog/test_gwas_catalog_summary_statistics.py index a43f1e5e2..4ede62f36 100644 --- a/tests/datasource/gwas_catalog/test_gwas_catalog_summary_statistics.py +++ b/tests/gentropy/datasource/gwas_catalog/test_gwas_catalog_summary_statistics.py @@ -26,7 +26,7 @@ def gwas_catalog_summary_statistics__new_format( ) -> GWASCatalogSummaryStatistics: """Test GWASCatalogSummaryStatistics creation with mock data.""" return GWASCatalogSummaryStatistics.from_gwas_harmonized_summary_stats( - spark, "tests/data_samples/new_format_GCST90293086.h.tsv.gz" + spark, "tests/gentropy/data_samples/new_format_GCST90293086.h.tsv.gz" ) @pytest.fixture(scope="class") @@ -36,7 +36,7 @@ def gwas_catalog_summary_statistics__old_format( ) -> GWASCatalogSummaryStatistics: """Test GWASCatalogSummaryStatistics creation with mock data.""" return GWASCatalogSummaryStatistics.from_gwas_harmonized_summary_stats( - spark, "tests/data_samples/old_format_GCST006090.h.tsv.gz" + spark, "tests/gentropy/data_samples/old_format_GCST006090.h.tsv.gz" ) @pytest.fixture(scope="class") diff --git a/tests/datasource/intervals/test_andersson.py b/tests/gentropy/datasource/intervals/test_andersson.py similarity index 90% rename from tests/datasource/intervals/test_andersson.py rename to tests/gentropy/datasource/intervals/test_andersson.py index 792b99c7c..b7283597b 100644 --- a/tests/datasource/intervals/test_andersson.py +++ b/tests/gentropy/datasource/intervals/test_andersson.py @@ -12,7 +12,9 @@ @pytest.fixture(scope="module") def sample_intervals_andersson(spark: SparkSession) -> DataFrame: """Sample Andersson intervals.""" - return IntervalsAndersson.read(spark, "tests/data_samples/andersson_sample.bed") + return IntervalsAndersson.read( + spark, "tests/gentropy/data_samples/andersson_sample.bed" + ) def test_read_andersson(sample_intervals_andersson: DataFrame) -> None: diff --git a/tests/datasource/intervals/test_javierre.py b/tests/gentropy/datasource/intervals/test_javierre.py similarity index 90% rename from tests/datasource/intervals/test_javierre.py rename to tests/gentropy/datasource/intervals/test_javierre.py index 59abe9f66..be8c528e2 100644 --- a/tests/datasource/intervals/test_javierre.py +++ b/tests/gentropy/datasource/intervals/test_javierre.py @@ -12,7 +12,9 @@ @pytest.fixture(scope="module") def sample_intervals_javierre(spark: SparkSession) -> DataFrame: """Sample Javierre intervals.""" - return IntervalsJavierre.read(spark, "tests/data_samples/javierre_sample.parquet") + return IntervalsJavierre.read( + spark, "tests/gentropy/data_samples/javierre_sample.parquet" + ) def test_read_javierre(sample_intervals_javierre: DataFrame) -> None: diff --git a/tests/datasource/intervals/test_jung.py b/tests/gentropy/datasource/intervals/test_jung.py similarity index 92% rename from tests/datasource/intervals/test_jung.py rename to tests/gentropy/datasource/intervals/test_jung.py index fa88a7620..81506ae55 100644 --- a/tests/datasource/intervals/test_jung.py +++ b/tests/gentropy/datasource/intervals/test_jung.py @@ -12,7 +12,7 @@ @pytest.fixture(scope="module") def sample_intervals_jung(spark: SparkSession) -> DataFrame: """Sample Jung intervals.""" - return IntervalsJung.read(spark, "tests/data_samples/jung_sample.bed") + return IntervalsJung.read(spark, "tests/gentropy/data_samples/jung_sample.bed") def test_read_jung(sample_intervals_jung: DataFrame) -> None: diff --git a/tests/datasource/intervals/test_thurman.py b/tests/gentropy/datasource/intervals/test_thurman.py similarity index 90% rename from tests/datasource/intervals/test_thurman.py rename to tests/gentropy/datasource/intervals/test_thurman.py index afd64feed..f89c9d3da 100644 --- a/tests/datasource/intervals/test_thurman.py +++ b/tests/gentropy/datasource/intervals/test_thurman.py @@ -12,7 +12,9 @@ @pytest.fixture(scope="module") def sample_intervals_thurman(spark: SparkSession) -> DataFrame: """Sample Thurman intervals.""" - return IntervalsThurman.read(spark, "tests/data_samples/thurman_sample.bed8") + return IntervalsThurman.read( + spark, "tests/gentropy/data_samples/thurman_sample.bed8" + ) def test_read_thurman(sample_intervals_thurman: DataFrame) -> None: diff --git a/tests/datasource/open_targets/test_l2g_gold_standard.py b/tests/gentropy/datasource/open_targets/test_l2g_gold_standard.py similarity index 100% rename from tests/datasource/open_targets/test_l2g_gold_standard.py rename to tests/gentropy/datasource/open_targets/test_l2g_gold_standard.py diff --git a/tests/datasource/open_targets/test_target.py b/tests/gentropy/datasource/open_targets/test_target.py similarity index 100% rename from tests/datasource/open_targets/test_target.py rename to tests/gentropy/datasource/open_targets/test_target.py diff --git a/tests/datasource/ukbiobank/test_ukbiobank_study_index.py b/tests/gentropy/datasource/ukbiobank/test_ukbiobank_study_index.py similarity index 100% rename from tests/datasource/ukbiobank/test_ukbiobank_study_index.py rename to tests/gentropy/datasource/ukbiobank/test_ukbiobank_study_index.py diff --git a/tests/docs/test_applying_methods.py b/tests/gentropy/docs/test_applying_methods.py similarity index 100% rename from tests/docs/test_applying_methods.py rename to tests/gentropy/docs/test_applying_methods.py diff --git a/tests/test_docs.py b/tests/gentropy/docs/test_build.py similarity index 100% rename from tests/test_docs.py rename to tests/gentropy/docs/test_build.py diff --git a/tests/docs/test_create_dataset.py b/tests/gentropy/docs/test_create_dataset.py similarity index 100% rename from tests/docs/test_create_dataset.py rename to tests/gentropy/docs/test_create_dataset.py diff --git a/tests/docs/test_creating_spark_session.py b/tests/gentropy/docs/test_creating_spark_session.py similarity index 100% rename from tests/docs/test_creating_spark_session.py rename to tests/gentropy/docs/test_creating_spark_session.py diff --git a/tests/docs/test_inspect_dataset.py b/tests/gentropy/docs/test_inspect_dataset.py similarity index 100% rename from tests/docs/test_inspect_dataset.py rename to tests/gentropy/docs/test_inspect_dataset.py diff --git a/tests/method/test_carma.py b/tests/gentropy/method/test_carma.py similarity index 100% rename from tests/method/test_carma.py rename to tests/gentropy/method/test_carma.py diff --git a/tests/method/test_clump.py b/tests/gentropy/method/test_clump.py similarity index 100% rename from tests/method/test_clump.py rename to tests/gentropy/method/test_clump.py diff --git a/tests/method/test_colocalisation_method.py b/tests/gentropy/method/test_colocalisation_method.py similarity index 100% rename from tests/method/test_colocalisation_method.py rename to tests/gentropy/method/test_colocalisation_method.py diff --git a/tests/method/test_ld.py b/tests/gentropy/method/test_ld.py similarity index 100% rename from tests/method/test_ld.py rename to tests/gentropy/method/test_ld.py diff --git a/tests/method/test_locus_to_gene.py b/tests/gentropy/method/test_locus_to_gene.py similarity index 100% rename from tests/method/test_locus_to_gene.py rename to tests/gentropy/method/test_locus_to_gene.py diff --git a/tests/method/test_pics.py b/tests/gentropy/method/test_pics.py similarity index 100% rename from tests/method/test_pics.py rename to tests/gentropy/method/test_pics.py diff --git a/tests/method/test_susie_inf.py b/tests/gentropy/method/test_susie_inf.py similarity index 100% rename from tests/method/test_susie_inf.py rename to tests/gentropy/method/test_susie_inf.py diff --git a/tests/method/test_window_based_clumping.py b/tests/gentropy/method/test_window_based_clumping.py similarity index 100% rename from tests/method/test_window_based_clumping.py rename to tests/gentropy/method/test_window_based_clumping.py diff --git a/tests/step/test_clump_step.py b/tests/gentropy/step/test_clump_step.py similarity index 90% rename from tests/step/test_clump_step.py rename to tests/gentropy/step/test_clump_step.py index bacf70b41..701afc950 100644 --- a/tests/step/test_clump_step.py +++ b/tests/gentropy/step/test_clump_step.py @@ -20,7 +20,7 @@ def test_clumpstep_summary_stats(self, session: Session) -> None: clumped_study_locus_path = Path(temp_dir, "GCST005523_chr18_clumped") WindowBasedClumpingStep( session=session, - summary_statistics_input_path="tests/data_samples/sumstats_sample", + summary_statistics_input_path="tests/gentropy/data_samples/sumstats_sample", study_locus_output_path=str(clumped_study_locus_path), ) assert Path(clumped_study_locus_path).exists(), "Output directory exists." diff --git a/tests/test_cli.py b/tests/gentropy/test_cli.py similarity index 100% rename from tests/test_cli.py rename to tests/gentropy/test_cli.py diff --git a/tests/test_schemas.py b/tests/gentropy/test_schemas.py similarity index 100% rename from tests/test_schemas.py rename to tests/gentropy/test_schemas.py diff --git a/tests/test_spark_helpers.py b/tests/gentropy/test_spark_helpers.py similarity index 100% rename from tests/test_spark_helpers.py rename to tests/gentropy/test_spark_helpers.py From 65c44a05561db6d50fe51983236a3661f204a628 Mon Sep 17 00:00:00 2001 From: David Ochoa Date: Wed, 21 Feb 2024 10:29:09 +0000 Subject: [PATCH 08/12] ci: pre-commit autoupdate message (#500) --- .pre-commit-config.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3d713a432..45736c8af 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,3 +1,6 @@ +ci: + autoupdate_commit_msg: "chore: pre-commit autoupdate" + autofix_commit_msg: "chore: pre-commit auto fixes [...]" repos: - repo: https://github.com/astral-sh/ruff-pre-commit rev: v0.2.2 From b2abe3f4c0c32fff896cc1ef641b0cccebe1812d Mon Sep 17 00:00:00 2001 From: xyg123 <33658607+xyg123@users.noreply.github.com> Date: Thu, 22 Feb 2024 11:15:44 +0000 Subject: [PATCH 09/12] fix(finemapping): finngen harmonisation (schema and orchestration) (#489) * fix: raw susie schema fix * fix: update airflow dag with explicit params * fix: path configs for step yaml * fix: config register fm ingestion step * fix: pytest for fm ingest * fix: release prefix in ingestion class * fix: minor changes from review * test: missing release prefix --------- Co-authored-by: David Ochoa --- .../ot_finngen_finemapping_ingestion.yaml | 1 + src/airflow/dags/finngen_preprocess.py | 13 +++ src/gentropy/config.py | 20 +++++ .../datasource/finngen/finemapping.py | 81 +++++++++++++------ src/gentropy/finngen_finemapping_ingestion.py | 5 +- .../finngen/test_finngen_finemapping.py | 1 + 6 files changed, 95 insertions(+), 26 deletions(-) diff --git a/config/step/ot_finngen_finemapping_ingestion.yaml b/config/step/ot_finngen_finemapping_ingestion.yaml index d7ddffa70..46aa497fa 100644 --- a/config/step/ot_finngen_finemapping_ingestion.yaml +++ b/config/step/ot_finngen_finemapping_ingestion.yaml @@ -3,4 +3,5 @@ defaults: finngen_finemapping_results_path: ${datasets.finngen_finemapping_results_path} finngen_finemapping_summaries_path: ${datasets.finngen_finemapping_summaries_path} +finngen_release_prefix: ${datasets.finngen_release_prefix} finngen_finemapping_out: ${datasets.finngen_finemapping_out} diff --git a/src/airflow/dags/finngen_preprocess.py b/src/airflow/dags/finngen_preprocess.py index f6dfd8c81..38aeb2510 100644 --- a/src/airflow/dags/finngen_preprocess.py +++ b/src/airflow/dags/finngen_preprocess.py @@ -21,6 +21,13 @@ LD_CLUMPED = f"{FINNGEN_BUCKET}/study_locus_datasets/finngen_ld_clumped" PICSED_CREDIBLE_SET = f"{FINNGEN_BUCKET}/credible_set_datasets/finngen_pics" +FINNGEN_FINEMAPPING = ( + "gs://genetics_etl_python_playground/input/Finngen_susie_finemapping_r10/full" +) +FINNGEN_FM_SUMMARIES = "gs://genetics_etl_python_playground/input/Finngen_susie_finemapping_r10/Finngen_susie_credset_summary_r10.tsv" +FINNGEN_PREFIX = "FINNGEN_R10_" +FINNGEN_FM_OUT = "gs://genetics_etl_python_playground/output/python_etl/parquet/XX.XX/finngen_susie_processed" + with DAG( dag_id=Path(__file__).stem, description="Open Targets Genetics — Finngen preprocess", @@ -31,6 +38,12 @@ cluster_name=CLUSTER_NAME, step_id="ot_finngen_finemapping_ingestion", task_id="finngen_finemapping_ingestion", + other_args=[ + f"step.finngen_finemapping_out={FINNGEN_FM_OUT}", + f"step.finngen_release_prefix={FINNGEN_PREFIX}", + f"step.finngen_finemapping_results_path={FINNGEN_FINEMAPPING}", + f"step.finngen_finemapping_summaries_path={FINNGEN_FM_SUMMARIES}", + ], # This allows to attempt running the task when above step fails do to failifexists trigger_rule=TriggerRule.ALL_DONE, ) diff --git a/src/gentropy/config.py b/src/gentropy/config.py index 03ef0f82c..552c50d1d 100644 --- a/src/gentropy/config.py +++ b/src/gentropy/config.py @@ -133,6 +133,19 @@ class FinngenSumstatPreprocessConfig(StepConfig): _target_: str = "gentropy.finngen_sumstat_preprocess.FinnGenSumstatPreprocessStep" +@dataclass +class FinngenFinemappingConfig(StepConfig): + """FinnGen fine mapping ingestion step configuration.""" + + finngen_finemapping_results_path: str = MISSING + finngen_finemapping_summaries_path: str = MISSING + finngen_release_prefix: str = MISSING + finngen_finemapping_out: str = MISSING + _target_: str = ( + "gentropy.finngen_finemapping_ingestion.FinnGenFinemappingIngestionStep" + ) + + @dataclass class LDIndexConfig(StepConfig): """LD index step configuration.""" @@ -353,6 +366,13 @@ def register_config() -> None: name="finngen_sumstat_preprocess", node=FinngenSumstatPreprocessConfig, ) + + cs.store( + group="step", + name="finngen_finemapping_ingestion", + node=FinngenFinemappingConfig, + ) + cs.store(group="step", name="pics", node=PICSConfig) cs.store(group="step", name="variant_annotation", node=VariantAnnotationConfig) cs.store(group="step", name="variant_index", node=VariantIndexConfig) diff --git a/src/gentropy/datasource/finngen/finemapping.py b/src/gentropy/datasource/finngen/finemapping.py index c7d5695b7..8fd12b0ba 100644 --- a/src/gentropy/datasource/finngen/finemapping.py +++ b/src/gentropy/datasource/finngen/finemapping.py @@ -8,7 +8,7 @@ import pyspark.sql.functions as f import pyspark.sql.types as t from pyspark.sql import SparkSession, Window -from pyspark.sql.types import StringType, StructField, StructType +from pyspark.sql.types import DoubleType, StringType, StructField, StructType from gentropy.common.spark_helpers import get_top_ranked_in_window from gentropy.common.utils import parse_pvalue @@ -29,7 +29,6 @@ class FinnGenFinemapping: Finemapping method is populated as a constant ("SuSIE"). """ - finngen_release_prefix: str = "FINNGEN_R10" raw_schema: t.StructType = StructType( [ StructField("trait", StringType(), True), @@ -48,26 +47,56 @@ class FinnGenFinemapping: StructField("sd", StringType(), True), StructField("prob", StringType(), True), StructField("cs", StringType(), True), - StructField("alpha1", StringType(), True), - StructField("alpha2", StringType(), True), - StructField("alpha3", StringType(), True), - StructField("alpha4", StringType(), True), - StructField("alpha5", StringType(), True), - StructField("alpha6", StringType(), True), - StructField("alpha7", StringType(), True), - StructField("alpha8", StringType(), True), - StructField("alpha9", StringType(), True), - StructField("alpha10", StringType(), True), - StructField("lbf_variable1", StringType(), True), - StructField("lbf_variable2", StringType(), True), - StructField("lbf_variable3", StringType(), True), - StructField("lbf_variable4", StringType(), True), - StructField("lbf_variable5", StringType(), True), - StructField("lbf_variable6", StringType(), True), - StructField("lbf_variable7", StringType(), True), - StructField("lbf_variable8", StringType(), True), - StructField("lbf_variable9", StringType(), True), - StructField("lbf_variable10", StringType(), True), + StructField("cs_specific_prob", DoubleType(), True), + StructField("low_purity", StringType(), True), + StructField("lead_r2", StringType(), True), + StructField("mean_99", StringType(), True), + StructField("sd_99", StringType(), True), + StructField("prob_99", StringType(), True), + StructField("cs_99", StringType(), True), + StructField("cs_specific_prob_99", StringType(), True), + StructField("low_purity_99", StringType(), True), + StructField("lead_r2_99", StringType(), True), + StructField("alpha1", DoubleType(), True), + StructField("alpha2", DoubleType(), True), + StructField("alpha3", DoubleType(), True), + StructField("alpha4", DoubleType(), True), + StructField("alpha5", DoubleType(), True), + StructField("alpha6", DoubleType(), True), + StructField("alpha7", DoubleType(), True), + StructField("alpha8", DoubleType(), True), + StructField("alpha9", DoubleType(), True), + StructField("alpha10", DoubleType(), True), + StructField("mean1", StringType(), True), + StructField("mean2", StringType(), True), + StructField("mean3", StringType(), True), + StructField("mean4", StringType(), True), + StructField("mean5", StringType(), True), + StructField("mean6", StringType(), True), + StructField("mean7", StringType(), True), + StructField("mean8", StringType(), True), + StructField("mean9", StringType(), True), + StructField("mean10", StringType(), True), + StructField("sd1", StringType(), True), + StructField("sd2", StringType(), True), + StructField("sd3", StringType(), True), + StructField("sd4", StringType(), True), + StructField("sd5", StringType(), True), + StructField("sd6", StringType(), True), + StructField("sd7", StringType(), True), + StructField("sd8", StringType(), True), + StructField("sd9", StringType(), True), + StructField("sd10", StringType(), True), + StructField("lbf_variable1", DoubleType(), True), + StructField("lbf_variable2", DoubleType(), True), + StructField("lbf_variable3", DoubleType(), True), + StructField("lbf_variable4", DoubleType(), True), + StructField("lbf_variable5", DoubleType(), True), + StructField("lbf_variable6", DoubleType(), True), + StructField("lbf_variable7", DoubleType(), True), + StructField("lbf_variable8", DoubleType(), True), + StructField("lbf_variable9", DoubleType(), True), + StructField("lbf_variable10", DoubleType(), True), ] ) @@ -76,7 +105,7 @@ class FinnGenFinemapping: StructField("trait", StringType(), True), StructField("region", StringType(), True), StructField("cs", StringType(), True), - StructField("cs_log10bf", StringType(), True), + StructField("cs_log10bf", DoubleType(), True), ] ) @@ -86,6 +115,7 @@ def from_finngen_susie_finemapping( spark: SparkSession, finngen_finemapping_df: (str | list[str]), finngen_finemapping_summaries: (str | list[str]), + finngen_release_prefix: str, credset_lbf_threshold: float = 0.8685889638065036, ) -> StudyLocus: """Process the SuSIE finemapping output for FinnGen studies. @@ -94,6 +124,7 @@ def from_finngen_susie_finemapping( spark (SparkSession): Spark session object. finngen_finemapping_df (str | list[str]): SuSIE finemapping output filename(s). finngen_finemapping_summaries (str | list[str]): filename of SuSIE finemapping summaries. + finngen_release_prefix (str): FinnGen study prefix. credset_lbf_threshold (float, optional): Filter out credible sets below, Default 0.8685889638065036 == np.log10(np.exp(2)), this is threshold from publication. Returns: @@ -110,7 +141,7 @@ def from_finngen_susie_finemapping( .filter(f.col("cs").cast(t.IntegerType()) > 0) .select( # Add study idenfitier. - f.concat(f.lit(cls.finngen_release_prefix), f.col("trait")) + f.concat(f.lit(finngen_release_prefix), f.col("trait")) .cast(t.StringType()) .alias("studyId"), f.col("region"), @@ -209,7 +240,7 @@ def from_finngen_susie_finemapping( | (f.col("credibleSetIndex") == 1) ) .withColumn( - "studyId", f.concat(f.lit(cls.finngen_release_prefix), f.col("trait")) + "studyId", f.concat(f.lit(finngen_release_prefix), f.col("trait")) ) ) diff --git a/src/gentropy/finngen_finemapping_ingestion.py b/src/gentropy/finngen_finemapping_ingestion.py index 420e4542d..d70316b5e 100644 --- a/src/gentropy/finngen_finemapping_ingestion.py +++ b/src/gentropy/finngen_finemapping_ingestion.py @@ -18,6 +18,7 @@ def __init__( session: Session, finngen_finemapping_results_path: str, finngen_finemapping_summaries_path: str, + finngen_release_prefix: str, finngen_finemapping_out: str, ) -> None: """Run FinnGen finemapping ingestion step. @@ -26,14 +27,16 @@ def __init__( session (Session): Session object. finngen_finemapping_results_path (str): Path to the FinnGen SuSIE finemapping results. finngen_finemapping_summaries_path (str): FinnGen SuSIE summaries for CS filters(LBF>2). + finngen_release_prefix (str): Release prefix for FinnGen. finngen_finemapping_out (str): Output path for the finemapping results in StudyLocus format. """ - # Read finemapping outputs from the URL. + # Read finemapping outputs from the input paths. finngen_finemapping_df = FinnGenFinemapping.from_finngen_susie_finemapping( spark=session.spark, finngen_finemapping_df=finngen_finemapping_results_path, finngen_finemapping_summaries=finngen_finemapping_summaries_path, + finngen_release_prefix=finngen_release_prefix, ) # Write the output. diff --git a/tests/gentropy/datasource/finngen/test_finngen_finemapping.py b/tests/gentropy/datasource/finngen/test_finngen_finemapping.py index e15538280..089b10f74 100644 --- a/tests/gentropy/datasource/finngen/test_finngen_finemapping.py +++ b/tests/gentropy/datasource/finngen/test_finngen_finemapping.py @@ -16,6 +16,7 @@ def test_finngen_finemapping_from_finngen_susie_finemapping( spark=spark, finngen_finemapping_df="tests/gentropy/data_samples/finngen_R9_AB1_EBV.SUSIE.snp.gz", finngen_finemapping_summaries="tests/gentropy/data_samples/finngen_credset_summary_sample.tsv", + finngen_release_prefix="FINNGEN_R10", ), StudyLocus, ) From c2fc459a31076396e3892efd5dae2f1292133c7d Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 26 Feb 2024 09:31:16 +0000 Subject: [PATCH 10/12] build(deps-dev): bump ipython from 8.21.0 to 8.22.1 Bumps [ipython](https://github.com/ipython/ipython) from 8.21.0 to 8.22.1. - [Release notes](https://github.com/ipython/ipython/releases) - [Commits](https://github.com/ipython/ipython/compare/8.21.0...8.22.1) --- updated-dependencies: - dependency-name: ipython dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- poetry.lock | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/poetry.lock b/poetry.lock index d70afbb00..ead80e422 100644 --- a/poetry.lock +++ b/poetry.lock @@ -4033,13 +4033,13 @@ test = ["flaky", "ipyparallel", "pre-commit", "pytest (>=7.0)", "pytest-asyncio [[package]] name = "ipython" -version = "8.21.0" +version = "8.22.1" description = "IPython: Productive Interactive Computing" optional = false python-versions = ">=3.10" files = [ - {file = "ipython-8.21.0-py3-none-any.whl", hash = "sha256:1050a3ab8473488d7eee163796b02e511d0735cf43a04ba2a8348bd0f2eaf8a5"}, - {file = "ipython-8.21.0.tar.gz", hash = "sha256:48fbc236fbe0e138b88773fa0437751f14c3645fb483f1d4c5dee58b37e5ce73"}, + {file = "ipython-8.22.1-py3-none-any.whl", hash = "sha256:869335e8cded62ffb6fac8928e5287a05433d6462e3ebaac25f4216474dd6bc4"}, + {file = "ipython-8.22.1.tar.gz", hash = "sha256:39c6f9efc079fb19bfb0f17eee903978fe9a290b1b82d68196c641cecb76ea22"}, ] [package.dependencies] @@ -4048,16 +4048,16 @@ decorator = "*" exceptiongroup = {version = "*", markers = "python_version < \"3.11\""} jedi = ">=0.16" matplotlib-inline = "*" -pexpect = {version = ">4.3", markers = "sys_platform != \"win32\""} +pexpect = {version = ">4.3", markers = "sys_platform != \"win32\" and sys_platform != \"emscripten\""} prompt-toolkit = ">=3.0.41,<3.1.0" pygments = ">=2.4.0" stack-data = "*" -traitlets = ">=5" +traitlets = ">=5.13.0" [package.extras] -all = ["black", "curio", "docrepr", "exceptiongroup", "ipykernel", "ipyparallel", "ipywidgets", "matplotlib", "matplotlib (!=3.2.0)", "nbconvert", "nbformat", "notebook", "numpy (>=1.23)", "pandas", "pickleshare", "pytest (<8)", "pytest-asyncio (<0.22)", "qtconsole", "setuptools (>=18.5)", "sphinx (>=1.3)", "sphinx-rtd-theme", "stack-data", "testpath", "trio", "typing-extensions"] +all = ["ipython[black,doc,kernel,nbconvert,nbformat,notebook,parallel,qtconsole,terminal]", "ipython[test,test-extra]"] black = ["black"] -doc = ["docrepr", "exceptiongroup", "ipykernel", "matplotlib", "pickleshare", "pytest (<8)", "pytest-asyncio (<0.22)", "setuptools (>=18.5)", "sphinx (>=1.3)", "sphinx-rtd-theme", "stack-data", "testpath", "typing-extensions"] +doc = ["docrepr", "exceptiongroup", "ipykernel", "ipython[test]", "matplotlib", "setuptools (>=18.5)", "sphinx (>=1.3)", "sphinx-rtd-theme", "sphinxcontrib-jquery", "stack-data", "typing-extensions"] kernel = ["ipykernel"] nbconvert = ["nbconvert"] nbformat = ["nbformat"] @@ -4065,7 +4065,7 @@ notebook = ["ipywidgets", "notebook"] parallel = ["ipyparallel"] qtconsole = ["qtconsole"] test = ["pickleshare", "pytest (<8)", "pytest-asyncio (<0.22)", "testpath"] -test-extra = ["curio", "matplotlib (!=3.2.0)", "nbformat", "numpy (>=1.23)", "pandas", "pickleshare", "pytest (<8)", "pytest-asyncio (<0.22)", "testpath", "trio"] +test-extra = ["curio", "ipython[test]", "matplotlib (!=3.2.0)", "nbformat", "numpy (>=1.23)", "pandas", "trio"] [[package]] name = "isodate" From c354f5a984a33baf4dfa2c6c0fe888dc9d874301 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 26 Feb 2024 10:40:59 +0000 Subject: [PATCH 11/12] build(deps): bump typing-extensions from 4.9.0 to 4.10.0 (#506) Bumps [typing-extensions](https://github.com/python/typing_extensions) from 4.9.0 to 4.10.0. - [Release notes](https://github.com/python/typing_extensions/releases) - [Changelog](https://github.com/python/typing_extensions/blob/main/CHANGELOG.md) - [Commits](https://github.com/python/typing_extensions/commits) --- updated-dependencies: - dependency-name: typing-extensions dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- poetry.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/poetry.lock b/poetry.lock index ead80e422..83092d46e 100644 --- a/poetry.lock +++ b/poetry.lock @@ -8019,13 +8019,13 @@ test = ["black (>=22.3.0,<23.0.0)", "coverage (>=6.2,<7.0)", "isort (>=5.0.6,<6. [[package]] name = "typing-extensions" -version = "4.9.0" +version = "4.10.0" description = "Backported and Experimental Type Hints for Python 3.8+" optional = false python-versions = ">=3.8" files = [ - {file = "typing_extensions-4.9.0-py3-none-any.whl", hash = "sha256:af72aea155e91adfc61c3ae9e0e342dbc0cba726d6cba4b6c72c1f34e47291cd"}, - {file = "typing_extensions-4.9.0.tar.gz", hash = "sha256:23478f88c37f27d76ac8aee6c905017a143b0b1b886c3c9f66bc2fd94f9f5783"}, + {file = "typing_extensions-4.10.0-py3-none-any.whl", hash = "sha256:69b1a937c3a517342112fb4c6df7e72fc39a38e7891a5730ed4985b5214b5475"}, + {file = "typing_extensions-4.10.0.tar.gz", hash = "sha256:b0abd7c89e8fb96f98db18d86106ff1d90ab692004eb746cf6eda2682f91b3cb"}, ] [[package]] From bb735e1aad938181f975a7641f64ae4f8c3f7ab9 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 26 Feb 2024 16:13:05 +0000 Subject: [PATCH 12/12] build(deps-dev): bump mkdocs-git-committers-plugin-2 from 2.2.3 to 2.3.0 (#505) Bumps [mkdocs-git-committers-plugin-2](https://github.com/ojacques/mkdocs-git-committers-plugin-2) from 2.2.3 to 2.3.0. - [Release notes](https://github.com/ojacques/mkdocs-git-committers-plugin-2/releases) - [Commits](https://github.com/ojacques/mkdocs-git-committers-plugin-2/compare/2.2.3...2.3.0) --- updated-dependencies: - dependency-name: mkdocs-git-committers-plugin-2 dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- poetry.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/poetry.lock b/poetry.lock index 83092d46e..64ac6a99d 100644 --- a/poetry.lock +++ b/poetry.lock @@ -4837,13 +4837,13 @@ mkdocs = "*" [[package]] name = "mkdocs-git-committers-plugin-2" -version = "2.2.3" +version = "2.3.0" description = "An MkDocs plugin to create a list of contributors on the page. The git-committers plugin will seed the template context with a list of GitHub or GitLab committers and other useful GIT info such as last modified date" optional = false python-versions = ">=3.8,<4" files = [ - {file = "mkdocs-git-committers-plugin-2-2.2.3.tar.gz", hash = "sha256:e0dddef4e3e321d97bcb83123fd963a839d9f9fa801c3125efeb6544bd8b247c"}, - {file = "mkdocs_git_committers_plugin_2-2.2.3-py3-none-any.whl", hash = "sha256:0f20d61a9315174f30d5a0266d4a457d3d88909ca5c5bd7d0e0dd2e1841f2af0"}, + {file = "mkdocs-git-committers-plugin-2-2.3.0.tar.gz", hash = "sha256:d6baca1ae04db8120640038eda8142f2d081c27b53f3b566c83c75717e4ed81a"}, + {file = "mkdocs_git_committers_plugin_2-2.3.0-py3-none-any.whl", hash = "sha256:7b3434af3be525c12858eb3b44b4c6b695b7c7b7760482ea8de1c6e292e84f0f"}, ] [package.dependencies]