generated from snakemake-workflows/snakemake-workflow-template
-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: Change folder structure for fp-fn results, add collection of fp…
…-fn results for all covs of a callset and all callsets of a benchmark.
- Loading branch information
1 parent
db68793
commit 4b1a64b
Showing
4 changed files
with
153 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
import sys | ||
sys.stderr = open(snakemake.log[0], "w") | ||
|
||
import pandas as pd | ||
|
||
|
||
def load_data(path, callset): | ||
d = pd.read_csv(path, sep="\t") | ||
d.insert(0, "callset", callset) | ||
return d | ||
|
||
|
||
results = pd.concat( | ||
[ | ||
load_data(f, callset) | ||
for f, callset in zip(snakemake.input.tables, snakemake.params.callsets) | ||
], | ||
axis="rows", | ||
) | ||
|
||
def cov_key(cov_label): | ||
# return lower bound as integer for sorting | ||
if ".." in cov_label: | ||
return int(cov_label.split("..")[0]) | ||
else: | ||
return int(cov_label[1:]) | ||
|
||
|
||
|
||
def sort_key(col): | ||
if col.name == "callset": | ||
return col | ||
if col.name == "coverage": | ||
return col.apply(cov_key) | ||
else: | ||
return col | ||
|
||
|
||
# if snakemake.params.vaf: | ||
# results.sort_values(["callset", "vaf", "coverage"], inplace=True, key=sort_key) | ||
# else: | ||
results.sort_values(["callset", "coverage"], inplace=True, key=sort_key) | ||
results["sort_index"] = results["coverage"].apply(cov_key) | ||
|
||
results.to_csv(snakemake.output[0], sep="\t", index=False) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
import sys | ||
|
||
sys.stderr = open(snakemake.log[0], "w") | ||
|
||
import pandas as pd | ||
|
||
|
||
def get_cov_label(coverage): | ||
lower = snakemake.params.coverage_lower_bounds[coverage] | ||
bounds = [ | ||
bound | ||
for bound in snakemake.params.coverage_lower_bounds.values() | ||
if bound > lower | ||
] | ||
if bounds: | ||
upper = min(bounds) | ||
return f"{lower}..{upper}" | ||
else: | ||
return f"≥{lower}" | ||
|
||
|
||
def load_data(f, coverage): | ||
d = pd.read_csv(f, sep="\t") | ||
d.insert(0, "coverage", get_cov_label(coverage)) | ||
return d | ||
|
||
|
||
if snakemake.input: | ||
report = pd.concat( | ||
load_data(f, cov) for cov, f in zip(snakemake.params.coverages, snakemake.input) | ||
) | ||
|
||
# TODO With separate files for SNVs and indels with e.g. STRELKA no predicted variants for the other type are expected | ||
# If later relevant, add annotation to the report | ||
# if (report["tp_truth"] == 0).all(): | ||
# raise ValueError( | ||
# f"The callset {snakemake.wildcards.callset} does not predict any variant from the truth. " | ||
# "This is likely a technical issue in the callset and should be checked before further evaluation." | ||
# ) | ||
|
||
report.to_csv(snakemake.output[0], sep="\t", index=False) | ||
else: | ||
pd.DataFrame( | ||
{ | ||
col: [] | ||
for col in [ | ||
"coverage", | ||
"class", | ||
"chromosome position", | ||
"ref_allele", | ||
"alt_allele" | ||
"true_genotype", | ||
"predicted_genotype" | ||
] | ||
} | ||
).to_csv(snakemake.output[0], sep="\t") |