diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk index ea357fe..af21b3b 100644 --- a/workflow/rules/common.smk +++ b/workflow/rules/common.smk @@ -326,35 +326,15 @@ def get_nonempty_coverages(wildcards): return _get_nonempty_coverages(wildcards.callset) -def get_truth_sample_name(wildcards): - benchmark = config["variant-calls"][wildcards.callset]["benchmark"] - # TODO use f-string when this is fixed: https://github.com/snakemake/snakefmt/issues/215 - filename = "results/somatic/{callset}.truth.cov-{cov}.sample_name.txt".format( - callset=wildcards.callset, cov=wildcards.cov - ) - with open(filename) as file: - truth_sample_name = file.read() - return truth_sample_name - - def get_somatic_flag(wildcards): benchmark = config["variant-calls"][wildcards.callset]["benchmark"] if genomes[benchmarks[benchmark]["genome"]]["somatic"]: - sample_name_baseline = get_truth_sample_name(wildcards) - # via rule extract_truth_sample_name "results/somatic/{benchmark}.truth.cov-{cov}.sample_name.txt" - # get name baseline via bcftools query -l truth.vcf -> new rule / function + sample_name_baseline = "truth" sample_name_callset = config["variant-calls"][wildcards.callset]["tumor_sample_name"] # get name tumor via config -> from dict somatic_flag = "--squash-ploidy --sample " + sample_name_baseline + ',' + sample_name_callset else: somatic_flag = "" return somatic_flag - # return ( - # "--squash-ploidy --sample ," #TODO replace and - - # if genomes[benchmarks[benchmark]["genome"]]["somatic"] - - # else "" - # ) def get_collect_stratifications_input(wildcards): diff --git a/workflow/rules/eval.smk b/workflow/rules/eval.smk index 9bf9102..7a37bb8 100644 --- a/workflow/rules/eval.smk +++ b/workflow/rules/eval.smk @@ -88,17 +88,6 @@ rule generate_sdf: "rtg format --output {output} {input.genome} &> {log}" -rule extract_truth_sample_name: - input: - truth=get_stratified_truth(), - output: - "results/somatic/{callset}.truth.cov-{cov}.sample_name.txt", - conda: - "../envs/tools.yaml" - shell: - "bcftools query -l {input.truth} > {output}" - - rule benchmark_variants: input: truth=get_stratified_truth(), @@ -106,7 +95,6 @@ rule benchmark_variants: query="results/stratified-variants/{callset}/{cov}.vcf.gz", query_index="results/stratified-variants/{callset}/{cov}.vcf.gz.tbi", genome="resources/reference/genome-sdf", - file="results/somatic/{callset}.truth.cov-{cov}.sample_name.txt", output: "results/vcfeval/{callset}/{cov}/output.vcf.gz", log: @@ -121,6 +109,7 @@ rule benchmark_variants: "rm -r {params.output}; rtg vcfeval --threads {threads} --ref-overlap --all-records " "--output-mode ga4gh --baseline {input.truth} --calls {input.query} " "--output {params.output} --template {input.genome} {params.somatic} &> {log}" + # "--output {params.output} --template {input.genome} --squash-ploidy --sample HG001,40perc_tumor_40perc &> {log}" rule calc_precision_recall: