Skip to content

Commit

Permalink
fix: remove non working rule and function for extracting truth sample…
Browse files Browse the repository at this point in the history
… name. Use fixed sample name 'truth' instead.
  • Loading branch information
BiancaStoecker committed Feb 9, 2024
1 parent 7b24bff commit 58955ce
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 33 deletions.
22 changes: 1 addition & 21 deletions workflow/rules/common.smk
Original file line number Diff line number Diff line change
Expand Up @@ -326,35 +326,15 @@ def get_nonempty_coverages(wildcards):
return _get_nonempty_coverages(wildcards.callset)


def get_truth_sample_name(wildcards):
benchmark = config["variant-calls"][wildcards.callset]["benchmark"]
# TODO use f-string when this is fixed: https://github.com/snakemake/snakefmt/issues/215
filename = "results/somatic/{callset}.truth.cov-{cov}.sample_name.txt".format(
callset=wildcards.callset, cov=wildcards.cov
)
with open(filename) as file:
truth_sample_name = file.read()
return truth_sample_name


def get_somatic_flag(wildcards):
benchmark = config["variant-calls"][wildcards.callset]["benchmark"]
if genomes[benchmarks[benchmark]["genome"]]["somatic"]:
sample_name_baseline = get_truth_sample_name(wildcards)
# via rule extract_truth_sample_name "results/somatic/{benchmark}.truth.cov-{cov}.sample_name.txt"
# get name baseline via bcftools query -l truth.vcf -> new rule / function
sample_name_baseline = "truth"
sample_name_callset = config["variant-calls"][wildcards.callset]["tumor_sample_name"] # get name tumor via config -> from dict
somatic_flag = "--squash-ploidy --sample " + sample_name_baseline + ',' + sample_name_callset
else:
somatic_flag = ""
return somatic_flag
# return (
# "--squash-ploidy --sample <name baseline>,<name tumor>" #TODO replace <name baseline> and <name tumor>

# if genomes[benchmarks[benchmark]["genome"]]["somatic"]

# else ""
# )


def get_collect_stratifications_input(wildcards):
Expand Down
13 changes: 1 addition & 12 deletions workflow/rules/eval.smk
Original file line number Diff line number Diff line change
Expand Up @@ -88,25 +88,13 @@ rule generate_sdf:
"rtg format --output {output} {input.genome} &> {log}"


rule extract_truth_sample_name:
input:
truth=get_stratified_truth(),
output:
"results/somatic/{callset}.truth.cov-{cov}.sample_name.txt",
conda:
"../envs/tools.yaml"
shell:
"bcftools query -l {input.truth} > {output}"


rule benchmark_variants:
input:
truth=get_stratified_truth(),
truth_idx=get_stratified_truth(".tbi"),
query="results/stratified-variants/{callset}/{cov}.vcf.gz",
query_index="results/stratified-variants/{callset}/{cov}.vcf.gz.tbi",
genome="resources/reference/genome-sdf",
file="results/somatic/{callset}.truth.cov-{cov}.sample_name.txt",
output:
"results/vcfeval/{callset}/{cov}/output.vcf.gz",
log:
Expand All @@ -121,6 +109,7 @@ rule benchmark_variants:
"rm -r {params.output}; rtg vcfeval --threads {threads} --ref-overlap --all-records "
"--output-mode ga4gh --baseline {input.truth} --calls {input.query} "
"--output {params.output} --template {input.genome} {params.somatic} &> {log}"
# "--output {params.output} --template {input.genome} --squash-ploidy --sample HG001,40perc_tumor_40perc &> {log}"


rule calc_precision_recall:
Expand Down

0 comments on commit 58955ce

Please sign in to comment.