-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathSnakefile
99 lines (83 loc) · 2.55 KB
/
Snakefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import random
import pandas as pd
import numpy as np
DATA_DIR = "data"
(SAMPLES,) = glob_wildcards(f"{DATA_DIR}/{{sample}}.sth")
if not SAMPLES:
raise ValueError(
f"Data directory {DATA_DIR} is missing or do not contain any .sth files"
)
rule all:
input:
expand(
"benchmarks/speedytree/{sample}{seed}_{algo}_{threads}.txt",
algo=["naive", "rapidnj", "hybrid"],
sample=SAMPLES,
seed=["", "_seed_1", "_seed_2"],
threads=[1, 8],
),
expand(
"benchmarks/{program}/{sample}{seed}.txt",
program=["rapidnj", "quicktree"],
sample=SAMPLES,
seed=["", "_seed_1", "_seed_2"],
),
rule speedytree_binary:
output:
"target/release/speedytree",
shell:
"cargo build --release"
rule speedytree:
input:
"target/snakemake/{sample}.phy",
"target/release/speedytree",
benchmark:
"benchmarks/speedytree/{sample}_{algo}_{threads}.txt"
shell:
"./{input[1]} --{wildcards.algo} -c {wildcards.threads} < {input[0]} > /dev/null"
rule rapidnj:
input:
"target/snakemake/{sample}.phy",
benchmark:
"benchmarks/rapidnj/{sample}.txt"
shell:
"rapidnj -i pd {input} > /dev/null"
rule quicktree:
input:
"target/snakemake/{sample}.phy",
benchmark:
"benchmarks/quicktree/{sample}.txt"
shell:
"quicktree -in m {input} > /dev/null"
rule phylip:
input:
f"{DATA_DIR}/{{sample}}.sth",
output:
"target/snakemake/{sample}.phy",
shell:
"rapidnj -i sth -o m {input} > {output}"
rule permutate_phylip:
input:
"target/snakemake/{sample}.phy",
output:
"target/snakemake/{sample}_seed_{seed}.phy",
run:
random.seed(wildcards.seed)
data = pd.read_table(input[0], delim_whitespace=True, skiprows=1, header=None)
data.set_index(0, inplace=True)
names = data.index
data = data.values
# Set column names as index
n = data.shape[1]
perm = list(range(n))
random.shuffle(perm)
perm_data = np.zeros((n, n))
for i in range(n):
for j in range(n):
perm_data[i, j] = data[perm[i], perm[j]]
perm_names = [names[i] for i in perm]
with open(output[0], "w") as f:
f.write(f"{perm_data.shape[0]}\n")
for name, row in zip(perm_names, perm_data):
name = name.strip()
f.write(f"{name} {' '.join(map(str, row))}\n")