-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathSnakefile
111 lines (75 loc) · 2.69 KB
/
Snakefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
"""
Make variant calls from aligned contigs.
"""
import os
import sys
global expand
global shell
global workflow
#
# Global constants
#
PIPELINE_DIR = os.path.dirname(os.path.realpath(workflow.snakefile))
#
# Parameters
#
configfile: config.get('config_file', 'config.json')
### Parameters from config ###
# Reference FASTA & FAI
REF_FA = 'data/ref/ref.fa.gz'
REF_FAI = REF_FA + '.fai'
# VCF file pattern
VCF_PATTERN = f'{config.get("vcf_prefix", "")}{{asm_name}}{config.get("vcf_suffix", "")}.vcf.gz'
#
# Assembly library and dependency imports
#
sys.path.append(PIPELINE_DIR) # pavlib
sys.path.append(os.path.join(PIPELINE_DIR, 'dep', 'svpop')) # svpoplib
sys.path.append(os.path.join(PIPELINE_DIR, 'dep', 'svpop', 'dep')) # kanapy
sys.path.append(os.path.join(PIPELINE_DIR, 'dep', 'svpop', 'dep', 'ply')) # ply - lexer / parser
import pavlib
#
# Read sample config
#
ASM_TABLE_FILENAME = config.get('assembly_table', None)
if ASM_TABLE_FILENAME is None and os.path.isfile('assemblies.tsv'):
ASM_TABLE_FILENAME = 'assemblies.tsv'
if ASM_TABLE_FILENAME is None and os.path.isfile('assemblies.xlsx'):
ASM_TABLE_FILENAME = 'assemblies.xlsx'
if ASM_TABLE_FILENAME is None:
raise RuntimeError('No input assembly table in config ("assembly_table") and the default table filename was not found ("assemblies.tsv")')
ASM_TABLE = pavlib.pipeline.read_assembly_table(ASM_TABLE_FILENAME, config)
#
# Rules
#
# Environment source file for shell commands
ENV_FILE = config.get('env_source', 'setenv.sh')
if not os.path.isfile(ENV_FILE) or pavlib.util.as_bool(config.get('ignore_env_file', False)):
ENV_FILE = None
if ENV_FILE:
shell.prefix(f'set -euo pipefail; source {ENV_FILE}; ')
else:
shell.prefix('set -euo pipefail; ')
### Wildcard constraints ###
wildcard_constraints:
asm_name=r'[A-Za-z_\-0-9\.]+'
### Default rule ###
localrules: pav_all
# pav_all
#
# Make all files for all samples.
rule pav_all:
input:
bed=expand('{asm_name}.vcf.gz', asm_name=ASM_TABLE.index)
# bed=expand('vcf/merged/{asm_name}.vcf.gz', asm_name=ASM_TABLE.index)
### Includes ###
include: os.path.join(PIPELINE_DIR, 'rules/definitions.snakefile')
include: os.path.join(PIPELINE_DIR, 'rules/pipeline.snakefile')
include: os.path.join(PIPELINE_DIR, 'rules/data.snakefile')
include: os.path.join(PIPELINE_DIR, 'rules/align.snakefile')
include: os.path.join(PIPELINE_DIR, 'rules/call.snakefile')
include: os.path.join(PIPELINE_DIR, 'rules/call_inv.snakefile')
include: os.path.join(PIPELINE_DIR, 'rules/call_lg.snakefile')
include: os.path.join(PIPELINE_DIR, 'rules/tracks.snakefile')
include: os.path.join(PIPELINE_DIR, 'rules/figures.snakefile')
include: os.path.join(PIPELINE_DIR, 'rules/vcf.snakefile')