-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSnakefile.report
executable file
·152 lines (130 loc) · 5.53 KB
/
Snakefile.report
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
#!/bin/bash
# vim: ft=python
# Contents >>>
# + Embedded BASH script to bootstrap the workflow
# + Initialisation and configuration
# + Helper functions
# + The rules specific to this workflow
# + More generic rules
# Note this workflow is designed to run locally. All of the heavy lifting should
# have been done by Snakefile.process_cells. Making two reports on different cells
# in parallel should be fine.
"""true" ### Begin shell script part
set -u
source "`dirname $0`"/shell_helper_functions.sh
export TOOLBOX="$(find_toolbox)"
export TEMPLATES="$(find_templates)"
export PATH="${PATH}:$(dirname "$0")"
snakerun_single "$0" "$@"
"exit""" ### End of shell script part
#!/usr/bin/env snakemake
from snakemake.utils import format
from smrtino import glob, load_yaml
TOOLBOX = 'env PATH="{}:$PATH"'.format(os.environ['TOOLBOX'])
""" Report will be made based upon all the cell.info.yaml files available. There may be none.
For every .info.yaml we need to generate a corresponding .link.yaml, and
we're also adding in PDF reports from SMRTLink at this stage - other plots are
generated by Snakefile.process_cells and linked in the info.yaml files.
Also we may or may not have some plots to add.
"""
def scan_cells(filter_by_config=True):
""" Work out all the cells to process based on config['cells'].
If supplied, config['cells'] will be a string like "1_A01 2_B01 3_C01" so
we need to translate these to cell names.
"""
all_info = load_yaml(config.get("sc_data", "sc_data.yaml"))['cells']
if filter_by_config and ('cells' in config):
all_info = { k: v for k, v in all_info.items()
if v['slot'] in config['cells'].split() }
return [ (v['slot'], k) for k, v in all_info.items() ]
def ifexists(filepattern):
"""Input decorator function that only adds an input file to a rule if that
file already exists.
"""
def _inner(wildcards):
filename = filepattern.format(**vars(wildcards))
if os.path.exists(filename):
return [filename]
else:
return []
return _inner
# Global wildcard patterns
wildcard_constraints:
cellslot = r"\d_[A-Z]01",
cell = r"m\w+_\w+_\w+",
# We shall make a single report per cell (used to be one per run).
# The report filenames are to be of the form:
# {cellslot}_{cellid}.html
localrules: report_main
rule report_main:
input:
pan = lambda wc: expand("all_reports/{c[0]}-{c[1]}.html", c=scan_cells())
# Note this script relies on ~/.smrtlinkrc for connection deets.
localrules: link_to_smrtlink, smrtlink_pdf_report, smrtlink_get_sample
rule link_to_smrtlink:
output: "{cell}.link.yaml"
input: "{cell}.info.yaml"
shell:
"link_to_smrtlink.py {input} > {output}"
# As does this, which will trigger generation of a new report, poll until ready, then
# download it.
# Modified to proceed even if report generation fails, since some datasets don't get
# properly imported in SMRTLink.
rule smrtlink_pdf_report:
output: "all_reports/pdf/{cell}.pdf"
input: "{cell}.link.yaml"
run:
# Un-silence sys.stderr in sub-jobs:
logger.quiet.discard('all')
# Setting SMRTLINKRC_SECTION=none will cause an empty report file to be made
cell_uuid = load_yaml(str(input))['cell_uuid']
shell("smrtlink_pdf_report.py -o {output} --empty_on_missing {cell_uuid}")
# This digs the sample setup info out of SMRTLink, where it is buried
# We may have to add the --use_latest flag but that's a bit dicey. We have
# pbpipeline/OVERRIDES.yaml instead. I made this an input to this rule so as
# to trigger a re-run if the OVERRIDES is edited.
rule smrtlink_get_sample:
output: "{cell}.sample-setup.yaml"
input:
info = "{cell}.info.yaml",
overrides = ifexists("pbpipeline/OVERRIDES.yaml"),
shell:
"""smrtlink_get_sample.py --options pbpipeline/OVERRIDES.yaml \
--errors_to_yaml \
{input.info} > {output}
"""
# Invokes PanDoc with the template stuff
rule pan_to_html:
output: "all_reports/{report}.html"
input: "all_reports/pandoc/{report}.pan"
params:
templates = os.environ.get('TEMPLATES', '.')
shell:
r"""{TOOLBOX} pandoc -f markdown \
--template={params.templates}/template.html \
--include-in-header={params.templates}/javascript.js.html \
--include-in-header={params.templates}/local.css.html \
--toc --toc-depth=2 \
-o {output} {input}
"""
# This rule marshalls all the .png images and makes one report in PanDoc format.
rule make_report:
output:
pan = "all_reports/pandoc/{cellslot}-{cell}.pan",
btouch = touch("all_reports/img/{cellslot}-{cell}-blob.linked"),
input:
yaml = "{cell}.info.yaml",
links = "{cell}.link.yaml",
sample = "{cell}.sample-setup.yaml",
pdf = "all_reports/pdf/{cell}.pdf",
blob_plots = lambda wc: glob(f"blob/{wc.cell}.*.cov0*.png"),
params:
pdflink = "pdf/{cell}.pdf",
shell:
r"""rm -vf all_reports/img/{wildcards.cell}.*.cov0*.png
[ -z "{input.blob_plots}" ] || ln -vsrnt all_reports/img {input.blob_plots}
make_report.py -o {output.pan} \
-y {input.yaml} -l {input.links} -r {params.pdflink} \
-S {input.sample} \
-s <(pb_run_status.py)
"""