-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathclarity_run_id_setter.py
executable file
·199 lines (157 loc) · 8.25 KB
/
clarity_run_id_setter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
#!/usr/bin/env python3
""" Given a run ID, this will poke it into the LIMS so that we can reliably look
up run flags and such like, and finish the implementation of get_project_from_lims.py.
At the moment, once a step is tagged we refuse to look and see if there is a newer
step, but in fact we probably want that feature. In that case the UDF should be
cleared from the original and tagged onto the new version.
This script should run, I think, whenever Illuminatus uploads a report. It's
idempotent, so safe to tag the same run again and again.
"""
import os, sys, re
import configparser
# Seems pointless to use my wrapper lib for this
from pyclarity_lims.lims import Lims
from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
import logging as L
from pprint import pprint, pformat
QC_PROCESS = "Flow Cell Lane QC EG 1.0 ST"
REPORT_URL_TEMPLATE = "http://web1.genepool.private/runinfo/illuminatus_reports/{}"
# For use in Illuminatus, that should be overridden by the setting we already have in environ.sh:
if os.environ.get("REPORT_LINK"):
REPORT_URL_TEMPLATE = os.environ.get("REPORT_LINK") + '/{}'
def main(args):
if args.debug:
L.basicConfig(format='{name:s} {levelname:s}: {message:s}', level=L.DEBUG, style='{')
# But silence chatter from pyclarity_lims
L.getLogger("requests").propagate = False
L.getLogger("urllib3").propagate = False
else:
L.basicConfig(format='{message:s}', level=L.INFO, style='{')
# Connect to ze lims...
lims = Lims(**get_config())
# How to do this?
# 1 - Firstly look to see if the name is already known. If so, report and exit.
# 2 - Then split out the flowcell name and look for a container with that name.
# 3 - If none, exit. If several, pick the latest (highest number)
# 4 - Find the first analyte and thus the step (see notes)
# 5 - Load the step and set the values (if not set already or forced)
# Sanity check, since I accidentally set several run IDs to
# /lustre/fastqdata/180416_M05898_0001_000000000-D42P7 etc. by foolish use of
# a shell loop.
assert re.match(r"^[0-9]{6}_[A-Za-z0-9_-]+", args.runid), \
"Run ID does not look right - {}".format(args.runid)
# 1
existing_proc = lims.get_processes(type=QC_PROCESS, udf={'RunID': args.runid})
if len(existing_proc):
L.info("Run {} is already in the LIMS database...".format(args.runid))
# There should just be one!
for ep in existing_proc:
L.info(" " + ep.uri)
if args.close and not args.no_act:
try:
L.info(" Completing the existing step...")
ep.step.get() ; ep.step.advance()
L.info(" completed")
except Exception as e:
L.info(" " + str(e))
if args.debug:
# It's useful to be able to get the parent process because that's what gets fed
# into the sample sheet generator.
for epp_uri in sorted(set(epp.uri for epp in ep.parent_processes())):
L.debug(" parent --> " + epp_uri)
exit(0) # Not an error, but FIXME I should allow the ID to be transferred to a new process.
# 2
if args.container_name:
container_name = args.container_name
else:
container_name = re.split('[_-]', args.runid)[-1]
# Prune off the stage letter.
mo = re.match(r'[AB](.........)', container_name)
if mo:
container_name = mo.group(1)
existing_containers = lims.get_containers(name=container_name)
# Sometimes the container in the LIMS is lower case?
existing_containers.extend(lims.get_containers(name=container_name.lower()))
# If the container name is not in all upper case then try making it so...
if container_name != container_name.upper():
existing_containers.extend(lims.get_containers(name=container_name.upper()))
# 3
if len(existing_containers) == 0:
L.warning("No container found with name {}.".format(container_name))
exit(1)
existing_container = sorted(existing_containers, key=lambda c: c.uri)[-1]
if len(existing_containers) > 1:
L.warning("Multiple containers found with name {}. The latest ({}) will be used.".format(
container_name, existing_container.id ))
L.info("Finding QC step for container {}.".format(existing_container.uri))
# 4 - HiSeq lane 1 is 1:1 while MiSeq lane is 'A:1' for some reson. We should see on eor the other.
analyte1, = [ existing_container.placements.get(loc)
for loc in ['1:1', 'A:1']
if loc in existing_container.placements ]
L.debug("Examining analyte {}".format(analyte1.uri))
# https://clarity.genomics.ed.ac.uk/api/v2/processes?type=Flow%20Cell%20Lane%20QC%20EG%201.0%20ST&inputartifactlimsid=2-126766
# I need to handle the error if there is not one process - older runs lack the step, for example.
# In the development LIMS we also seem to have multiple QC processes per flowcell. As before I think we need to go for
# the latest one.
# Also I've seen a case where a stage was queued and removed (ie. not completed). This shows up on the artifact page but
# not in the list when I run the get_processes search below.
try:
qc_proc = sorted( lims.get_processes(type=QC_PROCESS, inputartifactlimsid=analyte1.id),
key = lambda p: p.id )[-1]
except IndexError:
L.error("Could not find a QC step for this container.")
exit(1)
# 5 - it seems Clarity likes to set UDFs on a step not a process(?)
qc_step = qc_proc.step.details
L.info("Setting UDFs on {}".format(qc_step.uri))
if qc_step.udf.get('RunID'):
if args.force:
L.info("Forcing new RunID for step {} in place of '{}' as you requested.".format(qc_step.id, qc_step.udf.get('RunID')))
else:
L.info("RunID for step {} is already set to '{}'.".format(qc_step.id, qc_step.udf.get('RunID')))
exit(1)
#So, set it...
qc_step.udf['RunID'] = args.runid
qc_step.udf['Run Report'] = REPORT_URL_TEMPLATE.format(args.runid)
L.debug(pformat(qc_step.udf))
if args.no_act:
L.info("Nothing will be saved as no_act was set.")
else:
qc_step.put()
if args.close:
try:
L.info("Completing the step...")
qc_proc.step.get(force=True) ; qc_proc.step.advance()
L.info("completed")
except Exception as e:
L.info(str(e))
L.info("DONE")
def get_config(section='genologics', parts=['BASEURI', 'USERNAME', 'PASSWORD']):
"""The genologics.config module is braindead and broken.
Here's a simplistic reimplementation.
"""
config = configparser.SafeConfigParser()
conf_file = config.read(os.environ.get('GENOLOGICSRC',
[os.path.expanduser('~/.genologicsrc'),
'genologics.conf', 'genologics.cfg', '/etc/genologics.conf'] ))
L.debug("Read config from {}".format(conf_file))
return { i.lower(): config[section][i] for i in parts}
def parse_args(*args):
parser = ArgumentParser( description = "Set run names in the LIMS",
formatter_class = ArgumentDefaultsHelpFormatter)
parser.add_argument("runid",
help="A run ID, like 180222_K00166_0345_BHT2F5BBXX.")
parser.add_argument("--container_name",
help="Attach the name to a container with the specified name, as" +
" opposed to extracting the flowcell name from the runid.")
parser.add_argument("-c", "--close", "--complete", action="store_true",
help="Close/complete the QC step by calling advance().")
parser.add_argument("-f", "--force", action="store_true",
help="Overwrite existing value.")
parser.add_argument("-n", "--no_act", action="store_true",
help="Don't write back to Clarity.")
parser.add_argument("-v", "--debug", "--verbose", action="store_true",
help="Be verbose (debug).")
return parser.parse_args(*args)
if __name__ == '__main__':
main(parse_args())