Skip to content

Commit

Permalink
I think the new SSG is ready to go.
Browse files Browse the repository at this point in the history
  • Loading branch information
tbooth committed Oct 31, 2024
1 parent 1451019 commit e5104a5
Show file tree
Hide file tree
Showing 9 changed files with 806 additions and 22 deletions.
8 changes: 6 additions & 2 deletions illuminatus/ragic.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@ def use_ragic():
'Run ID': "1000037",
'Run QC Report': "1000048",
'Last Update': "109",
'_lane_keys' : [ "_subtable_1000015",
"_subtable_1000034",
"_subtable_1000035",
"_subtable_1000036", ],
},
'List of samples': { '_form': "sequencing/3",
'Project Name': "1000003",
Expand Down Expand Up @@ -76,7 +80,7 @@ def get_recent_runs(count, rc=None):
recent_n = count,
subtables = False )

L.debug(f"Found {len(runs)} record in Ragic.")
L.debug(f"Found {len(runs)} Illumina Run record(s) in Ragic.")

return runs.values()

Expand All @@ -90,7 +94,7 @@ def get_run(fcid, add_samples=False, rc=None):
query = f"Flowcell ID,eq,{fcid}"
runs = rc.list_entries("Illumina Run", query, subtables=add_samples)

L.debug(f"Found {len(runs)} record in Ragic.")
L.debug(f"Found {len(runs)} Illumina Run record(s) in Ragic.")
if not runs:
raise EmptyResultError(f"No record of flowcell ID {fcid}")

Expand Down
47 changes: 36 additions & 11 deletions samplesheet_from_ragic.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,11 @@
from illuminatus import ragic, illuminatus_version
from illuminatus.aggregator import aggregator

class MissingLanesError(RuntimeError):
"""Exception to be raised by gen_ss if there are empty lanes in the run.
"""
pass

def main(args):

try:
Expand Down Expand Up @@ -56,13 +61,38 @@ def mdydate(ragicts=None):

return thedate.strftime("%m/%d/%Y")

def gen_ss(run):
def get_lane_keys(run, allow_empty_lanes=False):
"""Get the lanes which have data. Check that the lanes actually have libraries in them.
"""
# Original version was:
# lane_keys = sorted([k for k in run if k.startswith("_subtable_")])
# but that won't cut it.

res = dict()
expected_keys = ragic.forms['Illumina Run']['_lane_keys']

for i, k in enumerate(expected_keys):
if run.get(k):
# The key is present and not an empty list
res[str(i+1)] = k

if not allow_empty_lanes:
# Given the available flowcells the only acceptable lodings are:
# lane1, lane1+lane2, lane1+lane2+lane3+lane4
# TODO - I could infer the run type based off the FCID and be even more strict
# but I'll leave that for now.
if ''.join(res) not in [ "1", "12", "1234" ]:
raise MissingLanesError(f"Samples only in lanes {list(res)}")

return res

def gen_ss(run, allow_empty_lanes=False):
"""Turn that thing into a sample sheet let's gooooo!
The sample sheet should be identical each time, unless Ragic is changed.
This means no putting the date of generation into the Date field.
"""
#return([pformat(run)])
lane_keys = get_lane_keys(run, allow_empty_lanes=False)

res = aggregator(ofs=",")

Expand Down Expand Up @@ -91,18 +121,13 @@ def gen_ss(run):
res( "[Settings]" )
# Nothing here just now.

# There may be a neater way to do this but the lanes correspond to the subtables,
# and I think I can just assume the keys are in order, or else maybe I order on
# '_header_Y'.
lane_keys = sorted([k for k in run if k.startswith("_subtable_")])

res()
res( "[Data]" )
res( "Lane", "Sample_ID", "Sample_Name", "Sample_Plate", "Sample_Well",
"Sample_Project", "I5_Index_ID", "index", "I7_Index_ID", "index2",
"Description" )
for lane_idx, lane_key in enumerate(lane_keys):
for run_elem in tabulate_lane( lane_num = lane_idx + 1,
for lane_name, lane_key in lane_keys.items():
for run_elem in tabulate_lane( lane_num = lane_name,
lane = run[lane_key],
samples_dict = run['Samples__dict'],
fcid = run['Flowcell ID'] ):
Expand Down Expand Up @@ -138,9 +163,9 @@ def tabulate_lane(lane_num, lane, samples_dict, fcid):
fcid,
"", # Sample_Well
proj, # Sample_Project
f"{proj}-{index1}",
f"{proj}-{index1}" if index1 else "",
index1,
f"{proj}-{index2}",
f"{proj}-{index2}" if index2 else "",
index2,
rel['Pool'], # May be blank if no pool
)
Expand Down
24 changes: 24 additions & 0 deletions test/ragic_sample_sheets/LP3YP/K001_LP3YP_SampleSheet.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
[Header]
IEMFileVersion,4
Investigator Name,Timothy Booth
Experiment Name,K001
Date,10/30/2024
Workflow,GenerateFASTQ
Application,FASTQ Only
Chemistry,Amplicon
#illuminatus_version,1.15.1-ragic-14510198
#index_revcomp,2,2,auto,auto

[Reads]
50
50

[Settings]

[Data]
Lane,Sample_ID,Sample_Name,Sample_Plate,Sample_Well,Sample_Project,I5_Index_ID,index,I7_Index_ID,index2,Description
1,30453FUpool01__30453FU00001L01,,LP3YP,,30453,30453-TCCATACCGGAA,TCCATACCGGAA,,,30453FUpool01
1,30453FUpool01__30453FU00002L01,,LP3YP,,30453,30453-AGCCCTGCTACA,AGCCCTGCTACA,,,30453FUpool01
2,30453FUpool01__30453FU00024L01,,LP3YP,,30453,30453-TGACTAATGGCC,TGACTAATGGCC,,,30453FUpool01
2,30453FUpool01__30453FU00008L01,,LP3YP,,30453,30453-GACTCAACCAGT,GACTCAACCAGT,,,30453FUpool01
2,30453FUpool01__30453FU00009L01,,LP3YP,,30453,30453-GCCTCTACGTCG,GCCTCTACGTCG,,,30453FUpool01
Loading

0 comments on commit e5104a5

Please sign in to comment.