Update by asebastian on Fri Oct 6 14:42:29 EDT 2017 from Aswathys-iMa…

…c.local
ialbert · Oct 6, 2017 · b03508e · b03508e
1 parent 67514eb
commit b03508e
Show file tree

Hide file tree

Showing 15 changed files with 401 additions and 183 deletions.
diff --git a/pipeline/Makefile b/pipeline/Makefile
@@ -1,48 +1,35 @@
 # set the working directory where the makefile will be created and analysis will be done.
 
-WORKDIR=./test
+TEMP_DIR = ~/tmp
 
 # analysis-specific files.
 
-SPEC=templates/metabarcode_qc/metabarcode_spec.hjson
-TEMPLATE=templates/metabarcode_qc/metabarcode_makefile.html
+SPEC=templates/qc/qc_spec.hjson
+TEMPLATE=templates/qc/qc_makefile.html
 COMMAND=all
 
-# check if spec file exists.
+TEST_DATA=$(TEMP_DIR)/data.tar.gz
+SAMPLE_DATA=$(TEMP_DIR)/sampleinfo.txt
 
-ifeq ($(wildcard $(SPEC)),)
-$(error $(SPEC) not found.)
-endif
-
-# check if template file exists.
-
-ifeq ($(wildcard $(TEMPLATE)),)
-$(error $(TEMPLATE) not found.)
-endif
+run: setup
+	@echo runs all commands to setup the pipeline and then runs it.
 
+$(TEMP_DIR):
+	mkdir -p $(TEMP_DIR)
 
-workdir:
-	#
-	# creates work directory.
-	#
-	mkdir -p $(WORKDIR)
+$(TEST_DATA): $(TEMP_DIR)
+	curl -o $(TEST_DATA) -O http://iris.bx.psu.edu/projects/metabarcode-data/data.tar.gz
 
-get_data: workdir
-	#
-	# Download data and sampleinfo to workdir.
-	#
-	wget -P $(WORKDIR) http://iris.bx.psu.edu/projects/metabarcode-data/data.tar.gz
-	wget -P $(WORKDIR) http://iris.bx.psu.edu/projects/metabarcode-data/sampleinfo.txt
+$(SAMPLE_DATA): $(TEMP_DIR)
+	 curl -o $(SAMPLE_DATA) -O http://iris.bx.psu.edu/projects/metabarcode-data/sampleinfo.txt
 
-setup:
+setup: $(TEST_DATA) $(SAMPLE_DATA)
 	#
 	# creates the makefile and runs the pipeline.
 	#
-	python make.py $(SPEC) $(TEMPLATE) > $(WORKDIR)/Makefile
+	#python make.py $(SPEC) $(TEMPLATE) > $(WORKDIR)/Makefile
 	#
 	# runs make
 	#
-	cd $(WORKDIR); make $(COMMAND)
+	#cd $(WORKDIR); make $(COMMAND)
 
-run: workdir get_data setup
-	@echo runs all commands to setup the pipeline and then runs it.
diff --git a/pipeline/classify/__init__.py b/pipeline/classify/__init__.py
diff --git a/pipeline/classify/test.py b/pipeline/classify/test.py
@@ -0,0 +1 @@
+print("HELLO")
diff --git a/pipeline/data/__init__.py b/pipeline/data/__init__.py
diff --git a/pipeline/samplesheet.py → pipeline/data/samplesheet.py b/pipeline/samplesheet.py → pipeline/data/samplesheet.py
@@ -69,14 +69,14 @@ def get_fnames(sname , datadir):
 if __name__ == "__main__":
 
     samplesheet = sys.argv[1]
-    datadir =sys.argv[2]
+    datadir = sys.argv[2]
 
     #samplesheet ="./test/sampleinfo.txt"
     #datadir ="test/data"
 
     data = update_sampleinfo(samplesheet, datadir)
 
-    outfile = open("updated_sampleinfo.txt", "w")
+    outfile = sys.stdout
     header ="\t".join(['sample_name','sample_group','target_gene','barcode','fwd_primer','rev_primer','file1','file2'])
     outfile.write(header + "\n")
 

diff --git a/pipeline/make.py b/pipeline/make.py
diff --git a/pipeline/qc/__init__.py b/pipeline/qc/__init__.py
diff --git a/pipeline/qc/qc_spec.hjson b/pipeline/qc/qc_spec.hjson
@@ -0,0 +1,124 @@
+{
+sampleinfo :
+    {
+    value : ~/tmp/sampleinfo.txt
+    help :
+           '''
+Enter a tab delimited text file. Order of the fields should be
+sample name, sample group, barcode, forward primer sequence, reverse primer sequence, target gene.
+Sample name should match the sample name in the dataset.
+    '''
+    label : Specify the sample file.
+    css: red
+    visible: 1
+    display_type: UPLOAD
+    }
+data :
+  {
+    value: ~/tmp/data.tar.gz
+    visible: 1
+    origin: PROJECT
+    data_type: TAR_FASTQ_GZ
+    display_type: DROPDOWN
+
+  }
+library_type :
+  {
+    value : true
+    label : Data entered is paired end.
+    visible: 1
+    display_type: CHECKBOX
+  }
+trim_primer:
+ {
+    label : Trims barcode and primer sequences.
+    value : true
+    help : Barcode and primer sequences should be in the sample sheet.
+    visible: 1
+    display_type: CHECKBOX
+
+ }
+trim_quality:
+ {
+
+    label : Specify average quality value for trimming.
+    help : Leave empty for not trimming
+    value :  30
+    range  : [20,32]
+    visible : 1
+    display_type : INTEGER
+ }
+merge_mismatch:
+  {
+  label : Maximum mismatch allowed. Default is 3.0.
+  value : 3.0
+  range : [0.9,5.1]
+  visible : 1
+  display_type : FLOAT
+  }
+merge_overlap:
+  {
+  label : Minimum number of overlapping bases to allow merging. Default is 12.
+  value :12
+  range : [8,16]
+  visible : 1
+  display_type : INTEGER
+  }
+action :
+ {
+    choices : {
+            fastqc : Create fastqc report
+            merge : Merge paired end dataset
+               }
+
+    value : merge
+    visible : 1
+    display_type: RADIO
+  }
+  // scripts info
+input_check :
+  {
+
+    value : ../../check_input.py
+    display_type : SCRIPT
+  }
+report_template :
+  {
+     value : metabarcode_report.html
+     display_type : SCRIPT
+  }
+report_script :
+  {
+     value : metabarcode_results.py
+     display_type: SCRIPT
+  }
+analysis_spec :
+  {
+    value: Quality control of sequencing data
+    description:
+                '''
+# Purpose
+
+This analysis takes a pired end dataset and produces a merged improved dataset.
+It can optionally trimmed the data before merging.
+MultiQC quality reports of each step are also produced.
+
+# Parameters
+
+**Input:**
+
+1. Paired end sequence data as a  tar archived gzip file.
+2. Sample information sheet as a tab delimited text file.
+
+# Results
+
+This analysis produces
+
+1. MultiQC report - an aggregate fastqc report of the input data.
+2. Trimmed dataset and its quality report (optional).
+3. Merged dataset and its quality report.
+
+        '''
+       display_type:MODEL
+  }
+}