Skip to content

Commit

Permalink
Update by asebastian on Fri Oct 6 14:42:29 EDT 2017 from Aswathys-iMa…
Browse files Browse the repository at this point in the history
…c.local
  • Loading branch information
aswathyseb committed Oct 6, 2017
1 parent 67514eb commit b03508e
Show file tree
Hide file tree
Showing 15 changed files with 401 additions and 183 deletions.
45 changes: 16 additions & 29 deletions pipeline/Makefile
Original file line number Diff line number Diff line change
@@ -1,48 +1,35 @@
# set the working directory where the makefile will be created and analysis will be done.

WORKDIR=./test
TEMP_DIR = ~/tmp

# analysis-specific files.

SPEC=templates/metabarcode_qc/metabarcode_spec.hjson
TEMPLATE=templates/metabarcode_qc/metabarcode_makefile.html
SPEC=templates/qc/qc_spec.hjson
TEMPLATE=templates/qc/qc_makefile.html
COMMAND=all

# check if spec file exists.
TEST_DATA=$(TEMP_DIR)/data.tar.gz
SAMPLE_DATA=$(TEMP_DIR)/sampleinfo.txt

ifeq ($(wildcard $(SPEC)),)
$(error $(SPEC) not found.)
endif

# check if template file exists.

ifeq ($(wildcard $(TEMPLATE)),)
$(error $(TEMPLATE) not found.)
endif
run: setup
@echo runs all commands to setup the pipeline and then runs it.

$(TEMP_DIR):
mkdir -p $(TEMP_DIR)

workdir:
#
# creates work directory.
#
mkdir -p $(WORKDIR)
$(TEST_DATA): $(TEMP_DIR)
curl -o $(TEST_DATA) -O http://iris.bx.psu.edu/projects/metabarcode-data/data.tar.gz

get_data: workdir
#
# Download data and sampleinfo to workdir.
#
wget -P $(WORKDIR) http://iris.bx.psu.edu/projects/metabarcode-data/data.tar.gz
wget -P $(WORKDIR) http://iris.bx.psu.edu/projects/metabarcode-data/sampleinfo.txt
$(SAMPLE_DATA): $(TEMP_DIR)
curl -o $(SAMPLE_DATA) -O http://iris.bx.psu.edu/projects/metabarcode-data/sampleinfo.txt

setup:
setup: $(TEST_DATA) $(SAMPLE_DATA)
#
# creates the makefile and runs the pipeline.
#
python make.py $(SPEC) $(TEMPLATE) > $(WORKDIR)/Makefile
#python make.py $(SPEC) $(TEMPLATE) > $(WORKDIR)/Makefile
#
# runs make
#
cd $(WORKDIR); make $(COMMAND)
#cd $(WORKDIR); make $(COMMAND)

run: workdir get_data setup
@echo runs all commands to setup the pipeline and then runs it.
Empty file added pipeline/classify/__init__.py
Empty file.
1 change: 1 addition & 0 deletions pipeline/classify/test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
print("HELLO")
Empty file added pipeline/data/__init__.py
Empty file.
4 changes: 2 additions & 2 deletions pipeline/samplesheet.py → pipeline/data/samplesheet.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,14 +69,14 @@ def get_fnames(sname , datadir):
if __name__ == "__main__":

samplesheet = sys.argv[1]
datadir =sys.argv[2]
datadir = sys.argv[2]

#samplesheet ="./test/sampleinfo.txt"
#datadir ="test/data"

data = update_sampleinfo(samplesheet, datadir)

outfile = open("updated_sampleinfo.txt", "w")
outfile = sys.stdout
header ="\t".join(['sample_name','sample_group','target_gene','barcode','fwd_primer','rev_primer','file1','file2'])
outfile.write(header + "\n")

Expand Down
98 changes: 0 additions & 98 deletions pipeline/make.py

This file was deleted.

Empty file added pipeline/qc/__init__.py
Empty file.
124 changes: 124 additions & 0 deletions pipeline/qc/qc_spec.hjson
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
{
sampleinfo :
{
value : ~/tmp/sampleinfo.txt
help :
'''
Enter a tab delimited text file. Order of the fields should be
sample name, sample group, barcode, forward primer sequence, reverse primer sequence, target gene.
Sample name should match the sample name in the dataset.
'''
label : Specify the sample file.
css: red
visible: 1
display_type: UPLOAD
}
data :
{
value: ~/tmp/data.tar.gz
visible: 1
origin: PROJECT
data_type: TAR_FASTQ_GZ
display_type: DROPDOWN

}
library_type :
{
value : true
label : Data entered is paired end.
visible: 1
display_type: CHECKBOX
}
trim_primer:
{
label : Trims barcode and primer sequences.
value : true
help : Barcode and primer sequences should be in the sample sheet.
visible: 1
display_type: CHECKBOX

}
trim_quality:
{

label : Specify average quality value for trimming.
help : Leave empty for not trimming
value : 30
range : [20,32]
visible : 1
display_type : INTEGER
}
merge_mismatch:
{
label : Maximum mismatch allowed. Default is 3.0.
value : 3.0
range : [0.9,5.1]
visible : 1
display_type : FLOAT
}
merge_overlap:
{
label : Minimum number of overlapping bases to allow merging. Default is 12.
value :12
range : [8,16]
visible : 1
display_type : INTEGER
}
action :
{
choices : {
fastqc : Create fastqc report
merge : Merge paired end dataset
}

value : merge
visible : 1
display_type: RADIO
}
// scripts info
input_check :
{

value : ../../check_input.py
display_type : SCRIPT
}
report_template :
{
value : metabarcode_report.html
display_type : SCRIPT
}
report_script :
{
value : metabarcode_results.py
display_type: SCRIPT
}
analysis_spec :
{
value: Quality control of sequencing data
description:
'''
# Purpose

This analysis takes a pired end dataset and produces a merged improved dataset.
It can optionally trimmed the data before merging.
MultiQC quality reports of each step are also produced.

# Parameters

**Input:**

1. Paired end sequence data as a tar archived gzip file.
2. Sample information sheet as a tab delimited text file.

# Results

This analysis produces

1. MultiQC report - an aggregate fastqc report of the input data.
2. Trimmed dataset and its quality report (optional).
3. Merged dataset and its quality report.

'''
display_type:MODEL
}
}
Loading

0 comments on commit b03508e

Please sign in to comment.