From 7ca5efcdeef72468946ee432194ba4e1e92e3680 Mon Sep 17 00:00:00 2001 From: TomKellyGenetics Date: Sat, 19 Jun 2021 14:31:06 +0600 Subject: [PATCH 01/18] hardcoded smartseq settings --- zUMIs-dge2.R | 2 +- zUMIs-mapping.R | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/zUMIs-dge2.R b/zUMIs-dge2.R index 5c222c9..a26c8e9 100644 --- a/zUMIs-dge2.R +++ b/zUMIs-dge2.R @@ -33,7 +33,7 @@ if(UMIcheck == "nonUMI"){ opt$counting_opts$Ham_Dist <- 0 } #is the data Smart-seq3? -smart3_flag <- ifelse(any(grepl(pattern = "ATTGCGCAATG",x = unlist(opt$sequence_files))), TRUE, FALSE) +smart3_flag <- TRUE #ifelse(any(grepl(pattern = "ATTGCGCAATG",x = unlist(opt$sequence_files))), TRUE, FALSE) ####################################################################### ##### Barcode handling & chunking diff --git a/zUMIs-mapping.R b/zUMIs-mapping.R index 9f5db0d..504132d 100755 --- a/zUMIs-mapping.R +++ b/zUMIs-mapping.R @@ -29,7 +29,7 @@ if(inp$which_Stage == "Filtering"){ genome_size <- system(command = paste("du -sh",inp$reference$STAR_index,"| cut -f1"), intern = TRUE) genome_size <- as.numeric(gsub(pattern = "G",replacement = "", x = genome_size)) -num_star_instances <- floor(inp$mem_limit/genome_size) +num_star_instances <- 16 #floor(inp$mem_limit/genome_size) # GTF file setup ---------------------------------------------------------- #in case of additional sequences, we need to create a custom GTF @@ -81,12 +81,12 @@ getmode <- function(v) { uniqv[which.max(tabulate(match(v, uniqv)))] } -cDNA_read_length <- getmode(nchar(cDNA_peek$V1)) +cDNA_read_length <- 150 #getmode(nchar(cDNA_peek$V1)) # Setup STAR mapping ------------------------------------------------------ samtools_load_cores <- ifelse(inp$num_threads>8,2,1) -avail_cores <- inp$num_threads - samtools_load_cores #reserve threads for samtools file opening +avail_cores <- 16 # inp$num_threads - samtools_load_cores #reserve threads for samtools file opening if(inp$which_Stage == "Filtering"){ avail_cores <- floor(avail_cores / num_star_instances) } From fbe994ab2e1be35b87d0528935a1291d247a956d Mon Sep 17 00:00:00 2001 From: TomKellyGenetics Date: Mon, 21 Jun 2021 08:13:00 +0600 Subject: [PATCH 02/18] set up smartseq run with STAR 2.7.9a --- mergeBAM.sh | 2 +- runfeatureCountFUN.R | 4 ++-- zUMIs-dge2.R | 3 ++- zUMIs-mapping.R | 32 +++++++++++++++++++------------- 4 files changed, 24 insertions(+), 17 deletions(-) diff --git a/mergeBAM.sh b/mergeBAM.sh index 7c2909f..7d48323 100755 --- a/mergeBAM.sh +++ b/mergeBAM.sh @@ -13,7 +13,7 @@ yaml=$6 samtoolsexc=$7 ls $tmpMerge/$project.*.filtered.tagged.bam > $tmpMerge/$project.bamlist.txt -#samtools merge -f -@ $nthreads -b $tmpMerge/$project.bamlist.txt $outdir/$project.filtered.tagged.bam > /dev/null 2>&1 +samtools merge -f -@ $nthreads -b $tmpMerge/$project.bamlist.txt $outdir/$project.filtered.tagged.bam > /dev/null 2>&1 cat $tmpMerge/$project.*.BCstats.txt > $outdir/$project.BCstats.txt diff --git a/runfeatureCountFUN.R b/runfeatureCountFUN.R index 2ca4fd9..9e2a32d 100755 --- a/runfeatureCountFUN.R +++ b/runfeatureCountFUN.R @@ -348,8 +348,8 @@ get_gr <- function(y){GenomicRanges::makeGRangesFromDataFrame(y, .runFeatureCount<-function(abamfile,RG,saf,strand,type,primaryOnly,cpu,mem,fcounts_clib,multi_overlap_var){ print(paste0("Assigning reads to features (",type,")")) - # fc.stat<-Rsubread::featureCounts(files=abamfile, - fc.stat <- featureCounts(files=abamfile, + fc.stat<-Rsubread::featureCounts(files=abamfile, + # fc.stat <- featureCounts(files=abamfile, annot.ext=saf, isGTFAnnotationFile=F, primaryOnly=primaryOnly, diff --git a/zUMIs-dge2.R b/zUMIs-dge2.R index eb3d052..43b97fd 100644 --- a/zUMIs-dge2.R +++ b/zUMIs-dge2.R @@ -66,7 +66,8 @@ try(gene_name_mapping <- .get_gene_names(gtf = paste0(opt$out_dir,"/",opt$projec try(data.table::fwrite(gene_name_mapping, file = paste0(opt$out_dir,"/zUMIs_output/expression/",opt$project,".gene_names.txt"), sep ="\t", quote = FALSE), silent = TRUE) ## -if(smart3_flag & opt$counting_opts$strand == 1){ +#if(smart3_flag & opt$counting_opts$strand == 1){ +if(TRUE){ #split bam in UMU ends and internal print("Preparing Smart-seq3 data for stranded gene assignment...") print(Sys.time()) diff --git a/zUMIs-mapping.R b/zUMIs-mapping.R index 35b44ec..1196ed8 100755 --- a/zUMIs-mapping.R +++ b/zUMIs-mapping.R @@ -25,7 +25,7 @@ if(inp$which_Stage == "Filtering"){ }else{ filtered_bams <- paste0(inp$out_dir,"/",inp$project,".filtered.tagged.unmapped.bam") # for resuming from mapping state using the merged unmapped bam } - +print("1") # check if multiple STAR instances can be run ----------------------------- @@ -34,14 +34,16 @@ genome_size <- as.numeric(gsub(pattern = "G",replacement = "", x = genome_size)) if(is.na(genome_size)){ genome_size <- 25 #set average genome size if there was a problem detecting } -num_star_instances <- 16 #floor(inp$mem_limit/genome_size) +print(genome_size) +num_star_instances <- floor(inp$mem_limit/genome_size) if(num_star_instances < 1){ num_star_instances = 1 #set the number of STAR instances to 1 if it is 0 } if(num_star_instances > inp$num_threads){ num_star_instances = inp$num_threads } - +print(num_star_instances) +print("2") # GTF file setup ---------------------------------------------------------- #in case of additional sequences, we need to create a custom GTF @@ -77,13 +79,13 @@ if ( is.null(additional_fq[1]) | length(additional_fq)==0 ) { gtf_to_use <- paste(inp$out_dir,"/",inp$project,".final_annot.gtf",sep="") param_additional_fa <- paste("--genomeFastaFiles",paste(inp$reference$additional_files,collapse = " ")) } - -#inp$reference$GTF_file_final <- gtf_to_use -#yaml::write_yaml(inp,file = paste(inp$out_dir,"/",inp$project,".postmap.yaml",sep="")) +print("3") +inp$reference$GTF_file_final <- gtf_to_use +yaml::write_yaml(inp,file = paste(inp$out_dir,"/",inp$project,".postmap.yaml",sep="")) # Detect read length ------------------------------------------------------ #check the first 100 reads to detect the read length of the cDNA read -#filtered_bam <- paste(inp$out_dir,"/",inp$project,".filtered.tagged.bam",sep="") +filtered_bam <- paste(inp$out_dir,"/",inp$project,".filtered.tagged.bam",sep="") cDNA_peek <- data.table::fread(cmd = paste(samtools,"view",filtered_bams[1],"| cut -f10 | head -n 1000"),stringsAsFactors = F,data.table = T, header = F) @@ -92,9 +94,9 @@ getmode <- function(v) { uniqv[which.max(tabulate(match(v, uniqv)))] } -cDNA_read_length <- 150 #getmode(nchar(cDNA_peek$V1)) - - +cDNA_read_length <- getmode(nchar(cDNA_peek$V1)) +print(cDNA_read_length) +print("4") # Setup STAR mapping ------------------------------------------------------ samtools_load_cores <- ifelse(inp$num_threads>8,2,1) avail_cores <- 16 # inp$num_threads - samtools_load_cores #reserve threads for samtools file opening @@ -112,13 +114,17 @@ param_misc <- paste("--genomeDir",inp$reference$STAR_index, "--sjdbGTFfile",gtf_to_use, "--runThreadN",avail_cores, "--sjdbOverhang", cDNA_read_length-1, - "--readFilesType SAM",inp$read_layout) +# "--sjdbOverhang 149", + "--readFilesType SAM PE", + "--genomeSAindexNbases 11", + "--limitOutSJcollapsed 5000000", + inp$read_layout) STAR_command <- paste(STAR_exec,param_defaults,param_misc,inp$reference$additional_STAR_params,param_additional_fa) if(inp$counting_opts$twoPass==TRUE){ STAR_command <- paste(STAR_command,"--twopassMode Basic") } - +print("5") #finally, run STAR if(num_star_instances>1 & inp$which_Stage == "Filtering"){ map_tmp_dir <- paste0(inp$out_dir,"/zUMIs_output/.tmpMap/") @@ -154,7 +160,7 @@ if(num_star_instances>1 & inp$which_Stage == "Filtering"){ system(STAR_command) } } - +print("6") #clean up chunked bam files if(inp$which_Stage == "Filtering"){ From eb7646317f00b0223f427b4531b33c79aadc7d85 Mon Sep 17 00:00:00 2001 From: TomKellyGenetics Date: Tue, 22 Jun 2021 06:09:12 +0600 Subject: [PATCH 03/18] correct counting by excluding unmapped reads from chromosomes --- UMIstuffFUN.R | 14 ++++++++++++++ mergeBAM.sh | 2 +- misc/countUMIfrags.py | 2 +- runfeatureCountFUN.R | 4 ++-- zUMIs-dge2.R | 11 +++++++---- zUMIs-mapping.R | 9 ++++----- 6 files changed, 29 insertions(+), 13 deletions(-) diff --git a/UMIstuffFUN.R b/UMIstuffFUN.R index b35aba0..cbd967e 100644 --- a/UMIstuffFUN.R +++ b/UMIstuffFUN.R @@ -68,6 +68,17 @@ reads2genes_new <- function(featfile, bccount, inex, chunk, cores, keepUnassigne taglist <- c(taglist, "GI") } + idxstats <- idxstats[grep("\\*", idxstats[,1], invert = TRUE), ] +print(idxstats) +#print(str(idxstats)) +#print(levels(idxstats[,"seqnames"])) + +idxstats[,1] <- droplevels(idxstats[,1]) +print(levels(idxstats[,"seqnames"])) + +idxstats[,1] <- factor(as.character(idxstats[,1]), levels = seqnames(seqinfo(BamFile(featfile)))) + +#print(idxstats) rsamtools_reads <- mclapply(1:nrow(idxstats), function(x) { if(opt$read_layout == "PE"){ parms <- ScanBamParam(tag=taglist, @@ -83,6 +94,9 @@ reads2genes_new <- function(featfile, bccount, inex, chunk, cores, keepUnassigne } dat <- scanBam(file = featfile, param = parms) +#print(head(dat)) +#print(dim(dat)) +#print(str(dat)) if(inex){ dt <- data.table(RG = dat[[1]]$tag$BC, UB = dat[[1]]$tag$UB, GE = dat[[1]]$tag$GE, GEin = dat[[1]]$tag$GI) }else{ diff --git a/mergeBAM.sh b/mergeBAM.sh index 7d48323..7c2909f 100755 --- a/mergeBAM.sh +++ b/mergeBAM.sh @@ -13,7 +13,7 @@ yaml=$6 samtoolsexc=$7 ls $tmpMerge/$project.*.filtered.tagged.bam > $tmpMerge/$project.bamlist.txt -samtools merge -f -@ $nthreads -b $tmpMerge/$project.bamlist.txt $outdir/$project.filtered.tagged.bam > /dev/null 2>&1 +#samtools merge -f -@ $nthreads -b $tmpMerge/$project.bamlist.txt $outdir/$project.filtered.tagged.bam > /dev/null 2>&1 cat $tmpMerge/$project.*.BCstats.txt > $outdir/$project.BCstats.txt diff --git a/misc/countUMIfrags.py b/misc/countUMIfrags.py index aa7e1d5..775529f 100755 --- a/misc/countUMIfrags.py +++ b/misc/countUMIfrags.py @@ -24,7 +24,7 @@ def count_UMItags(inpath, bcs, chr): bc = read.get_tag('BC') if bc in bcs: ub = read.get_tag('UB') - if ub is '': + if ub == '': bccounts[bc]['int'] += 1 else: bccounts[bc]['umi'] += 1 diff --git a/runfeatureCountFUN.R b/runfeatureCountFUN.R index 9e2a32d..26e6e08 100755 --- a/runfeatureCountFUN.R +++ b/runfeatureCountFUN.R @@ -348,8 +348,8 @@ get_gr <- function(y){GenomicRanges::makeGRangesFromDataFrame(y, .runFeatureCount<-function(abamfile,RG,saf,strand,type,primaryOnly,cpu,mem,fcounts_clib,multi_overlap_var){ print(paste0("Assigning reads to features (",type,")")) - fc.stat<-Rsubread::featureCounts(files=abamfile, - # fc.stat <- featureCounts(files=abamfile, + # fc.stat<-Rsubread::featureCounts(files=abamfile, + fc.stat <- featureCounts(files=abamfile, annot.ext=saf, isGTFAnnotationFile=F, primaryOnly=primaryOnly, diff --git a/zUMIs-dge2.R b/zUMIs-dge2.R index 43b97fd..737d2ba 100644 --- a/zUMIs-dge2.R +++ b/zUMIs-dge2.R @@ -8,7 +8,7 @@ suppressPackageStartupMessages(library(Rsamtools)) ########################## myYaml <- commandArgs(trailingOnly = T) -opt <-read_yaml(myYaml) +opt <- read_yaml(myYaml) setwd(opt$out_dir) #try(unixtools::set.tempdir(opt$out_dir)) source(paste0(opt$zUMIs_directory,"/runfeatureCountFUN.R")) @@ -26,6 +26,7 @@ data.table::setDTthreads(threads=1) fcounts_clib <- paste0(opt$zUMIs_directory,"/misc/fcountsLib2") opt <- fixMissingOptions(opt) +print(opt) ####################################################################### ########################## double check for non-UMI method UMIcheck <- check_nonUMIcollapse(opt$sequence_files) @@ -33,7 +34,7 @@ if(UMIcheck == "nonUMI"){ opt$counting_opts$Ham_Dist <- 0 } #is the data Smart-seq3? -smart3_flag <- TRUE #ifelse(any(grepl(pattern = "ATTGCGCAATG",x = unlist(opt$sequence_files))), TRUE, FALSE) +smart3_flag <- ifelse(any(grepl(pattern = "ATTGCGCAATG", x = unlist(opt$sequence_files))), TRUE, FALSE) ####################################################################### ##### Barcode handling & chunking @@ -66,8 +67,7 @@ try(gene_name_mapping <- .get_gene_names(gtf = paste0(opt$out_dir,"/",opt$projec try(data.table::fwrite(gene_name_mapping, file = paste0(opt$out_dir,"/zUMIs_output/expression/",opt$project,".gene_names.txt"), sep ="\t", quote = FALSE), silent = TRUE) ## -#if(smart3_flag & opt$counting_opts$strand == 1){ -if(TRUE){ +if(smart3_flag & opt$counting_opts$strand == 1){ #split bam in UMU ends and internal print("Preparing Smart-seq3 data for stranded gene assignment...") print(Sys.time()) @@ -210,6 +210,9 @@ if( opt$counting_opts$introns ){ for(i in unique(bccount$chunkID)){ print( paste( "Working on barcode chunk", i, "out of",length(unique(bccount$chunkID)) )) print( paste( "Processing",length(bccount[chunkID==i]$XC), "barcodes in this chunk..." )) +print(sortbamfile) +print(bccount) +print(opt$counting_opts$introns) reads <- reads2genes_new(featfile = sortbamfile, bccount = bccount, inex = opt$counting_opts$introns, diff --git a/zUMIs-mapping.R b/zUMIs-mapping.R index 1196ed8..c9b6dae 100755 --- a/zUMIs-mapping.R +++ b/zUMIs-mapping.R @@ -113,12 +113,11 @@ param_defaults <- paste("--readFilesCommand ",samtools," view -@",samtools_load_ param_misc <- paste("--genomeDir",inp$reference$STAR_index, "--sjdbGTFfile",gtf_to_use, "--runThreadN",avail_cores, - "--sjdbOverhang", cDNA_read_length-1, +# "--sjdbOverhang", cDNA_read_length-1, # "--sjdbOverhang 149", - "--readFilesType SAM PE", + "--readFilesType SAM", inp$read_layout, "--genomeSAindexNbases 11", - "--limitOutSJcollapsed 5000000", - inp$read_layout) + "--limitOutSJcollapsed 5000000") STAR_command <- paste(STAR_exec,param_defaults,param_misc,inp$reference$additional_STAR_params,param_additional_fa) if(inp$counting_opts$twoPass==TRUE){ @@ -143,7 +142,7 @@ if(num_star_instances>1 & inp$which_Stage == "Filtering"){ #after parallel instance STAR, collect output data in the usual file places out_logs <- list.files(map_tmp_dir, pattern = paste0("tmp.",inp$project,".*.Log.final.out"), full = TRUE) merge_logs <- paste("cat",paste(out_logs, collapse = " "),">",paste0(inp$out_dir,"/",inp$project,".filtered.tagged.Log.final.out")) - out_bams <- list.files(map_tmp_dir, pattern = paste0("tmp.",inp$project,".*.Aligned.out.bam"), full = TRUE) + out_bams <- list.files(map_tmp_dir, pattern = paste0("tmp.",inp$project,".*Aligned.out.bam"), full = TRUE) merge_bams <- paste(inp$samtools_exec,"cat -o",paste0(inp$out_dir,"/",inp$project,".filtered.tagged.Aligned.out.bam"),paste(out_bams, collapse = " ")) out_txbams <- list.files(map_tmp_dir, pattern = paste0("tmp.",inp$project,".*.Aligned.toTranscriptome.out.bam"), full = TRUE) merge_txbams <- paste(inp$samtools_exec,"cat -o",paste0(inp$out_dir,"/",inp$project,".filtered.tagged.Aligned.toTranscriptome.out.bam"),paste(out_txbams, collapse = " ")) From 37ec456e7e61904b4282aae3007e1598cbf37d72 Mon Sep 17 00:00:00 2001 From: TomKellyGenetics Date: Wed, 23 Jun 2021 14:42:24 +0900 Subject: [PATCH 04/18] remove verbose logs for debugging --- UMIstuffFUN.R | 16 ++++------------ runfeatureCountFUN.R | 3 +-- zUMIs-dge2.R | 4 +--- zUMIs-mapping.R | 11 ----------- 4 files changed, 6 insertions(+), 28 deletions(-) mode change 100755 => 100644 zUMIs-mapping.R diff --git a/UMIstuffFUN.R b/UMIstuffFUN.R index cbd967e..441126b 100644 --- a/UMIstuffFUN.R +++ b/UMIstuffFUN.R @@ -68,17 +68,12 @@ reads2genes_new <- function(featfile, bccount, inex, chunk, cores, keepUnassigne taglist <- c(taglist, "GI") } + # remove special characters for unmapped reads from list of chromosomes idxstats <- idxstats[grep("\\*", idxstats[,1], invert = TRUE), ] -print(idxstats) -#print(str(idxstats)) -#print(levels(idxstats[,"seqnames"])) + idxstats[,1] <- droplevels(idxstats[,1]) + idxstats[,1] <- factor(as.character(idxstats[,1]), levels = seqnames(seqinfo(BamFile(featfile)))) + print(idxstats) -idxstats[,1] <- droplevels(idxstats[,1]) -print(levels(idxstats[,"seqnames"])) - -idxstats[,1] <- factor(as.character(idxstats[,1]), levels = seqnames(seqinfo(BamFile(featfile)))) - -#print(idxstats) rsamtools_reads <- mclapply(1:nrow(idxstats), function(x) { if(opt$read_layout == "PE"){ parms <- ScanBamParam(tag=taglist, @@ -94,9 +89,6 @@ idxstats[,1] <- factor(as.character(idxstats[,1]), levels = seqnames(seqinfo(Bam } dat <- scanBam(file = featfile, param = parms) -#print(head(dat)) -#print(dim(dat)) -#print(str(dat)) if(inex){ dt <- data.table(RG = dat[[1]]$tag$BC, UB = dat[[1]]$tag$UB, GE = dat[[1]]$tag$GE, GEin = dat[[1]]$tag$GI) }else{ diff --git a/runfeatureCountFUN.R b/runfeatureCountFUN.R index 26e6e08..336ade1 100755 --- a/runfeatureCountFUN.R +++ b/runfeatureCountFUN.R @@ -348,8 +348,7 @@ get_gr <- function(y){GenomicRanges::makeGRangesFromDataFrame(y, .runFeatureCount<-function(abamfile,RG,saf,strand,type,primaryOnly,cpu,mem,fcounts_clib,multi_overlap_var){ print(paste0("Assigning reads to features (",type,")")) - # fc.stat<-Rsubread::featureCounts(files=abamfile, - fc.stat <- featureCounts(files=abamfile, + fc.stat <- featureCounts(files=abamfile, annot.ext=saf, isGTFAnnotationFile=F, primaryOnly=primaryOnly, diff --git a/zUMIs-dge2.R b/zUMIs-dge2.R index 737d2ba..34ebbd0 100644 --- a/zUMIs-dge2.R +++ b/zUMIs-dge2.R @@ -210,9 +210,7 @@ if( opt$counting_opts$introns ){ for(i in unique(bccount$chunkID)){ print( paste( "Working on barcode chunk", i, "out of",length(unique(bccount$chunkID)) )) print( paste( "Processing",length(bccount[chunkID==i]$XC), "barcodes in this chunk..." )) -print(sortbamfile) -print(bccount) -print(opt$counting_opts$introns) + print(sortbamfile) reads <- reads2genes_new(featfile = sortbamfile, bccount = bccount, inex = opt$counting_opts$introns, diff --git a/zUMIs-mapping.R b/zUMIs-mapping.R old mode 100755 new mode 100644 index c9b6dae..1bbc47c --- a/zUMIs-mapping.R +++ b/zUMIs-mapping.R @@ -25,7 +25,6 @@ if(inp$which_Stage == "Filtering"){ }else{ filtered_bams <- paste0(inp$out_dir,"/",inp$project,".filtered.tagged.unmapped.bam") # for resuming from mapping state using the merged unmapped bam } -print("1") # check if multiple STAR instances can be run ----------------------------- @@ -34,7 +33,6 @@ genome_size <- as.numeric(gsub(pattern = "G",replacement = "", x = genome_size)) if(is.na(genome_size)){ genome_size <- 25 #set average genome size if there was a problem detecting } -print(genome_size) num_star_instances <- floor(inp$mem_limit/genome_size) if(num_star_instances < 1){ num_star_instances = 1 #set the number of STAR instances to 1 if it is 0 @@ -42,8 +40,6 @@ if(num_star_instances < 1){ if(num_star_instances > inp$num_threads){ num_star_instances = inp$num_threads } -print(num_star_instances) -print("2") # GTF file setup ---------------------------------------------------------- #in case of additional sequences, we need to create a custom GTF @@ -79,7 +75,6 @@ if ( is.null(additional_fq[1]) | length(additional_fq)==0 ) { gtf_to_use <- paste(inp$out_dir,"/",inp$project,".final_annot.gtf",sep="") param_additional_fa <- paste("--genomeFastaFiles",paste(inp$reference$additional_files,collapse = " ")) } -print("3") inp$reference$GTF_file_final <- gtf_to_use yaml::write_yaml(inp,file = paste(inp$out_dir,"/",inp$project,".postmap.yaml",sep="")) @@ -95,8 +90,6 @@ getmode <- function(v) { } cDNA_read_length <- getmode(nchar(cDNA_peek$V1)) -print(cDNA_read_length) -print("4") # Setup STAR mapping ------------------------------------------------------ samtools_load_cores <- ifelse(inp$num_threads>8,2,1) avail_cores <- 16 # inp$num_threads - samtools_load_cores #reserve threads for samtools file opening @@ -113,8 +106,6 @@ param_defaults <- paste("--readFilesCommand ",samtools," view -@",samtools_load_ param_misc <- paste("--genomeDir",inp$reference$STAR_index, "--sjdbGTFfile",gtf_to_use, "--runThreadN",avail_cores, -# "--sjdbOverhang", cDNA_read_length-1, -# "--sjdbOverhang 149", "--readFilesType SAM", inp$read_layout, "--genomeSAindexNbases 11", "--limitOutSJcollapsed 5000000") @@ -123,7 +114,6 @@ STAR_command <- paste(STAR_exec,param_defaults,param_misc,inp$reference$addition if(inp$counting_opts$twoPass==TRUE){ STAR_command <- paste(STAR_command,"--twopassMode Basic") } -print("5") #finally, run STAR if(num_star_instances>1 & inp$which_Stage == "Filtering"){ map_tmp_dir <- paste0(inp$out_dir,"/zUMIs_output/.tmpMap/") @@ -159,7 +149,6 @@ if(num_star_instances>1 & inp$which_Stage == "Filtering"){ system(STAR_command) } } -print("6") #clean up chunked bam files if(inp$which_Stage == "Filtering"){ From b7e3ba6be09ffa810c5ca7b86f44fceea4608d6f Mon Sep 17 00:00:00 2001 From: TomKellyGenetics Date: Wed, 23 Jun 2021 14:50:52 +0900 Subject: [PATCH 05/18] update version and documentation --- README.md | 2 ++ zUMIs.sh | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index d21d381..2eb98b6 100755 --- a/README.md +++ b/README.md @@ -31,6 +31,8 @@ We provide a script to convert zUMIs output into loom file automatically based o zUMIs will try to automatically do this, otherwise convert zUMIs output to loom by simply running `Rscript rds2loom.R myRun.yaml`. ## Changelog +23 June 2021: zUMIs2.9.7: merged code from @TomKellyGenetics. Update for compatbility with STAR 2.7.9a and samtools 1.3.1-3. Prevent crash when unmapped reads are counted as a special character for chromosomes from samtools idxstats is not permitted as a factor levels. Excludes unmapped reads for counting. + 07 Apr 2021: zUMIs2.9.6: Fixed a potential crash when the input file to the barcode sharing feature does not contain an equal number of columns. 18 Feb 2021: zUMIs2.9.5: Mismatches for detecting Smart-seq3 UMI-read pattern are now user-settable in the YAML file as follows: `find_pattern: ATTGCGCAATG;2` would allow 2 mismatches. Usage as prior to this version (ie `find_pattern: ATTGCGCAATG`) will default to the previous value of 1 mismatch allowed. diff --git a/zUMIs.sh b/zUMIs.sh index aa7238f..dd54654 100755 --- a/zUMIs.sh +++ b/zUMIs.sh @@ -3,7 +3,7 @@ # Pipeline to run UMI-seq analysis from fastq to read count tables. # Authors: Swati Parekh, Christoph Ziegenhain, Beate Vieth & Ines Hellmann # Contact: sparekh@age.mpg.de or christoph.ziegenhain@ki.se -vers=2.9.6 +vers=2.9.7 currentv=$(curl -s https://raw.githubusercontent.com/sdparekh/zUMIs/main/zUMIs.sh | grep '^vers=' | cut -f2 -d "=") if [ "$currentv" != "$vers" ] ; then echo -e "------------- \n\n Good news! A newer version of zUMIs is available at https://github.com/sdparekh/zUMIs \n\n-------------"; From 3cc73a1d6ffa28168418bab37cfcc334bfa5afab Mon Sep 17 00:00:00 2001 From: TomKellyGenetics Date: Wed, 23 Jun 2021 14:56:11 +0900 Subject: [PATCH 06/18] remove hardcoded variables --- zUMIs-mapping.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zUMIs-mapping.R b/zUMIs-mapping.R index 1bbc47c..3332873 100644 --- a/zUMIs-mapping.R +++ b/zUMIs-mapping.R @@ -92,7 +92,7 @@ getmode <- function(v) { cDNA_read_length <- getmode(nchar(cDNA_peek$V1)) # Setup STAR mapping ------------------------------------------------------ samtools_load_cores <- ifelse(inp$num_threads>8,2,1) -avail_cores <- 16 # inp$num_threads - samtools_load_cores #reserve threads for samtools file opening +avail_cores <- inp$num_threads - samtools_load_cores #reserve threads for samtools file opening if(inp$which_Stage == "Filtering"){ avail_cores <- floor(avail_cores / num_star_instances) } From d4b5c85f69ee6cb39ee8c138f6b260f71699fcd5 Mon Sep 17 00:00:00 2001 From: TomKellyGenetics Date: Wed, 30 Jun 2021 10:18:35 +0900 Subject: [PATCH 07/18] add Docker configuration --- Dockerfile | 82 +++++++++++++++++++++++++++++++++++++++++++++++++ podman_build.sh | 6 ++++ 2 files changed, 88 insertions(+) create mode 100644 Dockerfile create mode 100644 podman_build.sh diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..fb99b29 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,82 @@ +FROM ubuntu:bionic + +#RUN /bin/sh -c [ -z "$(apt-get indextargets)" ] + +RUN set -xe && echo '#!/bin/sh' > /usr/sbin/policy-rc.d && echo 'exit 101' >> /usr/sbin/policy-rc.d && chmod +x /usr/sbin/policy-rc.d && dpkg-divert --local --rename --add /sbin/initctl && cp -a /usr/sbin/policy-rc.d /sbin/initctl && sed -i 's/^exit.*/exit 0/' /sbin/initctl && echo 'force-unsafe-io' > /etc/dpkg/dpkg.cfg.d/docker-apt-speedup && echo 'DPkg::Post-Invoke { "rm -f /var/cache/apt/archives/*.deb /var/cache/apt/archives/partial/*.deb /var/cache/apt/*.bin || true"; };' > /etc/apt/apt.conf.d/docker-clean && echo 'APT::Update::Post-Invoke { "rm -f /var/cache/apt/archives/*.deb /var/cache/apt/archives/partial/*.deb /var/cache/apt/*.bin || true"; };' >> /etc/apt/apt.conf.d/docker-clean && echo 'Dir::Cache::pkgcache ""; Dir::Cache::srcpkgcache "";' >> /etc/apt/apt.conf.d/docker-clean && echo 'Acquire::Languages "none";' > /etc/apt/apt.conf.d/docker-no-languages && echo 'Acquire::GzipIndexes "true"; Acquire::CompressionTypes::Order:: "gz";' > /etc/apt/apt.conf.d/docker-gzip-indexes && echo 'Apt::AutoRemove::SuggestsImportant "false";' > /etc/apt/apt.conf.d/docker-autoremove-suggests + +RUN mkdir -p /run/systemd && echo 'docker' > /run/systemd/container + +RUN apt-get update \ + && apt-get install -y \ + software-properties-common \ + build-essential \ + && apt-get update + +ENV DEBIAN_FRONTEND="noninteractive" TZ="Asia/Tokyo" + +ENV PATH="/root/miniconda3/bin:${PATH}" +ARG PATH="/root/miniconda3/bin:${PATH}" + +RUN apt-get update \ + && apt-get install -y wget && rm -rf /var/lib/apt/lists/* + +RUN wget \ + https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh \ + && mkdir /root/.conda \ + && bash Miniconda3-latest-Linux-x86_64.sh -b \ + && rm -f Miniconda3-latest-Linux-x86_64.sh +RUN conda --version + +RUN apt-get update \ + && apt-get upgrade -y \ + && apt-get install -y --fix-missing \ + git \ + nano \ + r-base r-recommended \ + samtools \ + htop \ + pigz \ + wget \ + curl \ + libcurl4-openssl-dev \ + libcairo2-dev cairo-dock cairo-dock-plug-ins \ + libxt-dev \ + libharfbuzz-dev libfribidi-dev \ + libhdf5-dev \ + libgl1-mesa-glx libegl1-mesa \ + libxrandr2 libxrandr2 \ + libxss1 libxcursor1 libxcomposite1 \ + libasound2 libxi6 libxtst6 \ + libssl-dev \ + python-pip + +RUN conda install numpy scipy cython numba matplotlib scikit-learn h5py click + +RUN pip install pysam velocyto + +RUN apt update -qq \ + && apt install --no-install-recommends software-properties-common dirmngr \ + && apt-key adv --keyserver keyserver.ubuntu.com --recv-keys E298A3A825C0D65DFD57CBB651716619E084DAB9 \ + && add-apt-repository "deb https://cloud.r-project.org/bin/linux/ubuntu bionic-cran40/" \ + && apt install -y --no-install-recommends r-base + +RUN add-apt-repository 'deb https://cloud.r-project.org/bin/linux/ubuntu bionic-cran40/' \ + && apt-key adv --keyserver keyserver.ubuntu.com --recv-keys E298A3A825C0D65DFD57CBB651716619E084DAB9 \ + && apt update \ + && gpg --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys E298A3A825C0D65DFD57CBB651716619E084DAB9 \ + && gpg -a --export E298A3A825C0D65DFD57CBB651716619E084DAB9 | apt-key add - \ + && apt-get install -y r-base r-base-core r-recommended r-base-dev + +RUN R -e 'install.packages(c("yaml", "BiocManager", "Cairo", "ggplot2", "cowplot", "data.table", "devtools", "dplyr", "ggraster", "ggrastr", "hdf5r", "inflection", "ragg", "textshaping"), dependencies = TRUE)' + +RUN R -e 'BiocManager::install(c("GenomicFeatures", "plyranges", "Rhtslib", "Rsamtools"), ask = FALSE)' + +RUN R -e 'install.packages("devtools"); devtools::install_github(repo = "hhoeflin/hdf5r"); devtools::install_github(repo = "mojaveazure/loomR", ref = "develop")' + +RUN mkdir -p home && cd home && git clone https://github.com/TomKellyGenetics/zUMIs.git + +RUN cd /home && wget https://github.com/alexdobin/STAR/archive/2.7.9a.tar.gz && tar -xzf 2.7.9a.tar.gz && cd STAR-2.7.9a/source && make + +CMD ["/bin/bash"] + +#bash diff --git a/podman_build.sh b/podman_build.sh new file mode 100644 index 0000000..e5fa956 --- /dev/null +++ b/podman_build.sh @@ -0,0 +1,6 @@ +#sleep 86400 +podman build -t zumis:v2.9.7 . +podman tag zumis:v2.9.7 tomkellygenetics/zumis:v2.9.7 +podman tag zumis:v2.9.7 tomkellygenetics/zumis:latest +podman push tomkellygenetics/zumis:v2.9.7 +podman push tomkellygenetics/zumis:latest From 3c3782d053fdac0f0145873acc2df2bcf29436b8 Mon Sep 17 00:00:00 2001 From: TomKellyGenetics Date: Wed, 30 Jun 2021 15:56:22 +0900 Subject: [PATCH 08/18] update docker build to install dependencies --- Dockerfile | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index fb99b29..41f79db 100644 --- a/Dockerfile +++ b/Dockerfile @@ -40,6 +40,7 @@ RUN apt-get update \ curl \ libcurl4-openssl-dev \ libcairo2-dev cairo-dock cairo-dock-plug-ins \ + libfreetype6-dev libpng-dev libtiff5-dev libjpeg-dev \ libxt-dev \ libharfbuzz-dev libfribidi-dev \ libhdf5-dev \ @@ -71,11 +72,20 @@ RUN R -e 'install.packages(c("yaml", "BiocManager", "Cairo", "ggplot2", "cowplot RUN R -e 'BiocManager::install(c("GenomicFeatures", "plyranges", "Rhtslib", "Rsamtools"), ask = FALSE)' +RUN echo 'PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"' > /root/.Renviron + RUN R -e 'install.packages("devtools"); devtools::install_github(repo = "hhoeflin/hdf5r"); devtools::install_github(repo = "mojaveazure/loomR", ref = "develop")' RUN mkdir -p home && cd home && git clone https://github.com/TomKellyGenetics/zUMIs.git -RUN cd /home && wget https://github.com/alexdobin/STAR/archive/2.7.9a.tar.gz && tar -xzf 2.7.9a.tar.gz && cd STAR-2.7.9a/source && make +RUN cd /home && wget https://github.com/alexdobin/STAR/archive/2.7.9a.tar.gz \ + && tar xf 2.7.9a.tar.gz \ + && rm 2.7.9a.tar.gz \ + && cd STAR-2.7.9a/source \ + && make \ + && mv ./STAR* /usr/bin \ + && cd ../.. \ + && rm -rf STAR-2.7.9a CMD ["/bin/bash"] From 8fd3c6ba246324db1ac520f0e89390b8a3e21287 Mon Sep 17 00:00:00 2001 From: TomKellyGenetics Date: Wed, 30 Jun 2021 16:25:14 +0900 Subject: [PATCH 09/18] Argument list too long for ls https://unix.stackexchange.com/questions/38955/argument-list-too-long-for-ls --- splitfq.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/splitfq.sh b/splitfq.sh index 709eeb2..be6b984 100755 --- a/splitfq.sh +++ b/splitfq.sh @@ -38,7 +38,7 @@ function splitfqgz() { d=`dirname $fqfile` $pexc -dc -p $nthreads $d/$pref.gz | split --lines=$nl --filter=''$pexc' -p '$nthreads' > $FILE.gz' - $t$pref$project - ls $t$pref$project* | sed "s|$t$pref$project||" | sed 's/.gz//' > $t/$project.listPrefix.txt + find -type f -name $t$pref$project* | sed "s|$t$pref$project||" | sed 's/.gz//' > $t/$project.listPrefix.txt exit 1 } From 10d91c09a42e4716742ff3024e28e976662d8862 Mon Sep 17 00:00:00 2001 From: TomKellyGenetics Date: Wed, 30 Jun 2021 16:26:41 +0900 Subject: [PATCH 10/18] Argument list too long for ls https://unix.stackexchange.com/questions/38955/argument-list-too-long-for-ls --- splitfq.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/splitfq.sh b/splitfq.sh index be6b984..7b2bc64 100755 --- a/splitfq.sh +++ b/splitfq.sh @@ -38,7 +38,7 @@ function splitfqgz() { d=`dirname $fqfile` $pexc -dc -p $nthreads $d/$pref.gz | split --lines=$nl --filter=''$pexc' -p '$nthreads' > $FILE.gz' - $t$pref$project - find -type f -name $t$pref$project* | sed "s|$t$pref$project||" | sed 's/.gz//' > $t/$project.listPrefix.txt + find -type f -name '$t$pref$project*' | sed "s|$t$pref$project||" | sed 's/.gz//' > $t/$project.listPrefix.txt exit 1 } From f3e1cc28864757108987ce6ea7005b40cb84c75f Mon Sep 17 00:00:00 2001 From: TomKellyGenetics Date: Wed, 30 Jun 2021 16:48:03 +0900 Subject: [PATCH 11/18] find arguments too long --- splitfq.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/splitfq.sh b/splitfq.sh index 7b2bc64..f0e9bf6 100755 --- a/splitfq.sh +++ b/splitfq.sh @@ -38,7 +38,9 @@ function splitfqgz() { d=`dirname $fqfile` $pexc -dc -p $nthreads $d/$pref.gz | split --lines=$nl --filter=''$pexc' -p '$nthreads' > $FILE.gz' - $t$pref$project - find -type f -name '$t$pref$project*' | sed "s|$t$pref$project||" | sed 's/.gz//' > $t/$project.listPrefix.txt + ls $t$pref$project* > $t/$project.listPrefix.txt + sed -i "s|$t$pref$project||" $t/$project.listPrefix.txt + sed -i's/.gz//' $t/$project.listPrefix.txt exit 1 } From 4937d8c4058852466c49ee8abd56164682f3a2ca Mon Sep 17 00:00:00 2001 From: TomKellyGenetics Date: Wed, 30 Jun 2021 16:50:01 +0900 Subject: [PATCH 12/18] ls without arguments --- splitfq.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/splitfq.sh b/splitfq.sh index f0e9bf6..27e9bef 100755 --- a/splitfq.sh +++ b/splitfq.sh @@ -38,9 +38,9 @@ function splitfqgz() { d=`dirname $fqfile` $pexc -dc -p $nthreads $d/$pref.gz | split --lines=$nl --filter=''$pexc' -p '$nthreads' > $FILE.gz' - $t$pref$project - ls $t$pref$project* > $t/$project.listPrefix.txt - sed -i "s|$t$pref$project||" $t/$project.listPrefix.txt - sed -i's/.gz//' $t/$project.listPrefix.txt + cd $t$pref$project + ls | sed 's/.gz//' > $t/$project.listPrefix.txt + cd - exit 1 } From 153b53b171ee9e868b84a62ee465de2a7697f8cf Mon Sep 17 00:00:00 2001 From: TomKellyGenetics Date: Wed, 30 Jun 2021 22:35:55 +0900 Subject: [PATCH 13/18] resolve bug for merge temp files for large inputs --- Dockerfile | 2 +- splitfq.sh | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/Dockerfile b/Dockerfile index 41f79db..8844500 100644 --- a/Dockerfile +++ b/Dockerfile @@ -53,7 +53,7 @@ RUN apt-get update \ RUN conda install numpy scipy cython numba matplotlib scikit-learn h5py click -RUN pip install pysam velocyto +RUN pip install pysam velocyto; pip2 install pysam RUN apt update -qq \ && apt install --no-install-recommends software-properties-common dirmngr \ diff --git a/splitfq.sh b/splitfq.sh index 27e9bef..523e681 100755 --- a/splitfq.sh +++ b/splitfq.sh @@ -38,9 +38,7 @@ function splitfqgz() { d=`dirname $fqfile` $pexc -dc -p $nthreads $d/$pref.gz | split --lines=$nl --filter=''$pexc' -p '$nthreads' > $FILE.gz' - $t$pref$project - cd $t$pref$project - ls | sed 's/.gz//' > $t/$project.listPrefix.txt - cd - + ls $t| grep $pref$project | sed "s|$pref$project||" | sed 's/.gz//' > $t/$project.listPrefix.txt exit 1 } From 2757a7fcc35faf0b1526e34b33fffc91e13bec8f Mon Sep 17 00:00:00 2001 From: TomKellyGenetics Date: Thu, 1 Jul 2021 10:27:19 +0900 Subject: [PATCH 14/18] Argument list too long for rm tmpMerge --- mergeBAM.sh | 2 +- zUMIs.sh | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/mergeBAM.sh b/mergeBAM.sh index 7c2909f..7d48323 100755 --- a/mergeBAM.sh +++ b/mergeBAM.sh @@ -13,7 +13,7 @@ yaml=$6 samtoolsexc=$7 ls $tmpMerge/$project.*.filtered.tagged.bam > $tmpMerge/$project.bamlist.txt -#samtools merge -f -@ $nthreads -b $tmpMerge/$project.bamlist.txt $outdir/$project.filtered.tagged.bam > /dev/null 2>&1 +samtools merge -f -@ $nthreads -b $tmpMerge/$project.bamlist.txt $outdir/$project.filtered.tagged.bam > /dev/null 2>&1 cat $tmpMerge/$project.*.BCstats.txt > $outdir/$project.BCstats.txt diff --git a/zUMIs.sh b/zUMIs.sh index dd54654..8e40812 100755 --- a/zUMIs.sh +++ b/zUMIs.sh @@ -261,10 +261,6 @@ if [[ "${whichStage}" == "Filtering" ]] ; then for x in ${l} ; do perl ${zumisdir}/fqfilter_v2.pl ${yaml} ${samtoolsexc} ${Rexc} ${pigzexc} ${zumisdir} ${x} & done wait bash ${zumisdir}/mergeBAM.sh ${zumisdir} ${tmpMerge} ${num_threads} ${project} ${outdir} ${yaml} ${samtoolsexc} - for i in ${fqfiles} ; do - pref=$(basename ${i} | sed 's/.fastq.gz//' | sed 's/.fq.gz//') - rm ${tmpMerge}${pref}*gz - done date #run barcode detection @@ -292,6 +288,8 @@ if date fi +rm -rf ${tmpMerge} + if [[ "${whichStage}" == "Filtering" ]] || [[ "${whichStage}" == "Mapping" ]] || From 8f7c841a0fa87ef6636585bd6f1d2cf1c8c317db Mon Sep 17 00:00:00 2001 From: TomKellyGenetics Date: Thu, 1 Jul 2021 10:39:40 +0900 Subject: [PATCH 15/18] add script for building docker container --- docker_build.sh | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 docker_build.sh diff --git a/docker_build.sh b/docker_build.sh new file mode 100644 index 0000000..e5fa956 --- /dev/null +++ b/docker_build.sh @@ -0,0 +1,6 @@ +#sleep 86400 +podman build -t zumis:v2.9.7 . +podman tag zumis:v2.9.7 tomkellygenetics/zumis:v2.9.7 +podman tag zumis:v2.9.7 tomkellygenetics/zumis:latest +podman push tomkellygenetics/zumis:v2.9.7 +podman push tomkellygenetics/zumis:latest From e3e3cccd87d8a02c05f45168c44a3bc761e68e72 Mon Sep 17 00:00:00 2001 From: TomKellyGenetics Date: Thu, 1 Jul 2021 10:40:44 +0900 Subject: [PATCH 16/18] update docker build --- docker_build.sh | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docker_build.sh b/docker_build.sh index e5fa956..ca323ab 100644 --- a/docker_build.sh +++ b/docker_build.sh @@ -1,6 +1,6 @@ #sleep 86400 -podman build -t zumis:v2.9.7 . -podman tag zumis:v2.9.7 tomkellygenetics/zumis:v2.9.7 -podman tag zumis:v2.9.7 tomkellygenetics/zumis:latest -podman push tomkellygenetics/zumis:v2.9.7 -podman push tomkellygenetics/zumis:latest +podman build -t zumis2:v2.9.7 . +podman tag zumis2:v2.9.7 chrzie/zumis2:v2.9.7 +podman tag zumis2:v2.9.7 chrzie/zumis2:latest +podman push chrzie/zumis2:v2.9.7 +podman push chrzie/zumis2:latest From fbfd41874ef441e087129e246fcccde3074872b4 Mon Sep 17 00:00:00 2001 From: TomKellyGenetics Date: Fri, 2 Jul 2021 16:20:37 +0900 Subject: [PATCH 17/18] Argument list too long for rm --- mergeBAM.sh | 2 +- splitfq.sh | 2 +- zUMIs.sh | 6 ++---- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/mergeBAM.sh b/mergeBAM.sh index 7c2909f..7d48323 100755 --- a/mergeBAM.sh +++ b/mergeBAM.sh @@ -13,7 +13,7 @@ yaml=$6 samtoolsexc=$7 ls $tmpMerge/$project.*.filtered.tagged.bam > $tmpMerge/$project.bamlist.txt -#samtools merge -f -@ $nthreads -b $tmpMerge/$project.bamlist.txt $outdir/$project.filtered.tagged.bam > /dev/null 2>&1 +samtools merge -f -@ $nthreads -b $tmpMerge/$project.bamlist.txt $outdir/$project.filtered.tagged.bam > /dev/null 2>&1 cat $tmpMerge/$project.*.BCstats.txt > $outdir/$project.BCstats.txt diff --git a/splitfq.sh b/splitfq.sh index 709eeb2..523e681 100755 --- a/splitfq.sh +++ b/splitfq.sh @@ -38,7 +38,7 @@ function splitfqgz() { d=`dirname $fqfile` $pexc -dc -p $nthreads $d/$pref.gz | split --lines=$nl --filter=''$pexc' -p '$nthreads' > $FILE.gz' - $t$pref$project - ls $t$pref$project* | sed "s|$t$pref$project||" | sed 's/.gz//' > $t/$project.listPrefix.txt + ls $t| grep $pref$project | sed "s|$pref$project||" | sed 's/.gz//' > $t/$project.listPrefix.txt exit 1 } diff --git a/zUMIs.sh b/zUMIs.sh index dd54654..d97942f 100755 --- a/zUMIs.sh +++ b/zUMIs.sh @@ -261,10 +261,6 @@ if [[ "${whichStage}" == "Filtering" ]] ; then for x in ${l} ; do perl ${zumisdir}/fqfilter_v2.pl ${yaml} ${samtoolsexc} ${Rexc} ${pigzexc} ${zumisdir} ${x} & done wait bash ${zumisdir}/mergeBAM.sh ${zumisdir} ${tmpMerge} ${num_threads} ${project} ${outdir} ${yaml} ${samtoolsexc} - for i in ${fqfiles} ; do - pref=$(basename ${i} | sed 's/.fastq.gz//' | sed 's/.fq.gz//') - rm ${tmpMerge}${pref}*gz - done date #run barcode detection @@ -322,3 +318,5 @@ fi if [[ ${conda} = true ]] ; then source ${zumisenv}/bin/deactivate fi + +rm -rf ${tmpMerge} From 71b5a0c872953ee29306c16474e0a52fea835bc9 Mon Sep 17 00:00:00 2001 From: TomKellyGenetics Date: Fri, 2 Jul 2021 16:22:24 +0900 Subject: [PATCH 18/18] Argument list too long for rm --- zUMIs.sh | 2 -- 1 file changed, 2 deletions(-) diff --git a/zUMIs.sh b/zUMIs.sh index c7d8f8f..d97942f 100755 --- a/zUMIs.sh +++ b/zUMIs.sh @@ -288,8 +288,6 @@ if date fi -rm -rf ${tmpMerge} - if [[ "${whichStage}" == "Filtering" ]] || [[ "${whichStage}" == "Mapping" ]] ||