Skip to content

Commit

Permalink
Merge branch 'main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
TomKellyGenetics authored Nov 10, 2021
2 parents 71b5a0c + 3a786f8 commit 1ee22a4
Show file tree
Hide file tree
Showing 11 changed files with 29 additions and 17 deletions.
5 changes: 4 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,10 @@ We provide a script to convert zUMIs output into loom file automatically based o
zUMIs will try to automatically do this, otherwise convert zUMIs output to loom by simply running `Rscript rds2loom.R myRun.yaml`.

## Changelog
23 June 2021: zUMIs2.9.7: merged code from @TomKellyGenetics. Update for compatbility with STAR 2.7.9a and samtools 1.3.1-3. Prevent crash when unmapped reads are counted as a special character for chromosomes from samtools idxstats is not permitted as a factor levels. Excludes unmapped reads for counting.
DD MMM YYYY: zUMIs2.9.8: merged code from @TomKellyGenetics. Update for compatbility with STAR 2.7.9a and samtools 1.3.1-3. Prevent crash when unmapped reads are counted as a special character for chromosomes from samtools idxstats is not permitted as a factor levels. Excludes unmapped reads for counting.

16 Jul 2021: zUMIs2.9.7: Change perl [shebang line](https://github.com/sdparekh/zUMIs/issues/261). Change featureCounts settings to assign reads to genes with the longest overlap in case of ambiguous/overlapping annotations. Allow to control the minimum required overlap of reads to genes with a setting `fraction_overlap:` in the counting section.


07 Apr 2021: zUMIs2.9.6: Fixed a potential crash when the input file to the barcode sharing feature does not contain an equal number of columns.

Expand Down
4 changes: 4 additions & 0 deletions UMIstuffFUN.R
Original file line number Diff line number Diff line change
Expand Up @@ -590,6 +590,10 @@ fixMissingOptions <- function(config){
config$counting_opts$multi_overlap <- FALSE
}

if(is.null(config$counting_opts$fraction_overlap)){
config$counting_opts$fraction_overlap <- 0
}

if(is.null(config$counting_opts$intronProb)){
config$counting_opts$intronProb <- FALSE
}
Expand Down
2 changes: 1 addition & 1 deletion correct_BCtag.pl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/usr/bin/perl
#!/usr/bin/env perl
use warnings;

if(@ARGV != 4)
Expand Down
2 changes: 1 addition & 1 deletion correct_UBtag.pl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/usr/bin/perl
#!/usr/bin/env perl
use warnings;

if(@ARGV != 4)
Expand Down
2 changes: 1 addition & 1 deletion fqfilter_v2.pl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/usr/bin/perl
#!/usr/bin/env perl
# LMU Munich. AG Enard
# A script to filter reads based on Barcode base quality.

Expand Down
2 changes: 1 addition & 1 deletion merge_bbmap_alignment.pl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/usr/bin/perl
#!/usr/bin/env perl
use warnings;

if(@ARGV != 5)
Expand Down
2 changes: 1 addition & 1 deletion misc/demultiplex_BC.pl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/usr/bin/perl
#!/usr/bin/env perl
use warnings;

if(@ARGV != 4)
Expand Down
6 changes: 4 additions & 2 deletions runfeatureCountFUN.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ suppressWarnings(suppressMessages(require(AnnotationDbi)))
.chromLengthFilter <- function(abamfile, keep_chr_minLength =0, samtoolsexc){
bread<- paste(samtoolsexc,"view -H ", abamfile , "| grep '^@SQ' | cut -f2,3 ")
chr<-data.table::fread(bread,col.names = c("chr","len"), header = F)[
, chr2 :=gsub("SN:","",chr)][
, chr2 :=gsub("SN:","",chr)][
, len2 :=gsub("LN:","",len)][
, len2 := as.numeric(len2)][len2>=keep_chr_minLength]
out <- data.frame("name" = chr$chr2, "length" = chr$len2, stringsAsFactors = FALSE)
Expand Down Expand Up @@ -346,7 +346,7 @@ get_gr <- function(y){GenomicRanges::makeGRangesFromDataFrame(y,
return(saf)
}

.runFeatureCount<-function(abamfile,RG,saf,strand,type,primaryOnly,cpu,mem,fcounts_clib,multi_overlap_var){
.runFeatureCount<-function(abamfile,RG,saf,strand,type,primaryOnly,cpu,mem,fcounts_clib,multi_overlap_var,fraction_overlap){
print(paste0("Assigning reads to features (",type,")"))
fc.stat <- featureCounts(files=abamfile,
annot.ext=saf,
Expand All @@ -360,6 +360,8 @@ get_gr <- function(y){GenomicRanges::makeGRangesFromDataFrame(y,
isPairedEnd=T,
countChimericFragments=F,
fcounts_clib = fcounts_clib,
largestOverlap = TRUE,
fracOverlap = fraction_overlap,
isIntronInput = ifelse(type == "in", 1, 0))$stat
fn<-paste0(abamfile,".featureCounts.bam")
nfn<-paste0(abamfile,".",type,".featureCounts.bam")
Expand Down
18 changes: 10 additions & 8 deletions zUMIs-dge2.R
Original file line number Diff line number Diff line change
Expand Up @@ -74,14 +74,14 @@ if(smart3_flag & opt$counting_opts$strand == 1){
tmp_bams <- split_bam(bam = abamfile, cpu = opt$num_threads, samtoolsexc=samtoolsexc)

#assign features with appropriate strand
fnex_int<-.runFeatureCount(tmp_bams[1], saf=saf$exons, strand=0, type="ex", primaryOnly = opt$counting_opts$primaryHit, cpu = opt$num_threads, mem = opt$mem_limit, fcounts_clib = fcounts_clib, multi_overlap_var = opt$counting_opts$multi_overlap)
fnex_umi<-.runFeatureCount(tmp_bams[2], saf=saf$exons, strand=1, type="ex", primaryOnly = opt$counting_opts$primaryHit, cpu = opt$num_threads, mem = opt$mem_limit, fcounts_clib = fcounts_clib, multi_overlap_var = opt$counting_opts$multi_overlap)
fnex_int<-.runFeatureCount(tmp_bams[1], saf=saf$exons, strand=0, type="ex", primaryOnly = opt$counting_opts$primaryHit, cpu = opt$num_threads, mem = opt$mem_limit, fcounts_clib = fcounts_clib, multi_overlap_var = opt$counting_opts$multi_overlap, fraction_overlap = opt$counting_opts$fraction_overlap)
fnex_umi<-.runFeatureCount(tmp_bams[2], saf=saf$exons, strand=1, type="ex", primaryOnly = opt$counting_opts$primaryHit, cpu = opt$num_threads, mem = opt$mem_limit, fcounts_clib = fcounts_clib, multi_overlap_var = opt$counting_opts$multi_overlap, fraction_overlap = opt$counting_opts$fraction_overlap)
ffiles_int <- paste0(fnex_int,".tmp")
ffiles_umi <- paste0(fnex_umi,".tmp")

if(opt$counting_opts$introns){
fnin_int<-.runFeatureCount(ffiles_int, saf=saf$introns, strand=0, type="in", primaryOnly = opt$counting_opts$primaryHit, cpu = opt$num_threads, mem = opt$mem_limit, fcounts_clib = fcounts_clib, multi_overlap_var = opt$counting_opts$multi_overlap)
fnin_umi<-.runFeatureCount(ffiles_umi, saf=saf$introns, strand=1, type="in", primaryOnly = opt$counting_opts$primaryHit, cpu = opt$num_threads, mem = opt$mem_limit, fcounts_clib = fcounts_clib, multi_overlap_var = opt$counting_opts$multi_overlap)
fnin_int<-.runFeatureCount(ffiles_int, saf=saf$introns, strand=0, type="in", primaryOnly = opt$counting_opts$primaryHit, cpu = opt$num_threads, mem = opt$mem_limit, fcounts_clib = fcounts_clib, multi_overlap_var = opt$counting_opts$multi_overlap, fraction_overlap = opt$counting_opts$fraction_overlap)
fnin_umi<-.runFeatureCount(ffiles_umi, saf=saf$introns, strand=1, type="in", primaryOnly = opt$counting_opts$primaryHit, cpu = opt$num_threads, mem = opt$mem_limit, fcounts_clib = fcounts_clib, multi_overlap_var = opt$counting_opts$multi_overlap, fraction_overlap = opt$counting_opts$fraction_overlap)
ffiles_int <- paste0(fnin_int,".tmp")
ffiles_umi <- paste0(fnin_umi,".tmp")
}
Expand All @@ -97,7 +97,8 @@ if(smart3_flag & opt$counting_opts$strand == 1){
cpu = opt$num_threads,
mem = opt$mem_limit,
fcounts_clib = fcounts_clib,
multi_overlap_var = opt$counting_opts$multi_overlap)
multi_overlap_var = opt$counting_opts$multi_overlap,
fraction_overlap = opt$counting_opts$fraction_overlap)
ffiles<-paste0(fnex,".tmp")

if(opt$counting_opts$introns){
Expand All @@ -109,7 +110,8 @@ if(smart3_flag & opt$counting_opts$strand == 1){
cpu = opt$num_threads,
mem = opt$mem_limit,
fcounts_clib = fcounts_clib,
multi_overlap_var = opt$counting_opts$multi_overlap)
multi_overlap_var = opt$counting_opts$multi_overlap,
fraction_overlap = opt$counting_opts$fraction_overlap)
system(paste0("rm ",fnex,".tmp"))
ffiles<-paste0(fnin,".tmp")
}
Expand Down Expand Up @@ -147,12 +149,12 @@ if(opt$counting_opts$Ham_Dist == 0){
system(index_cmd)
system(paste0("rm ",tmpbamfile))
print(Sys.time())

#check if PE / SE flag is set correctly
if(is.null(opt$read_layout)){
opt$read_layout <- check_read_layout(outbamfile)
}

for(i in unique(bccount$chunkID)){
print( paste( "Hamming distance collapse in barcode chunk", i, "out of",length(unique(bccount$chunkID)) ))
reads <- reads2genes_new(featfile = outbamfile,
Expand Down
2 changes: 1 addition & 1 deletion zUMIs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ if [[ "${whichStage}" == "Filtering" ]] ; then
pref=$(basename ${f} .gz)
l=$(ls ${tmpMerge}${pref}* | sed "s|${tmpMerge}${pref}||" | sed 's/.gz//')
else
cat ${f} | head -n 4000000 > ${tmpMerge}/${project}.1mio.check.fq
head -n 4000000 ${f} > ${tmpMerge}/${project}.1mio.check.fq
smallsize=$(stat --printf="%s" ${tmpMerge}/${project}.1mio.check.fq)
rm ${tmpMerge}/${project}.1mio.check.fq
nreads=$(expr ${fullsize} \* 1000000 / ${smallsize})
Expand Down
1 change: 1 addition & 0 deletions zUMIs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ counting_opts:
velocyto: no #Would you like velocyto to do counting of intron-exon spanning reads
primaryHit: yes #Do you want to count the primary Hits of multimapping reads towards gene expression levels?
multi_overlap: no #Do you want to assign reads overlapping to multiple features?
fraction_overlap: 0 #minimum required fraction of the read overlapping with the gene for read assignment to genes
twoPass: yes #perform basic STAR twoPass mapping

#produce stats files and plots?
Expand Down

0 comments on commit 1ee22a4

Please sign in to comment.