From 762787cd0f99d080daa299553aa70305e4bc7ee9 Mon Sep 17 00:00:00 2001 From: chriswyatt1 Date: Fri, 3 Feb 2023 21:14:40 +0000 Subject: [PATCH] Update_jabfeb2023 --- .gitignore | 1 + bin/blast2taxgenesummary.pl | 2 +- bin/tophitsonly.pl | 48 ++++++++++++++++++++++++++++++++++++ conf/myriad.config | 2 +- main.nf | 9 ++++--- modules/diamond_blast.nf | 6 ++--- modules/plot_taxonomy_pie.nf | 11 ++++++--- modules/transdecoder.nf | 1 + 8 files changed, 69 insertions(+), 11 deletions(-) mode change 100644 => 100755 bin/blast2taxgenesummary.pl create mode 100755 bin/tophitsonly.pl diff --git a/.gitignore b/.gitignore index bed87a7..be12afc 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,4 @@ work trace* results/* Input_prot_gotits.txt +blastdb diff --git a/bin/blast2taxgenesummary.pl b/bin/blast2taxgenesummary.pl old mode 100644 new mode 100755 index 93e6493..11ba6c6 --- a/bin/blast2taxgenesummary.pl +++ b/bin/blast2taxgenesummary.pl @@ -8,7 +8,7 @@ my $input = $ARGV[0]; -my $OUT_file="Horizontal_assesment.tsv"; +my $OUT_file="$input\_summary.tsv"; #initiaite groups from conversion file. open(my $INPUT_IN, "<", $input) or die "Could not open $input \n"; diff --git a/bin/tophitsonly.pl b/bin/tophitsonly.pl new file mode 100755 index 0000000..d348354 --- /dev/null +++ b/bin/tophitsonly.pl @@ -0,0 +1,48 @@ +#!/usr/bin/perl +#plot_event.pl +use strict; +use warnings; + +die "Please specify (1) Blast hits\n" unless(@ARGV==1); + + +my $input = $ARGV[0]; + +my $OUT_file="tophitsonly.tsv"; + +#initiaite groups from conversion file. +open(my $INPUT_IN, "<", $input) or die "Could not open $input \n"; +open(my $outhandle, ">", $OUT_file) or die "Could not open $OUT_file\n"; + +my %genes; + +while (my $line=<$INPUT_IN>){ + chomp $line; + my @split= split("\t", $line); + + my $id= $split[0]; + my $perc= $split[3]; + + if ($genes{$id}){ + my $oldline=$genes{$id}; + my @split2= split("\t", $oldline); + + if ($perc > $split2[3]){ + $genes{$id}="$line"; + } + else{ + #do nothing. + } + + } + else{ + $genes{$id}="$line"; + } + +} + +foreach my $key ( sort keys %genes){ + + print $outhandle "$genes{$key}\n"; + +} diff --git a/conf/myriad.config b/conf/myriad.config index 9e6c5b1..56c04f3 100644 --- a/conf/myriad.config +++ b/conf/myriad.config @@ -9,7 +9,7 @@ process { penv = "smp" cpus = 6 container = 'chriswyatt/diamond' - clusterOptions = '-l mem=40G -l h_rt=24:0:0 -l tmpfs=400G' + clusterOptions = '-l mem=20G -l h_rt=48:0:0 -l tmpfs=200G' } withLabel: "blastdb" { penv = "smp" diff --git a/main.nf b/main.nf index 05be15e..cd88022 100755 --- a/main.nf +++ b/main.nf @@ -13,6 +13,8 @@ params.predownloaded= false params.outdir = "results" params.names = false params.nodes = false +params.numhits = null +params.tophits = null params.level = "family" params.sensitivity = "fast" params.horizontal = false @@ -88,11 +90,12 @@ workflow { DIAMOND_BLAST ( input_target_proteins , input_database ) + PLOT_PIE ( input_nodes , input_names , DIAMOND_BLAST.out.blast_hits ) - if ( params.horizontal ){ - DIAMOND_HORIZONTAL ( input_target_proteins , input_database ) - } + //if ( params.horizontal ){ + // DIAMOND_HORIZONTAL ( input_target_proteins , input_database ) + //} if ( params.xml ){ DIAMOND_XML ( input_target_proteins , input_database ) diff --git a/modules/diamond_blast.nf b/modules/diamond_blast.nf index 16ac760..8b25e72 100755 --- a/modules/diamond_blast.nf +++ b/modules/diamond_blast.nf @@ -12,8 +12,8 @@ process DIAMOND_BLAST { script: """ - diamond blastp --$params.sensitivity --max-target-seqs $params.numhits --query $proteins --db nr --out ${proteins}\_results.tsv --threads $task.cpus --outfmt 6 qseqid sseqid stitle pident evalue sphylums staxids - #rm nr.dmnd - #rm $proteins + diamond blastp --$params.sensitivity --top $params.tophits --query $proteins --db nr --out ${proteins}\_results.tsv --threads $task.cpus --outfmt 6 qseqid sseqid stitle pident evalue sphylums staxids + #rm nr.dmnd + #rm $proteins """ } diff --git a/modules/plot_taxonomy_pie.nf b/modules/plot_taxonomy_pie.nf index 581e889..e0ab058 100755 --- a/modules/plot_taxonomy_pie.nf +++ b/modules/plot_taxonomy_pie.nf @@ -1,7 +1,8 @@ process PLOT_PIE { label 'perl_pie' - publishDir "$params.outdir/Blast_results/" - //stageInMode 'copy' + publishDir "$params.outdir/Blast_results/", mode:'copy', pattern: '*top.tsv' + publishDir "$params.outdir/Taxo_figure/", mode:'copy', pattern: '*.pdf' + publishDir "$params.outdir/Taxo_summary/", mode:'copy', pattern: '*_summary.tsv' input: path nodes @@ -15,6 +16,10 @@ process PLOT_PIE { script: """ - ${workflow.projectDir}/bin/ncbi_txids_taxonomy.all.pl $nodes $names $blast_result + ${workflow.projectDir}/bin/tophitsonly.pl $blast_result + mv tophitsonly.tsv ${blast_result}_top.tsv + ${workflow.projectDir}/bin/ncbi_txids_taxonomy.all.pl $nodes $names ${blast_result}_top.tsv + + blast2taxgenesummary.pl $blast_result """ } diff --git a/modules/transdecoder.nf b/modules/transdecoder.nf index f6b5b3b..18b990b 100755 --- a/modules/transdecoder.nf +++ b/modules/transdecoder.nf @@ -1,5 +1,6 @@ process T_DECODER { label 'transdecoder' + publishDir "$params.outdir/Prot/", mode:'copy', pattern: '*.prot.fa' input: path fasta_file