diff --git a/src/sickle.h b/src/sickle.h index e78e667..c697b92 100644 --- a/src/sickle.h +++ b/src/sickle.h @@ -98,6 +98,6 @@ typedef struct __cutsites_ { /* Function Prototypes */ int single_main (int argc, char *argv[]); int paired_main (int argc, char *argv[]); -cutsites* sliding_window (kseq_t *fqrec, int qualtype, int length_threshold, int qual_threshold, int no_fiveprime, int trunc_n, int debug); +cutsites* sliding_window (kseq_t *fqrec, int qualtype, int length_threshold, int qual_threshold, int no_fiveprime, int trunc_n, int drop_n, int debug); #endif /*SICKLE_H*/ diff --git a/src/sliding.c b/src/sliding.c index 7573106..5a8193c 100644 --- a/src/sliding.c +++ b/src/sliding.c @@ -32,7 +32,7 @@ int get_quality_num (char qualchar, int qualtype, kseq_t *fqrec, int pos) { } -cutsites* sliding_window (kseq_t *fqrec, int qualtype, int length_threshold, int qual_threshold, int no_fiveprime, int trunc_n, int debug) { +cutsites* sliding_window (kseq_t *fqrec, int qualtype, int length_threshold, int qual_threshold, int no_fiveprime, int trunc_n, int drop_n, int debug) { int window_size = (int) (0.1 * fqrec->seq.l); int i,j; @@ -112,10 +112,14 @@ cutsites* sliding_window (kseq_t *fqrec, int qualtype, int length_threshold, int } - /* If truncate N option is selected, and sequence has Ns, then */ - /* change 3' cut site to be the base before the first N */ - if (trunc_n && ((npos = strstr(fqrec->seq.s, "N")) || (npos = strstr(fqrec->seq.s, "n")))) { - three_prime_cut = npos - fqrec->seq.s; + /* If truncate N option is selected, and sequence has Ns, then + * change 3' cut site to be the base before the first N. + * If drop N option is selected, omit the sequence. */ + if ((npos = strstr(fqrec->seq.s, "N")) || (npos = strstr(fqrec->seq.s, "n"))) { + if (trunc_n) + three_prime_cut = npos - fqrec->seq.s; + else if (drop_n) + three_prime_cut = five_prime_cut = -1; } /* if cutting length is less than threshold then return -1 for both */ diff --git a/src/trim_paired.c b/src/trim_paired.c index 6a42251..2b8f3ad 100644 --- a/src/trim_paired.c +++ b/src/trim_paired.c @@ -16,7 +16,7 @@ __KSEQ_READ int paired_qual_threshold = 20; int paired_length_threshold = 20; -static const char *paired_short_options = "df:r:c:t:o:p:m:M:s:q:l:xng"; +static const char *paired_short_options = "df:r:c:t:o:p:m:M:s:q:l:xnNg"; static struct option paired_long_options[] = { { "qual-type", required_argument, NULL, 't' }, { "pe-file1", required_argument, NULL, 'f' }, @@ -30,6 +30,7 @@ static struct option paired_long_options[] = { { "length-threshold", required_argument, NULL, 'l' }, { "no-fiveprime", no_argument, NULL, 'x' }, { "truncate-n", no_argument, NULL, 'n' }, + { "drop-n", no_argument, NULL, 'N' }, { "gzip-output", no_argument, NULL, 'g' }, { "output-combo-all", required_argument, NULL, 'M' }, { "quiet", no_argument, NULL, 'z' }, @@ -95,6 +96,7 @@ void paired_usage (int status, char *msg) { " trimming. Default %4$d.\n" "-x, --no-fiveprime Don't do five prime trimming.\n" "-n, --truncate-n Truncate sequences at position of first N.\n" + "-N, --drop-n Discard sequences containing an N.\n" "-g, --gzip-output Output gzipped files.\n" "--quiet Do not output trimming info\n" "--help Display this help and exit\n" @@ -154,6 +156,7 @@ int paired_main(int argc, char *argv[]) { int quiet = 0; int no_fiveprime = 0; int trunc_n = 0; + int drop_n = 0; int gzip_output = 0; int combo_all=0; int combo_s=0; @@ -246,6 +249,10 @@ int paired_main(int argc, char *argv[]) { trunc_n = 1; break; + case 'N': + drop_n = 1; + break; + case 'g': gzip_output = 1; break; @@ -276,6 +283,11 @@ int paired_main(int argc, char *argv[]) { paired_usage(EXIT_FAILURE, "****Error: Quality type is required."); } + if (trunc_n && drop_n) { + fprintf(stderr, "****Error: cannot specify both --truncate-n and --drop-n\n\n"); + return EXIT_FAILURE; + } + /* make sure minimum input filenames are specified */ if (!infn1 && !infnc) { paired_usage(EXIT_FAILURE, "****Error: Must have either -f OR -c argument."); @@ -414,8 +426,8 @@ int paired_main(int argc, char *argv[]) { break; } - p1cut = sliding_window(fqrec1, qualtype, paired_length_threshold, paired_qual_threshold, no_fiveprime, trunc_n, debug); - p2cut = sliding_window(fqrec2, qualtype, paired_length_threshold, paired_qual_threshold, no_fiveprime, trunc_n, debug); + p1cut = sliding_window(fqrec1, qualtype, paired_length_threshold, paired_qual_threshold, no_fiveprime, trunc_n, drop_n, debug); + p2cut = sliding_window(fqrec2, qualtype, paired_length_threshold, paired_qual_threshold, no_fiveprime, trunc_n, drop_n, debug); total += 2; if (debug) printf("p1cut: %d,%d\n", p1cut->five_prime_cut, p1cut->three_prime_cut); @@ -520,7 +532,7 @@ int paired_main(int argc, char *argv[]) { } if (!quiet) { - if (infn1 && infn2) fprintf(stdout, "\nPE forwrd file: %s\nPE reverse file: %s\n", infn1, infn2); + if (infn1 && infn2) fprintf(stdout, "\nPE forward file: %s\nPE reverse file: %s\n", infn1, infn2); if (infnc) fprintf(stdout, "\nPE interleaved file: %s\n", infnc); fprintf(stdout, "\nTotal input FastQ records: %d (%d pairs)\n", total, (total / 2)); fprintf(stdout, "\nFastQ paired records kept: %d (%d pairs)\n", kept_p, (kept_p / 2)); diff --git a/src/trim_single.c b/src/trim_single.c index afedd8b..fcdb3d5 100644 --- a/src/trim_single.c +++ b/src/trim_single.c @@ -15,7 +15,7 @@ __KSEQ_READ int single_qual_threshold = 20; int single_length_threshold = 20; -static const char *single_short_options = "df:t:o:q:l:zxng"; +static const char *single_short_options = "df:t:o:q:l:zxnNg"; static struct option single_long_options[] = { { "fastq-file", required_argument, NULL, 'f' }, { "output-file", required_argument, NULL, 'o' }, @@ -53,6 +53,7 @@ void single_usage(int status, char *msg) { " trimming. Default %4$d.\n" "-x, --no-fiveprime Don't do five prime trimming.\n" "-n, --truncate-n Truncate sequences at position of first N.\n" + "-N, --drop-n Discard sequences containing an N.\n" "-g, --gzip-output Output gzipped files.\n" "--quiet Do not output trimming info\n" "--help Display this help and exit\n" @@ -92,6 +93,7 @@ int single_main(int argc, char *argv[]) { int quiet = 0; int no_fiveprime = 0; int trunc_n = 0; + int drop_n = 0; int gzip_output = 0; int total=0; @@ -153,6 +155,10 @@ int single_main(int argc, char *argv[]) { trunc_n = 1; break; + case 'N': + drop_n = 1; + break; + case 'g': gzip_output = 1; break; @@ -208,12 +214,17 @@ int single_main(int argc, char *argv[]) { } } + if (trunc_n && drop_n) { + fprintf(stderr, "****Error: cannot specify both --truncate-n and --drop-n\n\n"); + return EXIT_FAILURE; + } + fqrec = kseq_init(se); while ((l = kseq_read(fqrec)) >= 0) { - p1cut = sliding_window(fqrec, qualtype, single_length_threshold, single_qual_threshold, no_fiveprime, trunc_n, debug); + p1cut = sliding_window(fqrec, qualtype, single_length_threshold, single_qual_threshold, no_fiveprime, trunc_n, drop_n, debug); total++; if (debug) printf("P1cut: %d,%d\n", p1cut->five_prime_cut, p1cut->three_prime_cut);