diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 000000000..32e25341c --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,10 @@ +version: 2 +updates: + - package-ecosystem: "cargo" + directory: "/" + schedule: + interval: "weekly" + commit-message: + # Prefix all commit messages with "deps: ", which should be + # accepted as a conventional commit and trigger release-please + prefix: "deps" diff --git a/CHANGELOG.md b/CHANGELOG.md index b01750214..79337f4b9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,13 @@ All notable changes to this project will be documented in this file. This project adheres to [Semantic Versioning](http://semver.org/). +## [0.47.1](https://github.com/rust-bio/rust-htslib/compare/v0.47.0...v0.47.1) (2024-11-12) + + +### Bug Fixes + +* allow leading deletions in read_pos method of CigarStringView. ([#447](https://github.com/rust-bio/rust-htslib/issues/447)) ([2986713](https://github.com/rust-bio/rust-htslib/commit/298671382ffeab8f1f057cde83e8474963fbfb9a)) + ## [0.47.0](https://github.com/rust-bio/rust-htslib/compare/v0.46.0...v0.47.0) (2024-05-22) diff --git a/Cargo.toml b/Cargo.toml index d95dcb3a9..bbfe8cc80 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,7 +9,7 @@ license = "MIT" name = "rust-htslib" readme = "README.md" repository = "https://github.com/rust-bio/rust-htslib.git" -version = "0.47.0" +version = "0.47.1" [package.metadata.release] pre-release-commit-message = "release version {{version}}" diff --git a/.cargo/config b/config.toml similarity index 100% rename from .cargo/config rename to config.toml diff --git a/src/bam/mod.rs b/src/bam/mod.rs index 478b0ac63..92198a498 100644 --- a/src/bam/mod.rs +++ b/src/bam/mod.rs @@ -2998,6 +2998,7 @@ CCCCCCCCCCCCCCCCCCC"[..], let reader = Reader::from_path("test/test_issue_156_no_text.bam").unwrap(); let header_hashmap = Header::from_template(reader.header()).to_hashmap().unwrap(); let header_refseqs = header_hashmap.get("SQ".into()).unwrap(); + assert_eq!(header_refseqs[0].get("SN").unwrap(), "ref_1",); assert_eq!(header_refseqs[0].get("LN").unwrap(), "10000000",); } diff --git a/src/bam/record.rs b/src/bam/record.rs index ced3cc8aa..c72627ec5 100644 --- a/src/bam/record.rs +++ b/src/bam/record.rs @@ -1088,30 +1088,36 @@ impl Record { return SequenceReadPairOrientation::None; } - let (is_reverse, is_first_in_template, is_mate_reverse) = if self.pos() < self.mpos() { - // given record is the left one + let (pos_1, pos_2, fwd_1, fwd_2) = if self.is_first_in_template() { ( - self.is_reverse(), - self.is_first_in_template(), - self.is_mate_reverse(), + self.pos(), + self.mpos(), + !self.is_reverse(), + !self.is_mate_reverse(), ) } else { - // given record is the right one ( - self.is_mate_reverse(), - self.is_last_in_template(), - self.is_reverse(), + self.mpos(), + self.pos(), + !self.is_mate_reverse(), + !self.is_reverse(), ) }; - match (is_reverse, is_first_in_template, is_mate_reverse) { - (false, false, false) => SequenceReadPairOrientation::F2F1, - (false, false, true) => SequenceReadPairOrientation::F2R1, - (false, true, false) => SequenceReadPairOrientation::F1F2, - (true, false, false) => SequenceReadPairOrientation::R2F1, - (false, true, true) => SequenceReadPairOrientation::F1R2, - (true, false, true) => SequenceReadPairOrientation::R2R1, - (true, true, false) => SequenceReadPairOrientation::R1F2, - (true, true, true) => SequenceReadPairOrientation::R1R2, + + if pos_1 < pos_2 { + match (fwd_1, fwd_2) { + (true, true) => SequenceReadPairOrientation::F1F2, + (true, false) => SequenceReadPairOrientation::F1R2, + (false, true) => SequenceReadPairOrientation::R1F2, + (false, false) => SequenceReadPairOrientation::R1R2, + } + } else { + match (fwd_2, fwd_1) { + (true, true) => SequenceReadPairOrientation::F2F1, + (true, false) => SequenceReadPairOrientation::F2R1, + (false, true) => SequenceReadPairOrientation::R2F1, + (false, false) => SequenceReadPairOrientation::R2R1, + } } } else { SequenceReadPairOrientation::None @@ -2097,10 +2103,15 @@ impl CigarStringView { } break; }, - Cigar::Del(_) => { - return Err(Error::BamUnexpectedCigarOperation { - msg: "'deletion' (D) found before any operation describing read sequence".to_owned() - }); + Cigar::Del(l) => { + // METHOD: leading deletions can happen in case of trimmed reads where + // a primer has been removed AFTER read mapping. + // Example: 24M8I8D18M9S before trimming, 32H8D18M9S after trimming + // with fgbio. While leading deletions should be impossible with + // normal read mapping, they make perfect sense with primer trimming + // because the mapper still had the evidence to decide in favor of + // the deletion via the primer sequence. + rpos += l; }, Cigar::RefSkip(_) => { return Err(Error::BamUnexpectedCigarOperation { @@ -2839,6 +2850,32 @@ mod alignment_cigar_tests { } } + #[test] + fn test_read_orientation_f2r1() { + let mut bam = Reader::from_path(&"test/test_nonstandard_orientation.sam").unwrap(); + + for res in bam.records() { + let record = res.unwrap(); + assert_eq!( + record.read_pair_orientation(), + SequenceReadPairOrientation::F2R1 + ); + } + } + + #[test] + fn test_read_orientation_supplementary() { + let mut bam = Reader::from_path(&"test/test_orientation_supplementary.sam").unwrap(); + + for res in bam.records() { + let record = res.unwrap(); + assert_eq!( + record.read_pair_orientation(), + SequenceReadPairOrientation::F2R1 + ); + } + } + #[test] pub fn test_cigar_parsing_non_ascii_error() { let cigar_str = "43ጷ"; diff --git a/test/test_nonstandard_orientation.sam b/test/test_nonstandard_orientation.sam new file mode 100644 index 000000000..d4d58d70b --- /dev/null +++ b/test/test_nonstandard_orientation.sam @@ -0,0 +1,203 @@ +@HD VN:1.6 SO:coordinate +@SQ SN:1 LN:248956422 +@SQ SN:10 LN:133797422 +@SQ SN:11 LN:135086622 +@SQ SN:12 LN:133275309 +@SQ SN:13 LN:114364328 +@SQ SN:14 LN:107043718 +@SQ SN:15 LN:101991189 +@SQ SN:16 LN:90338345 +@SQ SN:17 LN:83257441 +@SQ SN:18 LN:80373285 +@SQ SN:19 LN:58617616 +@SQ SN:2 LN:242193529 +@SQ SN:20 LN:64444167 +@SQ SN:21 LN:46709983 +@SQ SN:22 LN:50818468 +@SQ SN:3 LN:198295559 +@SQ SN:4 LN:190214555 +@SQ SN:5 LN:181538259 +@SQ SN:6 LN:170805979 +@SQ SN:7 LN:159345973 +@SQ SN:8 LN:145138636 +@SQ SN:9 LN:138394717 +@SQ SN:MT LN:16569 +@SQ SN:X LN:156040895 +@SQ SN:Y LN:57227415 +@SQ SN:KI270728.1 LN:1872759 +@SQ SN:KI270727.1 LN:448248 +@SQ SN:KI270442.1 LN:392061 +@SQ SN:KI270729.1 LN:280839 +@SQ SN:GL000225.1 LN:211173 +@SQ SN:KI270743.1 LN:210658 +@SQ SN:GL000008.2 LN:209709 +@SQ SN:GL000009.2 LN:201709 +@SQ SN:KI270747.1 LN:198735 +@SQ SN:KI270722.1 LN:194050 +@SQ SN:GL000194.1 LN:191469 +@SQ SN:KI270742.1 LN:186739 +@SQ SN:GL000205.2 LN:185591 +@SQ SN:GL000195.1 LN:182896 +@SQ SN:KI270736.1 LN:181920 +@SQ SN:KI270733.1 LN:179772 +@SQ SN:GL000224.1 LN:179693 +@SQ SN:GL000219.1 LN:179198 +@SQ SN:KI270719.1 LN:176845 +@SQ SN:GL000216.2 LN:176608 +@SQ SN:KI270712.1 LN:176043 +@SQ SN:KI270706.1 LN:175055 +@SQ SN:KI270725.1 LN:172810 +@SQ SN:KI270744.1 LN:168472 +@SQ SN:KI270734.1 LN:165050 +@SQ SN:GL000213.1 LN:164239 +@SQ SN:GL000220.1 LN:161802 +@SQ SN:KI270715.1 LN:161471 +@SQ SN:GL000218.1 LN:161147 +@SQ SN:KI270749.1 LN:158759 +@SQ SN:KI270741.1 LN:157432 +@SQ SN:GL000221.1 LN:155397 +@SQ SN:KI270716.1 LN:153799 +@SQ SN:KI270731.1 LN:150754 +@SQ SN:KI270751.1 LN:150742 +@SQ SN:KI270750.1 LN:148850 +@SQ SN:KI270519.1 LN:138126 +@SQ SN:GL000214.1 LN:137718 +@SQ SN:KI270708.1 LN:127682 +@SQ SN:KI270730.1 LN:112551 +@SQ SN:KI270438.1 LN:112505 +@SQ SN:KI270737.1 LN:103838 +@SQ SN:KI270721.1 LN:100316 +@SQ SN:KI270738.1 LN:99375 +@SQ SN:KI270748.1 LN:93321 +@SQ SN:KI270435.1 LN:92983 +@SQ SN:GL000208.1 LN:92689 +@SQ SN:KI270538.1 LN:91309 +@SQ SN:KI270756.1 LN:79590 +@SQ SN:KI270739.1 LN:73985 +@SQ SN:KI270757.1 LN:71251 +@SQ SN:KI270709.1 LN:66860 +@SQ SN:KI270746.1 LN:66486 +@SQ SN:KI270753.1 LN:62944 +@SQ SN:KI270589.1 LN:44474 +@SQ SN:KI270726.1 LN:43739 +@SQ SN:KI270735.1 LN:42811 +@SQ SN:KI270711.1 LN:42210 +@SQ SN:KI270745.1 LN:41891 +@SQ SN:KI270714.1 LN:41717 +@SQ SN:KI270732.1 LN:41543 +@SQ SN:KI270713.1 LN:40745 +@SQ SN:KI270754.1 LN:40191 +@SQ SN:KI270710.1 LN:40176 +@SQ SN:KI270717.1 LN:40062 +@SQ SN:KI270724.1 LN:39555 +@SQ SN:KI270720.1 LN:39050 +@SQ SN:KI270723.1 LN:38115 +@SQ SN:KI270718.1 LN:38054 +@SQ SN:KI270317.1 LN:37690 +@SQ SN:KI270740.1 LN:37240 +@SQ SN:KI270755.1 LN:36723 +@SQ SN:KI270707.1 LN:32032 +@SQ SN:KI270579.1 LN:31033 +@SQ SN:KI270752.1 LN:27745 +@SQ SN:KI270512.1 LN:22689 +@SQ SN:KI270322.1 LN:21476 +@SQ SN:GL000226.1 LN:15008 +@SQ SN:KI270311.1 LN:12399 +@SQ SN:KI270366.1 LN:8320 +@SQ SN:KI270511.1 LN:8127 +@SQ SN:KI270448.1 LN:7992 +@SQ SN:KI270521.1 LN:7642 +@SQ SN:KI270581.1 LN:7046 +@SQ SN:KI270582.1 LN:6504 +@SQ SN:KI270515.1 LN:6361 +@SQ SN:KI270588.1 LN:6158 +@SQ SN:KI270591.1 LN:5796 +@SQ SN:KI270522.1 LN:5674 +@SQ SN:KI270507.1 LN:5353 +@SQ SN:KI270590.1 LN:4685 +@SQ SN:KI270584.1 LN:4513 +@SQ SN:KI270320.1 LN:4416 +@SQ SN:KI270382.1 LN:4215 +@SQ SN:KI270468.1 LN:4055 +@SQ SN:KI270467.1 LN:3920 +@SQ SN:KI270362.1 LN:3530 +@SQ SN:KI270517.1 LN:3253 +@SQ SN:KI270593.1 LN:3041 +@SQ SN:KI270528.1 LN:2983 +@SQ SN:KI270587.1 LN:2969 +@SQ SN:KI270364.1 LN:2855 +@SQ SN:KI270371.1 LN:2805 +@SQ SN:KI270333.1 LN:2699 +@SQ SN:KI270374.1 LN:2656 +@SQ SN:KI270411.1 LN:2646 +@SQ SN:KI270414.1 LN:2489 +@SQ SN:KI270510.1 LN:2415 +@SQ SN:KI270390.1 LN:2387 +@SQ SN:KI270375.1 LN:2378 +@SQ SN:KI270420.1 LN:2321 +@SQ SN:KI270509.1 LN:2318 +@SQ SN:KI270315.1 LN:2276 +@SQ SN:KI270302.1 LN:2274 +@SQ SN:KI270518.1 LN:2186 +@SQ SN:KI270530.1 LN:2168 +@SQ SN:KI270304.1 LN:2165 +@SQ SN:KI270418.1 LN:2145 +@SQ SN:KI270424.1 LN:2140 +@SQ SN:KI270417.1 LN:2043 +@SQ SN:KI270508.1 LN:1951 +@SQ SN:KI270303.1 LN:1942 +@SQ SN:KI270381.1 LN:1930 +@SQ SN:KI270529.1 LN:1899 +@SQ SN:KI270425.1 LN:1884 +@SQ SN:KI270396.1 LN:1880 +@SQ SN:KI270363.1 LN:1803 +@SQ SN:KI270386.1 LN:1788 +@SQ SN:KI270465.1 LN:1774 +@SQ SN:KI270383.1 LN:1750 +@SQ SN:KI270384.1 LN:1658 +@SQ SN:KI270330.1 LN:1652 +@SQ SN:KI270372.1 LN:1650 +@SQ SN:KI270548.1 LN:1599 +@SQ SN:KI270580.1 LN:1553 +@SQ SN:KI270387.1 LN:1537 +@SQ SN:KI270391.1 LN:1484 +@SQ SN:KI270305.1 LN:1472 +@SQ SN:KI270373.1 LN:1451 +@SQ SN:KI270422.1 LN:1445 +@SQ SN:KI270316.1 LN:1444 +@SQ SN:KI270340.1 LN:1428 +@SQ SN:KI270338.1 LN:1428 +@SQ SN:KI270583.1 LN:1400 +@SQ SN:KI270334.1 LN:1368 +@SQ SN:KI270429.1 LN:1361 +@SQ SN:KI270393.1 LN:1308 +@SQ SN:KI270516.1 LN:1300 +@SQ SN:KI270389.1 LN:1298 +@SQ SN:KI270466.1 LN:1233 +@SQ SN:KI270388.1 LN:1216 +@SQ SN:KI270544.1 LN:1202 +@SQ SN:KI270310.1 LN:1201 +@SQ SN:KI270412.1 LN:1179 +@SQ SN:KI270395.1 LN:1143 +@SQ SN:KI270376.1 LN:1136 +@SQ SN:KI270337.1 LN:1121 +@SQ SN:KI270335.1 LN:1048 +@SQ SN:KI270378.1 LN:1048 +@SQ SN:KI270379.1 LN:1045 +@SQ SN:KI270329.1 LN:1040 +@SQ SN:KI270419.1 LN:1029 +@SQ SN:KI270336.1 LN:1026 +@SQ SN:KI270312.1 LN:998 +@SQ SN:KI270539.1 LN:993 +@SQ SN:KI270385.1 LN:990 +@SQ SN:KI270423.1 LN:981 +@SQ SN:KI270392.1 LN:971 +@SQ SN:KI270394.1 LN:970 +@RG ID:chm SM:chm PL:ILLUMINA +@PG ID:bwa PN:bwa VN:0.7.17-r1188 CL:bwa mem -t 8 -R @RG\tID:chm\tSM:chm\tPL:ILLUMINA resources/genome.fasta results/merged/chm_R1.fastq.gz results/merged/chm_R2.fastq.gz +@PG ID:GATK ApplyBQSR VN:4.1.4.1 CL:ApplyBQSR --output results/recal/chm.sorted.bam --bqsr-recal-file /tmp/tmpuhppums6/recal_table.grp --input results/mapped/chm.sorted.bam --reference resources/genome.fasta --preserve-qscores-less-than 6 --use-original-qualities false --quantize-quals 0 --round-down-quantized false --emit-original-quals false --global-qscore-prior -1.0 --interval-set-rule UNION --interval-padding 0 --interval-exclusion-padding 0 --interval-merging-rule ALL --read-validation-stringency SILENT --seconds-between-progress-updates 10.0 --disable-sequence-dictionary-validation false --create-output-bam-index true --create-output-bam-md5 false --create-output-variant-index true --create-output-variant-md5 false --lenient false --add-output-sam-program-record true --add-output-vcf-command-line true --cloud-prefetch-buffer 40 --cloud-index-prefetch-buffer -1 --disable-bam-index-caching false --sites-only-vcf-output false --help false --version false --showHidden false --verbosity INFO --QUIET false --use-jdk-deflater false --use-jdk-inflater false --gcs-max-retries 20 --gcs-project-for-requester-pays --disable-tool-default-read-filters false PN:GATK ApplyBQSR +@PG ID:samtools PN:samtools PP:GATK ApplyBQSR VN:1.20 CL:samtools view -H chm.bam +@PG ID:samtools.1 PN:samtools PP:samtools VN:1.20 CL:samtools view -b nonstandard_orientation.sam +@PG ID:samtools.2 PN:samtools PP:samtools.1 VN:1.20 CL:samtools view -h test_nonstandard_orientation.bam +HK35WCCXX160124:1:2117:25885:46859 163 1 231 46 113M38S = 351 271 CAACCTCAGTTGACTTGGGGGACAAGGCAGCAGGAGCACCAGACCCCTGCACCACCTCCTTCTGGGTGGGAGATGAGGCAGCAGGAGCACCAGGGCCCTTCACGACCTCTTTCCAGGTGGGGGGGGAGGGAGCAGGGGCAGGAGAGCGCGT =BB?BCBCABD@C@?D@AAABD@CB?B@C@ACABCAAC@BDAC@BCCCBBD@CD@CDCCCEC;BCCCBCCE?CCCBCDCECCECDDCCEA(.:BBCBBDAABBABBACCADCBCBB?CBBA1BABAAA@@B@AABBAAACB?9BBABA@DDAC2BBBBCBCDBCCFA=? XA:Z:1,-26345048,17S134M,8; MC:Z:113M38S MD:Z:1C7C5G0A28G5T4A46G32A14 RG:Z:chm NM:i:9 AS:i:109 XS:i:94 diff --git a/test/test_orientation_supplementary.sam b/test/test_orientation_supplementary.sam new file mode 100644 index 000000000..acf52140b --- /dev/null +++ b/test/test_orientation_supplementary.sam @@ -0,0 +1,35 @@ +@HD VN:1.3 SO:coordinate +@SQ SN:chr10 LN:135374737 +@SQ SN:chr11 LN:134452384 +@SQ SN:chr12 LN:132349534 +@SQ SN:chr13 LN:114142980 +@SQ SN:chr14 LN:106368585 +@SQ SN:chr15 LN:100338915 +@SQ SN:chr16 LN:88827254 +@SQ SN:chr17 LN:78774742 +@SQ SN:chr18 LN:76117153 +@SQ SN:chr19 LN:63811651 +@SQ SN:chr1 LN:247249719 +@SQ SN:chr20 LN:62435964 +@SQ SN:chr21 LN:46944323 +@SQ SN:chr22 LN:49691432 +@SQ SN:chr2 LN:242951149 +@SQ SN:chr3 LN:199501827 +@SQ SN:chr4 LN:191273063 +@SQ SN:chr5 LN:180857866 +@SQ SN:chr6 LN:170899992 +@SQ SN:chr7 LN:158821424 +@SQ SN:chr8 LN:146274826 +@SQ SN:chr9 LN:140273252 +@SQ SN:chrM LN:16571 +@SQ SN:chrX LN:154913754 +@SQ SN:chrY LN:57772954 +@RG ID:tumor SM:tumor +@PG ID:bwa PN:bwa VN:0.7.16a-r1187-dirty CL:resources/bwa mem -t 8 -Y -R @RG\tID:tumor\tSM:tumor index/hg18/genome reads/simulated.tumor.1.fastq reads/simulated.tumor.2.fastq +@PG ID:samtools PN:samtools PP:bwa VN:1.20 CL:samtools sort -n -o tests/resources/testcases/test16/tumor.bam.namesorted.bam tests/resources/testcases/test16/tumor.bam +@PG ID:samtools.1 PN:samtools PP:samtools VN:1.20 CL:samtools fixmate tests/resources/testcases/test16/tumor.bam.namesorted.bam tests/resources/testcases/test16/tumor.bam.fixed.bam +@PG ID:samtools.2 PN:samtools PP:samtools.1 VN:1.20 CL:samtools sort -o tests/resources/testcases/test16/tumor.bam tests/resources/testcases/test16/tumor.bam.fixed.bam +@PG ID:samtools.3 PN:samtools PP:samtools.2 VN:1.20 CL:samtools view -H tumor.bam +sim_Som2-5-1_chr1_1_1b5889 163 chr1 938 60 100M = 1143 264 CACTATACCACTTGAATGCTCAGAAGAAAAAAAAAAGAATTCAGAATATGTGTATTAAAATGGGTACAATAATGAGTAAAAAACTTGAAAGAAGCTGGAG GFGDECGFE?AA>FGADEEF)BDC>FDEC5EEBFEEB?D@BGDCAB:FBGGDE;DF-CBDDCD+>:ED@#?=?#>EADE,B#?A#FFC;CC@#:5DBA## NM:i:1 MD:Z:69A30 AS:i:95 XS:i:23 ZT:Z:95,1,72,72,0,0,69,0,1,0,0,0,1379,0 RG:Z:tumor MQ:i:60 MC:Z:59M41S +sim_Som2-5-1_chr1_1_1b5889 83 chr1 1143 60 59M41S = 938 -264 ATCTTTCCTTTATCAACTATTGGTGTTAACCTTTGATTATATTTTTGCATAAGCATACAAAATATTGATCTTTAATTATACTAAGGAATCAATAGCCAAA F:##.##D#DFDA5D#*#:D?E7B?.>?GA>6?=CE:EEBEDAE=AF:5GF:G#ABGDGGAGEG=DAE?FF?GG7AGEG-BDEGDAGEDDEDF=>F5E*@ NM:i:2 MD:Z:2G13A42 AS:i:51 XS:i:20 SA:Z:chr1,73497948,-,58S42M,60,1; ZT:Z:51,2,31,11,0,0,42,0,3,0,0,0,419,0 RG:Z:tumor MQ:i:60 MC:Z:100M +sim_Som2-5-1_chr1_1_1b5889 2131 chr1 1263 60 58S42M = 938 -367 ATCTTTCCTTTATCAACTATTGGTGTTAACCTTTGATTATATTTTTGCATAAGCATACAAAATATTGATCTTTAATTATACTAAGGAATCAATAGCCAAA F:##.##D#DFDA5D#*#:D?E7B?.>?GA>6?=CE:EEBEDAE=AF:5GF:G#ABGDGGAGEG=DAE?FF?GG7AGEG-BDEGDAGEDDEDF=>F5E*@ NM:i:1 MD:Z:40C1 MC:Z:100M AS:i:40 XS:i:0 SA:Z:chr1,73497828,-,59M41S,60,2; ZT:Z:40,1,40,40,0,0,40,0,0,0,0,0,400,0 RG:Z:tumor