diff --git a/crates/sage-cli/src/output.rs b/crates/sage-cli/src/output.rs index 8fc5a638..cc95e2b7 100644 --- a/crates/sage-cli/src/output.rs +++ b/crates/sage-cli/src/output.rs @@ -30,6 +30,8 @@ impl Runner { .format(peptide.proteins.len()) .as_bytes(), ); + record.push_field(peptide.start_position.iter().map(|&x| x.to_string()).collect::>().join(";").as_bytes()); + record.push_field(peptide.end_position.iter().map(|&x| x.to_string()).collect::>().join(";").as_bytes()); record.push_field(filenames[feature.file_id].as_bytes()); record.push_field(feature.spec_id.as_bytes()); record.push_field(itoa::Buffer::new().format(feature.rank).as_bytes()); @@ -161,12 +163,14 @@ impl Runner { "peptide", "proteins", "num_proteins", + "start_positions", + "end_positions", "filename", - "scannr", + "scan", "rank", "label", - "expmass", - "calcmass", + "measured_mass", + "calculated_mass", "charge", "peptide_len", "missed_cleavages", diff --git a/crates/sage-cli/tests/integration.rs b/crates/sage-cli/tests/integration.rs index 872d4f07..28879de1 100644 --- a/crates/sage-cli/tests/integration.rs +++ b/crates/sage-cli/tests/integration.rs @@ -1,7 +1,5 @@ use sage_core::database::Builder; -use sage_core::enzyme::Digest; use sage_core::mass::Tolerance; -use sage_core::peptide::Peptide; use sage_core::scoring::Scorer; use sage_core::spectrum::SpectrumProcessor; diff --git a/crates/sage/src/enzyme.rs b/crates/sage/src/enzyme.rs index 88c7ab5f..bbb945e0 100644 --- a/crates/sage/src/enzyme.rs +++ b/crates/sage/src/enzyme.rs @@ -18,11 +18,22 @@ pub struct Digest { /// Cleaved peptide sequence pub sequence: String, /// Protein accession - pub protein: Arc, + pub protein: ProteinAssignment, /// Missed cleavages pub missed_cleavages: u8, /// Is this an N-terminal peptide of the protein? pub position: Position, + /// What residue position does this start at (1-based inclusive)? + pub start_position: u32, + /// What residue position does this end at (1-based inclusive)? + pub end_position: u32 +} + +#[derive(Clone, PartialOrd, Ord, Debug, Default)] +pub struct ProteinAssignment { + identifier: Arc, + start_position: u32, + end_position: u32 } #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Debug, Hash)] @@ -58,6 +69,8 @@ impl Digest { sequence: sequence.into_iter().collect(), missed_cleavages: self.missed_cleavages, position: self.position, + start_position: self.start_position, + end_position: self.end_position } } } @@ -306,6 +319,8 @@ impl EnzymeParameters { semi_enzymatic: site.semi_enzymatic, position, protein: protein.clone(), + start_position: (start + 1) as u32, + end_position: end as u32 }); } } @@ -330,6 +345,8 @@ mod test { missed_cleavages: 0, position: Position::Nterm, protein: Arc::new(String::default()), + start_position: 1, + end_position: 6 }, Digest { decoy: false, @@ -338,6 +355,8 @@ mod test { missed_cleavages: 0, position: Position::Nterm, protein: Arc::new(String::default()), + start_position: 1, + end_position: 6 }, ]; @@ -353,6 +372,8 @@ mod test { missed_cleavages: 0, position: Position::Nterm, protein: Arc::new(String::default()), + start_position: 1, + end_position: 6 }, Digest { decoy: false, @@ -361,6 +382,8 @@ mod test { missed_cleavages: 0, position: Position::Internal, protein: Arc::new(String::default()), + start_position: 7, + end_position: 12 }, ]; @@ -373,11 +396,11 @@ mod test { fn trypsin() { let sequence = "MADEEKLPPGWEKRMSRSSGRVYYFNHITNASQWERPSGN"; let expected = vec![ - ("MADEEK".into(), Position::Nterm), - ("LPPGWEK".into(), Position::Internal), - ("MSR".into(), Position::Internal), - ("SSGR".into(), Position::Internal), - ("VYYFNHITNASQWERPSGN".into(), Position::Cterm), + ("MADEEK".into(), Position::Nterm, 1, 6), + ("LPPGWEK".into(), Position::Internal, 7, 13), + ("MSR".into(), Position::Internal, 14, 16), + ("SSGR".into(), Position::Internal, 17, 20), + ("VYYFNHITNASQWERPSGN".into(), Position::Cterm, 21, 40), ]; let tryp = EnzymeParameters { @@ -391,7 +414,7 @@ mod test { expected, tryp.digest(sequence, Arc::default()) .into_iter() - .map(|d| (d.sequence, d.position)) + .map(|d| (d.sequence, d.position, d.start_position, d.end_position)) .collect::>() ); } diff --git a/crates/sage/src/peptide.rs b/crates/sage/src/peptide.rs index 7c731fa6..613974ff 100644 --- a/crates/sage/src/peptide.rs +++ b/crates/sage/src/peptide.rs @@ -28,6 +28,10 @@ pub struct Peptide { pub position: Position, pub proteins: Vec>, + /// What residue does this peptide start at in the protein (1-based inclusive)? + pub start_position: Vec, + /// What residue does this peptide end at in the protein (1-based inclusive)? + pub end_position: Vec, } impl Peptide { @@ -66,6 +70,8 @@ impl Debug for Peptide { .field("monoisotopic", &self.monoisotopic) .field("missed_cleavages", &self.missed_cleavages) .field("position", &self.position) + .field("start_position", &self.start_position) + .field("end_position", &self.end_position) .finish() } } @@ -313,6 +319,8 @@ impl Peptide { s[1..n].reverse(); pep.sequence = Arc::from(s.into_boxed_slice()); pep.modifications[1..n].reverse(); + pep.start_position = pep.start_position; // TODO: calculate start/end in reversed protein sequences? + pep.end_position = pep.end_position; } pep } @@ -373,6 +381,8 @@ impl TryFrom for Peptide { missed_cleavages: value.missed_cleavages, semi_enzymatic: value.semi_enzymatic, proteins: vec![value.protein], + start_position: vec![value.start_position], + end_position: vec![value.end_position], }) } }