forked from compomics/ThermoRawFileParser
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathCentroidedMgfExtractor.cs
286 lines (242 loc) · 11.9 KB
/
CentroidedMgfExtractor.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
using System;
using System.Collections.Generic;
using System.IO;
using System.Text;
using Mono.Options;
using ThermoFisher.CommonCore.Data.Business;
using ThermoFisher.CommonCore.Data.Interfaces;
using ThermoFisher.CommonCore.RawFileReader;
namespace ThermoRawFileParser
{
internal class CentroidedMgfExtractor
{
private readonly string rawFilePath;
private readonly string outputDirectory;
private readonly string collection;
private readonly bool outputMetadata;
private readonly string msRun;
private readonly string subFolder;
private static string rawFileName;
private static string rawFileNameWithoutExtension;
public CentroidedMgfExtractor(string rawFilePath, string outputDirectory, Boolean outputMetadata,
string collection, string msRun,
string subFolder)
{
this.rawFilePath = rawFilePath;
this.outputDirectory = outputDirectory;
this.outputMetadata = outputMetadata;
this.collection = collection;
this.msRun = msRun;
this.subFolder = subFolder;
}
/// <summary>
/// Extract the RAW file metadata and spectra in MGF format.
/// </summary>
public void Extract()
{
// Check to see if the RAW file name was supplied as an argument to the program
if (string.IsNullOrEmpty(rawFilePath))
{
Console.WriteLine("No RAW file specified!");
return;
}
// Check to see if the specified RAW file exists
if (!File.Exists(rawFilePath))
{
Console.WriteLine(@"The file doesn't exist in the specified location - " + rawFilePath);
return;
}
else
{
string[] splittedPath = rawFilePath.Split('/');
rawFileName = splittedPath[splittedPath.Length - 1];
rawFileNameWithoutExtension = Path.GetFileNameWithoutExtension(rawFileName);
}
Console.WriteLine("Started parsing " + rawFilePath);
// Create the IRawDataPlus object for accessing the RAW file
//var rawFile = RawFileReaderAdapter.FileFactory(rawFilePath);
IRawDataPlus rawFile;
using (rawFile = RawFileReaderFactory.ReadFile(rawFilePath))
{
if (!rawFile.IsOpen)
{
Console.WriteLine("Unable to access the RAW file using the RawFileReader class!");
return;
}
// Check for any errors in the RAW file
if (rawFile.IsError)
{
Console.WriteLine($"Error opening ({rawFile.FileError}) - {rawFilePath}");
return;
}
// Check if the RAW file is being acquired
if (rawFile.InAcquisition)
{
Console.WriteLine("RAW file still being acquired - " + rawFilePath);
return;
}
// Get the number of instruments (controllers) present in the RAW file and set the
// selected instrument to the MS instrument, first instance of it
//Console.WriteLine("The RAW file has data from {0} instruments", rawFile.InstrumentCount);
rawFile.SelectInstrument(Device.MS, 1);
// Get the first and last scan from the RAW file
int firstScanNumber = rawFile.RunHeaderEx.FirstSpectrum;
int lastScanNumber = rawFile.RunHeaderEx.LastSpectrum;
if (outputMetadata)
{
WriteMetada(rawFile, firstScanNumber, lastScanNumber);
}
rawFile.SelectInstrument(Device.MS, 2);
WriteSpectraToMgf(rawFile, firstScanNumber, lastScanNumber);
Console.WriteLine("Finished parsing " + rawFilePath);
}
}
/// <summary>
/// Write the RAW file metadata to file.
/// <param name="rawFile">the RAW file object</param>
/// <param name="firstScanNumber">the first scan number</param>
/// <param name="lastScanNumber">the last scan number</param>
/// </summary>
private void WriteMetada(IRawDataPlus rawFile, int firstScanNumber, int lastScanNumber)
{
// Get the start and end time from the RAW file
double startTime = rawFile.RunHeaderEx.StartTime;
double endTime = rawFile.RunHeaderEx.EndTime;
// Collect the metadata
List<string> output = new List<string>
{
"RAW file=" + rawFile.FileName,
"RAWfileversion=" + rawFile.FileHeader.Revision,
"Creationdate=" + rawFile.FileHeader.CreationDate,
"Operator=" + rawFile.FileHeader.WhoCreatedId,
"Numberofinstruments=" + rawFile.InstrumentCount,
"Description=" + rawFile.FileHeader.FileDescription,
"Instrumentmode=" + rawFile.GetInstrumentData().Model,
"Instrumentname=" + rawFile.GetInstrumentData().Name,
"Serialnumber=" + rawFile.GetInstrumentData().SerialNumber,
"Softwareversion=" + rawFile.GetInstrumentData().SoftwareVersion,
"Firmwareversion=" + rawFile.GetInstrumentData().HardwareVersion,
"Units=" + rawFile.GetInstrumentData().Units
};
output.Add($"Massresolution={rawFile.RunHeaderEx.MassResolution:F3}");
output.Add($"Numberofscans={rawFile.RunHeaderEx.SpectraCount}");
output.Add($"Scan range={firstScanNumber},{lastScanNumber}");
output.Add($"Time range={startTime:F2},{endTime:F2}");
output.Add($"Mass range={rawFile.RunHeaderEx.LowMass:F4},{rawFile.RunHeaderEx.HighMass:F4}");
// Write the meta data to file
File.WriteAllLines(outputDirectory + "/" + rawFileNameWithoutExtension + "_metadata", output.ToArray());
}
/// <summary>
/// Write the RAW files' spectra to a MGF file.
/// </summary>
/// <param name="rawFile">the RAW file interface</param>
/// <param name="firstScanNumber">the first scan number</param>
/// <param name="lastScanNumber">the last scan number</param>
private void WriteSpectraToMgf(IRawDataPlus rawFile, int firstScanNumber, int lastScanNumber)
{
// Test centroid (high resolution/label) data
using (var mgfFile =
File.CreateText(outputDirectory + "//" + rawFileNameWithoutExtension + ".mgf"))
{
for (int scanNumber = firstScanNumber; scanNumber <= lastScanNumber; scanNumber++)
{
// Get each scan from the RAW file
var scan = Scan.FromFile(rawFile, scanNumber);
if (scan.HasCentroidStream)
{
// Check to see if the RAW file contains label (high-res) data and if it is present
// then look for any data that is out of order
double time = rawFile.RetentionTimeFromScanNumber(scanNumber);
// Get the scan filter for this scan number
var scanFilter = rawFile.GetFilterForScanNumber(scanNumber);
// Get the scan event for this scan number
var scanEvent = rawFile.GetScanEventForScanNumber(scanNumber);
// Get the ionizationMode, MS2 precursor mass, collision energy, and isolation width for each scan
if (scanFilter.MSOrder == ThermoFisher.CommonCore.Data.FilterEnums.MSOrderType.Ms2)
{
mgfFile.WriteLine("BEGIN IONS");
mgfFile.WriteLine($"TITLE={ConstructSpectrumTitle(scanNumber)}");
mgfFile.WriteLine($"SCAN={scanNumber}");
mgfFile.WriteLine($"RTINSECONDS={time * 60}");
// trailer extra data list
var trailerData = rawFile.GetTrailerExtraInformation(scanNumber);
for (int i = 0; i < trailerData.Length; i++)
{
if ((trailerData.Labels[i] == "Charge State:"))
{
if (Convert.ToInt32(trailerData.Values[i]) > 0)
{
mgfFile.WriteLine($"CHARGE={trailerData.Values[i]}+");
}
}
}
// Get the reaction information for the first precursor
var reaction = scanEvent.GetReaction(0);
double precursorMass = reaction.PrecursorMass;
mgfFile.WriteLine(
$"PEPMASS={precursorMass:F4}");
//$"PEPMASS={precursorMass:F2} {GetPrecursorIntensity(rawFile, scanNumber)}");
double collisionEnergy = reaction.CollisionEnergy;
mgfFile.WriteLine($"COLLISIONENERGY={collisionEnergy}");
var ionizationMode = scanFilter.IonizationMode;
mgfFile.WriteLine($"IONMODE={ionizationMode}");
var centroidStream = rawFile.GetCentroidStream(scanNumber, false);
if (scan.CentroidScan.Length > 0)
{
for (int i = 0; i < centroidStream.Length; i++)
{
mgfFile.WriteLine(
$"{centroidStream.Masses[i]:F7} {centroidStream.Intensities[i]:F10}");
}
}
mgfFile.WriteLine("END IONS");
}
}
}
}
}
/// <summary>
/// Construct the spectrum title.
/// </summary>
/// <param name="scanNumber">the spectrum scan number</param>
private String ConstructSpectrumTitle(int scanNumber)
{
StringBuilder spectrumTitle = new StringBuilder("mzspec:");
if (collection != null)
{
spectrumTitle.Append(collection).Append(":");
}
if (subFolder != null)
{
spectrumTitle.Append(subFolder).Append(":");
}
if (msRun != null)
{
spectrumTitle.Append(msRun).Append(":");
}
else
{
spectrumTitle.Append(rawFileName).Append(":");
}
spectrumTitle.Append("scan:");
spectrumTitle.Append(scanNumber);
return spectrumTitle.ToString();
}
/// <summary>
/// Get the spectrum intensity.
/// </summary>
/// <param name="rawFile">the RAW file object</param>
/// <param name="scanNumber">the scan number</param>
private double GetPrecursorIntensity(IRawDataPlus rawFile, int scanNumber)
{
// Define the settings for getting the Base Peak chromatogram
ChromatogramTraceSettings settings = new ChromatogramTraceSettings(TraceType.BasePeak);
// Get the chromatogram from the RAW file.
var data = rawFile.GetChromatogramData(new IChromatogramSettings[] {settings}, scanNumber - 1,
scanNumber - 1);
// Split the data into the chromatograms
var trace = ChromatogramSignal.FromChromatogramData(data);
return trace[0].Intensities[0];
}
}
}