libpappsomspp
Library for mass spectrometry
mzxmloutput.cpp
Go to the documentation of this file.
1/**
2 * \file pappsomspp/msrun/output/mzxmloutput.cpp
3 * \date 23/11/2019
4 * \author Olivier Langella
5 * \brief write msrun peaks into mzxml output stream
6 */
7
8/*******************************************************************************
9 * Copyright (c) 2019 Olivier Langella <Olivier.Langella@u-psud.fr>.
10 *
11 * This file is part of the PAPPSOms++ library.
12 *
13 * PAPPSOms++ is free software: you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation, either version 3 of the License, or
16 * (at your option) any later version.
17 *
18 * PAPPSOms++ is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with PAPPSOms++. If not, see <http://www.gnu.org/licenses/>.
25 *
26 * Contributors:
27 * Olivier Langella <Olivier.Langella@u-psud.fr> - initial API and
28 *implementation
29 ******************************************************************************/
30
31#include "mzxmloutput.h"
32#include <QDebug>
33#include <QStringList>
34#include <algorithm>
35#include <cstdio>
36#include "../../config.h"
37
38using namespace pappso;
39
40
41template <class T>
42T
44{
45 char *const p = reinterpret_cast<char *>(&in);
46 for(size_t i = 0; i < sizeof(T) / 2; ++i)
47 std::swap(p[i], p[sizeof(T) - i - 1]);
48 return in;
49}
50
51
52MzxmlOutput::Translater::Translater(MzxmlOutput *p_mzxml_output)
53{
54 mp_output = p_mzxml_output;
55}
57{
58}
59void
61 const QualifiedMassSpectrum &spectrum)
62{
63 qDebug();
64 mp_output->m_monitor.count();
65 mp_output->writeQualifiedMassSpectrum(spectrum);
66 qDebug();
67}
68bool
70{
71 return true;
72}
73
74
76 QIODevice *p_output_device)
77 : m_monitor(monitor)
78{
79
80 mpa_outputStream = new QXmlStreamWriter(p_output_device);
81 mpa_outputStream->setAutoFormatting(true);
82
83 mpa_outputStream->writeStartDocument("1.0");
84}
85
87{
88 close();
89 delete mpa_outputStream;
90}
91
92void
94{
95 m_isReadAhead = isReadAhead;
96}
97void
99{
100 qDebug();
101 m_monitor.setTotalSteps(p_msrunreader->spectrumListSize());
102 writeHeader(p_msrunreader);
103
104 Translater translater(this);
105
106 translater.setReadAhead(m_isReadAhead);
107
109 // translater.setNeedMsLevelPeakList(1, false);
110 // translater.setNeedMsLevelPeakList(2, false);
111 p_msrunreader->readSpectrumCollection(translater);
112
114 qDebug();
115}
116
117void
119{
120
121 mpa_outputStream->writeNamespace("http://www.w3.org/2001/XMLSchema-instance",
122 "xsi");
123 // xmlns="http://sashimi.sourceforge.net/schema_revision/mzXML_2.0"
124 // xsi:schemaLocation="http://sashimi.sourceforge.net/schema_revision/mzXML_2.0
125 // http://sashimi.sourceforge.net/schema_revision/mzXML_2.0/mzXML_idx_2.0.xsd"
126 /*
127114 writer.setPrefix("xsi", xmlnsxsi);
128115 writer.setDefaultNamespace(namespaceURI);
129mpa_outputStream->writeStartElement("mzXML");
130117 writer.writeNamespace("xsi", xmlnsxsi);
131118 writer.writeDefaultNamespace(namespaceURI);
132119
133120 writer.writeAttribute(xmlnsxsi, "schemaLocation",
134xsischemaLocation); 121 */
135 mpa_outputStream->writeStartElement("mzXML");
136 mpa_outputStream->writeAttribute(
137 "xmlns", "http://sashimi.sourceforge.net/schema_revision/mzXML_3.2");
138 mpa_outputStream->writeAttribute(
139 "xsi:schemaLocation",
140 "http://sashimi.sourceforge.net/schema_revision/mzXML_3.2 "
141 "http://sashimi.sourceforge.net/schema_revision/mzXML_3.2/"
142 "mzXML_idx_3.2.xsd");
143
144 mpa_outputStream->writeStartElement("msRun");
145 mpa_outputStream->writeAttribute(
146 "scanCount", QString("%1").arg(p_msrunreader->spectrumListSize()));
147 //<msRun scanCount="16576" startTime="PT0.292553S" endTime="PT3000.34S">
148 // writer.writeAttribute("scanCount",
149 // ms_run.getSpectrumCount(this.controller).toString());
150
151 /*
152 * # < parentFile fileName = #
153 * "file://SEQUEST1/raw/vidal/20060411_VIDAL_JEAN_1_PEPCR1_42140.RAW" #
154 * fileType = "RAWData" fileSha1 = #
155 * "23c1620d4ad3f4f0103b0141b7caec1e8b7eebf5" / >
156 */
157 mpa_outputStream->writeStartElement("parentFile");
158 mpa_outputStream->writeAttribute("fileName",
159 p_msrunreader->getMsRunId()->getFileName());
160 mpa_outputStream->writeAttribute("fileType", "RAWData");
161 mpa_outputStream->writeEndElement();
162 /*
163144
164145 MsInstrumentList instrument_list =
165ms_run.getMsInstruments(controller); 146 for (MsInstrument
166instrument : instrument_list) { 147 this.write(instrument); 148 }
167*/
168
169 mpa_outputStream->writeStartElement("msInstrument");
170 mpa_outputStream->writeAttribute("msInstrumentID", "1");
171 //<msManufacturer category="msManufacturer" value="Thermo Scientific"/>
172 mpa_outputStream->writeStartElement("msManufacturer");
173 mpa_outputStream->writeAttribute("category", "msManufacturer");
174 mpa_outputStream->writeAttribute("value", "unknown");
175 mpa_outputStream->writeEndElement();
176 //<msModel category="msModel" value="Q Exactive"/>
177 // <msIonisation category="msIonisation" value="nanoelectrospray"/>
178 // <msMassAnalyzer category="msMassAnalyzer" value="quadrupole"/>
179 // <msDetector category="msDetector" value="inductive detector"/>
180 // <software type="acquisition" name="Xcalibur"
181 // version="2.1-152001/2.1.0.1520"/>
182 mpa_outputStream->writeEndElement();
183 /*
184149
185150 // #< dataProcessing centroided ="1" >
186151 // my $ref_data_processings =
187$ms_run_description->dataProcessing(); 152 MsDataProcessingList
188dataProcList = ms_run.getMsDataProcessings(controller); 153 for
189(MsDataProcessing msDataProc : dataProcList) { 154 this.write(msDataProc); 155 }
190*/
191 mpa_outputStream->writeStartElement("dataProcessing");
192 //<dataProcessing centroided="1">
193 mpa_outputStream->writeAttribute("centroided", "1");
194 // <software type="conversion" name="ProteoWizard" version="3.0.3706"/>
195 mpa_outputStream->writeStartElement("software");
196 mpa_outputStream->writeAttribute("type", "conversion");
197 mpa_outputStream->writeAttribute("name", PAPPSOMSPP_NAME);
198 mpa_outputStream->writeAttribute("version", PAPPSOMSPP_VERSION);
199 mpa_outputStream->writeEndElement();
200 //<processingOperation name="Conversion to mzML"/>
201 mpa_outputStream->writeStartElement("processingOperation");
202 mpa_outputStream->writeAttribute("name", "Conversion to mzXML");
203 //<software type="processing" name="ProteoWizard" version="3.0.3706"/>
204 mpa_outputStream->writeStartElement("software");
205 mpa_outputStream->writeAttribute("type", "processing");
206 mpa_outputStream->writeAttribute("name", PAPPSOMSPP_NAME);
207 mpa_outputStream->writeAttribute("version", PAPPSOMSPP_VERSION);
208 mpa_outputStream->writeEndElement();
209 //<comment>Thermo/Xcalibur peak picking</comment>
210 mpa_outputStream->writeStartElement("comment");
211 mpa_outputStream->writeCharacters("pappso::MzxmlOutput");
212 mpa_outputStream->writeEndElement();
213 //</dataProcessing>
214 mpa_outputStream->writeEndElement();
215 mpa_outputStream->writeEndElement();
216 // Peaks
217}
218
219
220void
222{
223 mpa_outputStream->writeEndDocument();
224}
225
226
227std::size_t
228MzxmlOutput::getScanNumberFromNativeId(const QString &native_id) const
229{
230 QStringList native_id_list = native_id.split("=");
231 if(native_id_list.size() < 2)
232 {
233 }
234 else
235 {
236 return native_id_list.back().toULong();
237 }
238 return std::numeric_limits<std::size_t>::max();
239}
240
241std::size_t
243{
244 std::size_t scan_number =
246 if(scan_number == std::numeric_limits<std::size_t>::max())
247 {
248 scan_number = spectrum.getMassSpectrumId().getSpectrumIndex() + 1;
249 }
250 return scan_number;
251}
252
253std::size_t
255{
256
257 std::size_t scan_number =
259 if(scan_number == std::numeric_limits<std::size_t>::max())
260 {
261 scan_number = spectrum.getPrecursorSpectrumIndex() + 1;
262 }
263 return scan_number;
264}
265
266void
268 const pappso::QualifiedMassSpectrum &spectrum)
269{
270 qDebug();
271 mpa_outputStream->writeStartElement("scan");
272 /*
273 <scan num="1"
274 scanType="Full"
275 centroided="1"
276 msLevel="1"
277 peaksCount="1552"
278 polarity="+"
279 retentionTime="PT0.292553S"
280 lowMz="400.153411865234"
281 highMz="1013.123352050781"
282 basePeakMz="445.12003"
283 basePeakIntensity="2.0422125e06"
284 totIonCurrent="1.737798e07">*/
285 mpa_outputStream->writeAttribute("num",
286 QString("%1").arg(getScanNumber(spectrum)));
287 mpa_outputStream->writeAttribute("centroided", QString("1"));
288 mpa_outputStream->writeAttribute("msLevel",
289 QString("%1").arg(spectrum.getMsLevel()));
290 if(spectrum.getMassSpectrumCstSPtr().get() == nullptr)
291 {
292 mpa_outputStream->writeAttribute("peaksCount", "0");
293 }
294 else
295 {
296 mpa_outputStream->writeAttribute("peaksCount",
297 QString("%1").arg(spectrum.size()));
298
299 if(spectrum.size() > 0)
300 {
301 mpa_outputStream->writeAttribute(
302 "lowMz",
303 QString::number(
304 spectrum.getMassSpectrumCstSPtr().get()->front().x, 'f', 12));
305
306 mpa_outputStream->writeAttribute(
307 "highMz",
308 QString::number(
309 spectrum.getMassSpectrumCstSPtr().get()->back().x, 'f', 12));
310 // mpa_outputStream->writeAttribute("highMz",
311 // QString::number(spectrum.getMassSpectrumCstSPtr().get()->back().x,
312 // 'f', 10)); basePeakMz="245.1271988"
313 // basePeakIntensity="5810.7739"
314 // totIonCurrent="57803.815999999999">
315 }
316 }
317 mpa_outputStream->writeAttribute("polarity", "+");
318 mpa_outputStream->writeAttribute(
319 "retentionTime",
320 QString("PT%1S").arg(QString::number(spectrum.getRtInSeconds(), 'f', 2)));
321
322 if(spectrum.getMsLevel() > 1)
323 {
324
325 //<precursorMz precursorScanNum="16574"
326 // precursorIntensity="58403.04296875" precursorCharge="2"
327 ////activationMethod="HCD">994.690619901808</precursorMz>
328 mpa_outputStream->writeStartElement("precursorMz");
329 mpa_outputStream->writeAttribute(
330 "precursorScanNum",
331 QString("%1").arg(getPrecursorScanNumber(spectrum)));
332 mpa_outputStream->writeAttribute(
333 "precursorIntensity",
334 QString::number(spectrum.getPrecursorIntensity(), 'f', 4));
335 mpa_outputStream->writeAttribute(
336 "precursorCharge", QString("%1").arg(spectrum.getPrecursorCharge()));
337 mpa_outputStream->writeCharacters(
338 QString::number(spectrum.getPrecursorMz(), 'f', 12));
339 mpa_outputStream->writeEndElement();
340 }
341
342 /*<peaks compressionType="none"
343 compressedLen="0"
344 precision="64"
345 byteOrder="network"
346 contentType="m/z-int"></peaks>*/
347
348 mpa_outputStream->writeStartElement("peaks");
349 mpa_outputStream->writeAttribute("compressionType", "none");
350 mpa_outputStream->writeAttribute("compressedLen", "0");
351 mpa_outputStream->writeAttribute("precision", "64");
352 mpa_outputStream->writeAttribute("byteOrder", "network");
353 mpa_outputStream->writeAttribute("contentType", "m/z-int");
354
355 if(spectrum.getMassSpectrumCstSPtr().get() != nullptr)
356 {
357 QByteArray byte_array;
358 if(QSysInfo::ByteOrder == QSysInfo::LittleEndian)
359 {
360 for(const DataPoint &peak :
361 *(spectrum.getMassSpectrumCstSPtr().get()))
362 {
363 double swap = change_endian(peak.x);
364 byte_array.append((char *)&swap, 8);
365 swap = change_endian(peak.y);
366 byte_array.append((char *)&swap, 8);
367 }
368 }
369 else
370 {
371 for(const DataPoint &peak :
372 *(spectrum.getMassSpectrumCstSPtr().get()))
373 {
374 byte_array.append((char *)&peak.x, 8);
375 byte_array.append((char *)&peak.y, 8);
376 }
377 }
378 mpa_outputStream->writeCharacters(byte_array.toBase64());
379 }
380 mpa_outputStream->writeEndElement();
381
382 // scan
383 mpa_outputStream->writeEndElement();
384 qDebug();
385}
386
387void
389{
390 m_ms1IsMasked = mask_ms1;
391}
std::size_t getSpectrumIndex() const
const QString & getNativeId() const
base class to read MSrun the only way to build a MsRunReader object is to use the MsRunReaderFactory
Definition: msrunreader.h:191
virtual std::size_t spectrumListSize() const =0
get the totat number of spectrum conained in the MSrun data file
virtual void readSpectrumCollection(SpectrumCollectionHandlerInterface &handler)=0
function to visit an MsRunReader and get each Spectrum in a spectrum collection handler
const MsRunIdCstSPtr & getMsRunId() const
virtual bool needPeakList() const override
tells if we need the peak list (if we want the binary data) for each spectrum
Definition: mzxmloutput.cpp:69
virtual void setQualifiedMassSpectrum(const QualifiedMassSpectrum &spectrum) override
Definition: mzxmloutput.cpp:60
void setReadAhead(bool read_ahead)
Definition: mzxmloutput.cpp:93
std::size_t getScanNumberFromNativeId(const QString &native_id) const
MzxmlOutput(UiMonitorInterface &monitor, QIODevice *p_output_device)
Definition: mzxmloutput.cpp:75
void write(MsRunReader *p_msrunreader)
Definition: mzxmloutput.cpp:98
UiMonitorInterface & m_monitor
Definition: mzxmloutput.h:90
std::size_t getScanNumber(const QualifiedMassSpectrum &spectrum) const
QXmlStreamWriter * mpa_outputStream
Definition: mzxmloutput.h:91
void writeQualifiedMassSpectrum(const QualifiedMassSpectrum &spectrum)
void maskMs1(bool mask_ms1)
std::size_t getPrecursorScanNumber(const QualifiedMassSpectrum &spectrum) const
void writeHeader(MsRunReader *p_msrunreader)
Class representing a fully specified mass spectrum.
uint getMsLevel() const
Get the mass spectrum level.
MassSpectrumCstSPtr getMassSpectrumCstSPtr() const
Get the MassSpectrumCstSPtr.
uint getPrecursorCharge(bool *ok=nullptr) const
Get the precursor charge.
const QString & getPrecursorNativeId() const
pappso_double getPrecursorIntensity(bool *ok=nullptr) const
Get the intensity of the precursor ion.
const MassSpectrumId & getMassSpectrumId() const
Get the MassSpectrumId.
pappso_double getPrecursorMz(bool *ok=nullptr) const
Get the precursor m/z ratio.
std::size_t getPrecursorSpectrumIndex() const
Get the scan number of the precursor ion.
pappso_double getRtInSeconds() const
Get the retention time in seconds.
virtual void setReadAhead(bool is_read_ahead) final
use threads to read a spectrum by batch of batch_size
Definition: msrunreader.cpp:57
virtual void setNeedMsLevelPeakList(unsigned int ms_level, bool want_peak_list) final
tells if we need the peak list given
Definition: msrunreader.cpp:87
virtual void setTotalSteps(std::size_t total_number_of_steps)
use it if the number of steps is known in an algorithm the total number of steps is usefull to report...
#define PAPPSOMSPP_VERSION
Definition: config.h:4
#define PAPPSOMSPP_NAME
Definition: config.h:3
T change_endian(T in)
Definition: mzxmloutput.cpp:43
write msrun peaks into mzxml output stream
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition: aa.cpp:39