libpappsomspp
Library for mass spectrometry
msrunreader.cpp
Go to the documentation of this file.
1 /**
2  * \file pappsomspp/msrun/msrunreader.cpp
3  * \date 29/05/2018
4  * \author Olivier Langella
5  * \brief base interface to read MSrun files
6  */
7 
8 /*******************************************************************************
9  * Copyright (c) 2018 Olivier Langella <Olivier.Langella@u-psud.fr>.
10  *
11  * This file is part of the PAPPSOms++ library.
12  *
13  * PAPPSOms++ is free software: you can redistribute it and/or modify
14  * it under the terms of the GNU General Public License as published by
15  * the Free Software Foundation, either version 3 of the License, or
16  * (at your option) any later version.
17  *
18  * PAPPSOms++ is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21  * GNU General Public License for more details.
22  *
23  * You should have received a copy of the GNU General Public License
24  * along with PAPPSOms++. If not, see <http://www.gnu.org/licenses/>.
25  *
26  ******************************************************************************/
27 
28 #include <QDebug>
29 
30 #include "msrunreader.h"
31 #include "../../pappsomspp/exception/exceptionnotfound.h"
32 
33 
35  qRegisterMetaType<pappso::MsRunReaderSPtr>("pappso::MsRunReaderSPtr");
36 
37 
38 namespace pappso
39 {
40 
41 
42 bool
44 {
45  return false;
46 }
47 void
49 {
50 }
51 void
53  [[maybe_unused]] std::size_t size)
54 {
55 }
56 void
58 {
59  m_isReadAhead = is_read_ahead;
60 }
61 
62 bool
64 {
65  return m_isReadAhead;
66 }
67 
68 bool
70  unsigned int ms_level) const
71 {
72  if(needPeakList() == true)
73  {
74  if(ms_level < m_needPeakListByMsLevel.size())
75  {
76  return m_needPeakListByMsLevel[ms_level];
77  }
78  else
79  return true;
80  }
81  else
82  {
83  return false;
84  }
85 }
86 void
88  unsigned int ms_level, bool want_peak_list)
89 {
90  if(ms_level < m_needPeakListByMsLevel.size())
91  {
92  m_needPeakListByMsLevel[ms_level] = want_peak_list;
93  }
94 }
95 
96 bool
98 {
99  return false;
100 }
101 
102 
103 void
105 {
106  // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()"
107  //<< "The data loading process ended.";
108 }
109 
110 
111 void
113  const QualifiedMassSpectrum &qspectrum)
114 {
115  // The vector[0] contains the number of spectra at MS
116  // The vector[1] contains the number of spectra at MS^2
117  // The vector[2] contains the number of spectra at MS^3
118  // ...
119 
120  unsigned int ms_level = qspectrum.getMsLevel();
121  if(ms_level == 0)
122  return;
123  if(ms_level > m_countMsLevelSpectrum.size())
124  {
125  m_countMsLevelSpectrum.resize(ms_level);
126  }
127  m_countMsLevelSpectrum[ms_level - 1]++;
128 }
129 
130 
131 unsigned long
132 MsRunSimpleStatistics::getMsLevelCount(unsigned int ms_level) const
133 {
134  if(ms_level == 0)
135  return 0;
136  if(ms_level > m_countMsLevelSpectrum.size())
137  return 0;
138  return (m_countMsLevelSpectrum[ms_level - 1]);
139 }
140 
141 
142 unsigned long
144 {
145  unsigned long total = 0;
146  for(unsigned long count : m_countMsLevelSpectrum)
147  {
148  total += count;
149  }
150  return total;
151 }
152 
153 
155 {
156  // qDebug();
157 }
158 
159 
161 {
162  // qDebug();
163 }
164 
165 
166 bool
168 {
169  return false;
170 }
171 
172 void
174  const QualifiedMassSpectrum &qspectrum)
175 {
176  qDebug() << " " << qspectrum.getMassSpectrumId().getNativeId();
177 
178  QStringList native_id_list =
179  qspectrum.getMassSpectrumId().getNativeId().split("=");
180  if(native_id_list.size() < 2)
181  {
182  return;
183  }
184  else
185  {
186  std::size_t scan_number = native_id_list.back().toULong();
187  m_mmap_scan2index.insert(std::pair<std::size_t, std::size_t>(
188  scan_number, qspectrum.getMassSpectrumId().getSpectrumIndex()));
189 
190  qDebug() << "scan number " << scan_number << "=>"
191  << qspectrum.getMassSpectrumId().getSpectrumIndex();
192  }
193 }
194 
195 std::size_t
197  std::size_t scan_number) const
198 {
199 
200  qDebug() << m_mmap_scan2index.size();
201 
202  auto it = m_mmap_scan2index.find(scan_number);
203 
204  if(it == m_mmap_scan2index.end())
205  {
206  throw ExceptionNotFound(
207  QObject::tr("scan number %1 not found").arg(scan_number));
208  }
209 
210  std::size_t index = it->second;
211 
212  it++;
213  if((it != m_mmap_scan2index.end()) && (it->first == scan_number))
214  {
215  throw PappsoException(
216  QObject::tr("scan number %1 found multiple times").arg(scan_number));
217  }
218  return index;
219 }
220 
221 
223 {
224  // qDebug();
225 }
226 
227 
229 {
230  // qDebug();
231 }
232 
233 
234 bool
236 {
237  return false;
238 }
239 
240 
241 void
243  const QualifiedMassSpectrum &qspectrum)
244 {
245  qDebug() << " " << qspectrum.getMassSpectrumId().getNativeId();
246 
247  m_retention_time_list.push_back(qspectrum.getRtInSeconds());
248 }
249 
250 const std::vector<double> &
252 {
253  return m_retention_time_list;
254 }
255 
256 
257 MsRunReader::MsRunReader(MsRunIdCstSPtr &ms_run_id) : mcsp_msRunId(ms_run_id)
258 {
259 }
260 
262  : mcsp_msRunId(other.mcsp_msRunId)
263 {
264  mpa_multiMapScanNumber = nullptr;
266 }
267 
268 
269 const MsRunIdCstSPtr &
271 {
272  return mcsp_msRunId;
273 }
274 
275 
277 {
278  if(mpa_multiMapScanNumber == nullptr)
279  delete mpa_multiMapScanNumber;
280 }
281 
282 void
283 MsRunReader::setMonoThread(bool is_mono_thread)
284 {
285  m_isMonoThread = is_mono_thread;
286 }
287 
288 bool
290 {
291  return m_isMonoThread;
292 }
293 
294 
295 std::size_t
297 {
298  qDebug() << " " << mpa_multiMapScanNumber;
299 
300  if(mpa_multiMapScanNumber == nullptr)
301  {
304  }
305  try
306  {
308  scan_number);
309  }
310 
311  catch(ExceptionNotFound &error)
312  {
313  throw ExceptionNotFound(QObject::tr("error reading file %1 : %2")
314  .arg(mcsp_msRunId.get()->getFileName())
315  .arg(error.qwhat()));
316  }
317  catch(PappsoException &error)
318  {
319  throw PappsoException(QObject::tr("error reading file %1 : %2")
320  .arg(mcsp_msRunId.get()->getFileName())
321  .arg(error.qwhat()));
322  }
323 }
324 
325 
326 bool
328 {
329  return false;
330 }
331 
332 std::vector<double>
334 {
335  qDebug();
336 
337  try
338  {
339 
340  MsRunReaderRetentionTimeLine reader_timeline;
341 
342  readSpectrumCollectionByMsLevel(reader_timeline, 1);
343 
344  return reader_timeline.getRetentionTimeLine();
345  }
346 
347  catch(ExceptionNotFound &error)
348  {
349  throw ExceptionNotFound(QObject::tr("error reading file %1 : %2")
350  .arg(mcsp_msRunId.get()->getFileName())
351  .arg(error.qwhat()));
352  }
353  catch(PappsoException &error)
354  {
355  throw PappsoException(QObject::tr("error reading file %1 : %2")
356  .arg(mcsp_msRunId.get()->getFileName())
357  .arg(error.qwhat()));
358  }
359 }
360 
361 
362 Trace
364 {
365  qDebug();
366 
367  try
368  {
369  MsRunReaderTicChromatogram ms_run_reader;
370 
371  readSpectrumCollection(ms_run_reader);
372 
373  return ms_run_reader.getTicChromatogram();
374  }
375 
376  catch(ExceptionNotFound &error)
377  {
378  throw ExceptionNotFound(QObject::tr("error reading file %1 : %2")
379  .arg(mcsp_msRunId.get()->getFileName())
380  .arg(error.qwhat()));
381  }
382  catch(PappsoException &error)
383  {
384  throw PappsoException(QObject::tr("error reading file %1 : %2")
385  .arg(mcsp_msRunId.get()->getFileName())
386  .arg(error.qwhat()));
387  }
388 }
389 
390 
392 {
393 }
394 
395 
397 {
398 }
399 
400 
401 bool
403 {
404  return true;
405 }
406 
407 
408 void
410  const QualifiedMassSpectrum &qualified_mass_spectrum)
411 {
412  // In this specialized reader we want to compute the total ion current
413  // chromatogram that plot the sum of all the ion intensities in the spectra as
414  // a function of the retention time.
415 
416  uint spectrum_ms_level = qualified_mass_spectrum.getMsLevel();
417 
418  if(spectrum_ms_level != 1)
419  return;
420 
421  double sumY = qualified_mass_spectrum.getMassSpectrumSPtr()->sumY();
422 
423  if(!sumY)
424  return;
425 
426  double rt = qualified_mass_spectrum.getRtInMinutes();
427 
428  using Pair = std::pair<double, double>;
429  using Map = std::map<double, double>;
430  using Iterator = Map::iterator;
431 
432  std::pair<Iterator, bool> res = m_ticChromMapTrace.insert(Pair(rt, sumY));
433 
434  if(!res.second)
435  {
436  // One other same rt value was seen already (like in ion mobility mass
437  // spectrometry, for example). Only increment the y value.
438 
439  res.first->second += sumY;
440  }
441 }
442 
443 
444 Trace
446 {
447  return m_ticChromMapTrace.toTrace();
448 }
449 
450 
451 } // namespace pappso
Trace toTrace() const
Definition: maptrace.cpp:218
std::size_t getSpectrumIndex() const
const QString & getNativeId() const
collect retention times along MS run
Definition: msrunreader.h:150
const std::vector< double > & getRetentionTimeLine() const
virtual void setQualifiedMassSpectrum(const QualifiedMassSpectrum &spectrum) override
virtual bool needPeakList() const override
tells if we need the peak list (if we want the binary data) for each spectrum
std::vector< double > m_retention_time_list
Definition: msrunreader.h:152
provides a multimap to find quickly spectrum index from scan number
Definition: msrunreader.h:132
virtual bool needPeakList() const override
tells if we need the peak list (if we want the binary data) for each spectrum
virtual void setQualifiedMassSpectrum(const QualifiedMassSpectrum &spectrum) override
std::size_t getSpectrumIndexFromScanNumber(std::size_t scan_number) const
std::multimap< std::size_t, std::size_t > m_mmap_scan2index
Definition: msrunreader.h:134
calculate a TIC chromatogram
Definition: msrunreader.h:168
virtual void setQualifiedMassSpectrum(const QualifiedMassSpectrum &qualified_mass_spectrum) override
virtual bool needPeakList() const override
tells if we need the peak list (if we want the binary data) for each spectrum
base class to read MSrun the only way to build a MsRunReader object is to use the MsRunReaderFactory
Definition: msrunreader.h:191
MsRunIdCstSPtr mcsp_msRunId
Definition: msrunreader.h:291
MsRunReaderScanNumberMultiMap * mpa_multiMapScanNumber
Definition: msrunreader.h:292
virtual bool hasScanNumbers() const
tells if spectra can be accessed using scan numbers by default, it returns false. Only overrided func...
void setMonoThread(bool is_mono_thread)
set only one is_mono_thread to true
virtual std::vector< double > getRetentionTimeLine()
retention timeline get retention times along the MSrun in seconds
virtual std::size_t scanNumber2SpectrumIndex(std::size_t scan_number)
if possible, converts a scan number into a spectrum index This is a convenient function to help trans...
virtual void readSpectrumCollection(SpectrumCollectionHandlerInterface &handler)=0
function to visit an MsRunReader and get each Spectrum in a spectrum collection handler
bool isMonoThread() const
virtual Trace getTicChromatogram()
MsRunReader(MsRunIdCstSPtr &ms_run_id)
virtual void readSpectrumCollectionByMsLevel(SpectrumCollectionHandlerInterface &handler, unsigned int ms_level)=0
function to visit an MsRunReader and get each Spectrum in a spectrum collection handler by Ms Levels
const MsRunIdCstSPtr & getMsRunId() const
virtual void setQualifiedMassSpectrum(const QualifiedMassSpectrum &spectrum) override
unsigned long getTotalCount() const
virtual void loadingEnded() override
std::vector< unsigned long > m_countMsLevelSpectrum
Definition: msrunreader.h:115
virtual bool needPeakList() const override
tells if we need the peak list (if we want the binary data) for each spectrum
Definition: msrunreader.cpp:97
unsigned long getMsLevelCount(unsigned int ms_level) const
virtual const QString & qwhat() const
Class representing a fully specified mass spectrum.
uint getMsLevel() const
Get the mass spectrum level.
pappso_double getRtInMinutes() const
Get the retention time in minutes.
const MassSpectrumId & getMassSpectrumId() const
Get the MassSpectrumId.
MassSpectrumSPtr getMassSpectrumSPtr() const
Get the MassSpectrumSPtr.
pappso_double getRtInSeconds() const
Get the retention time in seconds.
virtual bool isReadAhead() const
tells if we want to read ahead spectrum
Definition: msrunreader.cpp:63
virtual bool needPeakList() const =0
tells if we need the peak list (if we want the binary data) for each spectrum
virtual bool needMsLevelPeakList(unsigned int ms_level) const final
tells if we need the peak list (if we want the binary data) for each spectrum, given an MS level
Definition: msrunreader.cpp:69
virtual void setReadAhead(bool is_read_ahead) final
use threads to read a spectrum by batch of batch_size
Definition: msrunreader.cpp:57
virtual void setNeedMsLevelPeakList(unsigned int ms_level, bool want_peak_list) final
tells if we need the peak list given
Definition: msrunreader.cpp:87
virtual void spectrumListHasSize(std::size_t size)
Definition: msrunreader.cpp:52
A simple container of DataPoint instances.
Definition: trace.h:148
int msRunReaderSPtrMetaTypeId
Definition: msrunreader.cpp:34
base interface to read MSrun files
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition: aa.cpp:39
std::shared_ptr< const MsRunId > MsRunIdCstSPtr
Definition: msrunid.h:44
@ rt
Retention time.
unsigned int uint
Definition: types.h:55