libpappsomspp
Library for mass spectrometry
pwizmsrunreader.cpp
Go to the documentation of this file.
1 /**
2  * \file pappsomspp/msrun/private/pwizmsrunreader.cpp
3  * \date 29/05/2018
4  * \author Olivier Langella
5  * \brief MSrun file reader base on proteowizard library
6  */
7 
8 /*******************************************************************************
9  * Copyright (c) 2018 Olivier Langella <Olivier.Langella@u-psud.fr>.
10  *
11  * This file is part of the PAPPSOms++ library.
12  *
13  * PAPPSOms++ is free software: you can redistribute it and/or modify
14  * it under the terms of the GNU General Public License as published by
15  * the Free Software Foundation, either version 3 of the License, or
16  * (at your option) any later version.
17  *
18  * PAPPSOms++ is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21  * GNU General Public License for more details.
22  *
23  * You should have received a copy of the GNU General Public License
24  * along with PAPPSOms++. If not, see <http://www.gnu.org/licenses/>.
25  *
26  * Contributors:
27  * Olivier Langella <Olivier.Langella@u-psud.fr> - initial API and
28  *implementation
29  ******************************************************************************/
30 
31 
32 #include <QDebug>
33 
34 #include "pwizmsrunreader.h"
35 
36 #include <pwiz/data/msdata/DefaultReaderList.hpp>
37 
38 
39 #include "../../utils.h"
40 #include "../../pappsoexception.h"
41 #include "../../exception/exceptionnotfound.h"
42 #include "../../exception/exceptionnotpossible.h"
43 
44 
45 // int pwizMsRunReaderMetaTypeId =
46 // qRegisterMetaType<pappso::PwizMsRunReader>("pappso::PwizMsRunReader");
47 
48 
49 namespace pappso
50 {
51 
52 
54  : MsRunReader(msrun_id_csp)
55 {
56  // The initialization needs to be done immediately so that we get the pwiz
57  // MsDataPtr corresponding to the right ms_run_id in the parameter. That
58  // pointer will be set to msp_msData.
59 
60  initialize();
61 }
62 
63 
64 void
66 {
67  std::string file_name_std =
69 
70  // Make a backup of the current locale
71  std::string env_backup = setlocale(LC_ALL, "");
72  // struct lconv *lc = localeconv();
73 
74  // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()"
75  //<< "env_backup=" << env_backup.c_str() << "lc->decimal_point"
76  //<< lc->decimal_point;
77 
78  // Now actually search the useful MSDataPtr to the member variable.
79 
80  pwiz::msdata::DefaultReaderList defaultReaderList;
81 
82  std::vector<pwiz::msdata::MSDataPtr> msDataPtrVector;
83 
84  try
85  {
86  defaultReaderList.read(file_name_std, msDataPtrVector);
87  }
88  catch(std::exception &error)
89  {
90  qDebug() << QString("Failed to read the data from file %1")
91  .arg(QString::fromStdString(file_name_std));
92 
93  throw(PappsoException(
94  QString("Error reading file %1 in PwizMsRunReader, for msrun %2:\n%3")
95  .arg(mcsp_msRunId->getFileName())
96  .arg(mcsp_msRunId.get()->toString())
97  .arg(error.what())));
98  }
99 
100  // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()"
101  //<< "The number of runs is:" << msDataPtrVector.size()
102  //<< "The number of spectra in first run is:"
103  //<< msDataPtrVector.at(0)->run.spectrumListPtr->size();
104 
105  // Single-run file handling here.
106 
107  // Specific case of the MGF data format: we do not have a run id for that kind
108  // of data. In this case there must be a single run!
109 
110  if(mcsp_msRunId->getRunId().isEmpty())
111  {
112  if(msDataPtrVector.size() != 1)
113  throw(
114  ExceptionNotPossible("For the kind of file at hand there can only be "
115  "one run in the file."));
116 
117  // At this point we know the single msDataPtr is the one we are looking
118  // for.
119 
120  msp_msData = msDataPtrVector.front();
121  }
122 
123  else
124  {
125  // Multi-run file handling here.
126  for(auto &msDataPtr : msDataPtrVector)
127  {
128  if(msDataPtr->run.id == mcsp_msRunId->getRunId().toStdString())
129  {
130  msp_msData = msDataPtr;
131 
132  // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()"
133  //<< "Found the right MSDataPtr for run id.";
134 
135  break;
136  }
137  }
138  }
139 
140  if(msp_msData == nullptr)
141  {
142  throw(ExceptionNotPossible(
143  QString("Could not find a MSDataPtr matching the requested run id : %1")
144  .arg(mcsp_msRunId.get()->toString())));
145  }
146 
147 
148  // check if this MS run can be used with scan numbers
149  // MS:1000490 Agilent instrument model
150  pwiz::cv::CVID native_id_format =
151  pwiz::msdata::id::getDefaultNativeIDFormat(*msp_msData.get());
152 
153  // msp_msData.get()->getDefaultNativeIDFormat();
154 
155  if(native_id_format == pwiz::cv::CVID::MS_Thermo_nativeID_format)
156  {
157  m_hasScanNumbers = true;
158  }
159  else
160  {
161  m_hasScanNumbers = false;
162  }
163 
164  if(mcsp_msRunId.get()->getMzFormat() == MzFormat::mzXML)
165  {
166  m_hasScanNumbers = true;
167  }
168 }
169 
170 
172 {
173 }
174 
175 
176 pwiz::msdata::SpectrumPtr
177 PwizMsRunReader::getPwizSpectrumPtr(pwiz::msdata::SpectrumList *p_spectrum_list,
178  std::size_t spectrum_index,
179  bool want_binary_data) const
180 {
181  pwiz::msdata::SpectrumPtr native_pwiz_spectrum_sp;
182 
183  try
184  {
185  native_pwiz_spectrum_sp =
186  p_spectrum_list->spectrum(spectrum_index, want_binary_data);
187  }
188  catch(std::runtime_error &error)
189  {
190  qDebug() << "getPwizSpectrumPtr error " << error.what() << " "
191  << typeid(error).name();
192 
193  throw ExceptionNotFound(QObject::tr("Pwiz spectrum index %1 not found in "
194  "MS file std::runtime_error :\n%2")
195  .arg(spectrum_index)
196  .arg(error.what()));
197  }
198  catch(std::exception &error)
199  {
200  qDebug() << "getPwizSpectrumPtr error " << error.what()
201  << typeid(error).name();
202 
203  throw ExceptionNotFound(
204  QObject::tr("Pwiz spectrum index %1 not found in MS file :\n%2")
205  .arg(spectrum_index)
206  .arg(error.what()));
207  }
208 
209  if(native_pwiz_spectrum_sp.get() == nullptr)
210  {
211  throw ExceptionNotFound(
212  QObject::tr(
213  "Pwiz spectrum index %1 not found in MS file : null pointer")
214  .arg(spectrum_index));
215  }
216 
217  return native_pwiz_spectrum_sp;
218 }
219 
220 
221 bool
223  pwiz::msdata::Spectrum *spectrum_p,
224  QualifiedMassSpectrum &qualified_mass_spectrum) const
225 {
226 
227  // We now have to set the retention time at which this mass spectrum
228  // was acquired. This is the scan start time.
229 
230  if(!spectrum_p->scanList.scans[0].hasCVParam(
231  pwiz::msdata::MS_scan_start_time))
232  {
233  if(mcsp_msRunId.get()->getMzFormat() == MzFormat::MGF)
234  { // MGF could not have scan start time
235  qualified_mass_spectrum.setRtInSeconds(-1);
236  }
237  else
238  {
239  throw(ExceptionNotPossible(
240  "The spectrum has no scan start time value set."));
241  }
242  }
243  else
244  {
245  pwiz::data::CVParam retention_time_cv_param =
246  spectrum_p->scanList.scans[0].cvParam(pwiz::msdata::MS_scan_start_time);
247 
248  // Try to get the units of the retention time value.
249 
250  std::string unit_name = retention_time_cv_param.unitsName();
251  // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()"
252  //<< "Unit name for the retention time:"
253  //<< QString::fromStdString(unit_name);
254 
255  if(unit_name == "second")
256  {
257  qualified_mass_spectrum.setRtInSeconds(
258  retention_time_cv_param.valueAs<double>());
259  }
260  else if(unit_name == "minute")
261  {
262  qualified_mass_spectrum.setRtInSeconds(
263  retention_time_cv_param.valueAs<double>() * 60);
264  }
265  else
266  throw(
267  ExceptionNotPossible("Could not determine the unit for the "
268  "scan start time value."));
269  }
270 
271  // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()"
272  //<< "Retention time for spectrum is:"
273  //<< qualified_mass_spectrum.getRtInSeconds();
274 
275  // Old version not checking unit (by default unit is minutes for RT,
276  // not seconds)
277  //
278  // pappso_double retentionTime =
279  // QString(spectrum_p->scanList.scans[0]
280  //.cvParam(pwiz::msdata::MS_scan_start_time)
281  //.value.c_str())
282  //.toDouble();
283  // qualified_mass_spectrum.setRtInSeconds(retentionTime);
284 
285  return true;
286 }
287 
288 
289 bool
291  pwiz::msdata::Spectrum *spectrum_p,
292  QualifiedMassSpectrum &qualified_mass_spectrum) const
293 {
294  // Not all the acquisitions have ion mobility data. We need to test
295  // that:
296 
297  if(spectrum_p->scanList.scans[0].hasCVParam(
298  pwiz::msdata::MS_ion_mobility_drift_time))
299  {
300 
301  // qDebug() << "as strings:"
302  //<< QString::fromStdString(
303  // spectrum_p->scanList.scans[0]
304  //.cvParam(pwiz::msdata::MS_ion_mobility_drift_time)
305  //.valueAs<std::string>());
306 
307  pappso_double driftTime =
308  spectrum_p->scanList.scans[0]
309  .cvParam(pwiz::msdata::MS_ion_mobility_drift_time)
310  .valueAs<double>();
311 
312  // qDebug() << "driftTime:" << driftTime;
313 
314  // Old version requiring use of QString.
315  // pappso_double driftTime =
316  // QString(spectrum_p->scanList.scans[0]
317  //.cvParam(pwiz::msdata::MS_ion_mobility_drift_time)
318  //.value.c_str())
319  //.toDouble();
320 
321  // Now make positively sure that the obtained value is correct.
322  // Note that I suffered a lot with Waters Synapt data that
323  // contained apparently correct drift time XML element that in
324  // fact contained either NaN or inf. When such mass spectra were
325  // encountered, the mz,i data were bogus and crashed the data
326  // loading functions. We just want to skip this kind of bogus mass
327  // spectrum by letting the caller know that the drift time was
328  // bogus ("I" is Filippo Rusconi).
329 
330  if(std::isnan(driftTime) || std::isinf(driftTime))
331  {
332  // qDebug() << "detected as nan or inf.";
333 
334  return false;
335  }
336  else
337  {
338  // The mzML standard stipulates that drift times are in
339  // milliseconds.
340  qualified_mass_spectrum.setDtInMilliSeconds(driftTime);
341  }
342  }
343  // End of
344  // if(spectrum_p->scanList.scans[0].hasCVParam(
345  // pwiz::msdata::MS_ion_mobility_drift_time))
346  else
347  {
348  // Not a bogus mass spectrum but also not a drift spectrum, set -1
349  // as the drift time value.
350  qualified_mass_spectrum.setDtInMilliSeconds(-1);
351  }
352 
353  return true;
354 }
355 
356 
359  const MassSpectrumId &massSpectrumId,
360  pwiz::msdata::Spectrum *spectrum_p,
361  bool want_binary_data,
362  bool &ok) const
363 {
364  // qDebug();
365 
366  std::string env;
367  env = setlocale(LC_ALL, "");
368  setlocale(LC_ALL, "C");
369 
370  QualifiedMassSpectrum qualified_mass_spectrum(massSpectrumId);
371 
372  try
373  {
374 
375  // We want to store the ms level for this spectrum
376 
377  int msLevel =
378  (spectrum_p->cvParam(pwiz::msdata::MS_ms_level).valueAs<int>());
379 
380  qualified_mass_spectrum.setMsLevel(msLevel);
381 
382  // We want to know if this spectrum is a fragmentation spectrum obtained
383  // from a selected precursor ion.
384 
385  std::size_t precursor_list_size = spectrum_p->precursors.size();
386 
387  // qDebug() << "For spectrum at index:" <<
388  // massSpectrumId.getSpectrumIndex()
389  //<< "msLevel:" << msLevel
390  //<< "with number of precursors:" << precursor_list_size;
391 
392  if(precursor_list_size > 0)
393  {
394 
395  // Sanity check
396  if(msLevel < 2)
397  {
398  qDebug() << "Going to throw: msLevel cannot be less than two for "
399  "a spectrum that has items in its Precursor list.";
400 
401  throw(ExceptionNotPossible(
402  "msLevel cannot be less than two for "
403  "a spectrum that has items in its Precursor list."));
404  }
405 
406  // See what is the first precursor in the list.
407 
408  for(auto &precursor : spectrum_p->precursors)
409  {
410 
411  // Set this variable ready as we need that default value in
412  // certain circumstances.
413 
414  std::size_t precursor_spectrum_index =
415  std::numeric_limits<std::size_t>::max();
416 
417  // The spectrum ID of the precursor might be empty.
418 
419  if(precursor.spectrumID.empty())
420  {
421  // qDebug() << "The precursor's spectrum ID is empty.";
422 
423  if(mcsp_msRunId.get()->getMzFormat() == MzFormat::MGF)
424  {
425  // qDebug()
426  //<< "Format is MGF, precursor's spectrum ID can be
427  // empty.";
428  }
429  else
430  {
431  // When performing Lumos Fusion fragmentation experiments
432  // in Tune mode and with recording, the first spectrum of
433  // the list is a fragmentation spectrum (ms level 2) that
434  // has no identity for the precursor spectrum because
435  // there is no full scan accquisition.
436  }
437  }
438  // End of
439  // if(precursor.spectrumID.empty())
440  else
441  {
442  // We could get a native precursor spectrum id, so convert
443  // that native id to a spectrum index.
444 
445  qualified_mass_spectrum.setPrecursorNativeId(
446  QString::fromStdString(precursor.spectrumID));
447 
448  if(qualified_mass_spectrum.getPrecursorNativeId().isEmpty())
449  {
450  // qDebug() << "The native id of the precursor spectrum is
451  // empty.";
452  }
453 
454  // Get the spectrum index of the spectrum that contained the
455  // precursor ion.
456 
457  precursor_spectrum_index =
458  msp_msData->run.spectrumListPtr->find(precursor.spectrumID);
459 
460  // Note that the Mascot MGF format has a peculiar handling of
461  // the precursor ion stuff so we cannot throw.
462  if(precursor_spectrum_index ==
463  msp_msData->run.spectrumListPtr->size())
464  {
465  if(mcsp_msRunId.get()->getMzFormat() != MzFormat::MGF)
466  {
467  throw(ExceptionNotPossible(
468  "Failed to find the index of the "
469  "precursor ion's spectrum."));
470  }
471  }
472 
473  qualified_mass_spectrum.setPrecursorSpectrumIndex(
474  precursor_spectrum_index);
475 
476  // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ <<
477  // "()"
478  //<< "Set the precursor spectrum index to:"
479  //<< qualified_mass_spectrum.getPrecursorSpectrumIndex()
480  //<< "for qualified mass spectrum:"
481  //<< &qualified_mass_spectrum;
482  }
483 
484  if(!precursor.selectedIons.size())
485  {
486  qDebug()
487  << "Going to throw The spectrum has msLevel > 1 but the "
488  "precursor ions's selected ions list is empty..";
489 
490  throw(
491  ExceptionNotPossible("The spectrum has msLevel > 1 but the "
492  "precursor ions's selected ions "
493  "list is empty."));
494  }
495 
496  pwiz::msdata::SelectedIon &ion =
497  *(precursor.selectedIons.begin());
498 
499  // selected ion m/z
500 
501  pappso_double selected_ion_mz =
502  QString(
503  ion.cvParam(pwiz::cv::MS_selected_ion_m_z).value.c_str())
504  .toDouble();
505 
506  // selected ion peak intensity
507 
508  pappso_double selected_ion_peak_intensity =
509  QString(ion.cvParam(pwiz::cv::MS_peak_intensity).value.c_str())
510  .toDouble();
511 
512  // charge state
513 
514  unsigned int selected_ion_charge_state =
515  QString(ion.cvParam(pwiz::cv::MS_charge_state).value.c_str())
516  .toUInt();
517 
518  // At this point we can craft a new PrecursorIonData instance and
519  // push it back to the vector.
520 
521  PrecursorIonData precursor_ion_data(selected_ion_mz,
522  selected_ion_charge_state,
523  selected_ion_peak_intensity);
524 
525  qualified_mass_spectrum.appendPrecursorIonData(
526  precursor_ion_data);
527 
528  // General sum-up
529 
530  // qDebug()
531  //<< "Appended new PrecursorIonData:"
532  //<< "mz:"
533  //<< qualified_mass_spectrum.getPrecursorIonData().back().mz
534  //<< "charge:"
535  //<< qualified_mass_spectrum.getPrecursorIonData().back().charge
536  //<< "intensity:"
537  //<< qualified_mass_spectrum.getPrecursorIonData()
538  //.back()
539  //.intensity;
540  }
541  // End of
542  // for(auto &precursor : spectrum_p->precursors)
543  }
544  // End of
545  // if(precursor_list_size > 0)
546  else
547  {
548  // Sanity check
549 
550  // Unfortunately, logic here is defeated by some vendors that have
551  // files with MS2 spectra without <precursorList>. Thus we have
552  // spectrum_p->precursors.size() == 0 and msLevel > 1.
553 
554  // if(msLevel != 1)
555  //{
556  // throw(
557  // ExceptionNotPossible("msLevel cannot be different than 1 if "
558  //"there is not a single precursor ion."));
559  //}
560  }
561 
562  // Sanity check.
563 
564  if(precursor_list_size !=
565  qualified_mass_spectrum.getPrecursorIonData().size())
566  {
567  qDebug() << "Going to throw The number of precursors in the file is "
568  "different from the number of precursors in memory.";
569 
571  QObject::tr("The number of precursors in the file is different "
572  "from the number of precursors in memory."));
573  }
574 
575  // if(precursor_list_size == 1)
576  //{
577  // qDebug() << "Trying to get the mz value of the unique precursor ion:"
578  //<< qualified_mass_spectrum.getPrecursorMz();
579  //}
580 
581  processRetentionTime(spectrum_p, qualified_mass_spectrum);
582 
583  processDriftTime(spectrum_p, qualified_mass_spectrum);
584 
585  // for(pwiz::data::CVParam cv_param : ion.cvParams)
586  //{
587  // pwiz::msdata::CVID param_id = cv_param.cvid;
588  // qDebug() << param_id;
589  // qDebug() << cv_param.cvid.c_str();
590  // qDebug() << cv_param.name().c_str();
591  // qDebug() << cv_param.value.c_str();
592  //}
593 
594  if(want_binary_data)
595  {
596 
597  // Fill-in MZIntensityPair vector for convenient access to binary
598  // data
599 
600  std::vector<pwiz::msdata::MZIntensityPair> pairs;
601  spectrum_p->getMZIntensityPairs(pairs);
602 
603  MassSpectrum spectrum;
604  double tic = 0;
605  // std::size_t iterCount = 0;
606 
607  // Iterate through the m/z-intensity pairs
608  for(std::vector<pwiz::msdata::MZIntensityPair>::const_iterator
609  it = pairs.begin(),
610  end = pairs.end();
611  it != end;
612  ++it)
613  {
614  //++iterCount;
615 
616  // qDebug() << "it->mz " << it->mz << " it->intensity" <<
617  // it->intensity;
618  if(it->intensity)
619  {
620  spectrum.push_back(DataPoint(it->mz, it->intensity));
621  tic += it->intensity;
622  }
623  }
624 
625  if(mcsp_msRunId.get()->getMzFormat() == MzFormat::MGF)
626  {
627  // Sort peaks by mz
628  spectrum.sortMz();
629  }
630 
631  // lc = localeconv ();
632  // qDebug() << " env=" << localeconv () << " lc->decimal_point "
633  // << lc->decimal_point;
634  // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()
635  // "<< spectrum.size();
636  MassSpectrumSPtr spectrum_sp = spectrum.makeMassSpectrumSPtr();
637  qualified_mass_spectrum.setMassSpectrumSPtr(spectrum_sp);
638 
639  // double sumY =
640  // qualified_mass_spectrum.getMassSpectrumSPtr()->sumY(); qDebug()
641  // <<
642  // __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()"
643  //<< "iterCount:" << iterCount << "Spectrum size "
644  //<< spectrum.size() << "with tic:" << tic
645  //<< "and sumY:" << sumY;
646  }
647  else
648  qualified_mass_spectrum.setMassSpectrumSPtr(nullptr);
649  }
650  catch(PappsoException &errorp)
651  {
652  qDebug() << "Going to throw";
653 
655  QObject::tr("Error reading data using the proteowizard library: %1")
656  .arg(errorp.qwhat()));
657  }
658  catch(std::exception &error)
659  {
660  qDebug() << "Going to throw";
661 
663  QObject::tr("Error reading data using the proteowizard library: %1")
664  .arg(error.what()));
665  }
666 
667  // setlocale(LC_ALL, env.c_str());
668 
669  ok = true;
670 
671  // qDebug() << "QualifiedMassSpectrum: " <<
672  // qualified_mass_spectrum.toString();
673  return qualified_mass_spectrum;
674 }
675 
676 
679  bool want_binary_data,
680  bool &ok) const
681 {
682 
683  std::string env;
684  env = setlocale(LC_ALL, "");
685  // struct lconv *lc = localeconv();
686 
687  // qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__
688  //<< "env=" << env.c_str()
689  //<< "lc->decimal_point:" << lc->decimal_point;
690 
691  setlocale(LC_ALL, "C");
692 
693  MassSpectrumId massSpectrumId(mcsp_msRunId);
694 
695  if(msp_msData == nullptr)
696  {
697  setlocale(LC_ALL, env.c_str());
698  return (QualifiedMassSpectrum(massSpectrumId));
699  }
700 
701  // const bool want_binary_data = true;
702 
703  pwiz::msdata::SpectrumListPtr spectrum_list_p =
704  msp_msData->run.spectrumListPtr;
705 
706  if(spectrum_index == spectrum_list_p.get()->size())
707  {
708  setlocale(LC_ALL, env.c_str());
709  throw ExceptionNotFound(
710  QObject::tr("The spectrum index cannot be equal to the size of the "
711  "spectrum list."));
712  }
713 
714  // At this point we know the spectrum index might be sane, so store it in
715  // the mass spec id object.
716  massSpectrumId.setSpectrumIndex(spectrum_index);
717 
718  pwiz::msdata::SpectrumPtr native_pwiz_spectrum_sp =
719  getPwizSpectrumPtr(spectrum_list_p.get(), spectrum_index, want_binary_data);
720 
721  setlocale(LC_ALL, env.c_str());
722 
723  massSpectrumId.setNativeId(
724  QString::fromStdString(native_pwiz_spectrum_sp->id));
725 
727  massSpectrumId, native_pwiz_spectrum_sp.get(), want_binary_data, ok);
728 }
729 
730 
731 bool
732 PwizMsRunReader::accept(const QString &file_name) const
733 {
734  // We want to know if we can handle the file_name.
735  pwiz::msdata::ReaderList reader_list;
736 
737  std::string reader_type = reader_list.identify(file_name.toStdString());
738 
739  if(!reader_type.empty())
740  return true;
741 
742  return false;
743 }
744 
745 
747 PwizMsRunReader::massSpectrumSPtr(std::size_t spectrum_index)
748 {
749  // qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__;
750  return qualifiedMassSpectrum(spectrum_index, true).getMassSpectrumSPtr();
751 }
752 
754 PwizMsRunReader::massSpectrumCstSPtr(std::size_t spectrum_index)
755 {
756  // qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__;
757  return qualifiedMassSpectrum(spectrum_index, true).getMassSpectrumCstSPtr();
758 }
759 
761 PwizMsRunReader::qualifiedMassSpectrum(std::size_t spectrum_index,
762  bool want_binary_data) const
763 {
764 
765  QualifiedMassSpectrum spectrum;
766  bool ok = false;
767 
768  spectrum =
769  qualifiedMassSpectrumFromPwizMSData(spectrum_index, want_binary_data, ok);
770 
771  if(mcsp_msRunId->getMzFormat() == pappso::MzFormat::MGF)
772  {
773  if(spectrum.getRtInSeconds() == 0)
774  {
775  // spectrum = qualifiedMassSpectrumFromPwizMSData(scan_num - 1);
776  }
777  }
778 
779  // if(!ok)
780  // qDebug() << "Encountered a mass spectrum for which the status is bad.";
781 
782  return spectrum;
783 }
784 
785 
786 void
789 {
791 }
792 
793 
794 void
796  SpectrumCollectionHandlerInterface &handler, unsigned int ms_level)
797 {
798 
799  acquireDevice();
800  // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()";
801 
802  // We want to iterate in the pwiz-spectrum-list and for each pwiz-spectrum
803  // create a pappso-spectrum (QualifiedMassSpectrum). Once the pappso mass
804  // spectrum has been fully qualified (that is, the member data have been
805  // set), it is transferred to the handler passed as parameter to this
806  // function for the consumer to do what it wants with it.
807 
808  // Does the handler consuming the mass spectra read from file want these
809  // mass spectra to hold the binary data arrays (mz/i vectors)?
810 
811  const bool want_binary_data = handler.needPeakList();
812 
813 
814  std::string env;
815  env = setlocale(LC_ALL, "");
816  setlocale(LC_ALL, "C");
817 
818 
819  // We access the pwiz-mass-spectra via the spectrumListPtr that sits in the
820  // run member of msp_msData.
821 
822  pwiz::msdata::SpectrumListPtr spectrum_list_p =
823  msp_msData->run.spectrumListPtr;
824 
825  // We'll need it to perform the looping in the spectrum list.
826  std::size_t spectrum_list_size = spectrum_list_p.get()->size();
827 
828  // qDebug() << "The spectrum list has size:" << spectrum_list_size;
829 
830  // Inform the handler of the spectrum list so that it can handle feedback to
831  // the user.
832  handler.spectrumListHasSize(spectrum_list_size);
833 
834  // Iterate in the full list of spectra.
835 
836  for(std::size_t iter = 0; iter < spectrum_list_size; iter++)
837  {
838 
839  // If the user of this reader instance wants to stop reading the
840  // spectra, then break this loop.
841  if(handler.shouldStop())
842  {
843  qDebug() << "The operation was cancelled. Breaking the loop.";
844  break;
845  }
846 
847  // Get the native pwiz-spectrum from the spectrum list.
848  // Note that this pointer is a shared pointer from pwiz.
849 
850  pwiz::msdata::SpectrumPtr native_pwiz_spectrum_sp =
851  getPwizSpectrumPtr(spectrum_list_p.get(), iter, want_binary_data);
852 
853  /*
854  * we want to load metadata of the spectrum even if it does not contain
855  peaks
856 
857  * if(!native_pwiz_spectrum_sp->hasBinaryData())
858  {
859  // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ <<
860  "
861  ()"
862  //<< "native pwiz spectrum is empty, continuing.";
863  continue;
864  }
865  */
866 
867  // Instantiate the mass spectrum id that will hold critical information
868  // like the the native id string and the spectrum index.
869 
870  MassSpectrumId massSpectrumId(mcsp_msRunId, iter /* spectrum index*/);
871 
872  // Get the spectrum native id as a QString to store it in the mass
873  // spectrum id class. This is will allow later to refer to the same
874  // spectrum starting back from the file.
875 
876  QString native_id = QString::fromStdString(native_pwiz_spectrum_sp->id);
877  massSpectrumId.setNativeId(native_id);
878 
879  // Finally, instantiate the qualified mass spectrum with its id. This
880  // function will continue performing pappso-spectrum detailed
881  // qualification.
882 
883  bool ok = false;
884 
885  QualifiedMassSpectrum qualified_mass_spectrum =
887  massSpectrumId, native_pwiz_spectrum_sp.get(), want_binary_data, ok);
888 
889  if(!ok)
890  {
891  // qDebug() << "Encountered a mass spectrum for which the returned "
892  //"status is bad.";
893  continue;
894  }
895 
896  // Before handing the mass spectrum out to the handler, see if the
897  // native mass spectrum was empty or not.
898 
899  // if(!native_pwiz_spectrum_sp->defaultArrayLength)
900  // qDebug() << "The mass spectrum has not defaultArrayLength";
901 
902  qualified_mass_spectrum.setEmptyMassSpectrum(
903  !native_pwiz_spectrum_sp->defaultArrayLength);
904 
905  // The handler will receive the index of the mass spectrum in the
906  // current run via the mass spectrum id member datum.
907  if(ms_level == 0)
908  {
909  handler.setQualifiedMassSpectrum(qualified_mass_spectrum);
910  }
911  else
912  {
913  if(qualified_mass_spectrum.getMsLevel() == ms_level)
914  {
915  handler.setQualifiedMassSpectrum(qualified_mass_spectrum);
916  }
917  }
918  }
919 
920  setlocale(LC_ALL, env.c_str());
921  // End of
922  // for(std::size_t iter = 0; iter < spectrum_list_size; iter++)
923 
924  // Now let the loading handler know that the loading of the data has ended.
925  // The handler might need this "signal" to perform additional tasks or to
926  // cleanup cruft.
927 
928  // qDebug() << "Loading ended";
929  handler.loadingEnded();
930 }
931 
932 std::size_t
934 {
935  return msp_msData->run.spectrumListPtr.get()->size();
936 }
937 
938 bool
940 {
941  return m_hasScanNumbers;
942 }
943 
944 bool
946 {
947  msp_msData = nullptr;
948  return true;
949 }
950 
951 bool
953 {
954  if(msp_msData == nullptr)
955  {
956  initialize();
957  }
958  return true;
959 }
960 
961 
964  std::size_t spectrum_index, pappso::PrecisionPtr precision) const
965 {
966 
967  QualifiedMassSpectrum mass_spectrum =
968  qualifiedMassSpectrum(spectrum_index, false);
969 
970  return newXicCoordSPtrFromQualifiedMassSpectrum(mass_spectrum, precision);
971 }
972 
975  const pappso::QualifiedMassSpectrum &mass_spectrum,
976  pappso::PrecisionPtr precision) const
977 {
978  XicCoordSPtr xic_coord = std::make_shared<XicCoord>();
979 
980  xic_coord.get()->rtTarget = mass_spectrum.getRtInSeconds();
981 
982  xic_coord.get()->mzRange = MzRange(mass_spectrum.getPrecursorMz(), precision);
983 
984  return xic_coord;
985 }
986 
987 } // namespace pappso
void setNativeId(const QString &native_id)
void setSpectrumIndex(std::size_t index)
Class to represent a mass spectrum.
Definition: massspectrum.h:71
void sortMz()
Sort the DataPoint instances of this spectrum.
MassSpectrumSPtr makeMassSpectrumSPtr() const
base class to read MSrun the only way to build a MsRunReader object is to use the MsRunReaderFactory
Definition: msrunreader.h:173
MsRunIdCstSPtr mcsp_msRunId
Definition: msrunreader.h:263
virtual const QString & qwhat() const
virtual pappso::XicCoordSPtr newXicCoordSPtrFromSpectrumIndex(std::size_t spectrum_index, pappso::PrecisionPtr precision) const override
get a xic coordinate object from a given spectrum index
pwiz::msdata::MSDataPtr msp_msData
virtual void readSpectrumCollectionByMsLevel(SpectrumCollectionHandlerInterface &handler, unsigned int ms_level) override
function to visit an MsRunReader and get each Spectrum in a spectrum collection handler by Ms Levels
bool processDriftTime(pwiz::msdata::Spectrum *spectrum_p, QualifiedMassSpectrum &qualified_mass_spectrum) const
virtual MassSpectrumCstSPtr massSpectrumCstSPtr(std::size_t spectrum_index) override
QualifiedMassSpectrum qualifiedMassSpectrumFromPwizMSData(std::size_t spectrum_index, bool want_binary_data, bool &ok) const
PwizMsRunReader(MsRunIdCstSPtr &msrun_id_csp)
QualifiedMassSpectrum qualifiedMassSpectrumFromPwizSpectrumPtr(const MassSpectrumId &massSpectrumId, pwiz::msdata::Spectrum *spectrum_p, bool want_binary_data, bool &ok) const
virtual QualifiedMassSpectrum qualifiedMassSpectrum(std::size_t spectrum_index, bool want_binary_data=true) const override
get a QualifiedMassSpectrum class given its scan number
virtual bool hasScanNumbers() const override
tells if spectra can be accessed using scan numbers by default, it returns false. Only overrided func...
bool processRetentionTime(pwiz::msdata::Spectrum *spectrum_p, QualifiedMassSpectrum &qualified_mass_spectrum) const
virtual bool acquireDevice() override
acquire data back end device
virtual void initialize() override
virtual void readSpectrumCollection(SpectrumCollectionHandlerInterface &handler) override
function to visit an MsRunReader and get each Spectrum in a spectrum collection handler
virtual bool accept(const QString &file_name) const override
tells if the reader is able to handle this file must be implemented by private MS run reader,...
virtual std::size_t spectrumListSize() const override
get the totat number of spectrum conained in the MSrun data file
pwiz::msdata::SpectrumPtr getPwizSpectrumPtr(pwiz::msdata::SpectrumList *p_spectrum_list, std::size_t spectrum_index, bool want_binary_data) const
virtual bool releaseDevice() override
release data back end device if a the data back end is released, the developper has to use acquireDev...
virtual pappso::XicCoordSPtr newXicCoordSPtrFromQualifiedMassSpectrum(const pappso::QualifiedMassSpectrum &mass_spectrum, pappso::PrecisionPtr precision) const override
get a xic coordinate object from a given spectrum
virtual MassSpectrumSPtr massSpectrumSPtr(std::size_t spectrum_index) override
get a MassSpectrumSPtr class given its spectrum index
Class representing a fully specified mass spectrum.
uint getMsLevel() const
Get the mass spectrum level.
MassSpectrumCstSPtr getMassSpectrumCstSPtr() const
Get the MassSpectrumCstSPtr.
void setPrecursorNativeId(const QString &native_id)
Set the scan native id of the precursor ion.
const std::vector< PrecursorIonData > & getPrecursorIonData() const
void setDtInMilliSeconds(pappso_double rt)
Set the drift time in milliseconds.
const QString & getPrecursorNativeId() const
void appendPrecursorIonData(const PrecursorIonData &precursor_ion_data)
void setMsLevel(uint ms_level)
Set the mass spectrum level.
void setPrecursorSpectrumIndex(std::size_t precursor_scan_num)
Set the scan number of the precursor ion.
pappso_double getPrecursorMz(bool *ok=nullptr) const
Get the precursor m/z ratio.
MassSpectrumSPtr getMassSpectrumSPtr() const
Get the MassSpectrumSPtr.
void setMassSpectrumSPtr(MassSpectrumSPtr massSpectrum)
Set the MassSpectrumSPtr.
void setRtInSeconds(pappso_double rt)
Set the retention time in seconds.
pappso_double getRtInSeconds() const
Get the retention time in seconds.
void setEmptyMassSpectrum(bool is_empty_mass_spectrum)
interface to collect spectrums from the MsRunReader class
Definition: msrunreader.h:56
virtual bool needPeakList() const =0
tells if we need the peak list (if we want the binary data) for each spectrum
virtual void spectrumListHasSize(std::size_t size)
Definition: msrunreader.cpp:52
virtual void setQualifiedMassSpectrum(const QualifiedMassSpectrum &spectrum)=0
static std::string toUtf8StandardString(const QString &text)
Definition: utils.cpp:127
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition: aa.cpp:39
@ MGF
Mascot format.
std::shared_ptr< const MsRunId > MsRunIdCstSPtr
Definition: msrunid.h:44
double pappso_double
A type definition for doubles.
Definition: types.h:48
std::shared_ptr< const MassSpectrum > MassSpectrumCstSPtr
Definition: massspectrum.h:55
std::shared_ptr< MassSpectrum > MassSpectrumSPtr
Definition: massspectrum.h:54
std::shared_ptr< XicCoord > XicCoordSPtr
Definition: xiccoord.h:41
MSrun file reader base on proteowizard library.