libpappsomspp
Library for mass spectrometry
msrunxicextractordisk.cpp
Go to the documentation of this file.
1 /**
2  * \file pappsomspp/xicextractor/private/msrunxicextractordisk.cpp
3  * \date 12/05/2018
4  * \author Olivier Langella
5  * \brief proteowizard based XIC extractor featuring disk cache
6  */
7 
8 /*******************************************************************************
9  * Copyright (c) 2018 Olivier Langella <Olivier.Langella@u-psud.fr>.
10  *
11  * This file is part of the PAPPSOms++ library.
12  *
13  * PAPPSOms++ is free software: you can redistribute it and/or modify
14  * it under the terms of the GNU General Public License as published by
15  * the Free Software Foundation, either version 3 of the License, or
16  * (at your option) any later version.
17  *
18  * PAPPSOms++ is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21  * GNU General Public License for more details.
22  *
23  * You should have received a copy of the GNU General Public License
24  * along with PAPPSOms++. If not, see <http://www.gnu.org/licenses/>.
25  *
26  * Contributors:
27  * Olivier Langella <Olivier.Langella@u-psud.fr> - initial API and
28  *implementation
29  ******************************************************************************/
30 
31 #include "msrunxicextractordisk.h"
32 #include <QDebug>
33 #include "../../pappsoexception.h"
34 #include "../../massspectrum/massspectrum.h"
35 
36 namespace pappso
37 {
38 
40  const QDir &temporary_dir)
41  : pappso::MsRunXicExtractor(msrun_reader)
42 {
43  mpa_temporaryDirectory = nullptr;
44  m_temporaryDirectory = temporary_dir.absolutePath();
45 }
46 
48  : pappso::MsRunXicExtractor(other)
49 {
50 
52  mpa_temporaryDirectory = new QTemporaryDir(
53  QString("%1/msrun_%2_")
55  .arg(msp_msrun_reader.get()->getMsRunId().get()->getXmlId()));
56 }
57 
59 {
60  if(mpa_temporaryDirectory != nullptr)
61  {
63  }
64 }
65 
66 void
68 {
69  qDebug();
70  try
71  {
73  // msp_msrun_reader = nullptr;
74  }
75  catch(pappso::PappsoException &errora)
76  {
77  qDebug();
79  QObject::tr("Error reading file (%1) : %2")
80  .arg(msp_msrun_reader.get()->getMsRunId().get()->toString())
81  .arg(errora.qwhat()));
82  }
83  catch(std::exception &error)
84  {
85  qDebug();
87  QObject::tr("Error reading file (%1) using : %2")
88  .arg(msp_msrun_reader.get()->getMsRunId().get()->toString())
89  .arg(error.what()));
90  }
91 }
92 
93 
94 void
96  UiMonitorInterface &monitor, std::vector<XicCoordSPtr> &xic_coord_list)
97 {
98  monitor.setStatus(
99  QObject::tr("extracting %1 XICs").arg(xic_coord_list.size()));
100  monitor.setTotalSteps(xic_coord_list.size());
101  // sort xic by mz:
102  std::sort(xic_coord_list.begin(),
103  xic_coord_list.end(),
104  [](XicCoordSPtr &a, XicCoordSPtr &b) {
105  return a.get()->mzRange.getMz() < b.get()->mzRange.getMz();
106  });
107 
108  for(XicCoordSPtr &p_xic_coord : xic_coord_list)
109  {
110  extractOneXicCoord(*p_xic_coord.get());
111  monitor.count();
112  }
113 }
114 
115 void
117 {
118  std::shared_ptr<Xic> msrunxic_sp = xic_coord.xicSptr;
119 
120  double rt_begin = xic_coord.rtTarget - m_retentionTimeAroundTarget;
121  double rt_end = xic_coord.rtTarget + m_retentionTimeAroundTarget;
122 
123 
124  std::vector<MsRunSliceSPtr> slice_list;
125  slice_list = acquireSlices(xic_coord.mzRange);
126 
127  if(slice_list.size() == 0)
128  {
130  QObject::tr("Error getMsRunXicSp slice_list.size() == 0"));
131  }
132 
133  for(std::size_t i = 0; i < m_retentionTimeList.size(); i++)
134  {
135 
136  DataPoint xic_element;
137  xic_element.x = m_retentionTimeList[i];
138  xic_element.y = 0;
139  if((xic_element.x < rt_begin) || (xic_element.x > rt_end))
140  continue;
141 
142  for(auto &&msrun_slice : slice_list)
143  {
144  const MassSpectrum &spectrum = msrun_slice.get()->getSpectrum(i);
145  for(auto &&peak : spectrum)
146  {
147  if(xic_coord.mzRange.contains(peak.x))
148  {
150  {
151  xic_element.y += peak.y;
152  }
153  else
154  {
155  if(xic_element.y < peak.y)
156  {
157  xic_element.y = peak.y;
158  }
159  }
160  }
161  }
162  }
163  msrunxic_sp.get()->push_back(xic_element);
164  }
165 }
166 
167 void
169 {
170  qDebug();
171  m_minMz = 5000;
172  m_maxMz = 0;
173 
174  unsigned int slice_number;
175  std::map<unsigned int, MassSpectrum> spectrum_map;
176 
177  /*
178  const pwiz::msdata::SpectrumList *p_spectrum_list =
179  p_msdatafile->run.spectrumListPtr.get();
180 
181  std::size_t spectrum_list_size = p_spectrum_list->size();
182  pwiz::msdata::SpectrumPtr pwiz_spectrum;
183  */
184 
185  m_rtSize = m_msrun_points.size();
186 
187 
188  MassSpectrumCstSPtr spectrum;
189  for(auto &&msrun_point : m_msrun_points)
190  {
191 
192  spectrum_map.clear();
193 
194  m_retentionTimeList.push_back(msrun_point.rt);
195 
196  spectrum =
197  msp_msrun_reader.get()->massSpectrumCstSPtr(msrun_point.spectrum_index);
198 
199  const MassSpectrum *p_spectrum = spectrum.get();
200  if(p_spectrum->size() > 0)
201  {
202  if(p_spectrum->begin()->x < m_minMz)
203  {
204  m_minMz = p_spectrum->begin()->x;
205  }
206  // iterate through the m/z-intensity pairs
207 
208  if(p_spectrum->back().x > m_maxMz)
209  {
210  m_maxMz = p_spectrum->back().x;
211  }
212 
213  for(auto &peak : *p_spectrum)
214  {
215 
216  slice_number = peak.x;
217 
218  std::pair<std::map<unsigned int, MassSpectrum>::iterator, bool>
219  ret = spectrum_map.insert(std::pair<unsigned int, MassSpectrum>(
220  slice_number, MassSpectrum()));
221 
222  ret.first->second.push_back(peak);
223  // auto ret = spectrum_map.insert(std::pair<unsigned int,
224  // MassSpectrum>(slice_number,MassSpectrum()));
225  // ret.first->second.push_back(peak);
226  }
227 
228  // slices are ready for this retention time
229  storeSlices(spectrum_map, m_retentionTimeList.size() - 1);
230  }
231  }
232 
233  endPwizRead();
234  qDebug();
235 }
236 
237 
238 void
240  std::map<unsigned int, MassSpectrum> &spectrum_map, std::size_t ipos)
241 {
242  // qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__;
243 
244  for(auto &&spectrum_pair : spectrum_map)
245  {
246  appendSliceOnDisk(spectrum_pair.first, spectrum_pair.second, ipos);
247  }
248 
249  // qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__;
250 }
251 
252 void
254  MassSpectrum &spectrum,
255  std::size_t ipos)
256 {
257  // qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__;
258  QFile slice_file(
259  QString("%1/%2").arg(mpa_temporaryDirectory->path()).arg(slice_number));
260  bool new_file = false;
261  if(!slice_file.exists())
262  {
263  new_file = true;
264  }
265  if(!slice_file.open(QIODevice::WriteOnly | QIODevice::Append))
266  {
268  QObject::tr("unable to open file %1").arg(slice_file.fileName()));
269  }
270  QDataStream stream(&slice_file);
271 
272  if(new_file)
273  {
274  stream << (quint32)slice_number;
275  stream << (quint32)m_rtSize;
276  }
277 
278  stream << (quint32)ipos;
279  stream << spectrum;
280 
281  slice_file.flush();
282  slice_file.close();
283  // qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__;
284 }
285 
287 MsRunXicExtractorDisk::unserializeSlice(unsigned int slice_number)
288 {
289  qDebug();
290  try
291  {
292  std::shared_ptr<MsRunSlice> msrun_slice_sp =
293  std::make_shared<MsRunSlice>(MsRunSlice());
294 
295  QFile slice_file(
296  QString("%1/%2").arg(mpa_temporaryDirectory->path()).arg(slice_number));
297  if(!slice_file.exists())
298  {
299  msrun_slice_sp.get()->setSize(m_rtSize);
300  msrun_slice_sp.get()->setSliceNumber(slice_number);
301  return msrun_slice_sp;
302  }
303  if(!slice_file.open(QIODevice::ReadOnly))
304  {
306  QObject::tr("unable to open file %1 in readonly")
307  .arg(slice_file.fileName()));
308  }
309  QDataStream stream(&slice_file);
310 
311  stream >> *(msrun_slice_sp.get());
312 
313  slice_file.close();
314 
315  return msrun_slice_sp;
316  }
317  catch(pappso::PappsoException &error)
318  {
320  QObject::tr("error unserializing slice %1:\n%2")
321  .arg(slice_number)
322  .arg(error.qwhat()));
323  }
324  qDebug();
325 }
326 
327 std::vector<MsRunSliceSPtr>
329 {
330  QMutexLocker lock(&m_mutex);
331  std::vector<MsRunSliceSPtr> slice_list;
332  for(unsigned int i = mz_range.lower(); i <= mz_range.upper(); i++)
333  {
334  auto it = std::find_if(m_msRunSliceListCache.begin(),
335  m_msRunSliceListCache.end(),
336  [i](const MsRunSliceSPtr &slice_sp) {
337  return slice_sp.get()->getSliceNumber() == i;
338  });
339  if(it != m_msRunSliceListCache.end())
340  {
341  slice_list.push_back(*it);
342  m_msRunSliceListCache.push_back(*it);
343  }
344  else
345  {
346  MsRunSliceSPtr slice_sp = unserializeSlice(i);
347  slice_list.push_back(slice_sp);
348  m_msRunSliceListCache.push_back(slice_sp);
349  }
350  }
351 
352  if(m_msRunSliceListCache.size() > 20)
353  {
354  m_msRunSliceListCache.pop_front();
355  }
356  return slice_list;
357 }
358 
359 
360 void
362 {
363  msp_msrun_reader.get()->releaseDevice();
364 }
365 } // namespace pappso
Class to represent a mass spectrum.
Definition: massspectrum.h:71
std::vector< MsRunSliceSPtr > acquireSlices(const MzRange &mz_range)
retrieve all the slices corresponding to a given mz_range
std::vector< pappso::pappso_double > m_retentionTimeList
MsRunXicExtractorDisk(MsRunReaderSPtr &msrun_reader)
std::deque< MsRunSliceSPtr > m_msRunSliceListCache
virtual void storeSlices(std::map< unsigned int, MassSpectrum > &slice_vector, std::size_t ipos)
store MassSpectrum slices (by daltons) for a given retention time
virtual void extractXicCoordSPtrList(UiMonitorInterface &monitor, std::vector< XicCoordSPtr > &xic_coord_list) override
extract a list of XIC given a list of xic coordinates to extract
void extractOneXicCoord(XicCoord &xic_coord)
void appendSliceOnDisk(unsigned int slice_number, MassSpectrum &spectrum, std::size_t ipos)
append a slice on disk (in a file)
MsRunSliceSPtr unserializeSlice(unsigned int slice_number)
get one slice from disk by her slice number (dalton)
std::vector< MsRunXicExtractorPoints > m_msrun_points
pappso_double lower() const
Definition: mzrange.h:71
pappso_double upper() const
Definition: mzrange.h:77
bool contains(pappso_double) const
Definition: mzrange.cpp:120
virtual const QString & qwhat() const
virtual void setStatus(const QString &status)=0
current status of the process
virtual void setTotalSteps(std::size_t total_number_of_steps)
use it if the number of steps is known in an algorithm the total number of steps is usefull to report...
virtual void count()=0
count steps report when a step is computed in an algorithm
MsRunReader based XIC extractor featuring disk cache.
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition: aa.cpp:39
std::shared_ptr< MsRunReader > MsRunReaderSPtr
Definition: msrunreader.h:166
std::shared_ptr< const MsRunSlice > MsRunSliceSPtr
Definition: msrunslice.h:39
std::shared_ptr< const MassSpectrum > MassSpectrumCstSPtr
Definition: massspectrum.h:55
@ sum
sum of intensities
std::shared_ptr< XicCoord > XicCoordSPtr
Definition: xiccoord.h:41
pappso_double x
Definition: datapoint.h:22
pappso_double y
Definition: datapoint.h:23
coordinates of the XIC to extract and the resulting XIC after extraction
Definition: xiccoord.h:54
XicSPtr xicSptr
extracted xic
Definition: xiccoord.h:113
double rtTarget
the targeted retention time to extract around intended in seconds, and related to one msrun....
Definition: xiccoord.h:109
MzRange mzRange
the mass to extract
Definition: xiccoord.h:103