libpappsomspp
Library for mass spectrometry
msrunretentiontime.cpp
Go to the documentation of this file.
1
2/*******************************************************************************
3 * Copyright (c) 2019 Olivier Langella <Olivier.Langella@u-psud.fr>.
4 *
5 * This file is part of the PAPPSOms++ library.
6 *
7 * PAPPSOms++ is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * PAPPSOms++ is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with PAPPSOms++. If not, see <http://www.gnu.org/licenses/>.
19 *
20 * Contributors:
21 * Olivier Langella <Olivier.Langella@u-psud.fr> - initial API and
22 *implementation
23 ******************************************************************************/
24
25#include "msrunretentiontime.h"
26#include "../../exception/exceptionnotpossible.h"
27#include <QDebug>
28//#include <odsstream/odsexception.h>
29//#include <odsstream/odsdocwriter.h>
30//#include <QFileInfo>
31
32using namespace pappso;
33
34template <class T>
36 : m_ms2MedianFilter(10), m_ms2MeanFilter(15), m_ms1MeanFilter(1)
37{
38 msp_msrunReader = msrun_reader_sp;
39 mcsp_msrunId = msp_msrunReader.get()->getMsRunId();
40 m_ms1RetentionTimeVector = msp_msrunReader.get()->getRetentionTimeLine();
41
42
43 std::sort(m_ms1RetentionTimeVector.begin(),
45 [](const double &a, const double &b) { return (a < b); });
46}
47
48template <class T>
50 : m_ms2MedianFilter(other.m_ms2MedianFilter),
51 m_ms2MeanFilter(other.m_ms2MeanFilter),
52 m_ms1MeanFilter(other.m_ms1MeanFilter)
53{
58
59 m_seamarks = other.m_seamarks;
61
63
65}
66
67template <class T>
69{
70}
71
72template <class T>
73const MsRunId &
75{
76 return *(mcsp_msrunId.get());
77}
78
79template <typename T>
82{
83 return m_ms2MedianFilter;
84}
85
86template <class T>
87void
89 const FilterMorphoMedian &ms2MedianFilter)
90{
91 m_ms2MedianFilter = ms2MedianFilter;
92}
93
94template <typename T>
97{
98 return m_ms2MeanFilter;
99}
100
101
102template <class T>
103void
105{
106 m_ms2MeanFilter = ms2MeanFilter;
107}
108
109template <typename T>
112{
113 return m_ms1MeanFilter;
114}
115
116template <class T>
117void
119{
120 m_ms1MeanFilter = ms1MeanFilter;
121}
122
123template <class T>
124const std::vector<MsRunRetentionTimeSeamarkPoint<T>> &
126{
127 qDebug();
128 return m_seamarks;
129}
130
131template <class T>
132const std::vector<double> &
134{
135 return m_alignedRetentionTimeVector;
136}
137
138template <class T>
139std::size_t
141{
142 return m_valuesCorrected;
143}
144template <class T>
145const std::vector<double> &
147{
148 return m_ms1RetentionTimeVector;
149}
150
151template <class T>
152Trace
154 const std::vector<MsRunRetentionTimeSeamarkPoint<T>> &other_seamarks) const
155{
156 Trace common_points;
157 getCommonDeltaRt(common_points, other_seamarks);
158 return common_points;
159}
160
161template <class T>
162void
164 std::size_t ms2_spectrum_index)
165{
166
167 qDebug();
168 msp_msrunReader.get()->acquireDevice();
169 PeptideMs2Point ms2point;
170 ms2point.entityHash = peptide_id;
171 QualifiedMassSpectrum spectrum =
172 msp_msrunReader.get()->qualifiedMassSpectrum(ms2_spectrum_index, false);
173 ms2point.precursorIntensity = spectrum.getPrecursorIntensity();
174 ms2point.retentionTime = spectrum.getRtInSeconds();
175
176 // addSeamark(m_hash_fn(peptide_str.toStdString()), retentionTime);
177
178 m_allMs2Points.push_back(ms2point);
179
180 qDebug();
181}
182
183template <class T>
184void
186 double retentionTime,
187 double precursorIntensity)
188{
189 PeptideMs2Point ms2point;
190 ms2point.entityHash = peptide_id;
191 ms2point.precursorIntensity = precursorIntensity;
192 ms2point.retentionTime = retentionTime;
193 m_allMs2Points.push_back(ms2point);
194}
195
196
197template <class T>
198void
200{
201
202 qDebug();
203 if(m_allMs2Points.size() == 0)
204 {
205 // already computed
206 return;
207 }
208 m_seamarks.clear();
209 if(m_retentionTimeReferenceMethod ==
210 ComputeRetentionTimeReference::maximum_intensity)
211 {
212
213
214 std::sort(m_allMs2Points.begin(),
215 m_allMs2Points.end(),
216 [](const PeptideMs2Point &a, const PeptideMs2Point &b) {
217 if(a.entityHash == b.entityHash)
218 {
219 return (a.precursorIntensity > b.precursorIntensity);
220 }
221 return (a.entityHash < b.entityHash);
222 });
223
224 auto itend =
225 std::unique(m_allMs2Points.begin(),
226 m_allMs2Points.end(),
227 [](const PeptideMs2Point &a, const PeptideMs2Point &b) {
228 return (a.entityHash == b.entityHash);
229 });
230
231 auto it = m_allMs2Points.begin();
232 while(it != itend)
233 {
234 m_seamarks.push_back(
235 {it->entityHash, it->retentionTime, it->precursorIntensity});
236 it++;
237 }
238 }
239 m_allMs2Points.clear();
240
241 std::sort(m_seamarks.begin(),
242 m_seamarks.end(),
245 return (a.entityHash < b.entityHash);
246 });
247 qDebug();
248}
249
250template <class T>
251void
253 Trace &delta_rt,
254 const std::vector<MsRunRetentionTimeSeamarkPoint<T>> &other_seamarks) const
255{
256
257 qDebug();
258 auto it = other_seamarks.begin();
259
260 for(const MsRunRetentionTimeSeamarkPoint<T> &seamark : m_seamarks)
261 {
262 while((it != other_seamarks.end()) &&
263 (it->entityHash < seamark.entityHash))
264 {
265 it++;
266 }
267 if(it == other_seamarks.end())
268 break;
269 if(it->entityHash == seamark.entityHash)
270 {
271 delta_rt.push_back(DataPoint(
272 seamark.retentionTime, seamark.retentionTime - it->retentionTime));
273 }
274 }
275
276 qDebug();
277 if((m_ms2MedianFilter.getHalfWindowSize() * 2 + 1) >= delta_rt.size())
278 {
280 QObject::tr("ERROR : MS2 alignment of MS run '%1' (%2)' not possible : "
281 "\ntoo few MS2 points (%3) in common")
282 .arg(msp_msrunReader.get()->getMsRunId().get()->getXmlId())
283 .arg(msp_msrunReader.get()->getMsRunId().get()->getFileName())
284 .arg(delta_rt.size()));
285 }
286
287 qDebug();
288 if((m_ms2MeanFilter.getHalfWindowSize() * 2 + 1) >= delta_rt.size())
289 {
291 QObject::tr("ERROR : MS2 alignment of MS run '%1' (%2)' not possible : "
292 "\ntoo few MS2 points (%3) in common")
293 .arg(msp_msrunReader.get()->getMsRunId().get()->getXmlId())
294 .arg(msp_msrunReader.get()->getMsRunId().get()->getFileName())
295 .arg(delta_rt.size()));
296 }
297 delta_rt.sortX();
298
299 // there can be multiple entities (peptides) at one retention time
300 // in this case, avoid retention time redundancy by applying unique on trace :
301 delta_rt.unique();
302
303 qDebug();
304}
305
306
307template <class T>
308double
310{
311 if(isAligned())
312 {
313 return m_alignedRetentionTimeVector.front();
314 }
315 return m_ms1RetentionTimeVector.front();
316}
317template <class T>
318double
320{
321 if(isAligned())
322 {
323 return m_alignedRetentionTimeVector.back();
324 }
325 return m_ms1RetentionTimeVector.back();
326}
327
328
329template <class T>
330double
332 double original_retention_time) const
333{
334 if(m_alignedRetentionTimeVector.size() < 3)
335 {
337 QObject::tr("ERROR : too few aligned points to compute aligned "
338 "retention time (%1)")
339 .arg(m_ms1RetentionTimeVector.size()));
340 }
341 if(m_alignedRetentionTimeVector.size() != m_ms1RetentionTimeVector.size())
342 {
344 QObject::tr("ERROR : m_alignedRetentionTimeVector.size() %1 != %2 "
345 "m_ms1RetentionTimeVector.size()")
346 .arg(m_alignedRetentionTimeVector.size())
347 .arg(m_ms1RetentionTimeVector.size()));
348 }
349 auto it_plus =
350 std::find_if(m_ms1RetentionTimeVector.begin(),
351 m_ms1RetentionTimeVector.end(),
352 [original_retention_time](const double &rt_point) {
353 return original_retention_time < rt_point;
354 });
355 double rt1_a, rt2_a, rt1_b, rt2_b;
356 if(it_plus == m_ms1RetentionTimeVector.end())
357 {
358 it_plus--;
359 }
360 if(it_plus == m_ms1RetentionTimeVector.begin())
361 {
362 it_plus++;
363 }
364 auto it_minus = it_plus - 1;
365
366 rt1_a = *it_minus;
367 rt2_a = *it_plus;
368
369 double ratio = (original_retention_time - rt1_a) / (rt2_a - rt1_a);
370
371 auto itref = m_alignedRetentionTimeVector.begin() +
372 std::distance(m_ms1RetentionTimeVector.begin(), it_minus);
373
374 rt1_b = *itref;
375 itref++;
376 rt2_b = *itref;
377
378 return (((rt2_b - rt1_b) * ratio) + rt1_b);
379}
380
381template <class T>
382double
384 double aligned_retention_time) const
385{
386 if(m_alignedRetentionTimeVector.size() < 3)
387 {
389 QObject::tr("ERROR : too few aligned points to compute aligned "
390 "retention time (%1)")
391 .arg(m_ms1RetentionTimeVector.size()));
392 }
393 if(m_alignedRetentionTimeVector.size() != m_ms1RetentionTimeVector.size())
394 {
396 QObject::tr("ERROR : m_alignedRetentionTimeVector.size() %1 != %2 "
397 "m_ms1RetentionTimeVector.size()")
398 .arg(m_alignedRetentionTimeVector.size())
399 .arg(m_ms1RetentionTimeVector.size()));
400 }
401 auto it_plus = std::find_if(m_alignedRetentionTimeVector.begin(),
402 m_alignedRetentionTimeVector.end(),
403 [aligned_retention_time](const double &rt_point) {
404 return aligned_retention_time < rt_point;
405 });
406 double rt1_a, rt2_a, rt1_b, rt2_b;
407 if(it_plus == m_alignedRetentionTimeVector.end())
408 {
409 it_plus--;
410 }
411 if(it_plus == m_alignedRetentionTimeVector.begin())
412 {
413 it_plus++;
414 }
415 auto it_minus = it_plus - 1;
416
417 rt1_a = *it_minus;
418 rt2_a = *it_plus;
419
420 double ratio = (aligned_retention_time - rt1_a) / (rt2_a - rt1_a);
421
422 auto itref = m_ms1RetentionTimeVector.begin() +
423 std::distance(m_alignedRetentionTimeVector.begin(), it_minus);
424
425 rt1_b = *itref;
426 itref++;
427 rt2_b = *itref;
428
429 return (((rt2_b - rt1_b) * ratio) + rt1_b);
430}
431
432template <class T>
433const std::vector<MsRunRetentionTimeSeamarkPoint<T>>
435{
436 std::vector<MsRunRetentionTimeSeamarkPoint<T>> other_seamarks = m_seamarks;
437 for(auto &seamark : other_seamarks)
438 {
439 seamark.retentionTime =
440 translateOriginal2AlignedRetentionTime(seamark.retentionTime);
441 }
442 return other_seamarks;
443}
444
445template <class T>
446bool
448{
449 return (m_alignedRetentionTimeVector.size() > 0);
450}
451
452template <class T>
453Trace
455 const MsRunRetentionTime<T> &msrun_retention_time_reference)
456{
457 computeSeamarks();
458 std::vector<MsRunRetentionTimeSeamarkPoint<T>> other_seamarks;
459 if(msrun_retention_time_reference.isAligned())
460 {
461 other_seamarks = msrun_retention_time_reference.getSeamarksReferences();
462 }
463 else
464 {
465 other_seamarks = msrun_retention_time_reference.getSeamarks();
466 }
467 qDebug();
468 if((m_ms1MeanFilter.getHalfWindowSize() * 2 + 1) >=
469 m_ms1RetentionTimeVector.size())
470 {
472 QObject::tr("ERROR : MS1 alignment of MS run '%1' (%2)' not possible : "
473 "\ntoo few MS1 points (%3)")
474 .arg(msp_msrunReader.get()->getMsRunId().get()->getXmlId())
475 .arg(msp_msrunReader.get()->getMsRunId().get()->getFileName())
476 .arg(m_ms1RetentionTimeVector.size()));
477 }
478
479 qDebug() << m_seamarks[0].entityHash << " " << m_seamarks[0].retentionTime
480 << " " << other_seamarks[0].entityHash
481 << other_seamarks[0].retentionTime << " ";
482 // both seamarks has to be ordered
483 Trace common_points;
484 getCommonDeltaRt(common_points, other_seamarks);
485
486 // writeTrace("lib_ms2_delta_rt.ods", common_points);
487
488 qDebug() << common_points.front().x << " " << common_points.front().y;
489 m_ms2MedianFilter.filter(common_points);
490 // writeTrace("lib_ms2_delta_rt_median.ods", common_points);
491 m_ms2MeanFilter.filter(common_points);
492 // writeTrace("lib_ms2_delta_rt_mean.ods", common_points);
493 // convert common delta rt to real retention times (for convenience)
494 qDebug() << common_points.front().x << " " << common_points.front().y;
495
496
497 // add a first point to ensure coherence:
498 DataPoint first_point;
499 first_point.x = m_ms1RetentionTimeVector.front() - (double)1;
500 if(first_point.x < 0)
501 {
502 first_point.x = 0;
503 }
504 first_point.y =
505 m_ms1RetentionTimeVector.front() -
506 msrun_retention_time_reference.getFrontRetentionTimeReference();
507
508 common_points.push_back(first_point);
509 // add a last point to ensure coherence:
510 DataPoint last_point;
511 last_point.x = m_ms1RetentionTimeVector.back() + 1;
512 last_point.y = m_ms1RetentionTimeVector.back() -
513 msrun_retention_time_reference.getBackRetentionTimeReference();
514 common_points.push_back(last_point);
515 common_points.sortX();
516
517 // now, it is possible for each time range to give a new MS1 time using a
518 // linear regression on MS2 corrected times
519 m_alignedRetentionTimeVector.clear();
520
521 qDebug() << common_points.front().x << " " << common_points.front().y;
522
523 Trace ms1_aligned_points;
524
525 linearRegressionMs2toMs1(ms1_aligned_points, common_points);
526
527 // writeTrace("lib_ms1_map_rt.ods", ms1_aligned_points);
528 qDebug();
529 // smoothing on MS1 points
530 m_ms1MeanFilter.filter(ms1_aligned_points);
531
532 // writeTrace("lib_ms1_map_rt_mean.ods", ms1_aligned_points);
533 // final aligned retentionTime vector
534
535 for(DataPoint &data_point : ms1_aligned_points)
536 {
537 data_point.y = (data_point.x - data_point.y);
538 }
539
540 qDebug();
541 // Here, the correction parameter is the slope of old rt points curve
542 // (divided by 4 to get a finer correction).
543 double correction_parameter =
544 (m_ms1RetentionTimeVector.back() - m_ms1RetentionTimeVector.front()) /
545 (ms1_aligned_points.size());
546 // set_correction_parameter(correction_parameter / 4);
547 correction_parameter = correction_parameter / (double)4;
548 correctNewTimeValues(ms1_aligned_points, correction_parameter);
549
550 m_alignedRetentionTimeVector = ms1_aligned_points.yValues();
551
552 qDebug();
553 return ms1_aligned_points;
554}
555
556
557template <class T>
558void
560 const Trace &common_points)
561{
562
563 // first slope :
564 std::vector<DataPoint>::const_iterator itms2 = common_points.begin();
565 std::vector<DataPoint>::const_iterator itms2next = itms2 + 1;
566 if(itms2next == common_points.end())
567 {
568 // error
570 QObject::tr("ERROR : MS1 alignment of MS run '%1' (%2)' not possible : "
571 "\ntoo few common points (%3)")
572 .arg(msp_msrunReader.get()->getMsRunId().get()->getXmlId())
573 .arg(msp_msrunReader.get()->getMsRunId().get()->getFileName())
574 .arg(common_points.size()));
575 }
576 qDebug() << "() itms2->x=" << itms2->x << " itms2->y=" << itms2->y;
577
578 for(double &original_rt_point : m_ms1RetentionTimeVector)
579 {
580 DataPoint ms1_point;
581 ms1_point.x = original_rt_point;
582
583 while(ms1_point.x > itms2next->x)
584 {
585 itms2++;
586 itms2next++;
587 }
588
589 double ratio = (itms2next->x - itms2->x);
590 if(ratio != 0)
591 {
592 ratio = (ms1_point.x - itms2->x) / ratio;
593 }
594 else
595 {
596 // avoid division by zero
597 ratio = 1;
598 }
599 // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "() " <<
600 // ratio;
601
602 ms1_point.y = itms2->y + ((itms2next->y - itms2->y) * ratio);
603
604 // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "() "
605 // << ms1_point.y;
606 ms1_aligned_points.push_back(ms1_point);
607 }
608}
609
610template <class T>
611void
613 double correction_parameter)
614{
615
616 m_valuesCorrected = 0;
617 auto new_it(ms1_aligned_points.begin());
618 auto new_nextit(ms1_aligned_points.begin());
619 new_nextit++;
620 for(; new_nextit != ms1_aligned_points.end(); ++new_nextit, ++new_it)
621 {
622 if(new_nextit->y < new_it->y)
623 {
624 ++m_valuesCorrected;
625 new_nextit->y = new_it->y + correction_parameter;
626 }
627 }
628}
629
630
631template <class T>
634{
635 return msp_msrunReader;
636}
637
638template <class T>
639void
641 const std::vector<double> &aligned_times)
642{
643
644 if(aligned_times.size() == m_ms1RetentionTimeVector.size())
645 {
646 m_alignedRetentionTimeVector = aligned_times;
647 }
648 else
649 {
650 if(aligned_times.size() == m_ms1RetentionTimeVector.size() * 2)
651 {
652 m_alignedRetentionTimeVector = m_ms1RetentionTimeVector;
653 for(std::size_t i = 0; i < m_ms1RetentionTimeVector.size(); i++)
654 {
655 if(aligned_times[2 * i] != m_ms1RetentionTimeVector[i])
656 {
658 QObject::tr(
659 "ERROR : aligned_times (size=%1) vector does not have "
660 "required size (size=%2)")
661 .arg(aligned_times.size())
662 .arg(m_ms1RetentionTimeVector.size()));
663 }
664 m_alignedRetentionTimeVector[i] = aligned_times[(2 * i) + 1];
665 }
666 }
667 else
668 {
670 QObject::tr("ERROR : aligned_times (size=%1) vector does not have "
671 "required size (size=%2)")
672 .arg(aligned_times.size())
673 .arg(m_ms1RetentionTimeVector.size()));
674 }
675 }
676}
677
678
679template <class T>
680Trace
682 const MsRunRetentionTime<T> &msrun_retention_time_reference) const
683{
684 // computeSeamarks();
685 std::vector<MsRunRetentionTimeSeamarkPoint<T>> other_seamarks;
686 if(msrun_retention_time_reference.isAligned())
687 {
688 other_seamarks = msrun_retention_time_reference.getSeamarksReferences();
689 }
690 else
691 {
692 other_seamarks = msrun_retention_time_reference.getSeamarks();
693 }
694 qDebug();
695
696 qDebug() << m_seamarks[0].entityHash << " " << m_seamarks[0].retentionTime
697 << " " << other_seamarks[0].entityHash
698 << other_seamarks[0].retentionTime << " ";
699 // both seamarks has to be ordered
700 Trace common_points;
701 getCommonDeltaRt(common_points, other_seamarks);
702 return common_points;
703}
mean filter apply mean of y values inside the window : this results in a kind of smoothing
Definition: filtermorpho.h:210
median filter apply median of y values inside the window
Definition: filtermorpho.h:191
MS run identity MsRunId identifies an MS run with a unique ID (XmlId) and contains eventually informa...
Definition: msrunid.h:53
void setMs2MedianFilter(const FilterMorphoMedian &ms2MedianFilter)
std::vector< double > m_alignedRetentionTimeVector
std::vector< PeptideMs2Point > m_allMs2Points
const FilterMorphoMean & getMs1MeanFilter() const
const FilterMorphoMedian & getMs2MedianFilter() const
void computeSeamarks()
convert PeptideMs2Point into Peptide seamarks this is required before computing alignment
Trace getCommonSeamarksDeltaRt(const MsRunRetentionTime< T > &msrun_retention_time_reference) const
get common seamarks between msrunretentiontime objects and their deltart
void setMs1MeanFilter(const FilterMorphoMean &ms1MeanFilter)
void linearRegressionMs2toMs1(Trace &ms1_aligned_points, const Trace &common_points)
void setMs2MeanFilter(const FilterMorphoMean &ms2MeanFilter)
pappso::MsRunReaderSPtr msp_msrunReader
double getFrontRetentionTimeReference() const
MsRunRetentionTime(MsRunReaderSPtr msrun_reader_sp)
const std::vector< double > & getAlignedRetentionTimeVector() const
get aligned retention time vector
void setAlignedRetentionTimeVector(const std::vector< double > &aligned_times)
std::vector< MsRunRetentionTimeSeamarkPoint< T > > m_seamarks
Trace align(const MsRunRetentionTime< T > &msrun_retention_time_reference)
align the current msrunretentiontime object using the given reference
double translateAligned2OriginalRetentionTime(double aligned_retention_time) const
double translateOriginal2AlignedRetentionTime(double original_retention_time) const
const std::vector< double > & getMs1RetentionTimeVector() const
get orginal retention time vector (not aligned)
void correctNewTimeValues(Trace &ms1_aligned_points, double correction_parameter)
const std::vector< MsRunRetentionTimeSeamarkPoint< T > > & getSeamarks() const
Trace getCommonDeltaRt(const std::vector< MsRunRetentionTimeSeamarkPoint< T > > &other_seamarks) const
ComputeRetentionTimeReference m_retentionTimeReferenceMethod
const std::vector< MsRunRetentionTimeSeamarkPoint< T > > getSeamarksReferences() const
std::vector< double > m_ms1RetentionTimeVector
const FilterMorphoMean & getMs2MeanFilter() const
pappso::MsRunReaderSPtr getMsRunReaderSPtr() const
const MsRunId & getMsRunId() const
double getBackRetentionTimeReference() const
std::size_t getNumberOfCorrectedValues() const
void addPeptideAsSeamark(const T &peptide_id, std::size_t ms2_spectrum_index)
collects all peptide evidences of a given MSrun seamarks has to be converted to peptide retention tim...
pappso::MsRunIdCstSPtr mcsp_msrunId
Class representing a fully specified mass spectrum.
pappso_double getPrecursorIntensity(bool *ok=nullptr) const
Get the intensity of the precursor ion.
pappso_double getRtInSeconds() const
Get the retention time in seconds.
A simple container of DataPoint instances.
Definition: trace.h:147
void unique()
Definition: trace.cpp:952
void sortX()
Definition: trace.cpp:936
virtual Trace & filter(const FilterInterface &filter) final
apply a filter on this trace
Definition: trace.cpp:981
std::vector< pappso_double > yValues() const
Definition: trace.cpp:617
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition: aa.cpp:39
std::shared_ptr< MsRunReader > MsRunReaderSPtr
Definition: msrunreader.h:184
pappso_double x
Definition: datapoint.h:22
pappso_double y
Definition: datapoint.h:23