libpappsomspp
Library for mass spectrometry
aamodification.cpp
Go to the documentation of this file.
1/**
2 * \file pappsomspp/amino_acid/aamodification.h
3 * \date 7/3/2015
4 * \author Olivier Langella
5 * \brief amino acid modification model
6 */
7
8/*******************************************************************************
9 * Copyright (c) 2015 Olivier Langella <Olivier.Langella@moulon.inra.fr>.
10 *
11 * This file is part of the PAPPSOms++ library.
12 *
13 * PAPPSOms++ is free software: you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation, either version 3 of the License, or
16 * (at your option) any later version.
17 *
18 * PAPPSOms++ is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with PAPPSOms++. If not, see <http://www.gnu.org/licenses/>.
25 *
26 * Contributors:
27 * Olivier Langella <Olivier.Langella@moulon.inra.fr> - initial API and
28 *implementation
29 ******************************************************************************/
30
31#include <QRegularExpression>
32#include <QDebug>
33#include <cmath>
34
35#include "aamodification.h"
36#include "aa.h"
37#include "../pappsoexception.h"
38#include "../mzrange.h"
39#include "../peptide/peptide.h"
40#include "../obo/filterobopsimodsink.h"
41#include "../obo/filterobopsimodtermaccession.h"
42#include "../exception/exceptionnotfound.h"
43
44/*
45
46inline void initMyResource() {
47 Q_INIT_RESOURCE(resources);
48}
49*/
50
51namespace pappso
52{
53
55
56AaModification::AaModification(const QString &accession, pappso_double mass)
57 : m_accession(accession), m_mass(mass)
58{
64
66 {Isotope::H2, 0},
67 {Isotope::N15, 0},
68 {Isotope::O17, 0},
69 {Isotope::O18, 0},
70 {Isotope::S33, 0},
71 {Isotope::S34, 0},
72 {Isotope::S36, 0}};
73}
74
75
77 : m_accession(toCopy.m_accession),
78 m_name(toCopy.m_name),
79 m_mass(toCopy.m_mass),
80 m_atomCount(std::move(toCopy.m_atomCount)),
81 m_mapIsotope(toCopy.m_mapIsotope)
82{
83 m_origin = toCopy.m_origin;
84}
85
87{
88}
89
90const QString &
92{
93
94 qDebug();
95 return m_accession;
96}
97const QString &
99{
100 return m_name;
101}
104 MapAccessionModifications ret;
105
106 return ret;
107 }();
108
111{
112 AaModification *new_mod;
113 // qDebug() << " AaModification::createInstance begin";
114 new_mod = new AaModification(term.m_accession, term.m_diffMono);
115 // xref: DiffFormula: "C 0 H 0 N 0 O 1 S 0"
116 new_mod->setDiffFormula(term.m_diffFormula);
117 new_mod->setXrefOrigin(term.m_origin);
118 new_mod->m_name = term.m_name;
119 return new_mod;
120}
121
123AaModification::createInstance(const QString &accession)
124{
125 if(accession == "internal:Nter_hydrolytic_cleavage_H")
126 {
127 OboPsiModTerm term;
128 term.m_accession = accession;
129 term.m_diffFormula = "H 1";
130 term.m_diffMono = MPROTIUM;
131 term.m_name = "Nter hydrolytic cleavage H+";
132 return (AaModification::createInstance(term));
133 }
134 if(accession == "internal:Cter_hydrolytic_cleavage_HO")
135 {
136 OboPsiModTerm term;
137 term.m_accession = accession;
138 term.m_diffFormula = "H 1 O 1";
140 term.m_name = "Cter hydrolytic cleavage HO";
141 return (AaModification::createInstance(term));
142 }
143 if(accession.startsWith("MUTATION:"))
144 {
145 QRegularExpression regexp_mutation("^MUTATION:([A-Z])=>([A-Z])$");
146 QRegularExpressionMatch match = regexp_mutation.match(accession);
147 if(match.hasMatch())
148 {
149 qDebug() << match.captured(1).at(0) << " " << match.captured(2).at(0);
150
151 Aa aa_from(match.captured(1).toStdString().c_str()[0]);
152 Aa aa_to(match.captured(2).toStdString().c_str()[0]);
153 AaModificationP instance_mutation =
154 createInstanceMutation(aa_from, aa_to);
155 return instance_mutation;
156 // m_psiModLabel<<"|";
157 }
158 }
159 // initMyResource();
160 FilterOboPsiModSink term_list;
161 FilterOboPsiModTermAccession filterm_accession(term_list, accession);
162
163 OboPsiMod psimod(filterm_accession);
164
165 try
166 {
167 return (AaModification::createInstance(term_list.getOne()));
168 }
169 catch(ExceptionNotFound &e)
170 {
171 throw ExceptionNotFound(QObject::tr("modification not found : [%1]\n%2")
172 .arg(accession)
173 .arg(e.qwhat()));
174 }
175}
176
177void
178AaModification::setXrefOrigin(const QString &origin)
179{
180 // xref: Origin: "N"
181 // xref: Origin: "X"
182 m_origin = origin;
183}
184void
185AaModification::setDiffFormula(const QString &diff_formula)
186{
187 QRegularExpression rx("(^|\\s)([C,H,O,N,H,S])\\s([-]{0,1}\\d+)");
188
189 QRegularExpressionMatchIterator match_it = rx.globalMatch(diff_formula);
190
191 while(match_it.hasNext())
192 {
193 QRegularExpressionMatch match_rx = match_it.next();
194 qDebug() << match_rx.captured(2) << " " << match_rx.captured(3);
195 if(match_rx.captured(2) == "C")
196 {
197 m_atomCount[AtomIsotopeSurvey::C] = match_rx.captured(3).toInt();
198 }
199 else if(match_rx.captured(2) == "H")
200 {
201 m_atomCount[AtomIsotopeSurvey::H] = match_rx.captured(3).toInt();
202 }
203 else if(match_rx.captured(2) == "N")
204 {
205 m_atomCount[AtomIsotopeSurvey::N] = match_rx.captured(3).toInt();
206 }
207 else if(match_rx.captured(2) == "O")
208 {
209 m_atomCount[AtomIsotopeSurvey::O] = match_rx.captured(3).toInt();
210 }
211 else if(match_rx.captured(2) == "S")
212 {
213 m_atomCount[AtomIsotopeSurvey::S] = match_rx.captured(3).toInt();
214 }
215 }
216
217 // look for isotopes :
218 rx.setPattern("\\(([-]{0,1}\\d+)\\)([C,H,O,N,H,S])\\s([-]{0,1}\\d+)");
219
220 match_it = rx.globalMatch(diff_formula);
221
222 while(match_it.hasNext())
223 {
224 QRegularExpressionMatch match_rx = match_it.next();
225 qDebug() << match_rx.captured(1) << " " << match_rx.captured(2) << " "
226 << match_rx.captured(3);
227 int number_of_isotopes = match_rx.captured(3).toInt();
228 if(match_rx.captured(2) == "C")
229 {
230 if(match_rx.captured(1) == "13")
231 {
232 m_mapIsotope.at(Isotope::C13) = number_of_isotopes;
233 }
234 m_atomCount[AtomIsotopeSurvey::C] += number_of_isotopes;
235 }
236 else if(match_rx.captured(2) == "H")
237 {
238 if(match_rx.captured(1) == "2")
239 {
240 m_mapIsotope.at(Isotope::H2) = number_of_isotopes;
241 }
242 m_atomCount[AtomIsotopeSurvey::H] += number_of_isotopes;
243 }
244 else if(match_rx.captured(2) == "N")
245 {
246 if(match_rx.captured(1) == "15")
247 {
248 m_mapIsotope.at(Isotope::N15) = number_of_isotopes;
249 }
250 m_atomCount[AtomIsotopeSurvey::N] += number_of_isotopes;
251 }
252 else if(match_rx.captured(2) == "O")
253 {
254 if(match_rx.captured(1) == "17")
255 {
256 m_mapIsotope.at(Isotope::O17) = number_of_isotopes;
257 }
258 else if(match_rx.captured(1) == "18")
259 {
260 m_mapIsotope.at(Isotope::O18) = number_of_isotopes;
261 }
262 m_atomCount[AtomIsotopeSurvey::O] += number_of_isotopes;
263 }
264 else if(match_rx.captured(2) == "S")
265 {
266 if(match_rx.captured(1) == "33")
267 {
268 m_mapIsotope.at(Isotope::S33) = number_of_isotopes;
269 }
270 else if(match_rx.captured(1) == "34")
271 {
272 m_mapIsotope.at(Isotope::S34) = number_of_isotopes;
273 }
274 else if(match_rx.captured(1) == "36")
275 {
276 m_mapIsotope.at(Isotope::S36) = number_of_isotopes;
277 }
278 m_atomCount[AtomIsotopeSurvey::S] += number_of_isotopes;
279 }
280 }
281
282
284}
285
286
287void
289{
290 pappso_double theoreticalm_mass = 0;
291 std::map<AtomIsotopeSurvey, int>::const_iterator it_atom =
293 if(it_atom != m_atomCount.end())
294 {
295 theoreticalm_mass += MASSCARBON * (it_atom->second);
296 }
297 it_atom = m_atomCount.find(AtomIsotopeSurvey::H);
298 if(it_atom != m_atomCount.end())
299 {
300 theoreticalm_mass += MPROTIUM * (it_atom->second);
301 }
302
303 it_atom = m_atomCount.find(AtomIsotopeSurvey::O);
304 if(it_atom != m_atomCount.end())
305 {
306 theoreticalm_mass += MASSOXYGEN * (it_atom->second);
307 }
308
309 it_atom = m_atomCount.find(AtomIsotopeSurvey::N);
310 if(it_atom != m_atomCount.end())
311 {
312 theoreticalm_mass += MASSNITROGEN * (it_atom->second);
313 }
314 it_atom = m_atomCount.find(AtomIsotopeSurvey::S);
315 if(it_atom != m_atomCount.end())
316 {
317 theoreticalm_mass += MASSSULFUR * (it_atom->second);
318 }
319
320 qDebug() << theoreticalm_mass;
321
322 theoreticalm_mass += DIFFC12C13 * m_mapIsotope.at(Isotope::C13);
323 theoreticalm_mass += DIFFH1H2 * m_mapIsotope.at(Isotope::H2);
324 theoreticalm_mass += DIFFN14N15 * m_mapIsotope.at(Isotope::N15);
325 theoreticalm_mass += DIFFO16O17 * m_mapIsotope.at(Isotope::O17);
326 theoreticalm_mass += DIFFO16O18 * m_mapIsotope.at(Isotope::O18);
327 theoreticalm_mass += DIFFS32S33 * m_mapIsotope.at(Isotope::S33);
328 theoreticalm_mass += DIFFS32S34 * m_mapIsotope.at(Isotope::S34);
329 theoreticalm_mass += DIFFS32S36 * m_mapIsotope.at(Isotope::S36);
330
331
332 pappso_double diff = std::fabs((pappso_double)m_mass - theoreticalm_mass);
333 if(diff < 0.001)
334 {
335 m_mass = theoreticalm_mass;
336 qDebug() << diff;
337 }
338 else
339 {
340 qDebug()
341 << "ERROR in AaModification::calculateMassFromChemicalComponents theo="
342 << theoreticalm_mass << " m=" << m_mass << " diff=" << diff
343 << " accession=" << m_accession;
344 }
345}
346
349{
350 QString accession = QString("%1").arg(modificationMass);
351 qDebug() << accession;
352 QMutexLocker locker(&m_mutex);
353 if(m_mapAccessionModifications.find(accession) ==
355 {
356 // not found
357 m_mapAccessionModifications.insert(std::pair<QString, AaModification *>(
358 accession, new AaModification(accession, modificationMass)));
359 }
360 else
361 {
362 // found
363 }
364 return m_mapAccessionModifications.at(accession);
365}
366
368AaModification::getInstance(const QString &accession)
369{
370 try
371 {
372 QMutexLocker locker(&m_mutex);
373 MapAccessionModifications::iterator it =
374 m_mapAccessionModifications.find(accession);
375 if(it == m_mapAccessionModifications.end())
376 {
377
378 // not found
379 std::pair<MapAccessionModifications::iterator, bool> insert_res =
381 std::pair<QString, AaModificationP>(
382 accession, AaModification::createInstance(accession)));
383 it = insert_res.first;
384 }
385 else
386 {
387 // found
388 }
389 return it->second;
390 }
391 catch(ExceptionNotFound &e)
392 {
393 throw ExceptionNotFound(
394 QObject::tr("ERROR getting instance of : %1 NOT FOUND\n%2")
395 .arg(accession)
396 .arg(e.qwhat()));
397 }
398 catch(PappsoException &e)
399 {
400 throw PappsoException(QObject::tr("ERROR getting instance of %1\n%2")
401 .arg(accession)
402 .arg(e.qwhat()));
403 }
404 catch(std::exception &e)
405 {
406 throw PappsoException(QObject::tr("ERROR getting instance of %1\n%2")
407 .arg(accession)
408 .arg(e.what()));
409 }
410}
411
414{
415
416 QMutexLocker locker(&m_mutex);
417 MapAccessionModifications::iterator it =
419 if(it == m_mapAccessionModifications.end())
420 {
421 // not found
422 std::pair<MapAccessionModifications::iterator, bool> insert_res =
423 m_mapAccessionModifications.insert(std::pair<QString, AaModificationP>(
425 it = insert_res.first;
426 }
427 else
428 {
429 // found
430 }
431 return it->second;
432}
433
434
437 pappso_double mass,
438 const PeptideSp &peptide_sp,
439 unsigned int position)
440{
442 if(MzRange(mass, precision).contains(getInstance("MOD:00719")->getMass()))
443 {
444 if(type == "M")
445 {
446 return getInstance("MOD:00719");
447 }
448 if(type == "K")
449 {
450 return getInstance("MOD:01047");
451 }
452 }
453 // accession== "MOD:00057"
454 if(MzRange(mass, precision).contains(getInstance("MOD:00408")->getMass()))
455 {
456 // id: MOD:00394
457 // name: acetylated residue
458 // potential N-terminus modifications
459 if(position == 0)
460 {
461 return getInstance("MOD:00408");
462 }
463 }
464 if(MzRange(mass, precision).contains(getInstance("MOD:01160")->getMass()))
465 {
466 //-17.02655
467 // loss of ammonia [MOD:01160] -17.026549
468 return getInstance("MOD:01160");
469 }
470
471 if(MzRange(mass, precision).contains(getInstance("MOD:01060")->getMass()))
472 {
473 //// iodoacetamide [MOD:00397] 57.021464
474 if(type == "C")
475 {
476 return getInstance("MOD:01060");
477 }
478 else
479 {
480 return getInstance("MOD:00397");
481 }
482 }
483 if(MzRange(mass, precision).contains(getInstance("MOD:00704")->getMass()))
484 {
485 // loss of water
486 /*
487 if (position == 0) {
488 if (peptide_sp.get()->getSequence().startsWith("EG")) {
489 return getInstance("MOD:00365");
490 }
491 if (peptide_sp.get()->getSequence().startsWith("ES")) {
492 return getInstance("MOD:00953");
493 }
494 if (type == "E") {
495 return getInstance("MOD:00420");
496 }
497 }
498 */
499 // dehydrated residue [MOD:00704] -18.010565
500 return getInstance("MOD:00704");
501 }
502 if(MzRange(mass, precision).contains(getInstance("MOD:00696")->getMass()))
503 {
504 // phosphorylated residue [MOD:00696] 79.966330
505 return getInstance("MOD:00696");
506 }
507 bool isCter = false;
508 if(peptide_sp.get()->size() == (position + 1))
509 {
510 isCter = true;
511 }
512 if((position == 0) || isCter)
513 {
514 if(MzRange(mass, precision).contains(getInstance("MOD:00429")->getMass()))
515 {
516 // dimethyl
517 return getInstance("MOD:00429");
518 }
519 if(MzRange(mass, precision).contains(getInstance("MOD:00552")->getMass()))
520 {
521 // 4x(2)H labeled dimethyl residue
522 return getInstance("MOD:00552");
523 }
524 if(MzRange(mass, precision).contains(getInstance("MOD:00638")->getMass()))
525 {
526 // 2x(13)C,6x(2)H-dimethylated arginine
527 return getInstance("MOD:00638");
528 }
529 }
530 throw PappsoException(
531 QObject::tr("tandem modification not found : %1 %2 %3 %4")
532 .arg(type)
533 .arg(mass)
534 .arg(peptide_sp.get()->getSequence())
535 .arg(position));
536}
537
540{
541 return m_mass;
542}
543
544
545int
547{
548 // qDebug() << "AaModification::getNumberOfAtom(AtomIsotopeSurvey atom) NOT
549 // IMPLEMENTED";
550 return m_atomCount.at(atom);
551}
552
553
554int
556{
557 try
558 {
559 return m_mapIsotope.at(isotope);
560 }
561 catch(std::exception &e)
562 {
563 throw PappsoException(
564 QObject::tr("ERROR in AaModification::getNumberOfIsotope %2")
565 .arg(e.what()));
566 }
567}
568
569
570bool
572{
573 if(m_accession.startsWith("internal:"))
574 {
575 return true;
576 }
577 return false;
578}
579
581AaModification::createInstanceMutation(const Aa &aa_from, const Aa &aa_to)
582{
583 QString accession(
584 QString("MUTATION:%1=>%2").arg(aa_from.getLetter()).arg(aa_to.getLetter()));
585 double diffMono = aa_to.getMass() - aa_from.getMass();
586 // not found
587 AaModification *instance_mutation;
588 // qDebug() << " AaModification::createInstance begin";
589 instance_mutation = new AaModification(accession, diffMono);
590 // xref: DiffFormula: "C 0 H 0 N 0 O 1 S 0"
591
592 for(std::int8_t atomInt = (std::int8_t)AtomIsotopeSurvey::C;
593 atomInt != (std::int8_t)AtomIsotopeSurvey::last;
594 atomInt++)
595 {
596 AtomIsotopeSurvey atom = static_cast<AtomIsotopeSurvey>(atomInt);
597 instance_mutation->m_atomCount[atom] =
598 aa_to.getNumberOfAtom(atom) - aa_from.getNumberOfAtom(atom);
599 }
600 instance_mutation->m_name = QString("mutation from %1 to %2")
601 .arg(aa_from.getLetter())
602 .arg(aa_to.getLetter());
603 return instance_mutation;
604}
605
606
608AaModification::getInstanceMutation(const QChar &mut_from, const QChar &mut_to)
609{
610 QString accession(QString("MUTATION:%1=>%2").arg(mut_from).arg(mut_to));
611 try
612 {
613 QMutexLocker locker(&m_mutex);
614 MapAccessionModifications::iterator it =
615 m_mapAccessionModifications.find(accession);
616 if(it == m_mapAccessionModifications.end())
617 {
618 Aa aa_from(mut_from.toLatin1());
619 Aa aa_to(mut_to.toLatin1());
620 AaModificationP instance_mutation =
621 createInstanceMutation(aa_from, aa_to);
622
623 std::pair<MapAccessionModifications::iterator, bool> insert_res =
625 std::pair<QString, AaModificationP>(accession,
626 instance_mutation));
627 it = insert_res.first;
628 }
629 else
630 {
631 // found
632 }
633 return it->second;
634 }
635 catch(ExceptionNotFound &e)
636 {
637 throw ExceptionNotFound(
638 QObject::tr("ERROR getting instance of : %1 NOT FOUND\n%2")
639 .arg(accession)
640 .arg(e.qwhat()));
641 }
642 catch(PappsoException &e)
643 {
644 throw PappsoException(QObject::tr("ERROR getting instance of %1\n%2")
645 .arg(accession)
646 .arg(e.qwhat()));
647 }
648 catch(std::exception &e)
649 {
650 throw PappsoException(QObject::tr("ERROR getting instance of %1\n%2")
651 .arg(accession)
652 .arg(e.what()));
653 }
654} // namespace pappso
655
656} // namespace pappso
amino acid modification model
virtual const char & getLetter() const
Definition: aabase.cpp:432
const QString & getName() const
static AaModificationP getInstanceMutation(const QChar &mut_from, const QChar &mut_to)
get a fake modification coding a mutation from an amino acid to an other
static AaModificationP createInstance(const QString &saccession)
std::map< Isotope, int > m_mapIsotope
const QString & getAccession() const
static AaModificationP getInstanceXtandemMod(const QString &type, pappso_double mass, const PeptideSp &peptide_sp, unsigned int position)
AaModification(AaModification &&toCopy)
std::map< AtomIsotopeSurvey, int > m_atomCount
int getNumberOfAtom(AtomIsotopeSurvey atom) const override final
get the number of atom C, O, N, H in the molecule
pappso_double getMass() const
void setXrefOrigin(const QString &origin)
set list of amino acid on which this modification takes place
std::map< QString, AaModificationP > MapAccessionModifications
static AaModificationP getInstance(const QString &accession)
static AaModificationP getInstanceCustomizedMod(pappso_double modificationMass)
const QString m_accession
void setDiffFormula(const QString &diff_formula)
static AaModificationP createInstanceMutation(const Aa &aa_from, const Aa &aa_to)
void calculateMassFromChemicalComponents()
static MapAccessionModifications m_mapAccessionModifications
int getNumberOfIsotope(Isotope isotope) const override final
get the number of isotopes C13, H2, O17, O18, N15, S33, S34, S36 in the molecule
Definition: aa.h:45
int getNumberOfAtom(AtomIsotopeSurvey atom) const override final
get the number of atom C, O, N, H in the molecule
Definition: aa.cpp:166
pappso_double getMass() const override
Definition: aa.cpp:79
const OboPsiModTerm & getOne()
const char * what() const noexcept override
virtual const QString & qwhat() const
static PrecisionPtr getDaltonInstance(pappso_double value)
get a Dalton precision pointer
Definition: precision.cpp:130
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition: aa.cpp:39
const pappso_double DIFFS32S33(32.9714589101 - MASSSULFUR)
const pappso_double DIFFS32S34(33.9678670300 - MASSSULFUR)
const pappso_double DIFFO16O17(16.99913150 - MASSOXYGEN)
const pappso_double MASSCARBON(12)
const pappso_double MASSSULFUR(31.9720711741)
std::shared_ptr< const Peptide > PeptideSp
const pappso_double DIFFS32S36(35.9670812000 - MASSSULFUR)
const AaModification * AaModificationP
AtomIsotopeSurvey
Definition: types.h:76
double pappso_double
A type definition for doubles.
Definition: types.h:48
Isotope
Definition: types.h:91
const pappso_double MPROTIUM(1.007825032241)
const pappso_double MASSNITROGEN(14.0030740048)
const pappso_double MASSOXYGEN(15.99491461956)
const pappso_double DIFFO16O18(17.9991610 - MASSOXYGEN)
const pappso_double DIFFN14N15(15.0001088982 - MASSNITROGEN)
const pappso_double DIFFC12C13(1.0033548378)
const pappso_double DIFFH1H2(2.0141017778 - MPROTIUM)