libpappsomspp
Library for mass spectrometry
aamodification.cpp
Go to the documentation of this file.
1/**
2 * \file pappsomspp/amino_acid/aamodification.h
3 * \date 7/3/2015
4 * \author Olivier Langella
5 * \brief amino acid modification model
6 */
7
8/*******************************************************************************
9 * Copyright (c) 2015 Olivier Langella <Olivier.Langella@moulon.inra.fr>.
10 *
11 * This file is part of the PAPPSOms++ library.
12 *
13 * PAPPSOms++ is free software: you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation, either version 3 of the License, or
16 * (at your option) any later version.
17 *
18 * PAPPSOms++ is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with PAPPSOms++. If not, see <http://www.gnu.org/licenses/>.
25 *
26 * Contributors:
27 * Olivier Langella <Olivier.Langella@moulon.inra.fr> - initial API and
28 *implementation
29 ******************************************************************************/
30
31#include <QRegularExpression>
32#include <QDebug>
33#include <cmath>
34
35#include "aamodification.h"
36#include "aa.h"
37#include "../pappsoexception.h"
38#include "../mzrange.h"
39#include "../peptide/peptide.h"
40#include "../obo/filterobopsimodsink.h"
41#include "../obo/filterobopsimodtermaccession.h"
42#include "../exception/exceptionnotfound.h"
43
44/*
45
46inline void initMyResource() {
47 Q_INIT_RESOURCE(resources);
48}
49*/
50
51namespace pappso
52{
53
55
56AaModification::AaModification(const QString &accession, pappso_double mass)
57 : m_accession(accession), m_mass(mass)
58{
64
66 {Isotope::H2, 0},
67 {Isotope::N15, 0},
68 {Isotope::O17, 0},
69 {Isotope::O18, 0},
70 {Isotope::S33, 0},
71 {Isotope::S34, 0},
72 {Isotope::S36, 0}};
73}
74
75
77 : m_accession(toCopy.m_accession),
78 m_name(toCopy.m_name),
79 m_mass(toCopy.m_mass),
80 m_atomCount(std::move(toCopy.m_atomCount)),
81 m_mapIsotope(toCopy.m_mapIsotope)
82{
83 m_origin = toCopy.m_origin;
84}
85
87{
88}
89
90const QString &
92{
93
94 // qDebug();
95 return m_accession;
96}
97
98const QString &
100{
101 return m_name;
102}
103
106 MapAccessionModifications ret;
107
108 return ret;
109 }();
110
113{
114 AaModification *new_mod;
115 // qDebug() << " AaModification::createInstance begin";
116 new_mod = new AaModification(term.m_accession, term.m_diffMono);
117 // xref: DiffFormula: "C 0 H 0 N 0 O 1 S 0"
118 new_mod->setDiffFormula(term.m_diffFormula);
119 new_mod->setXrefOrigin(term.m_origin);
120 new_mod->m_name = term.m_name;
121 return new_mod;
122}
123
125AaModification::createInstance(const QString &accession)
126{
127 if(accession == "internal:Nter_hydrolytic_cleavage_H")
128 {
129 OboPsiModTerm term;
130 term.m_accession = accession;
131 term.m_diffFormula = "H 1";
132 term.m_diffMono = MPROTIUM;
133 term.m_name = "Nter hydrolytic cleavage H+";
134 return (AaModification::createInstance(term));
135 }
136 if(accession == "internal:Cter_hydrolytic_cleavage_HO")
137 {
138 OboPsiModTerm term;
139 term.m_accession = accession;
140 term.m_diffFormula = "H 1 O 1";
142 term.m_name = "Cter hydrolytic cleavage HO";
143 return (AaModification::createInstance(term));
144 }
145 if(accession.startsWith("MUTATION:"))
146 {
147 QRegularExpression regexp_mutation("^MUTATION:([A-Z])=>([A-Z])$");
148 QRegularExpressionMatch match = regexp_mutation.match(accession);
149 if(match.hasMatch())
150 {
151 qDebug() << match.capturedTexts()[1].at(0) << " "
152 << match.capturedTexts()[2].at(0);
153
154 Aa aa_from(match.capturedTexts()[1].toStdString().c_str()[0]);
155 Aa aa_to(match.capturedTexts()[2].toStdString().c_str()[0]);
156 AaModificationP instance_mutation =
157 createInstanceMutation(aa_from, aa_to);
158 return instance_mutation;
159 // m_psiModLabel<<"|";
160 }
161 }
162 // initMyResource();
163 FilterOboPsiModSink term_list;
164 FilterOboPsiModTermAccession filterm_accession(term_list, accession);
165
166 OboPsiMod psimod(filterm_accession);
167
168 try
169 {
170 return (AaModification::createInstance(term_list.getOne()));
171 }
172 catch(ExceptionNotFound &e)
173 {
174 throw ExceptionNotFound(QObject::tr("modification not found : [%1]\n%2")
175 .arg(accession)
176 .arg(e.qwhat()));
177 }
178}
179
180void
181AaModification::setXrefOrigin(const QString &origin)
182{
183 // xref: Origin: "N"
184 // xref: Origin: "X"
185 m_origin = origin;
186}
187void
188AaModification::setDiffFormula(const QString &diff_formula)
189{
190 QRegularExpression rx("(^|\\s)([C,H,O,N,H,S])\\s([-]{0,1}\\d+)");
191 QRegularExpressionMatchIterator i = rx.globalMatch(diff_formula);
192
193 while(i.hasNext())
194 {
195 QRegularExpressionMatch match = i.next();
196
197 qDebug() << match.captured(2) << " " << match.captured(2) << " "
198 << match.captured(3);
199
200 if(match.captured(2) == "C")
201 {
202 m_atomCount[AtomIsotopeSurvey::C] = match.captured(3).toInt();
203 }
204 else if(match.captured(2) == "H")
205 {
206 m_atomCount[AtomIsotopeSurvey::H] = match.captured(3).toInt();
207 }
208 else if(match.captured(2) == "N")
209 {
210 m_atomCount[AtomIsotopeSurvey::N] = match.captured(3).toInt();
211 }
212 else if(match.captured(2) == "O")
213 {
214 m_atomCount[AtomIsotopeSurvey::O] = match.captured(3).toInt();
215 }
216 else if(match.captured(2) == "S")
217 {
218 m_atomCount[AtomIsotopeSurvey::S] = match.captured(3).toInt();
219 }
220 }
221
222 // look for isotopes :
223 rx.setPattern("\\(([-]{0,1}\\d+)\\)([C,H,O,N,H,S])\\s([-]{0,1}\\d+)");
224
225 i = rx.globalMatch(diff_formula);
226
227 while(i.hasNext())
228 {
229 QRegularExpressionMatch match = i.next();
230
231 qDebug() << match.captured(1) << " " << match.captured(2) << " "
232 << match.captured(3);
233
234 int number_of_isotopes = match.captured(3).toInt();
235
236 if(match.captured(2) == "C")
237 {
238 if(match.captured(1) == "13")
239 {
240 m_mapIsotope.at(Isotope::C13) = number_of_isotopes;
241 }
242 m_atomCount[AtomIsotopeSurvey::C] += number_of_isotopes;
243 }
244 else if(match.captured(2) == "H")
245 {
246 if(match.captured(1) == "2")
247 {
248 m_mapIsotope.at(Isotope::H2) = number_of_isotopes;
249 }
250 m_atomCount[AtomIsotopeSurvey::H] += number_of_isotopes;
251 }
252 else if(match.captured(2) == "N")
253 {
254 if(match.captured(1) == "15")
255 {
256 m_mapIsotope.at(Isotope::N15) = number_of_isotopes;
257 }
258 m_atomCount[AtomIsotopeSurvey::N] += number_of_isotopes;
259 }
260 else if(match.captured(2) == "O")
261 {
262 if(match.captured(1) == "17")
263 {
264 m_mapIsotope.at(Isotope::O17) = number_of_isotopes;
265 }
266 else if(match.captured(1) == "18")
267 {
268 m_mapIsotope.at(Isotope::O18) = number_of_isotopes;
269 }
270 m_atomCount[AtomIsotopeSurvey::O] += number_of_isotopes;
271 }
272 else if(match.captured(2) == "S")
273 {
274 if(match.captured(1) == "33")
275 {
276 m_mapIsotope.at(Isotope::S33) = number_of_isotopes;
277 }
278 else if(match.captured(1) == "34")
279 {
280 m_mapIsotope.at(Isotope::S34) = number_of_isotopes;
281 }
282 else if(match.captured(1) == "36")
283 {
284 m_mapIsotope.at(Isotope::S36) = number_of_isotopes;
285 }
286 m_atomCount[AtomIsotopeSurvey::S] += number_of_isotopes;
287 }
288 }
289
291}
292
293
294void
296{
297 pappso_double theoreticalm_mass = 0;
298 std::map<AtomIsotopeSurvey, int>::const_iterator it_atom =
300 if(it_atom != m_atomCount.end())
301 {
302 theoreticalm_mass += MASSCARBON * (it_atom->second);
303 }
304 it_atom = m_atomCount.find(AtomIsotopeSurvey::H);
305 if(it_atom != m_atomCount.end())
306 {
307 theoreticalm_mass += MPROTIUM * (it_atom->second);
308 }
309
310 it_atom = m_atomCount.find(AtomIsotopeSurvey::O);
311 if(it_atom != m_atomCount.end())
312 {
313 theoreticalm_mass += MASSOXYGEN * (it_atom->second);
314 }
315
316 it_atom = m_atomCount.find(AtomIsotopeSurvey::N);
317 if(it_atom != m_atomCount.end())
318 {
319 theoreticalm_mass += MASSNITROGEN * (it_atom->second);
320 }
321 it_atom = m_atomCount.find(AtomIsotopeSurvey::S);
322 if(it_atom != m_atomCount.end())
323 {
324 theoreticalm_mass += MASSSULFUR * (it_atom->second);
325 }
326
327 qDebug() << theoreticalm_mass;
328
329 theoreticalm_mass += DIFFC12C13 * m_mapIsotope.at(Isotope::C13);
330 theoreticalm_mass += DIFFH1H2 * m_mapIsotope.at(Isotope::H2);
331 theoreticalm_mass += DIFFN14N15 * m_mapIsotope.at(Isotope::N15);
332 theoreticalm_mass += DIFFO16O17 * m_mapIsotope.at(Isotope::O17);
333 theoreticalm_mass += DIFFO16O18 * m_mapIsotope.at(Isotope::O18);
334 theoreticalm_mass += DIFFS32S33 * m_mapIsotope.at(Isotope::S33);
335 theoreticalm_mass += DIFFS32S34 * m_mapIsotope.at(Isotope::S34);
336 theoreticalm_mass += DIFFS32S36 * m_mapIsotope.at(Isotope::S36);
337
338
339 pappso_double diff = std::fabs((pappso_double)m_mass - theoreticalm_mass);
340 if(diff < 0.001)
341 {
342 m_mass = theoreticalm_mass;
343 qDebug() << diff;
344 }
345 else
346 {
347 qDebug()
348 << "ERROR in AaModification::calculateMassFromChemicalComponents theo="
349 << theoreticalm_mass << " m=" << m_mass << " diff=" << diff
350 << " accession=" << m_accession;
351 }
352}
353
356{
357 QString accession = QString("%1").arg(modificationMass);
358 qDebug() << accession;
359 QMutexLocker locker(&m_mutex);
360 if(m_mapAccessionModifications.find(accession) ==
362 {
363 // not found
364 m_mapAccessionModifications.insert(std::pair<QString, AaModification *>(
365 accession, new AaModification(accession, modificationMass)));
366 }
367 else
368 {
369 // found
370 }
371 return m_mapAccessionModifications.at(accession);
372}
373
375AaModification::getInstance(const QString &accession)
376{
377 try
378 {
379 QMutexLocker locker(&m_mutex);
380 MapAccessionModifications::iterator it =
381 m_mapAccessionModifications.find(accession);
382 if(it == m_mapAccessionModifications.end())
383 {
384
385 // not found
386 std::pair<MapAccessionModifications::iterator, bool> insert_res =
388 std::pair<QString, AaModificationP>(
389 accession, AaModification::createInstance(accession)));
390 it = insert_res.first;
391 }
392 else
393 {
394 // found
395 }
396 return it->second;
397 }
398 catch(ExceptionNotFound &e)
399 {
400 throw ExceptionNotFound(
401 QObject::tr("ERROR getting instance of : %1 NOT FOUND\n%2")
402 .arg(accession)
403 .arg(e.qwhat()));
404 }
405 catch(PappsoException &e)
406 {
407 throw PappsoException(QObject::tr("ERROR getting instance of %1\n%2")
408 .arg(accession)
409 .arg(e.qwhat()));
410 }
411 catch(std::exception &e)
412 {
413 throw PappsoException(QObject::tr("ERROR getting instance of %1\n%2")
414 .arg(accession)
415 .arg(e.what()));
416 }
417}
418
421{
422
423 QMutexLocker locker(&m_mutex);
424 MapAccessionModifications::iterator it =
426 if(it == m_mapAccessionModifications.end())
427 {
428 // not found
429 std::pair<MapAccessionModifications::iterator, bool> insert_res =
430 m_mapAccessionModifications.insert(std::pair<QString, AaModificationP>(
432 it = insert_res.first;
433 }
434 else
435 {
436 // found
437 }
438 return it->second;
439}
440
441
444 pappso_double mass,
445 const PeptideSp &peptide_sp,
446 unsigned int position)
447{
449 if(MzRange(mass, precision).contains(getInstance("MOD:00719")->getMass()))
450 {
451 if(type == "M")
452 {
453 return getInstance("MOD:00719");
454 }
455 if(type == "K")
456 {
457 return getInstance("MOD:01047");
458 }
459 }
460 // accession== "MOD:00057"
461 if(MzRange(mass, precision).contains(getInstance("MOD:00408")->getMass()))
462 {
463 // id: MOD:00394
464 // name: acetylated residue
465 // potential N-terminus modifications
466 if(position == 0)
467 {
468 return getInstance("MOD:00408");
469 }
470 }
471 if(MzRange(mass, precision).contains(getInstance("MOD:01160")->getMass()))
472 {
473 //-17.02655
474 // loss of ammonia [MOD:01160] -17.026549
475 return getInstance("MOD:01160");
476 }
477
478 if(MzRange(mass, precision).contains(getInstance("MOD:01060")->getMass()))
479 {
480 //// iodoacetamide [MOD:00397] 57.021464
481 if(type == "C")
482 {
483 return getInstance("MOD:01060");
484 }
485 else
486 {
487 return getInstance("MOD:00397");
488 }
489 }
490 if(MzRange(mass, precision).contains(getInstance("MOD:00704")->getMass()))
491 {
492 // loss of water
493 /*
494 if (position == 0) {
495 if (peptide_sp.get()->getSequence().startsWith("EG")) {
496 return getInstance("MOD:00365");
497 }
498 if (peptide_sp.get()->getSequence().startsWith("ES")) {
499 return getInstance("MOD:00953");
500 }
501 if (type == "E") {
502 return getInstance("MOD:00420");
503 }
504 }
505 */
506 // dehydrated residue [MOD:00704] -18.010565
507 return getInstance("MOD:00704");
508 }
509 if(MzRange(mass, precision).contains(getInstance("MOD:00696")->getMass()))
510 {
511 // phosphorylated residue [MOD:00696] 79.966330
512 return getInstance("MOD:00696");
513 }
514 bool isCter = false;
515 if(peptide_sp.get()->size() == (position + 1))
516 {
517 isCter = true;
518 }
519 if((position == 0) || isCter)
520 {
521 if(MzRange(mass, precision).contains(getInstance("MOD:00429")->getMass()))
522 {
523 // dimethyl
524 return getInstance("MOD:00429");
525 }
526 if(MzRange(mass, precision).contains(getInstance("MOD:00552")->getMass()))
527 {
528 // 4x(2)H labeled dimethyl residue
529 return getInstance("MOD:00552");
530 }
531 if(MzRange(mass, precision).contains(getInstance("MOD:00638")->getMass()))
532 {
533 // 2x(13)C,6x(2)H-dimethylated arginine
534 return getInstance("MOD:00638");
535 }
536 }
537 throw PappsoException(
538 QObject::tr("tandem modification not found : %1 %2 %3 %4")
539 .arg(type)
540 .arg(mass)
541 .arg(peptide_sp.get()->getSequence())
542 .arg(position));
543}
544
547{
548 return m_mass;
549}
550
551
552int
554{
555 // qDebug() << "AaModification::getNumberOfAtom(AtomIsotopeSurvey atom) NOT
556 // IMPLEMENTED";
557 return m_atomCount.at(atom);
558}
559
560
561int
563{
564 try
565 {
566 return m_mapIsotope.at(isotope);
567 }
568 catch(std::exception &e)
569 {
570 throw PappsoException(
571 QObject::tr("ERROR in AaModification::getNumberOfIsotope %2")
572 .arg(e.what()));
573 }
574}
575
576
577bool
579{
580 if(m_accession.startsWith("internal:"))
581 {
582 return true;
583 }
584 return false;
585}
586
588AaModification::createInstanceMutation(const Aa &aa_from, const Aa &aa_to)
589{
590 QString accession(
591 QString("MUTATION:%1=>%2").arg(aa_from.getLetter()).arg(aa_to.getLetter()));
592 double diffMono = aa_to.getMass() - aa_from.getMass();
593 // not found
594 AaModification *instance_mutation;
595 // qDebug() << " AaModification::createInstance begin";
596 instance_mutation = new AaModification(accession, diffMono);
597 // xref: DiffFormula: "C 0 H 0 N 0 O 1 S 0"
598
599 for(std::int8_t atomInt = (std::int8_t)AtomIsotopeSurvey::C;
600 atomInt != (std::int8_t)AtomIsotopeSurvey::last;
601 atomInt++)
602 {
603 AtomIsotopeSurvey atom = static_cast<AtomIsotopeSurvey>(atomInt);
604 instance_mutation->m_atomCount[atom] =
605 aa_to.getNumberOfAtom(atom) - aa_from.getNumberOfAtom(atom);
606 }
607 instance_mutation->m_name = QString("mutation from %1 to %2")
608 .arg(aa_from.getLetter())
609 .arg(aa_to.getLetter());
610 return instance_mutation;
611}
612
613
615AaModification::getInstanceMutation(const QChar &mut_from, const QChar &mut_to)
616{
617 QString accession(QString("MUTATION:%1=>%2").arg(mut_from).arg(mut_to));
618 try
619 {
620 QMutexLocker locker(&m_mutex);
621 MapAccessionModifications::iterator it =
622 m_mapAccessionModifications.find(accession);
623 if(it == m_mapAccessionModifications.end())
624 {
625 Aa aa_from(mut_from.toLatin1());
626 Aa aa_to(mut_to.toLatin1());
627 AaModificationP instance_mutation =
628 createInstanceMutation(aa_from, aa_to);
629
630 std::pair<MapAccessionModifications::iterator, bool> insert_res =
632 std::pair<QString, AaModificationP>(accession,
633 instance_mutation));
634 it = insert_res.first;
635 }
636 else
637 {
638 // found
639 }
640 return it->second;
641 }
642 catch(ExceptionNotFound &e)
643 {
644 throw ExceptionNotFound(
645 QObject::tr("ERROR getting instance of : %1 NOT FOUND\n%2")
646 .arg(accession)
647 .arg(e.qwhat()));
648 }
649 catch(PappsoException &e)
650 {
651 throw PappsoException(QObject::tr("ERROR getting instance of %1\n%2")
652 .arg(accession)
653 .arg(e.qwhat()));
654 }
655 catch(std::exception &e)
656 {
657 throw PappsoException(QObject::tr("ERROR getting instance of %1\n%2")
658 .arg(accession)
659 .arg(e.what()));
660 }
661} // namespace pappso
662
663} // namespace pappso
amino acid modification model
virtual const char & getLetter() const
Definition: aabase.cpp:434
const QString & getName() const
static AaModificationP getInstanceMutation(const QChar &mut_from, const QChar &mut_to)
get a fake modification coding a mutation from an amino acid to an other
static AaModificationP createInstance(const QString &saccession)
std::map< Isotope, int > m_mapIsotope
const QString & getAccession() const
static AaModificationP getInstanceXtandemMod(const QString &type, pappso_double mass, const PeptideSp &peptide_sp, unsigned int position)
AaModification(AaModification &&toCopy)
std::map< AtomIsotopeSurvey, int > m_atomCount
int getNumberOfAtom(AtomIsotopeSurvey atom) const override final
get the number of atom C, O, N, H in the molecule
pappso_double getMass() const
void setXrefOrigin(const QString &origin)
set list of amino acid on which this modification takes place
std::map< QString, AaModificationP > MapAccessionModifications
static AaModificationP getInstance(const QString &accession)
static AaModificationP getInstanceCustomizedMod(pappso_double modificationMass)
const QString m_accession
void setDiffFormula(const QString &diff_formula)
static AaModificationP createInstanceMutation(const Aa &aa_from, const Aa &aa_to)
void calculateMassFromChemicalComponents()
static MapAccessionModifications m_mapAccessionModifications
int getNumberOfIsotope(Isotope isotope) const override final
get the number of isotopes C13, H2, O17, O18, N15, S33, S34, S36 in the molecule
Definition: aa.h:45
int getNumberOfAtom(AtomIsotopeSurvey atom) const override final
get the number of atom C, O, N, H in the molecule
Definition: aa.cpp:166
pappso_double getMass() const override
Definition: aa.cpp:79
const OboPsiModTerm & getOne()
const char * what() const noexcept override
virtual const QString & qwhat() const
static PrecisionPtr getDaltonInstance(pappso_double value)
get a Dalton precision pointer
Definition: precision.cpp:130
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition: aa.cpp:39
const pappso_double DIFFS32S33(32.9714589101 - MASSSULFUR)
const pappso_double DIFFS32S34(33.9678670300 - MASSSULFUR)
const pappso_double DIFFO16O17(16.99913150 - MASSOXYGEN)
const pappso_double MASSCARBON(12)
const pappso_double MASSSULFUR(31.9720711741)
std::shared_ptr< const Peptide > PeptideSp
const pappso_double DIFFS32S36(35.9670812000 - MASSSULFUR)
const AaModification * AaModificationP
AtomIsotopeSurvey
Definition: types.h:77
double pappso_double
A type definition for doubles.
Definition: types.h:49
Isotope
Definition: types.h:92
const pappso_double MPROTIUM(1.007825032241)
const pappso_double MASSNITROGEN(14.0030740048)
const pappso_double MASSOXYGEN(15.99491461956)
const pappso_double DIFFO16O18(17.9991610 - MASSOXYGEN)
const pappso_double DIFFN14N15(15.0001088982 - MASSNITROGEN)
const pappso_double DIFFC12C13(1.0033548378)
const pappso_double DIFFH1H2(2.0141017778 - MPROTIUM)