libpappsomspp
Library for mass spectrometry
spomsspectrum.cpp
Go to the documentation of this file.
1/**
2 * \file pappsomspp/processing/specpeptidoms/spomsspectrum.cpp
3 * \date 24/03/2025
4 * \author Aurélien Berthier
5 * \brief SpecPeptidOMS Spectrum
6 *
7 * C++ implementation of the SpecPeptidOMS algorithm described in :
8 * (1) Benoist, É.; Jean, G.; Rogniaux, H.; Fertin, G.; Tessier, D. SpecPeptidOMS Directly and
9 * Rapidly Aligns Mass Spectra on Whole Proteomes and Identifies Peptides That Are Not Necessarily
10 * Tryptic: Implications for Peptidomics. J. Proteome Res. 2025.
11 * https://doi.org/10.1021/acs.jproteome.4c00870.
12 */
13
14/*
15 * Copyright (c) 2025 Aurélien Berthier
16 * <aurelien.berthier@ls2n.fr>
17 *
18 * This program is free software: you can redistribute it and/or modify
19 * it under the terms of the GNU General Public License as published by
20 * the Free Software Foundation, either version 3 of the License, or
21 * (at your option) any later version.
22 *
23 * This program is distributed in the hope that it will be useful,
24 * but WITHOUT ANY WARRANTY; without even the implied warranty of
25 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26 * GNU General Public License for more details.
27 *
28 * You should have received a copy of the GNU General Public License
29 * along with this program. If not, see <http://www.gnu.org/licenses/>.
30 */
31
32#include <algorithm>
33#include <unordered_set>
34#include "spomsspectrum.h"
35#include "types.h"
36#include "../../pappsoexception.h"
37#include "../../amino_acid/aacode.h"
38
39// SpOMSSpectrum::SpOMSSpectrum(const specglob::ExperimentalSpectrum &exp_spectrum)
41 pappso::PrecisionPtr precision_ptr,
42 const pappso::AaCode &aaCode)
43 : std::vector<pappso::specglob::ExperimentalSpectrumDataPoint>(
44 specglob::ExperimentalSpectrum(qmass_spectrum, precision_ptr)),
45 m_aaCode(aaCode),
46 m_qualifiedMassSpectrum(qmass_spectrum),
47 m_precision_ptr(precision_ptr),
48 m_precursor_mass_error(0)
49{
51 for(std::size_t iter = 0; iter < m_aaCode.getSize(); iter++)
52 {
53 m_aapositions.push_back(std::make_shared<std::vector<AaPosition>>());
54 m_aapositions.back()->reserve(this->size() - 1);
55 }
56 m_supported_peaks.reserve(this->size());
57 m_supported_peaks.push_back(std::make_shared<std::vector<uint8_t>>());
58 m_reindexed_peaks.push_back(0);
59 for(std::size_t iter = 1; iter < this->size(); iter++)
60 {
61 m_supported_peaks.push_back(std::make_shared<std::vector<uint8_t>>());
62 m_reindexed_peaks.push_back(-1);
63 }
64 this->at(0).peak_mz = pappso::MHPLUS + 2 * pappso::MPROTIUM + pappso::MASSOXYGEN;
67}
68
70 : std::vector<pappso::specglob::ExperimentalSpectrumDataPoint>(
71 pappso::specglob::ExperimentalSpectrum(other->m_qualifiedMassSpectrum,
72 other->m_precision_ptr)),
73 m_qualifiedMassSpectrum(other->m_qualifiedMassSpectrum),
74 m_aapositions(other->m_aapositions),
75 m_precision_ptr(other->m_precision_ptr),
76 m_supported_peaks(other->m_supported_peaks),
77 m_reindexed_peaks(other->m_reindexed_peaks),
78 m_aaCode(other->m_aaCode),
79 m_complementary_peak_indexes(other->m_complementary_peak_indexes),
80 m_precursor_mass_error(other->m_precursor_mass_error)
81{
82}
83
85 double precursor_mass_error)
86 : std::vector<pappso::specglob::ExperimentalSpectrumDataPoint>(
87 pappso::specglob::ExperimentalSpectrum(
88 other->m_qualifiedMassSpectrum, other->m_precision_ptr, precursor_mass_error)),
89 m_qualifiedMassSpectrum(other->m_qualifiedMassSpectrum),
90 m_precision_ptr(other->m_precision_ptr),
91 m_aaCode(other->m_aaCode),
92 m_precursor_mass_error(precursor_mass_error)
93{
95 for(std::size_t iter = 0; iter < m_aaCode.getSize(); iter++)
96 {
97 m_aapositions.push_back(std::make_shared<std::vector<AaPosition>>());
98 m_aapositions.back()->reserve(this->size() - 1);
99 }
100 m_supported_peaks.reserve(this->size());
101 m_supported_peaks.push_back(std::make_shared<std::vector<uint8_t>>());
102 m_reindexed_peaks.push_back(0);
103 for(std::size_t iter = 1; iter < this->size(); iter++)
104 {
105 m_supported_peaks.push_back(std::make_shared<std::vector<uint8_t>>());
106 m_reindexed_peaks.push_back(-1);
107 }
108 this->at(0).peak_mz = pappso::MHPLUS + 2 * pappso::MPROTIUM + pappso::MASSOXYGEN;
109 this->back().peak_mz =
112}
113
115{
116}
117
118// Add comments !!
119void
121{
122 bool found;
123 uint8_t aa;
124 std::vector<double>::iterator iter1, iter2;
125 std::size_t peak1, peak2, next_l_peak;
126 std::vector<double> mass_list = getMassList();
127
128 peak1 = -1;
129 for(iter1 = mass_list.begin(); iter1 != mass_list.end(); iter1++)
130 {
131 peak1++;
132 peak2 = peak1;
133 for(iter2 = iter1 + 1; iter2 != mass_list.end(); iter2++)
134 {
135 peak2++;
136 aa = m_aaCode.getAaCodeByMass(*(iter2) - *(iter1), m_precision_ptr);
137 if(aa != 0)
138 {
139 next_l_peak = 0;
140 for(std::size_t iter = 1; iter < peak1;
141 iter++) // Search of the closer supported left peak.
142 // Possible optimization => search from the right
143 {
144 if(m_reindexed_peaks.at(iter) >= 0)
145 {
146 next_l_peak = iter;
147 }
148 }
149 if(m_reindexed_peaks.at(peak2) == -1)
150 {
151 addSupportedPeak(peak2);
152 m_supported_peaks.at(peak2)->push_back(aa);
153 }
154 if(m_reindexed_peaks.at(peak1) >= 0)
155 {
156 addAaPosition(aa, peak2, peak1, next_l_peak, true);
157 }
158 else
159 {
160 addAaPosition(aa, peak2, next_l_peak, next_l_peak, false);
161 }
162 }
163 }
164 }
165
166 removeUnsupportedMasses();
167 correctPeakIndexes();
168
169 // std::size_t i = 0;
170 // for(auto &data_point : *this)
171 // {
172 // data_point.indice = i;
173 // i++;
174 // }
175
176 fillComplementaryPeakIndexes();
177}
178
179// pappso::Aa const *
180// SpOMSSpectrum::findAAMass(double mass, bool *found) const
181// {
182// bool ok;
183// // auto charge = m_qualifiedMassSpectrum.getPrecursorCharge(&ok);
184
185// if(!ok)
186// {
187// throw pappso::PappsoException(
188// QObject::tr("precursor charge is not defined in spectrum %1")
189// .arg(m_qualifiedMassSpectrum.getMassSpectrumId().getNativeId()));
190// }
191// pappso::MzRange mz_range(mass / m_qualifiedMassSpectrum.getPrecursorCharge(),
192// m_precision_ptr);
193
194// for(std::unordered_map<const Aa, double>::const_iterator aa = aaMasses.begin();
195// aa != aaMasses.end();
196// aa++)
197// {
198// if(mz_range.contains(aa->second))
199// {
200// if(found != nullptr)
201// {
202// *found = true;
203// }
204// return &(aa->first);
205// }
206// }
207// if(found != nullptr)
208// {
209// *found = false;
210// }
211// return nullptr;
212// }
213
214// Not sure if optimal
215void
217{
218 std::vector<specglob::ExperimentalSpectrumDataPoint> kept_peaks;
219 for(std::vector<specglob::ExperimentalSpectrumDataPoint>::iterator iter = this->begin();
220 iter != this->end();
221 iter++)
222 {
223 if(m_reindexed_peaks.at(iter->indice) >= 0)
224 {
225 kept_peaks.push_back(*iter);
226 }
227 }
228 this->clear();
229 this->assign(kept_peaks.begin(), kept_peaks.end());
230}
231
232void
234 const std::size_t r_peak,
235 const std::size_t l_peak,
236 const std::size_t next_l_peak,
237 bool l_support)
238{
239 // aa=0 corresponds to no amino acid identified, thus aa is always >=1. We substract 1 to aa to
240 // avoid keeping an empty, useless vector.
241 if(l_support)
242 {
243 m_aapositions.at(aa - 1)->push_back(
244 {r_peak, l_peak, next_l_peak, computeCondition(l_peak, l_support), l_support});
245 }
246 else
247 {
248 m_aapositions.at(aa - 1)->push_back(
249 {r_peak, next_l_peak, next_l_peak, computeCondition(l_peak, l_support), l_support});
250 }
251}
252
253uint32_t
255 bool l_support) const
256{
257 uint32_t condition;
258 if(l_peak == 0)
259 {
260 condition = 2;
261 }
262 else if(!l_support)
263 {
264 condition = 1;
265 }
266 else
267 {
268 condition = 0;
269 for(std::vector<uint8_t>::iterator aa = m_supported_peaks.at(l_peak)->begin();
270 aa != m_supported_peaks.at(l_peak)->end();
271 aa++)
272 {
273 condition += 2 << *(aa);
274 }
275 }
276 return condition;
277}
278
279std::vector<pappso::specpeptidoms::AaPosition> &
281{
282 return *m_aapositions.at(m_aaCode.getAaCode(aa.getLetter()) - 1);
283}
284
285std::vector<pappso::specpeptidoms::AaPosition>
287 std::vector<std::size_t> peaks_to_remove) const
288{
289 std::vector<AaPosition> aa_positions;
290 for(auto aap : *m_aapositions.at(m_aaCode.getAaCode(aa.getLetter()) - 1))
291 {
292 if(std::find(peaks_to_remove.begin(), peaks_to_remove.end(), aap.r_peak) ==
293 peaks_to_remove.end())
294 {
295 aa_positions.push_back(aap);
296 }
297 }
298 return aa_positions;
299}
300
301std::vector<double>
303{
304 std::vector<double> mass_list;
305 for(const specglob::ExperimentalSpectrumDataPoint &n : *this)
306 {
307 mass_list.push_back(n.peak_mz);
308 }
309 return mass_list;
310}
311
314{
315 return this->at(indice).type;
316}
317
318uint
320{
321 return m_qualifiedMassSpectrum.getPrecursorCharge();
322}
323
324double
325pappso::specpeptidoms::SpOMSSpectrum::getMZShift(std::size_t l_peak, std::size_t r_peak) const
326{
327 return this->at(r_peak).peak_mz - this->at(l_peak).peak_mz;
328}
329
330double
332{
333 return this->m_qualifiedMassSpectrum.getPrecursorMass() - m_precursor_mass_error -
334 this->at(peak).peak_mz + MHPLUS;
335}
336
337void
339{
340 std::size_t counter = 0;
341 for(std::size_t iter = 0; iter < peak; iter++)
342 {
343 if(m_reindexed_peaks.at(iter) >= 0)
344 {
345 counter++;
346 }
347 }
348 m_reindexed_peaks.at(peak) = counter;
349 for(std::size_t iter = peak + 1; iter < m_reindexed_peaks.size(); iter++)
350 {
351 if(m_reindexed_peaks.at(iter) >= 0)
352 {
353 m_reindexed_peaks.at(iter)++;
354 }
355 }
356}
357
358void
360{
361 for(auto aa = m_aapositions.begin(); aa != m_aapositions.end(); aa++)
362 {
363 for(auto aap = aa->get()->begin(); aap != aa->get()->end(); aap++)
364 {
365 aap->l_peak = m_reindexed_peaks.at(aap->l_peak);
366 aap->r_peak = m_reindexed_peaks.at(aap->r_peak);
367 aap->next_l_peak = m_reindexed_peaks.at(aap->next_l_peak);
368 }
369 }
370}
371
372void
374{
375 std::size_t left_index, right_index;
376
377 m_complementary_peak_indexes.reserve(this->size());
378 while(m_complementary_peak_indexes.size() < this->size())
379 {
380 m_complementary_peak_indexes.push_back(0);
381 }
382 left_index = 0;
383 right_index = this->size() - 1;
384 double comp_mass = m_qualifiedMassSpectrum.getPrecursorMass() + 2 * MHPLUS;
385
386 while(left_index < right_index)
387 {
388 pappso::MzRange mz_range(comp_mass - this->at(left_index).peak_mz, m_precision_ptr);
389 if(mz_range.contains(this->at(right_index).peak_mz))
390 {
391 m_complementary_peak_indexes.at(left_index) = right_index;
392 m_complementary_peak_indexes.at(right_index) = left_index;
393 qDebug() << left_index << right_index;
394 }
395 if(comp_mass - this->at(left_index).peak_mz - this->at(right_index).peak_mz >= 0)
396 {
397 left_index++;
398 }
399 else
400 {
401 right_index--;
402 }
403 }
404}
405
406std::size_t
408{
409 return m_complementary_peak_indexes.at(peak);
410}
collection of integer code for each amino acid 0 => null 1 to 20 => amino acid sorted by there mass (...
Definition: aacode.h:44
std::size_t getSize() const
Definition: aacode.cpp:74
Definition: aa.h:45
bool contains(pappso_double) const
Definition: mzrange.cpp:120
Class representing a fully specified mass spectrum.
double getPrecursorMass(bool *ok_p=nullptr) const
get precursor mass given the charge stats and precursor mz
void preprocessSpectrum()
Preprocess the spectrum.
double getMZShift(std::size_t l_peak, std::size_t r_peak) const
Returns the mz difference between two peaks.
uint getPrecursorCharge() const
Returns the spectrum's precursor's charge.
SpOMSSpectrum(pappso::QualifiedMassSpectrum &qmass_spectrum, pappso::PrecisionPtr precision_ptr, const pappso::AaCode &aaCode)
double getMissingMass(std::size_t peak) const
Returns the missing mass between a peak and the precursor's mass (shift at the end).
std::vector< std::shared_ptr< std::vector< uint8_t > > > m_supported_peaks
uint32_t computeCondition(const std::size_t l_peak, bool l_support) const
Computes the "condition" integer, used to apply the three peaks rule.
void addAaPosition(uint8_t aa, const std::size_t r_peak, const std::size_t l_peak, const std::size_t next_l_peak, bool l_support)
Adds an amino acid position to the data structure.
void removeUnsupportedMasses()
Removes the unsupported peaks (without an amino acid to the left) from the spectrum.
pappso::QualifiedMassSpectrum m_qualifiedMassSpectrum
std::vector< std::shared_ptr< std::vector< AaPosition > > > m_aapositions
void correctPeakIndexes()
Reindexes the peaks after removal of the unsupported peaks.
void addSupportedPeak(std::size_t peak)
Add a peak to the supported peaks list.
void fillComplementaryPeakIndexes()
For each point of the spectrum, indicate the index of its complementary peak;.
std::size_t getComplementaryPeak(std::size_t peak) const
specglob::ExperimentalSpectrumDataPointType peakType(std::size_t indice) const
Returns the type of one of the spectrum's peaks.
std::vector< double > getMassList() const
Returns the spectrum's list of masses.
std::vector< AaPosition > & getAaPositions(const Aa &aa) const
Returns the list of aa_positions for a given amino acid.
ExperimentalSpectrumDataPointType
Definition: types.h:78
std::shared_ptr< const SpOMSSpectrum > SpOMSSpectrumCsp
Definition: spomsspectrum.h:65
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition: aa.cpp:39
const pappso_double MHPLUS(1.007276466879)
const pappso_double MPROTIUM(1.007825032241)
unsigned int uint
Definition: types.h:59
const pappso_double MASSOXYGEN(15.99491461956)
SpecPeptidOMS Spectrum.
This header contains all the type re-definitions and all the global variables definitions used in the...