libpappsomspp
Library for mass spectrometry
psmfeaturesscan.cpp
Go to the documentation of this file.
1/**
2 * \file pappsomspp/processing/cbor/psm/evalscan/psmfeaturesscan.cpp
3 * \date 15/07/2025
4 * \author Olivier Langella
5 * \brief compute features on scan's PSM
6 */
7
8/*******************************************************************************
9 * Copyright (c) 2025 Olivier Langella <Olivier.Langella@universite-paris-saclay.fr>.
10 *
11 * This file is part of PAPPSOms-tools.
12 *
13 * PAPPSOms-tools is free software: you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation, either version 3 of the License, or
16 * (at your option) any later version.
17 *
18 * PAPPSOms-tools is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with PAPPSOms-tools. If not, see <http://www.gnu.org/licenses/>.
25 *
26 ******************************************************************************/
27
28#include "psmfeaturesscan.h"
29#include <QCborArray>
30#include <QCborMap>
31#include "../../../../peptide/peptideproformaparser.h"
32#include "../../../../psm/xtandem/xtandemhyperscore.h"
33
34
36 const pappso::cbor::psm::PsmFileScanProcess &psm_file_scan_process,
37 pappso::XtandemSpectrumProcess &tandem_spectrum_process,
38 std::list<pappso::PeptideIon> &ion_list,
39 pappso::PsmFeatures &psm_features,
40 pappso::PrecisionPtr fragment_tolerance)
41 : CborScanMapBase(psm_file_scan_process),
42 m_tandemSpectrumProcess(tandem_spectrum_process),
43 m_ionList(ion_list),
44 m_psmFeatures(psm_features)
45{
46 m_fragmentTolerance = fragment_tolerance;
47}
48
50{
51}
52
53double
55{
56 if(input < 0)
57 return 0;
58 return input;
59}
60
61void
63{
64
65 if(keys().contains("psm_list"))
66 {
67 QualifiedMassSpectrumSPtr qualified_mass_spectrum = getCurrentQualifiedMassSpectrumSPtr();
68
69 pappso::MassSpectrum spectrum =
70 m_tandemSpectrumProcess.process(*qualified_mass_spectrum.get()->getMassSpectrumSPtr().get(),
71 qualified_mass_spectrum.get()->getPrecursorMz(),
72 qualified_mass_spectrum.get()->getPrecursorCharge());
73
74 QCborArray new_psm_arr;
75 for(QCborValue cbor_psm : value("psm_list").toArray())
76 {
77 QCborMap cbor_psm_map = cbor_psm.toMap();
78 QCborMap cbor_psm_features;
79 pappso::PeptideSp peptide_sp =
80 pappso::PeptideProFormaParser::parseString(cbor_psm_map.value("proforma").toString());
81
82
83 std::size_t peptide_size = peptide_sp.get()->size();
84 cbor_psm_features.insert(QString("peptide_size"), (unsigned int)peptide_size);
85
86 pappso::XtandemHyperscore hyperscore(spectrum,
87 peptide_sp,
88 qualified_mass_spectrum.get()->getPrecursorCharge(),
89 m_fragmentTolerance,
90 m_ionList,
91 true);
92 cbor_psm_features.insert(QString("hyperscore"), QCborValue(hyperscore.getHyperscore()));
93
94
95 m_psmFeatures.setPeptideSpectrumCharge(
96 peptide_sp,
97 qualified_mass_spectrum.get()->getMassSpectrumSPtr().get(),
98 qualified_mass_spectrum.get()->getPrecursorCharge(),
99 2);
100 // TIC
101 cbor_psm_features.insert(QString("total_intensity"),
102 std::log(m_psmFeatures.getTotalIntensity()));
103 // MaxIntALL
104 cbor_psm_features.insert(QString("max_intensity"),
105 checkInf(std::log(qualified_mass_spectrum.get()
106 ->getMassSpectrumSPtr()
107 .get()
108 ->maxIntensityDataPoint()
109 .y)));
110
111 // MaxYionInt
112 cbor_psm_features.insert(
113 QString("MaxYionInt"),
114 checkInf(std::log(m_psmFeatures.getMaxIntensityPeakIonMatch(pappso::PeptideIon::y))));
115
116 // MaxBionInt
117 cbor_psm_features.insert(
118 QString("MaxBionInt"),
119 checkInf(std::log(m_psmFeatures.getMaxIntensityPeakIonMatch(pappso::PeptideIon::b))));
120
121 // SumYmatchInt
122 cbor_psm_features.insert(
123 QString("SumYmatchInt"),
124 checkInf(std::log(m_psmFeatures.getIntensityOfMatchedIon(pappso::PeptideIon::y))));
125
126 // SumBmatchInt
127 cbor_psm_features.insert(
128 QString("SumBmatchInt"),
129 checkInf(std::log(m_psmFeatures.getIntensityOfMatchedIon(pappso::PeptideIon::b))));
130
131 // FracYmatchInt
132 cbor_psm_features.insert(
133 QString("FracYmatchInt"),
134 checkInf(m_psmFeatures.getIntensityOfMatchedIon(pappso::PeptideIon::y) /
135 m_psmFeatures.getTotalIntensity()));
136 // FracBmatchInt
137 cbor_psm_features.insert(
138 QString("FracBmatchInt"),
139 checkInf(m_psmFeatures.getIntensityOfMatchedIon(pappso::PeptideIon::b) /
140 m_psmFeatures.getTotalIntensity()));
141
142 // SeqCoverYion
143 cbor_psm_features.insert(
144 QString("SeqCoverYion"),
145 (double)m_psmFeatures.getAaSequenceCoverage(pappso::PeptideIon::y) /
146 (double)peptide_size);
147 // SeqCoverBion
148 cbor_psm_features.insert(
149 QString("SeqCoverBion"),
150 (double)m_psmFeatures.getAaSequenceCoverage(pappso::PeptideIon::b) /
151 (double)peptide_size);
152
153
154 // ConsecutiveYion
155 cbor_psm_features.insert(
156 QString("ConsecutiveYion"),
157 (qint64)m_psmFeatures.getMaxConsecutiveIon(pappso::PeptideIon::y));
158 // ConsecutiveBion
159 cbor_psm_features.insert(
160 QString("ConsecutiveBion"),
161 (qint64)m_psmFeatures.getMaxConsecutiveIon(pappso::PeptideIon::b));
162
163 // MassErrMean
164 cbor_psm_features.insert(QString("MassErrMean"), m_psmFeatures.getMatchedMzDiffMean());
165
166 // MassErrSD
167 cbor_psm_features.insert(QString("MassErrSD"), m_psmFeatures.getMatchedMzDiffSd());
168
169 // NumofAnnoPeaks
170 cbor_psm_features.insert(QString("NumofAnnoPeaks"),
171 (unsigned int)m_psmFeatures.getNumberOfMatchedIons());
172
173 // NumofComplementPeaks
174 std::size_t num_of_pairs = m_psmFeatures.countMatchedIonComplementPairs();
175 cbor_psm_features.insert(QString("NumofComplementPeaks"), (unsigned int)num_of_pairs);
176 if(num_of_pairs > 0)
177 {
178 // SumComplementPeaksInt
179 cbor_psm_features.insert(
180 QString("SumComplementPeaksInt"),
181 std::log(m_psmFeatures.getTotalIntensityOfMatchedIonComplementPairs()));
182
183 // FracComplementPeaksInt
184 cbor_psm_features.insert(
185 QString("FracComplementPeaksInt"),
186 m_psmFeatures.getTotalIntensityOfMatchedIonComplementPairs() /
187 m_psmFeatures.getTotalIntensity());
188 // SeqCoverComplementPeaks
189 cbor_psm_features.insert(
190 QString("SeqCoverComplementPeaks"),
191 (double)m_psmFeatures.getComplementPairsAaSequenceCoverage() /
192 (double)peptide_size);
193 }
194 pappso::LinearRegression lr = m_psmFeatures.getIonIsotopeLinearRegression();
195 cbor_psm_features.insert(QString("lrSize"), (unsigned int)lr.getSize());
196
197
198 double coeff_of_determination = lr.getCoefficientOfDetermination();
199 if(std::isnan(coeff_of_determination))
200 {
201 }
202 else
203 {
204 cbor_psm_features.insert(QString("lrCoeffDet"), coeff_of_determination);
205 }
206
207
208 QCborMap psm_eval = cbor_psm_map.value("eval").toMap();
209 psm_eval.remove(QString("features"));
210 psm_eval.insert(QString("features"), cbor_psm_features);
211 cbor_psm_map.remove(QString("eval"));
212 cbor_psm_map.insert(QString("eval"), psm_eval);
213
214 new_psm_arr.push_back(cbor_psm_map);
215 }
216
217 insert(QString("psm_list"), new_psm_arr);
218 }
219}
std::size_t getSize() const
get data size
double getCoefficientOfDetermination() const
get Coefficient of determination (R2)
Class to represent a mass spectrum.
Definition: massspectrum.h:73
static PeptideSp parseString(const QString &pepstr)
std::map< pappso_double, pappso_double > toMap() const
Definition: trace.cpp:716
pappso_double getHyperscore() const
PsmFeaturesScan(const PsmFileScanProcess &psm_file_scan_process, pappso::XtandemSpectrumProcess &tandem_spectrum_process, std::list< pappso::PeptideIon > &ion_list, pappso::PsmFeatures &psm_features, pappso::PrecisionPtr fragment_tolerance)
double checkInf(double input) const
pappso::PrecisionPtr m_fragmentTolerance
Basic PSM file reader to process scan (parallelized scan processing)
@ y
Cter amino ions.
@ b
Nter acylium ions.
std::shared_ptr< QualifiedMassSpectrum > QualifiedMassSpectrumSPtr
std::shared_ptr< const Peptide > PeptideSp
compute features on scan's PSM