libpappsomspp
Library for mass spectrometry
sagetsvhandler.h
Go to the documentation of this file.
1/**
2 * \file input/sage/sagereader.h
3 * \date 21/08/2024
4 * \author Olivier Langella
5 * \brief read data files from Sage output
6 */
7
8/*******************************************************************************
9 * Copyright (c) 2024 Olivier Langella
10 *<Olivier.Langella@universite-paris-saclay.fr>.
11 *
12 * This file is part of i2MassChroQ.
13 *
14 * i2MassChroQ is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU General Public License as published by
16 * the Free Software Foundation, either version 3 of the License, or
17 * (at your option) any later version.
18 *
19 * i2MassChroQ is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU General Public License for more details.
23 *
24 * You should have received a copy of the GNU General Public License
25 * along with i2MassChroQ. If not, see <http://www.gnu.org/licenses/>.
26 *
27 ******************************************************************************/
28
29#pragma once
30
31#include <odsstream/odsdochandlerinterface.h>
32#include "../../../../processing/uimonitor/uimonitorinterface.h"
33#include "../../../../peptide/peptide.h"
34#include "sagereader.h"
35#include "../psmproteinmap.h"
36#include <QCborArray>
37
38namespace pappso::cbor::psm
39{
40/**
41 * @todo write docs
42 */
43class SageTsvHandler : public OdsDocHandlerInterface
44{
45 public:
46 enum class Columns : std::int8_t
47 {
48 psm_id,
49 peptide,
50 proteins,
51 num_proteins,
52 filename,
53 scannr,
54 rank,
55 label,
56 expmass,
57 calcmass,
58 charge,
59 peptide_len,
60 missed_cleavages,
61 semi_enzymatic,
62 isotope_error,
63 precursor_ppm,
64 fragment_ppm,
65 hyperscore,
66 delta_next,
67 delta_best,
68 rt,
69 aligned_rt,
70 predicted_rt,
71 delta_rt_model,
72 ion_mobility,
73 predicted_mobility,
74 delta_mobility,
75 matched_peaks,
76 longest_b,
77 longest_y,
78 longest_y_pct,
79 matched_intensity_pct,
80 scored_candidates,
81 poisson,
82 sage_discriminant_score,
83 posterior_error,
84 spectrum_q,
85 peptide_q,
86 protein_q,
87 ms2_intensity,
88 };
89
90 struct Line
91 {
92 int rank = 0;
93 int label = 0;
94 double expmass = 0.0;
95 double calcmass = 0.0;
96 int charge = 0;
97 std::size_t peptide_len;
106 double rt;
113 std::size_t matched_peaks;
114 std::size_t longest_b;
115 std::size_t longest_y;
118 std::size_t scored_candidates;
119 double poisson;
123 double peptide_q;
124 double protein_q;
126 };
127 /**
128 * Default constructor
129 */
131 const SageReader &sage_reader,
132 bool is_fasta_pass,
133 PsmProteinMap &psm_protein_map);
134
135 /**
136 * Destructor
137 */
138 virtual ~SageTsvHandler();
139 /**
140 * callback that indicates the begining of a data sheet. Override it in
141 * order to retrieve information about the current data sheet.
142 *
143 */
144 virtual void startSheet(const QString &sheet_name) override;
145
146 /**
147 * callback that indicates the end of the current data sheet. Override it if
148 * needed
149 */
150 virtual void endSheet() override;
151
152 /**
153 * callback that indicates a new line start. Override it if needed.
154 */
155
156 virtual void startLine() override;
157
158 /**
159 * callback that indicates a line ending. Override it if needed.
160 */
161
162 virtual void endLine() override;
163
164 /**
165 * callback that report the content of the current cell in a dedicated Cell
166 * object. Override it if you need to retrieve cell content.
167 */
168 virtual void setCell(const OdsCell &cell) override;
169
170 /**
171 * callback that report the end of the ODS document. Override it if you need
172 * to know that reading is finished.
173 */
174 virtual void endDocument() override;
175
176 private:
177 struct Psm
178 {
179 QString proforma;
181 QCborMap cbor_eval;
182 };
183 struct Scan
184 {
185 QCborMap cbor_id;
187 QCborMap cbor_ms2;
188 std::vector<Psm> psm_list;
189 };
190
191 struct Sample
192 {
193 QString name;
195 std::map<QString, Scan> scan_map;
196 };
197
198 private:
199 void parsePeptide(const QString &peptide_str);
200 void parseProteins(const QString &proteins_str);
201 bool parseSpectrumStringId(const QString &spectrum_string_id);
202 void parseMsRunFilename(const QString &msrun_filename);
203 void recordLine();
204
205 void writeSample(const Sample &one_sample);
206 void writeScan(const Scan &one_scan);
207 void writePsm(const Psm &one_psm);
208
209 private:
213 std::size_t m_progressIndex = 0;
214 std::size_t m_lineNumber = 0;
215 std::size_t m_columnNumber = 0;
216 std::vector<Columns> m_columnTypeList;
217
219 std::vector<SageReader::SageModification> m_staticModificationList;
220 std::vector<SageReader::SageModification> m_variableModificationList;
221 std::size_t m_spectrumIndex;
223 QString m_decoyTag;
225 QStringList m_proteinList;
227 std::map<QString, Sample> m_sampleMap;
229};
230} // namespace pappso::cbor::psm
std::vector< SageReader::SageModification > m_staticModificationList
void parseProteins(const QString &proteins_str)
virtual void endSheet() override
pappso::UiMonitorInterface * mp_monitor
virtual void startSheet(const QString &sheet_name) override
std::vector< SageReader::SageModification > m_variableModificationList
void parsePeptide(const QString &peptide_str)
bool parseSpectrumStringId(const QString &spectrum_string_id)
SageTsvHandler(pappso::UiMonitorInterface *p_monitor, const SageReader &sage_reader, bool is_fasta_pass, PsmProteinMap &psm_protein_map)
void writeSample(const Sample &one_sample)
virtual void setCell(const OdsCell &cell) override
void writePsm(const Psm &one_psm)
void writeScan(const Scan &one_scan)
std::map< QString, Sample > m_sampleMap
virtual void endDocument() override
virtual void startLine() override
void parseMsRunFilename(const QString &msrun_filename)
virtual void endLine() override
std::vector< Columns > m_columnTypeList
std::shared_ptr< const Peptide > PeptideSp
@ rt
Retention time.