libpappsomspp
Library for mass spectrometry
psmfilereaderbase.cpp
Go to the documentation of this file.
1/**
2 * \file pappsomspp/processing/cbor/psm/psmfilereaderbase.h
3 * \date 05/07/2025
4 * \author Olivier Langella
5 * \brief Base class to read CBOR PSM file
6 */
7
8/*******************************************************************************
9 * Copyright (c) 2025 Olivier Langella <Olivier.Langella@universite-paris-saclay.fr>.
10 *
11 * This file is part of PAPPSOms-tools.
12 *
13 * PAPPSOms-tools is free software: you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation, either version 3 of the License, or
16 * (at your option) any later version.
17 *
18 * PAPPSOms-tools is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with PAPPSOms-tools. If not, see <http://www.gnu.org/licenses/>.
25 *
26 ******************************************************************************/
27
28#include "psmfilereaderbase.h"
29#include "../../../pappsoexception.h"
30#include <QDebug>
31#include "../../../peptide/peptideproformaparser.h"
32#include <QCborArray>
33
35{
36}
37
39{
40 if(mpa_cborReader != nullptr)
41 delete mpa_cborReader;
42}
43
44void
46{
47 qDebug();
48 initCborReader(cborp);
49
50 qDebug();
51 if(mpa_cborReader->isMap())
52 {
53 readRoot(monitor);
54 }
55 qDebug();
56}
57
58void
60{
61}
62
63bool
65{
66 for(auto &it : m_currentPsmProteinRefList)
67 {
68 if(!m_proteinMap.getByAccession(it.accession).isTarget)
69 return true;
70 }
71 return false;
72}
73
74bool
76{
77 for(auto &it : m_currentPsmProteinRefList)
78 {
79 if(m_proteinMap.getByAccession(it.accession).isTarget)
80 return true;
81 }
82 return false;
83}
84
85
86void
88{
89 qDebug();
90 mpa_cborReader->enterContainer();
91
92 getExpectedString();
93 if(m_expectedString == "informations")
94 {
95 readInformations(monitor);
96 getExpectedString();
97
98 if(m_expectedString == "log")
99 {
100 readLog(monitor);
101 getExpectedString();
102 }
103
104 logReady(monitor);
105 }
106 else
107 {
108 throw pappso::PappsoException("ERROR: expecting informations element");
109 }
110
111
112 if(m_expectedString == "parameter_map")
113 {
114 readParameterMap(monitor);
115 }
116 else
117 {
118 throw pappso::PappsoException("ERROR: expecting parameter_map element");
119 }
120
121
122 getExpectedString();
123 m_targetFastaFiles.clear();
124 m_decoyFastaFiles.clear();
125 if(m_expectedString == "target_fasta_files")
126 {
127 mpa_cborReader->readArray(m_targetFastaFiles);
128 getExpectedString();
129 }
130
131 if(m_expectedString == "decoy_fasta_files")
132 {
133 mpa_cborReader->readArray(m_decoyFastaFiles);
134 getExpectedString();
135 }
136 fastaFilesReady(monitor);
137
138 if(m_expectedString == "protein_map")
139 {
140 readProteinMap(monitor);
141 getExpectedString();
142 }
143
144 if(m_expectedString == "sample_list")
145 {
146 sampleListStarted(monitor);
147 mpa_cborReader->enterContainer(); // array
148 while(!mpa_cborReader->lastError() && mpa_cborReader->hasNext())
149 {
150 readSample(monitor);
151 }
152 mpa_cborReader->leaveContainer(); // array
153 sampleListFinished(monitor);
154 }
155 else
156 {
158 QObject::tr("ERROR: expecting sample_list element not %1").arg(m_expectedString));
159 }
160 mpa_cborReader->leaveContainer(); // whole file
161 if(!mpa_cborReader->lastError() && mpa_cborReader->hasNext())
162 {
163 readRoot(monitor);
164 }
165}
166
167void
169 [[maybe_unused]])
170{
171 bool is_ok;
172 // m_cborInformations.clear();
173 is_ok = mpa_cborReader->readCborMap(m_cborInformations);
174
175 if(!is_ok)
176 {
177 throw pappso::PappsoException("ERROR: PSM cbor header informations not well formed");
178 }
179 qDebug() << m_cborInformations.keys();
180 if(m_cborInformations.value("type").toString() != "psm")
181 {
182 QStringList all_keys;
183 for(auto it_k : m_cborInformations.keys())
184 {
185 all_keys << it_k.toString();
186 }
188 QObject::tr("ERROR: this file does not contain PSM data but %1 and %2")
189 .arg(m_cborInformations.value("type").toString())
190 .arg(all_keys.join(" ")));
191 }
192 informationsReady(monitor);
193}
194
195void
197{
198 bool is_ok;
199 // m_cborInformations.clear();
200 is_ok = mpa_cborReader->readCborArray(m_cborLog);
201
202 if(!is_ok)
203 {
204 throw pappso::PappsoException("ERROR: PSM cbor header log not well formed");
205 }
206}
207
208
209void
211 [[maybe_unused]])
212{
213 bool is_ok;
214 m_cborParameterMap.clear();
215 is_ok = mpa_cborReader->readCborMap(m_cborParameterMap);
216
217 if(!is_ok)
218 {
219 throw pappso::PappsoException("ERROR: PSM cbor parameter_map not well formed");
220 }
221 parameterMapReady(monitor);
222}
223
224void
226{
227 m_proteinMap.readMap(*mpa_cborReader);
228 proteinMapReady(monitor);
229}
230
231
234{
235 PsmProteinRef protein_ref;
236 protein_ref.accession = "";
237 protein_ref.positions.clear();
238 mpa_cborReader->enterContainer();
239 getExpectedString();
240 qDebug() << m_expectedString;
241 if(m_expectedString == "accession")
242 {
243 is_ok = mpa_cborReader->decodeString(protein_ref.accession);
244 if(!is_ok)
245 {
246 throw pappso::PappsoException("ERROR: protein accession is not a string");
247 }
248 }
249 else
250 {
251 throw pappso::PappsoException("ERROR: expecting accession element in PSM protein_list");
252 }
253
254 getExpectedString();
255 qDebug() << m_expectedString;
256 if(m_expectedString == "positions")
257 {
258 mpa_cborReader->readArray(protein_ref.positions);
259
260 // mpa_cborReader->next();
261 }
262 mpa_cborReader->leaveContainer();
263
264 qDebug() << "end";
265 return protein_ref;
266}
267
268
271{
272 PsmFile file;
273 mpa_cborReader->enterContainer();
274 getExpectedString();
275 if(m_expectedString == "name")
276 {
277 if(!mpa_cborReader->decodeString(file.name))
278 {
279 throw pappso::PappsoException("file name is not a string");
280 }
281 }
282 else
283 {
284 throw pappso::PappsoException("ERROR: expecting name element in file");
285 }
286 mpa_cborReader->leaveContainer();
287 return file;
288}
289
290
291void
293 const pappso::cbor::psm::PsmFile &psm_file)
294{
295 writer.startMap();
296 writer.append("name");
297 writer.append(psm_file.name);
298 writer.endMap();
299}
300
301void
303 const std::vector<PsmFile> &file_list)
304{
305 writer.startArray();
306 for(auto &psm_file : file_list)
307 {
308 writePsmFile(writer, psm_file);
309 }
310 writer.endArray();
311}
312
313
314void
316{
317 //"name": "tandem2017_nopatch_20120906_balliau_extract_1_A01_urnb-1",
318 qDebug();
319 mpa_cborReader->enterContainer();
320 getExpectedString();
321
322 qDebug() << m_expectedString;
323 if(m_expectedString == "name")
324 {
325 if(!mpa_cborReader->decodeString(m_currentSampleName))
326 {
327 throw pappso::PappsoException("sample name is not a string");
328 }
329 }
330 else
331 {
332 throw pappso::PappsoException("ERROR: expecting name element in file");
333 }
334 //"identification_file_list": [{ "name":
335 //"/home/langella/data1/tandem/tandem2017_nopatch_20120906_balliau_extract_1_A01_urnb-1.xml",
336 //}],
337
338 getExpectedString();
339
340 qDebug() << m_expectedString;
341 m_currentIdentificationFileList.clear();
342 if(m_expectedString == "identification_file_list")
343 {
344 bool is_ok;
345 mpa_cborReader->enterContainer();
346
347 while(!mpa_cborReader->lastError() && mpa_cborReader->hasNext())
348 {
349 m_currentIdentificationFileList.push_back(readPsmFile(is_ok));
350 }
351 mpa_cborReader->leaveContainer();
352
353 getExpectedString();
354 }
355 //"peaklist_file": {"name": "tandem2017_nopatch_20120906_balliau_extract_1_A01_urnb-1.mzml"
356 //},
357
358 if(m_expectedString == "peaklist_file")
359 {
360 bool is_ok;
361 m_currentPeaklistFile = readPsmFile(is_ok);
362 }
363 else
364 {
365 throw pappso::PappsoException("ERROR: expecting peaklist_file element in sample");
366 }
367 //"scan_list": [
368 sampleStarted(monitor);
369 getExpectedString();
370 if(m_expectedString == "scan_list")
371 {
372 mpa_cborReader->enterContainer();
373
374 while(!mpa_cborReader->lastError() && mpa_cborReader->hasNext())
375 {
376 readScan(monitor);
377 }
378 mpa_cborReader->leaveContainer();
379 }
380 else
381 {
382 throw pappso::PappsoException("ERROR: expecting scan_list element in sample");
383 }
384 mpa_cborReader->leaveContainer();
385
386 sampleFinished(monitor);
387}
388
389void
391{
392 qDebug();
393 m_cborScanId.clear();
394 mpa_cborReader->enterContainer();
395 //"id": {
396 //"index": 1976
397 //},
398 qDebug() << "scan begin";
399
400 getExpectedString();
401 qDebug() << m_expectedString;
402 if(m_expectedString == "id")
403 {
404 if(!mpa_cborReader->readCborMap(m_cborScanId))
405 {
406 throw pappso::PappsoException(QObject::tr("id element in scan is not a cbor map"));
407 }
408 }
409 else
410 {
412 QObject::tr("ERROR: expecting id element in scan not %1").arg(m_expectedString));
413 }
414 //"precursor": {
415 //"z": 2,
416 //"mz": 1120.529471
417 //},
418
419 getExpectedString();
420 m_cborScanPrecursor.clear();
421 qDebug() << m_expectedString;
422 if(m_expectedString == "precursor")
423 {
424 if(!mpa_cborReader->readCborMap(m_cborScanPrecursor))
425 {
426 throw pappso::PappsoException(QObject::tr("precursor element in scan is not a cbor map"));
427 }
428 }
429 //"ms2": {PSM CBOR format documentation
430 //"rt": 12648.87,
431 //"mz" :[1,2,3,4],
432 //"intensity" : [1,2,3,4]
433 //},
434
435 getExpectedString();
436 qDebug() << m_expectedString;
437 m_cborScanMs2.clear();
438 if(m_expectedString == "ms2")
439 {
440 if(!mpa_cborReader->readCborMap(m_cborScanMs2))
441 {
443 QObject::tr("ms2 element in scan is not a cbor map %1 %2:\n%3")
444 .arg(m_currentSampleName)
445 .arg(m_cborScanId.value("index").toInteger())
446 .arg(mpa_cborReader->lastError().toString()));
447 }
448 }
449 //"psm_list": [
450 scanStarted(monitor);
451
452 getExpectedString();
453 qDebug() << m_expectedString;
454 if(m_expectedString == "psm_list")
455 {
456 mpa_cborReader->enterContainer();
457 while(!mpa_cborReader->lastError() && mpa_cborReader->hasNext())
458 {
459 readPsm(monitor);
460 }
461 mpa_cborReader->leaveContainer();
462 }
463
464 mpa_cborReader->leaveContainer();
465 qDebug() << "scan end";
466 scanFinished(monitor);
467 qDebug();
468}
469
470void
472{
473 bool is_ok;
474 mpa_cborReader->enterContainer();
475 getExpectedString();
476 // "proforma": "AQEEM[+15.99491]AQVAK",
477 if(m_expectedString == "proforma")
478 {
479 if(!mpa_cborReader->decodeString(m_currentPsmProforma))
480 {
481 throw pappso::PappsoException("ERROR: proforma element in psm-scan is not a string");
482 }
483 }
484 else
485 {
486 throw pappso::PappsoException("ERROR: expecting proforma element in psm-scan");
487 }
488 //"protein_list" : [
489 //{
490 //"accession": "GRMZM2G083841_P01",
491 //"position": [15,236]
492 //}
493 //],
494
495 getExpectedString();
496 m_currentPsmProteinRefList.clear();
497 qDebug() << m_expectedString;
498 if(m_expectedString == "protein_list")
499 {
500 mpa_cborReader->enterContainer();
501 while(!mpa_cborReader->lastError() && mpa_cborReader->hasNext())
502 {
503 m_currentPsmProteinRefList.push_back(readPsmProteinRef(is_ok));
504 if(!is_ok)
505 {
507 QObject::tr("ERROR: reading protein_list element in psm-scan"));
508 }
509 }
510 mpa_cborReader->leaveContainer();
511 }
512 else
513 {
514 throw pappso::PappsoException("ERROR: expecting protein_list element in psm-scan");
515 }
516 //"eval": {
517 qDebug();
518 m_cborScanPsmEval.clear();
519 if(!getExpectedString())
520 {
522 QObject::tr("ERROR: expecting eval element in psm-scan %1").arg(m_currentPsmProforma));
523 }
524 if(m_expectedString == "eval")
525 {
526 is_ok = mpa_cborReader->readCborMap(m_cborScanPsmEval);
527 if(!is_ok)
528 {
529 throw pappso::PappsoException("ERROR: eval element in psm-scan is not well formed");
530 }
531 }
532
533 qDebug() << m_expectedString;
534
535
536 mpa_cborReader->leaveContainer();
537 qDebug();
538 psmReady(monitor);
539}
540
541void
543{
544 // PSM is ready, do what you want :)
545}
546
547void
549{
550}
551
552void
554{
555}
556
557void
559{
560}
561
562
563void
565{
566}
567
568void
570{
571}
572
573void
575{
576}
577
578void
580{
581}
582
583void
585{
586}
587
588void
590{
591}
592
595{
596 pappso::PeptideSp peptide_sp;
597 if(m_currentPsmProforma.isEmpty())
598 {
599 throw pappso::PappsoException(QObject::tr("ERROR: m_currentPsmProforma is empty"));
600 }
601 else
602 {
603 peptide_sp = pappso::PeptideProFormaParser::parseString(m_currentPsmProforma);
604 }
605 return peptide_sp;
606}
607
610{
611 if(m_currentPeaklistFile.name.isEmpty())
612 {
613 throw pappso::PappsoException(QObject::tr("ERROR: m_currentPeaklistFile is empty"));
614 }
615 if(m_cborScanId.isEmpty())
616 {
617 throw pappso::PappsoException(QObject::tr("ERROR: m_cborScanId is empty"));
618 }
619 if(m_cborScanPrecursor.isEmpty())
620 {
621 throw pappso::PappsoException(QObject::tr("ERROR: m_cborScanPrecursor is empty"));
622 }
623 if(m_cborScanMs2.isEmpty())
624 {
625 throw pappso::PappsoException(QObject::tr("ERROR: m_cborScanMs2 is empty"));
626 }
627
628 if(!m_cborScanId.keys().contains("index"))
629 {
630 throw pappso::PappsoException("There is no scan index");
631 }
632 if(!m_cborScanMs2.keys().contains("mz"))
633 {
634 throw pappso::PappsoException("There is no ms2 mz values");
635 }
636 if(!m_cborScanMs2.keys().contains("intensity"))
637 {
638 throw pappso::PappsoException("There is no ms2 intensity values");
639 }
640 pappso::MsRunId msrun_id(m_currentPeaklistFile.name);
641 msrun_id.setSampleName(m_currentSampleName);
642 pappso::MsRunIdCstSPtr msrun_id_sp = std::make_shared<const pappso::MsRunId>(msrun_id);
643 pappso::MassSpectrumId ms_id(msrun_id_sp);
644 ms_id.setSpectrumIndex(m_cborScanId.value("index").toInteger());
645
646 // native_id
647 if(m_cborScanId.keys().contains("native_id"))
648 {
649 ms_id.setNativeId(m_cborScanId.value("native_id").toString());
650 }
651
652 std::vector<DataPoint> data_point_vector;
653 std::size_t i = 0;
654 for(auto cbor_mz_value : m_cborScanMs2.value("mz").toArray())
655 {
656 data_point_vector.push_back(
657 {cbor_mz_value.toDouble(), m_cborScanMs2.value("intensity").toArray().at(i).toDouble()});
658 i++;
659 }
660
661
662 MassSpectrum mass_spectrum(data_point_vector);
663 pappso::PrecursorIonData precursor_ion_data;
664
665 pappso::QualifiedMassSpectrum qualified_mass_spectrum(ms_id);
666 qualified_mass_spectrum.setMassSpectrumSPtr(mass_spectrum.makeMassSpectrumSPtr());
667 qualified_mass_spectrum.setMsLevel(2);
668
669 if(m_cborScanPrecursor.keys().contains("z"))
670 {
671 precursor_ion_data.charge = m_cborScanPrecursor.value("z").toInteger();
672 }
673 if(m_cborScanPrecursor.keys().contains("mz"))
674 {
675 precursor_ion_data.mz = m_cborScanPrecursor.value("mz").toDouble();
676 }
677 if(m_cborScanPrecursor.keys().contains("intensity"))
678 {
679 precursor_ion_data.intensity = m_cborScanPrecursor.value("intensity").toDouble();
680 }
681 qualified_mass_spectrum.appendPrecursorIonData(precursor_ion_data);
682 if(m_cborScanMs2.keys().contains("rt"))
683 {
684 qualified_mass_spectrum.setRtInSeconds(m_cborScanMs2.value("rt").toDouble());
685 }
686
687
688 return qualified_mass_spectrum.makeQualifiedMassSpectrumSPtr();
689}
690
691void
693 [[maybe_unused]])
694{
695}
696
697void
699 [[maybe_unused]])
700{
701}
702
703
704double
706{
707 // compute precursor mass given the charge state
708 mz_prec = mz_prec * (double)charge;
709 mz_prec -= (MHPLUS * (double)charge);
710 return mz_prec;
711}
void setNativeId(const QString &native_id)
void setSpectrumIndex(std::size_t index)
Class to represent a mass spectrum.
Definition: massspectrum.h:73
MassSpectrumSPtr makeMassSpectrumSPtr() const
MS run identity MsRunId identifies an MS run with a unique ID (XmlId) and contains eventually informa...
Definition: msrunid.h:54
void setSampleName(const QString &name)
set a sample name for this MsRunId
Definition: msrunid.cpp:79
static PeptideSp parseString(const QString &pepstr)
Class representing a fully specified mass spectrum.
void appendPrecursorIonData(const PrecursorIonData &precursor_ion_data)
void setMsLevel(uint ms_level)
Set the mass spectrum level.
QualifiedMassSpectrumSPtr makeQualifiedMassSpectrumSPtr() const
void setMassSpectrumSPtr(MassSpectrumSPtr massSpectrum)
Set the MassSpectrumSPtr.
void setRtInSeconds(pappso_double rt)
Set the retention time in seconds.
pappso::QualifiedMassSpectrumSPtr getCurrentQualifiedMassSpectrumSPtr() const
pappso::PeptideSp getCurrentPsmPeptideSp() const
virtual void sampleListStarted(pappso::UiMonitorInterface &monitor)
void writePsmFileList(CborStreamWriter &writer, const std::vector< PsmFile > &file_list)
double getPrecursorMass(double mz_prec, uint charge) const
convenient function do compute precusor ion mass
PsmProteinRef readPsmProteinRef(bool &is_ok)
virtual void logReady(pappso::UiMonitorInterface &monitor)
virtual void scanStarted(pappso::UiMonitorInterface &monitor)
virtual void readPsm(pappso::UiMonitorInterface &monitor)
virtual void readLog(pappso::UiMonitorInterface &monitor)
virtual void proteinMapReady(pappso::UiMonitorInterface &monitor)
virtual void sampleStarted(pappso::UiMonitorInterface &monitor)
virtual void readParameterMap(pappso::UiMonitorInterface &monitor)
void readCbor(QFile *cborp, pappso::UiMonitorInterface &monitor)
virtual void readScan(pappso::UiMonitorInterface &monitor)
virtual void readInformations(pappso::UiMonitorInterface &monitor)
virtual void scanFinished(pappso::UiMonitorInterface &monitor)
virtual void sampleListFinished(pappso::UiMonitorInterface &monitor)
virtual void psmReady(pappso::UiMonitorInterface &monitor)
virtual void informationsReady(pappso::UiMonitorInterface &monitor)
void readRoot(pappso::UiMonitorInterface &monitor)
void writePsmFile(CborStreamWriter &writer, const PsmFile &psm_file)
virtual void fastaFilesReady(pappso::UiMonitorInterface &monitor)
virtual void parameterMapReady(pappso::UiMonitorInterface &monitor)
virtual void readProteinMap(pappso::UiMonitorInterface &monitor)
virtual void readSample(pappso::UiMonitorInterface &monitor)
virtual void sampleFinished(pappso::UiMonitorInterface &monitor)
std::shared_ptr< QualifiedMassSpectrum > QualifiedMassSpectrumSPtr
std::shared_ptr< const Peptide > PeptideSp
std::shared_ptr< const MsRunId > MsRunIdCstSPtr
Definition: msrunid.h:46
const pappso_double MHPLUS(1.007276466879)
unsigned int uint
Definition: types.h:59
Base class to read CBOR PSM file.