libpappsomspp
Library for mass spectrometry
sagetsvhandler.cpp
Go to the documentation of this file.
1/**
2 * \file input/sage/sagereader.h
3 * \date 21/08/2024
4 * \author Olivier Langella
5 * \brief read data files from Sage output
6 */
7
8/*******************************************************************************
9 * Copyright (c) 2024 Olivier Langella
10 *<Olivier.Langella@universite-paris-saclay.fr>.
11 *
12 * This file is part of i2MassChroQ.
13 *
14 * i2MassChroQ is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU General Public License as published by
16 * the Free Software Foundation, either version 3 of the License, or
17 * (at your option) any later version.
18 *
19 * i2MassChroQ is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU General Public License for more details.
23 *
24 * You should have received a copy of the GNU General Public License
25 * along with i2MassChroQ. If not, see <http://www.gnu.org/licenses/>.
26 *
27 ******************************************************************************/
28
29
30#include "sagetsvhandler.h"
31#include "../../../../exception/exceptionnotimplemented.h"
32#include "../../../../exception/exceptionnotpossible.h"
33#include "../../../../exception/exceptionoutofrange.h"
34#include "../../../../exception/exceptionnotfound.h"
35#include "../../../../exception/exceptioninterrupted.h"
36#include "../../../../peptide/peptideproformaparser.h"
37#include "../../../../msrun/msrunreader.h"
38
39
41 const SageReader &sage_reader,
42 bool is_fasta_pass,
43 PsmProteinMap &psm_protein_map)
44 : m_sageReader(sage_reader), m_psmProteinMap(psm_protein_map)
45{
46 mp_monitor = p_monitor;
49 m_decoyTag = sage_reader.getDecoyTag();
50 m_isFirstPass = is_fasta_pass;
51}
52
54{
55}
56
57void
59{
60 for(auto it_sample_map : m_sampleMap)
61 {
62 writeSample(it_sample_map.second);
63 }
64}
65
66void
68{
69 if(m_line.charge != 0)
70 recordLine();
71 m_lineNumber++;
72}
73
74void
76{
77}
78
79void
81{
82 if(m_lineNumber == 0)
83 {
84 // header
85 QString tag = cell.getStringValue();
86 if(tag == "psm_id")
87 {
88 m_columnTypeList.push_back(Columns::psm_id);
89 }
90 else if(tag == "peptide")
91 {
92 m_columnTypeList.push_back(Columns::peptide);
93 }
94 else if(tag == "proteins")
95 {
96 m_columnTypeList.push_back(Columns::proteins);
97 }
98 else if(tag == "num_proteins")
99 {
100 m_columnTypeList.push_back(Columns::num_proteins);
101 }
102 else if(tag == "filename")
103 {
104 m_columnTypeList.push_back(Columns::filename);
105 }
106 else if(tag == "scannr")
107 {
108 m_columnTypeList.push_back(Columns::scannr);
109 }
110 else if(tag == "rank")
111 {
112 m_columnTypeList.push_back(Columns::rank);
113 }
114 else if(tag == "label")
115 {
116 m_columnTypeList.push_back(Columns::label);
117 }
118 else if(tag == "expmass")
119 {
120 m_columnTypeList.push_back(Columns::expmass);
121 }
122
123 else if(tag == "calcmass")
124 {
125 m_columnTypeList.push_back(Columns::calcmass);
126 }
127 else if(tag == "charge")
128 {
129 m_columnTypeList.push_back(Columns::charge);
130 }
131 else if(tag == "peptide_len")
132 {
133 m_columnTypeList.push_back(Columns::peptide_len);
134 }
135 else if(tag == "missed_cleavages")
136 {
137 m_columnTypeList.push_back(Columns::missed_cleavages);
138 }
139 else if(tag == "semi_enzymatic")
140 {
141 m_columnTypeList.push_back(Columns::semi_enzymatic);
142 }
143 else if(tag == "isotope_error")
144 {
145 m_columnTypeList.push_back(Columns::isotope_error);
146 }
147 else if(tag == "precursor_ppm")
148 {
149 m_columnTypeList.push_back(Columns::precursor_ppm);
150 }
151 else if(tag == "fragment_ppm")
152 {
153 m_columnTypeList.push_back(Columns::fragment_ppm);
154 }
155 else if(tag == "hyperscore")
156 {
157 m_columnTypeList.push_back(Columns::hyperscore);
158 }
159 else if(tag == "delta_next")
160 {
161 m_columnTypeList.push_back(Columns::delta_next);
162 }
163 else if(tag == "delta_best")
164 {
165 m_columnTypeList.push_back(Columns::delta_best);
166 }
167 else if(tag == "rt")
168 {
169 m_columnTypeList.push_back(Columns::rt);
170 }
171 else if(tag == "aligned_rt")
172 {
173 m_columnTypeList.push_back(Columns::aligned_rt);
174 }
175 else if(tag == "predicted_rt")
176 {
177 m_columnTypeList.push_back(Columns::predicted_rt);
178 }
179 else if(tag == "delta_rt_model")
180 {
181 m_columnTypeList.push_back(Columns::delta_rt_model);
182 }
183 else if(tag == "ion_mobility")
184 {
185 m_columnTypeList.push_back(Columns::ion_mobility);
186 }
187 else if(tag == "predicted_mobility")
188 {
189 m_columnTypeList.push_back(Columns::predicted_mobility);
190 }
191 else if(tag == "delta_mobility")
192 {
193 m_columnTypeList.push_back(Columns::delta_mobility);
194 }
195 else if(tag == "matched_peaks")
196 {
197 m_columnTypeList.push_back(Columns::matched_peaks);
198 }
199 else if(tag == "longest_b")
200 {
201 m_columnTypeList.push_back(Columns::longest_b);
202 }
203 else if(tag == "longest_y")
204 {
205 m_columnTypeList.push_back(Columns::longest_y);
206 }
207 else if(tag == "longest_y_pct")
208 {
209 m_columnTypeList.push_back(Columns::longest_y_pct);
210 }
211 else if(tag == "matched_intensity_pct")
212 {
213 m_columnTypeList.push_back(Columns::matched_intensity_pct);
214 }
215 else if(tag == "scored_candidates")
216 {
217 m_columnTypeList.push_back(Columns::scored_candidates);
218 }
219 else if(tag == "poisson")
220 {
221 m_columnTypeList.push_back(Columns::poisson);
222 }
223 else if(tag == "sage_discriminant_score")
224 {
225 m_columnTypeList.push_back(Columns::sage_discriminant_score);
226 }
227 else if(tag == "posterior_error")
228 {
229 m_columnTypeList.push_back(Columns::posterior_error);
230 }
231 else if(tag == "spectrum_q")
232 {
233 m_columnTypeList.push_back(Columns::spectrum_q);
234 }
235 else if(tag == "peptide_q")
236 {
237 m_columnTypeList.push_back(Columns::peptide_q);
238 }
239 else if(tag == "protein_q")
240 {
241 m_columnTypeList.push_back(Columns::protein_q);
242 }
243 else if(tag == "ms2_intensity")
244 {
245 m_columnTypeList.push_back(Columns::ms2_intensity);
246 }
247 else
248 {
249 throw pappso::ExceptionNotPossible(QObject::tr("column \"%1\" not defined").arg(tag));
250 }
251 }
252 else
253 {
254 if(m_columnNumber >= m_columnTypeList.size())
255 {
257 QObject::tr("the value %1 is out of range").arg(cell.getStringValue()));
258 }
259 Columns column_type = m_columnTypeList[m_columnNumber];
260 switch(column_type)
261 {
262 case Columns::psm_id:
263 break;
264 case Columns::peptide:
265 parsePeptide(cell.toString());
266 break;
267 case Columns::proteins:
268 parseProteins(cell.toString());
269 break;
270 case Columns::num_proteins:
271 if((std::size_t)cell.getDoubleValue() != (std::size_t)m_proteinList.size())
272 {
274 QObject::tr("column \"num_proteins\"!=%1").arg(m_proteinList.size()));
275 }
276 break;
277 case Columns::filename:
278 parseMsRunFilename(cell.toString());
279 break;
280 case Columns::scannr:
281 parseSpectrumStringId(cell.toString());
282 break;
283 case Columns::rank:
284 m_line.rank = cell.getDoubleValue();
285 break;
286 case Columns::label:
287 m_line.label = cell.getDoubleValue();
288 break;
289 case Columns::expmass:
290 m_line.expmass = cell.getDoubleValue();
291 break;
292 case Columns::calcmass:
293 m_line.calcmass = cell.getDoubleValue();
294 break;
295 case Columns::charge:
296 m_line.charge = cell.getDoubleValue();
297 break;
298 case Columns::peptide_len:
299 m_line.peptide_len = cell.getDoubleValue();
300 break;
301 case Columns::missed_cleavages:
302 m_line.missed_cleavages = cell.getDoubleValue();
303 break;
304 case Columns::semi_enzymatic:
305 m_line.semi_enzymatic = cell.getDoubleValue();
306 break;
307 case Columns::isotope_error:
308 m_line.isotope_error = cell.getDoubleValue();
309 break;
310
311 case Columns::precursor_ppm:
312 m_line.precursor_ppm = cell.getDoubleValue();
313 break;
314 case Columns::fragment_ppm:
315 m_line.fragment_ppm = cell.getDoubleValue();
316 break;
317 case Columns::hyperscore:
318 m_line.hyperscore = cell.getDoubleValue();
319 break;
320 case Columns::delta_next:
321 m_line.delta_next = cell.getDoubleValue();
322 break;
323 case Columns::delta_best:
324 m_line.delta_best = cell.getDoubleValue();
325 break;
326 case Columns::rt:
327 m_line.rt = cell.getDoubleValue() * 60; // to convert retention time in seconds
328 break;
329 case Columns::aligned_rt:
330 m_line.aligned_rt = cell.getDoubleValue();
331 break;
332 case Columns::predicted_rt:
333 m_line.predicted_rt = cell.getDoubleValue();
334 break;
335 case Columns::delta_rt_model:
336 m_line.delta_rt_model = cell.getDoubleValue();
337 break;
338 case Columns::ion_mobility:
339 m_line.ion_mobility = cell.getDoubleValue();
340 break;
341 case Columns::predicted_mobility:
342 m_line.predicted_mobility = cell.getDoubleValue();
343 break;
344 case Columns::delta_mobility:
345 m_line.delta_mobility = cell.getDoubleValue();
346 break;
347 case Columns::matched_peaks:
348 m_line.matched_peaks = cell.getDoubleValue();
349 break;
350 case Columns::longest_b:
351 m_line.longest_b = cell.getDoubleValue();
352 break;
353 case Columns::longest_y:
354 m_line.longest_y = cell.getDoubleValue();
355 break;
356 case Columns::longest_y_pct:
357 m_line.longest_y_pct = cell.getDoubleValue();
358 break;
359 case Columns::matched_intensity_pct:
360 m_line.matched_intensity_pct = cell.getDoubleValue();
361 break;
362 case Columns::scored_candidates:
363 m_line.scored_candidates = cell.getDoubleValue();
364 break;
365 case Columns::poisson:
366 m_line.poisson = cell.getDoubleValue();
367 break;
368 case Columns::sage_discriminant_score:
369 m_line.sage_discriminant_score = cell.getDoubleValue();
370 break;
371 case Columns::posterior_error:
372 m_line.posterior_error = cell.getDoubleValue();
373 break;
374 case Columns::spectrum_q:
375 m_line.spectrum_q = cell.getDoubleValue();
376 break;
377 case Columns::peptide_q:
378 m_line.peptide_q = cell.getDoubleValue();
379 break;
380 case Columns::protein_q:
381 m_line.protein_q = cell.getDoubleValue();
382 break;
383 case Columns::ms2_intensity:
384 m_line.ms2_intensity = cell.getDoubleValue();
385 break;
386 default:
387 qDebug() << "m_line.calcmass=" << m_line.calcmass;
389 QObject::tr("column type %1 not implemented").arg((std::uint8_t)column_type));
390 break;
391 }
392
393 /*
394
395 2333 TMISDSDYTEFENFTK
396 GRMZM2G018197_P01;GRMZM2G068952_P01;GRMZM5G822976_P01 3
397 20120906_balliau_extract_1_A01_urnb-1.mzML controllerType=0
398 controllerNumber=1 scan=12542 1 1 1926.8225 1926.8193 2 16 0 0
399 0.0 1.6471838 1.9796097 54.06803492297634 28.049970383419556 0.0 38.192993
400 0.76385987 0.7671368 0.0032769442 0.0 0.0 0.0 16 2 14 0.875 32.54396 380
401 -13.375352220427656 1.1570586 -34.13482 0.00016041065 0.00022231363
402 0.00040124074 1271951.1
403 */
404 }
405 m_columnNumber++;
406}
407
408void
410{
411 m_columnNumber = 0;
412 msp_peptide = nullptr;
413 m_proteinList.clear();
414 m_line = Line();
415}
416
417void
419{
420 m_columnNumber = 0;
421 m_lineNumber = 0;
422 mp_monitor->setStatus(QObject::tr("reading Sage TSV file"));
423
424 if(mp_monitor->shouldIstop())
425 {
426 throw pappso::ExceptionInterrupted(QObject::tr("Sage TSV data reading process interrupted"));
427 }
428}
429
430void
432{
433 qDebug();
434 if(m_isFirstPass)
435 return;
436 QString peptide_str_verif = peptide_str;
437 // fixed modifications :
438 for(SageReader::SageModification modif : m_staticModificationList)
439 {
440 qDebug() << modif.strModification;
441 qDebug() << modif.modification->getAccession();
442 peptide_str_verif = peptide_str_verif.replace(
443 modif.strModification, QString("[%1]").arg(modif.modification->getAccession()));
444 }
445 // variable modifications :
446 for(SageReader::SageModification modif : m_variableModificationList)
447 {
448 qDebug() << modif.strModification;
449 qDebug() << modif.modification->getAccession();
450 peptide_str_verif = peptide_str_verif.replace(
451 modif.strModification, QString("[%1]").arg(modif.modification->getAccession()));
452 }
453
454 qDebug() << peptide_str_verif;
455 // LPMFGC[+57.0216]NDATQVYK
456 msp_peptide = pappso::PeptideProFormaParser::parseString(peptide_str_verif);
457 qDebug();
458 // variable modifications :
459 /*
460 setVariableModifications(peptide_sp,
461 peptide_line.peptide_string_list.at(6));
462*/
463 qDebug() << msp_peptide.get()->toProForma();
464}
465
466
467void
469{
470 m_proteinList.clear();
471 m_proteinList = proteins_str.split(";");
472 if(m_isFirstPass)
473 {
474 for(QString accession : m_proteinList)
475 {
476
478 psm_protein.protein_sp = std::make_shared<pappso::Protein>(accession, "");
479 psm_protein.isTarget = true;
480 if(accession.startsWith(m_decoyTag))
481 {
482 psm_protein.isTarget = false;
483 }
484
485 m_psmProteinMap.insert(psm_protein);
486 }
487 }
488}
489
490bool
492{
493 m_spectrumNativeId = spectrum_string_id;
494 qDebug() << spectrum_string_id;
495 if(m_isFirstPass)
496 return false;
497 // controllerType=0 controllerNumber=1 scan=176056
498
499 bool is_ok = false;
500 QStringList scan_list = spectrum_string_id.split("scan=");
501 if(scan_list.size() == 2)
502 {
503 // we bet that there is a scan number, easy to parse
504 m_spectrumIndex = scan_list.at(1).toULongLong(&is_ok);
505 if(m_spectrumIndex > 0)
506 m_spectrumIndex--;
507 }
508 if(is_ok == false)
509 { /*
510 if(msp_previousMsrun != msp_msrun)
511 {
512 mp_monitor->setStatus(
513 QObject::tr("Reading mz data file %1").arg(msp_msrun.get()->getFileName()));
514 msp_previousMsrun = msp_msrun;
515 }
516
517 pappso::MsRunReader *msrunreader_p = msp_msrun.get()->getMsRunReaderSPtr().get();
518 if(msrunreader_p->getMsRunId()->getMsDataFormat() == pappso::MsDataFormat::brukerTims)
519 {
520 m_spectrumIndex = spectrum_string_id.toInt() * 2 - 1;
521 }
522 else
523 {
524 m_spectrumIndex =
525 msrunreader_p->spectrumStringIdentifier2SpectrumIndex(spectrum_string_id);
526 }*/
527 }
528
529 qDebug() << spectrum_string_id;
530 return is_ok;
531}
532
533void
535{
536
537 if(m_isFirstPass)
538 return;
539
540 // find the sample :
541 auto it_insert = m_sampleMap.insert({msrun_filename, {}});
542 mp_currentSample = &(it_insert.first->second);
543 if(it_insert.second)
544 {
545 // new sample
546 it_insert.first->second.name = msrun_filename;
547 QCborMap ms_file;
548 QCborMap identification_file;
549
550 it_insert.first->second.cbor_core_sample.insert(QString("name"),
551 QFileInfo(msrun_filename).baseName());
552
553 // identification_file_list
554 QCborArray identification_file_list;
555 identification_file.insert(QString("name"), m_sageReader.getmJsonAbsoluteFilePath());
556 identification_file_list.push_back(identification_file);
557 it_insert.first->second.cbor_core_sample.insert(QString("identification_file_list"),
558 identification_file_list);
559
560
561 ms_file.insert(QString("name"), QFileInfo(msrun_filename).absoluteFilePath());
562 it_insert.first->second.cbor_core_sample.insert(QString("peaklist_file"), ms_file);
563 }
564
565 /*
566 msp_msrun = m_sageReader.getSageFileReader().getMsRunSpWithFileName(msrun_filename);
567 qDebug() << msp_msrun.get()->getFileName();
568
569 msp_identificationSageJsonFileSp =
570 m_sageReader.getSageFileReader().getIdentificationSageJsonFileSpWithFileName(msrun_filename);
571
572 mp_identificationGroup =
573 m_sageReader.getSageFileReader().getIdentificationGroupPtrWithFileName(msrun_filename);
574 qDebug() << msp_msrun.get()->getFileName();
575 */
576}
577
578void
580{
581 qDebug();
582
583 if(m_isFirstPass)
584 {
585 for(const QString &accession : m_proteinList)
586 {
588 psm_protein.protein_sp = std::make_shared<pappso::Protein>(accession, "");
589
590 auto it = m_psmProteinMap.insert(psm_protein);
591 it.first->second.cborEval.insert(QString("protein_q"), m_line.protein_q);
592 }
593 }
594 else
595 {
596 /*
597 PeptideEvidence pe(msp_msrun.get(), m_spectrumIndex, true);
598 pe.setCharge(m_line.charge);
599 pe.setChecked(true);
600 pe.setExperimentalMass(m_line.expmass);
601 pe.setPeptideXtpSp(msp_peptide);
602 pe.setIdentificationDataSource(msp_identificationSageJsonFileSp.get());
603 pe.setIdentificationEngine(m_identificationEngine);
604 pe.setRetentionTime(m_line.rt);
605 pe.setParam(PeptideEvidenceParam::tandem_hyperscore, m_line.hyperscore);
606 pe.setParam(PeptideEvidenceParam::sage_sage_discriminant_score,
607 m_line.sage_discriminant_score);
608 pe.setParam(PeptideEvidenceParam::sage_peptide_q, m_line.peptide_q);
609 pe.setParam(PeptideEvidenceParam::sage_posterior_error, m_line.posterior_error);
610 pe.setParam(PeptideEvidenceParam::sage_spectrum_q, m_line.spectrum_q);
611 pe.setParam(PeptideEvidenceParam::sage_predicted_rt, m_line.predicted_rt);
612 pe.setParam(PeptideEvidenceParam::sage_isotope_error, m_line.isotope_error);
613
614
615 PeptideMatch peptide_match;
616 // peptide_match.setStart(mz_peptide_evidence.start);
617 peptide_match.setPeptideEvidenceSp(
618 msp_identificationSageJsonFileSp.get()->getPeptideEvidenceStore().getInstance(&pe));
619 */
620
621 // find the scan in sample
622 auto it_insert = mp_currentSample->scan_map.insert({m_spectrumNativeId, Scan()});
623 Scan *current_cbor_scan_p = &(it_insert.first->second);
624 if(it_insert.second)
625 {
626 // new scan
627 QCborMap &scan_id = it_insert.first->second.cbor_id;
628 scan_id.insert(QString("index"), (qint64)m_spectrumIndex);
629 scan_id.insert(QString("native_id"), m_spectrumNativeId);
630
631 QCborMap &scan_ms2 = it_insert.first->second.cbor_ms2;
632 scan_ms2.insert(QString("rt"), m_line.rt);
633
634 QCborMap &scan_precursor = it_insert.first->second.cbor_precursor;
635 scan_precursor.insert(QString("z"), m_line.charge);
636 scan_precursor.insert(QString("mz"), m_line.expmass);
637 }
638
639 Psm one_psm;
640 one_psm.proforma = msp_peptide.get()->toProForma();
641
642 for(const QString &accession : m_proteinList)
643 {
644 QCborMap cbor_protein;
645 cbor_protein.insert(QString("accession"), accession);
646
647
648 // start/end positions
649 QString protein_sequence =
650 QString(m_psmProteinMap.getByAccession(accession).protein_sp.get()->getSequence())
651 .replace("L", "I");
652 QString peptide_sequence = msp_peptide.get()->getSequenceLi();
653 int position = protein_sequence.indexOf(peptide_sequence);
654
655 QCborArray positions;
656 while(position >= 0)
657 {
658 positions.push_back(position);
659 position = protein_sequence.indexOf(peptide_sequence, position + 1);
660 }
661
662 cbor_protein.insert(QString("positions"), positions);
663
664 one_psm.cbor_protein_list.append(cbor_protein);
665 }
666
667 one_psm.cbor_eval.insert(QString("aligned_rt"), m_line.aligned_rt);
668 one_psm.cbor_eval.insert(QString("calcmass"), m_line.calcmass);
669 one_psm.cbor_eval.insert(QString("delta_best"), m_line.delta_best);
670 one_psm.cbor_eval.insert(QString("delta_mobility"), m_line.delta_mobility);
671 one_psm.cbor_eval.insert(QString("delta_next"), m_line.delta_next);
672 one_psm.cbor_eval.insert(QString("delta_rt_model"), m_line.delta_rt_model);
673 one_psm.cbor_eval.insert(QString("fragment_ppm"), m_line.fragment_ppm);
674 one_psm.cbor_eval.insert(QString("hyperscore"), m_line.hyperscore);
675 one_psm.cbor_eval.insert(QString("ion_mobility"), m_line.ion_mobility);
676 one_psm.cbor_eval.insert(QString("isotope_error"), m_line.isotope_error);
677 one_psm.cbor_eval.insert(QString("label"), m_line.label);
678 one_psm.cbor_eval.insert(QString("longest_b"), (qint64)m_line.longest_b);
679 one_psm.cbor_eval.insert(QString("longest_y"), (qint64)m_line.longest_y);
680 one_psm.cbor_eval.insert(QString("longest_y_pct"), m_line.longest_y_pct);
681 one_psm.cbor_eval.insert(QString("matched_intensity_pct"), m_line.matched_intensity_pct);
682 one_psm.cbor_eval.insert(QString("matched_peaks"), (qint64)m_line.matched_peaks);
683 one_psm.cbor_eval.insert(QString("missed_cleavages"), m_line.missed_cleavages);
684 one_psm.cbor_eval.insert(QString("ms2_intensity"), m_line.ms2_intensity);
685 one_psm.cbor_eval.insert(QString("peptide_len"), (qint64)m_line.peptide_len);
686 one_psm.cbor_eval.insert(QString("peptide_q"), m_line.peptide_q);
687 one_psm.cbor_eval.insert(QString("poisson"), m_line.poisson);
688 one_psm.cbor_eval.insert(QString("posterior_error"), m_line.posterior_error);
689 one_psm.cbor_eval.insert(QString("precursor_ppm"), m_line.precursor_ppm);
690 one_psm.cbor_eval.insert(QString("predicted_mobility"), m_line.predicted_mobility);
691 one_psm.cbor_eval.insert(QString("predicted_rt"), m_line.predicted_rt);
692 one_psm.cbor_eval.insert(QString("protein_q"), m_line.protein_q);
693 one_psm.cbor_eval.insert(QString("rank"), m_line.rank);
694 one_psm.cbor_eval.insert(QString("sage_discriminant_score"), m_line.sage_discriminant_score);
695 one_psm.cbor_eval.insert(QString("scored_candidates"), (qint64)m_line.scored_candidates);
696 one_psm.cbor_eval.insert(QString("semi_enzymatic"), m_line.semi_enzymatic);
697 one_psm.cbor_eval.insert(QString("spectrum_q"), m_line.spectrum_q);
698
699 current_cbor_scan_p->psm_list.emplace_back(one_psm);
700 }
701
702 std::size_t progress = m_lineNumber / 10000;
703 if(progress > m_progressIndex)
704 {
705 if(mp_monitor->shouldIstop())
706 {
708 QObject::tr("Sage TSV data reading process interrupted"));
709 }
710 m_progressIndex = progress;
711 mp_monitor->setStatus(QString("%1K ").arg(m_progressIndex * 10));
712 }
713}
714
715void
718{
719 m_sageReader.getCborStreamWriter().startMap();
720 m_sageReader.getCborStreamWriter().append("name");
721 one_sample.cbor_core_sample.value("name").toCbor(m_sageReader.getCborStreamWriter());
722
723 m_sageReader.getCborStreamWriter().append("identification_file_list");
724 one_sample.cbor_core_sample.value("identification_file_list")
725 .toCbor(m_sageReader.getCborStreamWriter());
726
727
728 m_sageReader.getCborStreamWriter().append("peaklist_file");
729 one_sample.cbor_core_sample.value("peaklist_file").toCbor(m_sageReader.getCborStreamWriter());
730 //"scan_list": [
731
732 m_sageReader.getCborStreamWriter().append("scan_list");
733 m_sageReader.getCborStreamWriter().startArray(one_sample.scan_map.size());
734 for(auto &it_scan : one_sample.scan_map)
735 {
736 writeScan(it_scan.second);
737 }
738 m_sageReader.getCborStreamWriter().endArray();
739
740
741 m_sageReader.getCborStreamWriter().endMap();
742}
743
744void
747{
748 m_sageReader.getCborStreamWriter().startMap();
749 m_sageReader.getCborStreamWriter().append("id");
750 QCborValue(one_scan.cbor_id).toCbor(m_sageReader.getCborStreamWriter());
751 m_sageReader.getCborStreamWriter().append("precursor");
752 QCborValue(one_scan.cbor_precursor).toCbor(m_sageReader.getCborStreamWriter());
753 m_sageReader.getCborStreamWriter().append("ms2");
754 QCborValue(one_scan.cbor_ms2).toCbor(m_sageReader.getCborStreamWriter());
755
756 m_sageReader.getCborStreamWriter().append("psm_list");
757 m_sageReader.getCborStreamWriter().startArray(one_scan.psm_list.size());
758 for(auto &it_psm : one_scan.psm_list)
759 {
760 writePsm(it_psm);
761 }
762 m_sageReader.getCborStreamWriter().endArray();
763
764 m_sageReader.getCborStreamWriter().endMap();
765}
766
767void
769{
770 m_sageReader.getCborStreamWriter().startMap();
771 m_sageReader.getCborStreamWriter().append("proforma");
772 m_sageReader.getCborStreamWriter().append(one_psm.proforma);
773 m_sageReader.getCborStreamWriter().append("protein_list");
774 QCborValue(one_psm.cbor_protein_list).toCbor(m_sageReader.getCborStreamWriter());
775
776 m_sageReader.getCborStreamWriter().append("eval");
777 m_sageReader.getCborStreamWriter().startMap();
778 m_sageReader.getCborStreamWriter().append("sage");
779 QCborValue(one_psm.cbor_eval).toCbor(m_sageReader.getCborStreamWriter());
780 m_sageReader.getCborStreamWriter().endMap();
781
782 m_sageReader.getCborStreamWriter().endMap();
783}
static PeptideSp parseString(const QString &pepstr)
std::vector< SageModification > getStaticModificationList() const
Definition: sagereader.cpp:214
std::vector< SageModification > getVariableModificationList() const
Definition: sagereader.cpp:256
std::vector< SageReader::SageModification > m_staticModificationList
void parseProteins(const QString &proteins_str)
virtual void endSheet() override
pappso::UiMonitorInterface * mp_monitor
virtual void startSheet(const QString &sheet_name) override
std::vector< SageReader::SageModification > m_variableModificationList
void parsePeptide(const QString &peptide_str)
bool parseSpectrumStringId(const QString &spectrum_string_id)
SageTsvHandler(pappso::UiMonitorInterface *p_monitor, const SageReader &sage_reader, bool is_fasta_pass, PsmProteinMap &psm_protein_map)
void writeSample(const Sample &one_sample)
virtual void setCell(const OdsCell &cell) override
void writePsm(const Psm &one_psm)
void writeScan(const Scan &one_scan)
virtual void endDocument() override
virtual void startLine() override
void parseMsRunFilename(const QString &msrun_filename)
virtual void endLine() override
std::shared_ptr< Protein > protein_sp
Definition: psmproteinmap.h:41