31#include "../../../../exception/exceptionnotimplemented.h"
32#include "../../../../exception/exceptionnotpossible.h"
33#include "../../../../exception/exceptionoutofrange.h"
34#include "../../../../exception/exceptionnotfound.h"
35#include "../../../../exception/exceptioninterrupted.h"
36#include "../../../../peptide/peptideproformaparser.h"
37#include "../../../../msrun/msrunreader.h"
44 : m_sageReader(sage_reader), m_psmProteinMap(psm_protein_map)
60 for(
auto it_sample_map : m_sampleMap)
62 writeSample(it_sample_map.second);
69 if(m_line.charge != 0)
85 QString tag = cell.getStringValue();
88 m_columnTypeList.push_back(Columns::psm_id);
90 else if(tag ==
"peptide")
92 m_columnTypeList.push_back(Columns::peptide);
94 else if(tag ==
"proteins")
96 m_columnTypeList.push_back(Columns::proteins);
98 else if(tag ==
"num_proteins")
100 m_columnTypeList.push_back(Columns::num_proteins);
102 else if(tag ==
"filename")
104 m_columnTypeList.push_back(Columns::filename);
106 else if(tag ==
"scannr")
108 m_columnTypeList.push_back(Columns::scannr);
110 else if(tag ==
"rank")
112 m_columnTypeList.push_back(Columns::rank);
114 else if(tag ==
"label")
116 m_columnTypeList.push_back(Columns::label);
118 else if(tag ==
"expmass")
120 m_columnTypeList.push_back(Columns::expmass);
123 else if(tag ==
"calcmass")
125 m_columnTypeList.push_back(Columns::calcmass);
127 else if(tag ==
"charge")
129 m_columnTypeList.push_back(Columns::charge);
131 else if(tag ==
"peptide_len")
133 m_columnTypeList.push_back(Columns::peptide_len);
135 else if(tag ==
"missed_cleavages")
137 m_columnTypeList.push_back(Columns::missed_cleavages);
139 else if(tag ==
"semi_enzymatic")
141 m_columnTypeList.push_back(Columns::semi_enzymatic);
143 else if(tag ==
"isotope_error")
145 m_columnTypeList.push_back(Columns::isotope_error);
147 else if(tag ==
"precursor_ppm")
149 m_columnTypeList.push_back(Columns::precursor_ppm);
151 else if(tag ==
"fragment_ppm")
153 m_columnTypeList.push_back(Columns::fragment_ppm);
155 else if(tag ==
"hyperscore")
157 m_columnTypeList.push_back(Columns::hyperscore);
159 else if(tag ==
"delta_next")
161 m_columnTypeList.push_back(Columns::delta_next);
163 else if(tag ==
"delta_best")
165 m_columnTypeList.push_back(Columns::delta_best);
169 m_columnTypeList.push_back(Columns::rt);
171 else if(tag ==
"aligned_rt")
173 m_columnTypeList.push_back(Columns::aligned_rt);
175 else if(tag ==
"predicted_rt")
177 m_columnTypeList.push_back(Columns::predicted_rt);
179 else if(tag ==
"delta_rt_model")
181 m_columnTypeList.push_back(Columns::delta_rt_model);
183 else if(tag ==
"ion_mobility")
185 m_columnTypeList.push_back(Columns::ion_mobility);
187 else if(tag ==
"predicted_mobility")
189 m_columnTypeList.push_back(Columns::predicted_mobility);
191 else if(tag ==
"delta_mobility")
193 m_columnTypeList.push_back(Columns::delta_mobility);
195 else if(tag ==
"matched_peaks")
197 m_columnTypeList.push_back(Columns::matched_peaks);
199 else if(tag ==
"longest_b")
201 m_columnTypeList.push_back(Columns::longest_b);
203 else if(tag ==
"longest_y")
205 m_columnTypeList.push_back(Columns::longest_y);
207 else if(tag ==
"longest_y_pct")
209 m_columnTypeList.push_back(Columns::longest_y_pct);
211 else if(tag ==
"matched_intensity_pct")
213 m_columnTypeList.push_back(Columns::matched_intensity_pct);
215 else if(tag ==
"scored_candidates")
217 m_columnTypeList.push_back(Columns::scored_candidates);
219 else if(tag ==
"poisson")
221 m_columnTypeList.push_back(Columns::poisson);
223 else if(tag ==
"sage_discriminant_score")
225 m_columnTypeList.push_back(Columns::sage_discriminant_score);
227 else if(tag ==
"posterior_error")
229 m_columnTypeList.push_back(Columns::posterior_error);
231 else if(tag ==
"spectrum_q")
233 m_columnTypeList.push_back(Columns::spectrum_q);
235 else if(tag ==
"peptide_q")
237 m_columnTypeList.push_back(Columns::peptide_q);
239 else if(tag ==
"protein_q")
241 m_columnTypeList.push_back(Columns::protein_q);
243 else if(tag ==
"ms2_intensity")
245 m_columnTypeList.push_back(Columns::ms2_intensity);
254 if(m_columnNumber >= m_columnTypeList.size())
257 QObject::tr(
"the value %1 is out of range").arg(cell.getStringValue()));
259 Columns column_type = m_columnTypeList[m_columnNumber];
262 case Columns::psm_id:
264 case Columns::peptide:
265 parsePeptide(cell.toString());
267 case Columns::proteins:
268 parseProteins(cell.toString());
270 case Columns::num_proteins:
271 if((std::size_t)cell.getDoubleValue() != (std::size_t)m_proteinList.size())
274 QObject::tr(
"column \"num_proteins\"!=%1").arg(m_proteinList.size()));
277 case Columns::filename:
278 parseMsRunFilename(cell.toString());
280 case Columns::scannr:
281 parseSpectrumStringId(cell.toString());
284 m_line.rank = cell.getDoubleValue();
287 m_line.label = cell.getDoubleValue();
289 case Columns::expmass:
290 m_line.expmass = cell.getDoubleValue();
292 case Columns::calcmass:
293 m_line.calcmass = cell.getDoubleValue();
295 case Columns::charge:
296 m_line.charge = cell.getDoubleValue();
298 case Columns::peptide_len:
299 m_line.peptide_len = cell.getDoubleValue();
301 case Columns::missed_cleavages:
302 m_line.missed_cleavages = cell.getDoubleValue();
304 case Columns::semi_enzymatic:
305 m_line.semi_enzymatic = cell.getDoubleValue();
307 case Columns::isotope_error:
308 m_line.isotope_error = cell.getDoubleValue();
311 case Columns::precursor_ppm:
312 m_line.precursor_ppm = cell.getDoubleValue();
314 case Columns::fragment_ppm:
315 m_line.fragment_ppm = cell.getDoubleValue();
317 case Columns::hyperscore:
318 m_line.hyperscore = cell.getDoubleValue();
320 case Columns::delta_next:
321 m_line.delta_next = cell.getDoubleValue();
323 case Columns::delta_best:
324 m_line.delta_best = cell.getDoubleValue();
327 m_line.rt = cell.getDoubleValue() * 60;
329 case Columns::aligned_rt:
330 m_line.aligned_rt = cell.getDoubleValue();
332 case Columns::predicted_rt:
333 m_line.predicted_rt = cell.getDoubleValue();
335 case Columns::delta_rt_model:
336 m_line.delta_rt_model = cell.getDoubleValue();
338 case Columns::ion_mobility:
339 m_line.ion_mobility = cell.getDoubleValue();
341 case Columns::predicted_mobility:
342 m_line.predicted_mobility = cell.getDoubleValue();
344 case Columns::delta_mobility:
345 m_line.delta_mobility = cell.getDoubleValue();
347 case Columns::matched_peaks:
348 m_line.matched_peaks = cell.getDoubleValue();
350 case Columns::longest_b:
351 m_line.longest_b = cell.getDoubleValue();
353 case Columns::longest_y:
354 m_line.longest_y = cell.getDoubleValue();
356 case Columns::longest_y_pct:
357 m_line.longest_y_pct = cell.getDoubleValue();
359 case Columns::matched_intensity_pct:
360 m_line.matched_intensity_pct = cell.getDoubleValue();
362 case Columns::scored_candidates:
363 m_line.scored_candidates = cell.getDoubleValue();
365 case Columns::poisson:
366 m_line.poisson = cell.getDoubleValue();
368 case Columns::sage_discriminant_score:
369 m_line.sage_discriminant_score = cell.getDoubleValue();
371 case Columns::posterior_error:
372 m_line.posterior_error = cell.getDoubleValue();
374 case Columns::spectrum_q:
375 m_line.spectrum_q = cell.getDoubleValue();
377 case Columns::peptide_q:
378 m_line.peptide_q = cell.getDoubleValue();
380 case Columns::protein_q:
381 m_line.protein_q = cell.getDoubleValue();
383 case Columns::ms2_intensity:
384 m_line.ms2_intensity = cell.getDoubleValue();
387 qDebug() <<
"m_line.calcmass=" << m_line.calcmass;
389 QObject::tr(
"column type %1 not implemented").arg((std::uint8_t)column_type));
412 msp_peptide =
nullptr;
413 m_proteinList.clear();
422 mp_monitor->setStatus(QObject::tr(
"reading Sage TSV file"));
424 if(mp_monitor->shouldIstop())
436 QString peptide_str_verif = peptide_str;
440 qDebug() << modif.strModification;
441 qDebug() << modif.modification->getAccession();
442 peptide_str_verif = peptide_str_verif.replace(
443 modif.strModification, QString(
"[%1]").arg(modif.modification->getAccession()));
448 qDebug() << modif.strModification;
449 qDebug() << modif.modification->getAccession();
450 peptide_str_verif = peptide_str_verif.replace(
451 modif.strModification, QString(
"[%1]").arg(modif.modification->getAccession()));
454 qDebug() << peptide_str_verif;
463 qDebug() << msp_peptide.get()->toProForma();
470 m_proteinList.clear();
471 m_proteinList = proteins_str.split(
";");
474 for(QString accession : m_proteinList)
478 psm_protein.
protein_sp = std::make_shared<pappso::Protein>(accession,
"");
480 if(accession.startsWith(m_decoyTag))
485 m_psmProteinMap.insert(psm_protein);
493 m_spectrumNativeId = spectrum_string_id;
494 qDebug() << spectrum_string_id;
500 QStringList scan_list = spectrum_string_id.split(
"scan=");
501 if(scan_list.size() == 2)
504 m_spectrumIndex = scan_list.at(1).toULongLong(&is_ok);
505 if(m_spectrumIndex > 0)
529 qDebug() << spectrum_string_id;
541 auto it_insert = m_sampleMap.insert({msrun_filename, {}});
542 mp_currentSample = &(it_insert.first->second);
546 it_insert.first->second.name = msrun_filename;
548 QCborMap identification_file;
550 it_insert.first->second.cbor_core_sample.insert(QString(
"name"),
551 QFileInfo(msrun_filename).baseName());
554 QCborArray identification_file_list;
555 identification_file.insert(QString(
"name"), m_sageReader.getmJsonAbsoluteFilePath());
556 identification_file_list.push_back(identification_file);
557 it_insert.first->second.cbor_core_sample.insert(QString(
"identification_file_list"),
558 identification_file_list);
561 ms_file.insert(QString(
"name"), QFileInfo(msrun_filename).absoluteFilePath());
562 it_insert.first->second.cbor_core_sample.insert(QString(
"peaklist_file"), ms_file);
585 for(
const QString &accession : m_proteinList)
588 psm_protein.
protein_sp = std::make_shared<pappso::Protein>(accession,
"");
590 auto it = m_psmProteinMap.insert(psm_protein);
591 it.first->second.cborEval.insert(QString(
"protein_q"), m_line.protein_q);
622 auto it_insert = mp_currentSample->scan_map.insert({m_spectrumNativeId,
Scan()});
623 Scan *current_cbor_scan_p = &(it_insert.first->second);
627 QCborMap &scan_id = it_insert.first->second.
cbor_id;
628 scan_id.insert(QString(
"index"), (qint64)m_spectrumIndex);
629 scan_id.insert(QString(
"native_id"), m_spectrumNativeId);
631 QCborMap &scan_ms2 = it_insert.first->second.cbor_ms2;
632 scan_ms2.insert(QString(
"rt"), m_line.rt);
634 QCborMap &scan_precursor = it_insert.first->second.cbor_precursor;
635 scan_precursor.insert(QString(
"z"), m_line.charge);
636 scan_precursor.insert(QString(
"mz"), m_line.expmass);
640 one_psm.
proforma = msp_peptide.get()->toProForma();
642 for(
const QString &accession : m_proteinList)
644 QCborMap cbor_protein;
645 cbor_protein.insert(QString(
"accession"), accession);
649 QString protein_sequence =
650 QString(m_psmProteinMap.getByAccession(accession).protein_sp.get()->getSequence())
652 QString peptide_sequence = msp_peptide.get()->getSequenceLi();
653 int position = protein_sequence.indexOf(peptide_sequence);
655 QCborArray positions;
658 positions.push_back(position);
659 position = protein_sequence.indexOf(peptide_sequence, position + 1);
662 cbor_protein.insert(QString(
"positions"), positions);
667 one_psm.
cbor_eval.insert(QString(
"aligned_rt"), m_line.aligned_rt);
668 one_psm.
cbor_eval.insert(QString(
"calcmass"), m_line.calcmass);
669 one_psm.
cbor_eval.insert(QString(
"delta_best"), m_line.delta_best);
670 one_psm.
cbor_eval.insert(QString(
"delta_mobility"), m_line.delta_mobility);
671 one_psm.
cbor_eval.insert(QString(
"delta_next"), m_line.delta_next);
672 one_psm.
cbor_eval.insert(QString(
"delta_rt_model"), m_line.delta_rt_model);
673 one_psm.
cbor_eval.insert(QString(
"fragment_ppm"), m_line.fragment_ppm);
674 one_psm.
cbor_eval.insert(QString(
"hyperscore"), m_line.hyperscore);
675 one_psm.
cbor_eval.insert(QString(
"ion_mobility"), m_line.ion_mobility);
676 one_psm.
cbor_eval.insert(QString(
"isotope_error"), m_line.isotope_error);
677 one_psm.
cbor_eval.insert(QString(
"label"), m_line.label);
678 one_psm.
cbor_eval.insert(QString(
"longest_b"), (qint64)m_line.longest_b);
679 one_psm.
cbor_eval.insert(QString(
"longest_y"), (qint64)m_line.longest_y);
680 one_psm.
cbor_eval.insert(QString(
"longest_y_pct"), m_line.longest_y_pct);
681 one_psm.
cbor_eval.insert(QString(
"matched_intensity_pct"), m_line.matched_intensity_pct);
682 one_psm.
cbor_eval.insert(QString(
"matched_peaks"), (qint64)m_line.matched_peaks);
683 one_psm.
cbor_eval.insert(QString(
"missed_cleavages"), m_line.missed_cleavages);
684 one_psm.
cbor_eval.insert(QString(
"ms2_intensity"), m_line.ms2_intensity);
685 one_psm.
cbor_eval.insert(QString(
"peptide_len"), (qint64)m_line.peptide_len);
686 one_psm.
cbor_eval.insert(QString(
"peptide_q"), m_line.peptide_q);
687 one_psm.
cbor_eval.insert(QString(
"poisson"), m_line.poisson);
688 one_psm.
cbor_eval.insert(QString(
"posterior_error"), m_line.posterior_error);
689 one_psm.
cbor_eval.insert(QString(
"precursor_ppm"), m_line.precursor_ppm);
690 one_psm.
cbor_eval.insert(QString(
"predicted_mobility"), m_line.predicted_mobility);
691 one_psm.
cbor_eval.insert(QString(
"predicted_rt"), m_line.predicted_rt);
692 one_psm.
cbor_eval.insert(QString(
"protein_q"), m_line.protein_q);
693 one_psm.
cbor_eval.insert(QString(
"rank"), m_line.rank);
694 one_psm.
cbor_eval.insert(QString(
"sage_discriminant_score"), m_line.sage_discriminant_score);
695 one_psm.
cbor_eval.insert(QString(
"scored_candidates"), (qint64)m_line.scored_candidates);
696 one_psm.
cbor_eval.insert(QString(
"semi_enzymatic"), m_line.semi_enzymatic);
697 one_psm.
cbor_eval.insert(QString(
"spectrum_q"), m_line.spectrum_q);
699 current_cbor_scan_p->
psm_list.emplace_back(one_psm);
702 std::size_t progress = m_lineNumber / 10000;
703 if(progress > m_progressIndex)
705 if(mp_monitor->shouldIstop())
708 QObject::tr(
"Sage TSV data reading process interrupted"));
710 m_progressIndex = progress;
711 mp_monitor->setStatus(QString(
"%1K ").arg(m_progressIndex * 10));
719 m_sageReader.getCborStreamWriter().startMap();
720 m_sageReader.getCborStreamWriter().append(
"name");
721 one_sample.
cbor_core_sample.value(
"name").toCbor(m_sageReader.getCborStreamWriter());
723 m_sageReader.getCborStreamWriter().append(
"identification_file_list");
725 .toCbor(m_sageReader.getCborStreamWriter());
728 m_sageReader.getCborStreamWriter().append(
"peaklist_file");
729 one_sample.
cbor_core_sample.value(
"peaklist_file").toCbor(m_sageReader.getCborStreamWriter());
732 m_sageReader.getCborStreamWriter().append(
"scan_list");
733 m_sageReader.getCborStreamWriter().startArray(one_sample.
scan_map.size());
734 for(
auto &it_scan : one_sample.
scan_map)
736 writeScan(it_scan.second);
738 m_sageReader.getCborStreamWriter().endArray();
741 m_sageReader.getCborStreamWriter().endMap();
748 m_sageReader.getCborStreamWriter().startMap();
749 m_sageReader.getCborStreamWriter().append(
"id");
750 QCborValue(one_scan.
cbor_id).toCbor(m_sageReader.getCborStreamWriter());
751 m_sageReader.getCborStreamWriter().append(
"precursor");
752 QCborValue(one_scan.
cbor_precursor).toCbor(m_sageReader.getCborStreamWriter());
753 m_sageReader.getCborStreamWriter().append(
"ms2");
754 QCborValue(one_scan.
cbor_ms2).toCbor(m_sageReader.getCborStreamWriter());
756 m_sageReader.getCborStreamWriter().append(
"psm_list");
757 m_sageReader.getCborStreamWriter().startArray(one_scan.
psm_list.size());
758 for(
auto &it_psm : one_scan.
psm_list)
762 m_sageReader.getCborStreamWriter().endArray();
764 m_sageReader.getCborStreamWriter().endMap();
770 m_sageReader.getCborStreamWriter().startMap();
771 m_sageReader.getCborStreamWriter().append(
"proforma");
772 m_sageReader.getCborStreamWriter().append(one_psm.
proforma);
773 m_sageReader.getCborStreamWriter().append(
"protein_list");
774 QCborValue(one_psm.
cbor_protein_list).toCbor(m_sageReader.getCborStreamWriter());
776 m_sageReader.getCborStreamWriter().append(
"eval");
777 m_sageReader.getCborStreamWriter().startMap();
778 m_sageReader.getCborStreamWriter().append(
"sage");
779 QCborValue(one_psm.
cbor_eval).toCbor(m_sageReader.getCborStreamWriter());
780 m_sageReader.getCborStreamWriter().endMap();
782 m_sageReader.getCborStreamWriter().endMap();
std::vector< SageModification > getStaticModificationList() const
std::vector< SageModification > getVariableModificationList() const
QString getDecoyTag() const
std::vector< SageReader::SageModification > m_staticModificationList
void parseProteins(const QString &proteins_str)
virtual void endSheet() override
pappso::UiMonitorInterface * mp_monitor
virtual void startSheet(const QString &sheet_name) override
std::vector< SageReader::SageModification > m_variableModificationList
virtual ~SageTsvHandler()
void parsePeptide(const QString &peptide_str)
bool parseSpectrumStringId(const QString &spectrum_string_id)
SageTsvHandler(pappso::UiMonitorInterface *p_monitor, const SageReader &sage_reader, bool is_fasta_pass, PsmProteinMap &psm_protein_map)
void writeSample(const Sample &one_sample)
virtual void setCell(const OdsCell &cell) override
void writePsm(const Psm &one_psm)
void writeScan(const Scan &one_scan)
virtual void endDocument() override
virtual void startLine() override
void parseMsRunFilename(const QString &msrun_filename)
virtual void endLine() override
std::shared_ptr< Protein > protein_sp
QCborArray cbor_protein_list
QCborMap cbor_core_sample
std::map< QString, Scan > scan_map
std::vector< Psm > psm_list