libpappsomspp
Library for mass spectrometry
Loading...
Searching...
No Matches
mzcbormsrunreader.cpp
Go to the documentation of this file.
1/**
2 * \file pappsomspp/msrun/private/timsmsrunreader.cpp
3 * \date 21/11/2025
4 * \author Olivier Langella
5 * \brief MSrun file reader for mzcbor
6 */
7
8/*******************************************************************************
9 * Copyright (c) 2025 Olivier Langella <Olivier.Langella@universite-paris-saclay.fr>.
10 *
11 * This file is part of the PAPPSOms++ library.
12 *
13 * PAPPSOms++ is free software: you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation, either version 3 of the License, or
16 * (at your option) any later version.
17 *
18 * PAPPSOms++ is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with PAPPSOms++. If not, see <http://www.gnu.org/licenses/>.
25 *
26 ******************************************************************************/
27
28
29#include "mzcbormsrunreader.h"
37
38namespace pappso
39{
41 : pappso::MsRunReader(msrun_id_csp)
42{
43 initialize();
44 qDebug();
45}
46
51
52
53const std::vector<qint64> &
59
60void
62{
63 qDebug();
64 m_cborFileInfo.setFile(mcsp_msRunId->getFileName());
65
66 if(!m_cborFileInfo.exists())
67 throw ExceptionNotFound(m_cborFileInfo.absoluteFilePath());
68
69
70 // build the index
72
73 QString str_index_file = m_cborFileInfo.absoluteFilePath();
74 QFileInfo mzcbor_index_fileinfo(str_index_file.append(".idx"));
75 if(mzcbor_index_fileinfo.exists())
76 {
77 qDebug() << "mzcbor_index_fileinfo.exists()";
78 QFile mzcbor_index_file(mzcbor_index_fileinfo.absoluteFilePath());
79 mzcbor_index_file.open(QIODevice::ReadOnly);
80
82 index_reader.readCbor(&mzcbor_index_file);
83 index_reader.close();
84 mzcbor_index_file.close();
85
86 qDebug();
87 std::vector<QString> run_id_list = index_reader.getRunIdList();
88 if(run_id_list.size() > 0)
89 {
90 std::size_t run_position = 0;
91 if(run_id_list.size() > 1)
92 {
93 std::size_t i_run = 0;
94 for(const QString &run_id : run_id_list)
95 {
96 if(run_id == getMsRunId().get()->getRunId())
97 {
98 run_position = i_run;
99 }
100 i_run++;
101 }
102 }
103
105 index_reader.getRunAndSpectrumOffsetList().at(run_position);
106
107 std::size_t i = 0;
108 for(const QString &native_id : index_reader.getRunAndSpectrumIdList().at(run_position))
109 {
110 m_nativeId2SpectrumIndexMap.insert({native_id, i});
111 i++;
112 }
113 }
114 }
115
116 if(m_spectrumIndexPositionInFile.size() == 0)
117 {
118
119 pappso::UiMonitorVoid monitor;
120 pappso::cbor::mzcbor::MzcborBuildIndexReader mzcbor_build_index_reader(getMsRunId());
121 mzcbor_build_index_reader.readCbor(mpa_mzcborFileDevice, monitor);
122
123
124 qDebug();
125 mzcbor_build_index_reader.close();
126 // releaseDevice();
127 qDebug();
128
130 m_nativeId2SpectrumIndexMap = mzcbor_build_index_reader.getNativeId2SpectrumIndexMap();
131 }
132}
133
134bool
136{
137 if(m_nativeId2SpectrumIndexMap.size() > 0)
138 {
139 if(m_nativeId2SpectrumIndexMap.begin()->first.contains("scan="))
140 return true;
141 }
142 return false;
143}
144
145std::size_t
147{
148 if(m_scan2SpectrumIndexMap.size() == 0)
149 {
150 for(auto &index_pair : m_nativeId2SpectrumIndexMap)
151 {
152 QStringList native_id_list = index_pair.first.split("=");
153 if(native_id_list.size() < 2)
154 {
155 }
156 else
157 {
158 std::size_t scan_number = native_id_list.back().toULong();
160 std::pair<std::size_t, std::size_t>(scan_number, index_pair.second));
161 }
162 }
163 }
164
165 auto it = m_scan2SpectrumIndexMap.find(scan_number);
166
167 if(it == m_scan2SpectrumIndexMap.end())
168 {
169 throw ExceptionNotFound(QObject::tr("error reading file %1 : scan %2 not found")
170 .arg(mcsp_msRunId.get()->getFileName())
171 .arg(scan_number));
172 }
173 return it->second;
174}
175
176bool
177MzcborMsRunReader::accept(const QString &file_name [[maybe_unused]]) const
178{
179
181 QObject::tr("%1 %2 %3 not implemented").arg(__FILE__).arg(__FUNCTION__).arg(__LINE__));
182}
183
184
185const OboPsiModTerm
187{
189 QObject::tr("%1 %2 %3 not implemented").arg(__FILE__).arg(__FUNCTION__).arg(__LINE__));
190}
191
192bool
194{
195 if(mpa_mzcborFileDevice == nullptr)
196 {
197 mpa_mzcborFileDevice = new QFile(m_cborFileInfo.absoluteFilePath());
198 mpa_mzcborFileDevice->open(QIODevice::ReadOnly);
199 }
200 return true;
201}
202
203
204bool
206{
207 if(mpa_mzcborFileDevice != nullptr)
208 {
209 mpa_mzcborFileDevice->close();
210 delete mpa_mzcborFileDevice;
211 mpa_mzcborFileDevice = nullptr;
212 }
213 return true;
214}
215
216
219 const pappso::QualifiedMassSpectrum &mass_spectrum, pappso::PrecisionPtr precision) const
220{
221
222 XicCoordSPtr xic_coord = std::make_shared<XicCoord>();
223
224 xic_coord.get()->rtTarget = mass_spectrum.getRtInSeconds();
225 xic_coord.get()->mzRange = MzRange(mass_spectrum.getPrecursorMz(), precision);
226 return xic_coord;
227}
228
231 pappso::PrecisionPtr precision) const
232{
233
234 pappso::cbor::mzcbor::Spectrum cbor_spectrum;
235
236 fillMzcborSpectrum(spectrum_index, cbor_spectrum, false);
237
238 XicCoordSPtr xic_coord = std::make_shared<XicCoord>();
239
240 xic_coord.get()->rtTarget = cbor_spectrum.getRtInSeconds();
241
242 bool get_mz = false;
243 if(cbor_spectrum.precursorList.size() > 0)
244 {
245
246 for(auto &precursor : cbor_spectrum.precursorList)
247 {
248 for(auto &ion : precursor.selectedIonList)
249 {
250 xic_coord.get()->mzRange = MzRange(ion.getMz(), precision);
251 get_mz = true;
252 }
253 }
254 if(!get_mz)
255 {
257 QObject::tr("precursor m/z not found for this spectrum index %1").arg(spectrum_index));
258 }
259 }
260 else
261 {
263 QObject::tr("no precursor found for this spectrum index %1").arg(spectrum_index));
264 }
265
266 return xic_coord;
267}
268
271{
272 return massSpectrumSPtr(spectrum_index);
273}
274
277{
278 MassSpectrumSPtr mass_spectrum_sp;
279 try
280 {
281 acquireDevice();
282 pappso::cbor::mzcbor::Spectrum cbor_spectrum;
283
284 fillMzcborSpectrum(spectrum_index, cbor_spectrum, true);
285 if(cbor_spectrum.binaryDataArrayList.size() == 2)
286 {
287 mass_spectrum_sp = std::make_shared<MassSpectrum>();
288 cbor_spectrum.decodeTrace(*(mass_spectrum_sp.get()));
289 }
290 else
291 {
293 QObject::tr("cbor_spectrum.binaryDataArrayList.size() != 2"));
294 }
295 }
296 catch(const pappso::PappsoException &pappso_error)
297 {
298 qDebug() << "Going to throw";
299
300 throw pappso::PappsoException(QObject::tr("Error reading data (massSpectrumSPtr) using the "
301 "mzcbor reader: %1")
302 .arg(pappso_error.what()));
303 }
304 return mass_spectrum_sp;
305}
306
309 bool want_binary_data) const
310{
311 qDebug();
312 try
313 {
314 pappso::cbor::mzcbor::Spectrum cbor_spectrum;
315
316 fillMzcborSpectrum(spectrum_index, cbor_spectrum, want_binary_data);
317 qDebug() << cbor_spectrum.index;
318 QualifiedMassSpectrum qualified_mass_spectrum;
319
320 MassSpectrumId spectrum_id(mcsp_msRunId, spectrum_index);
321 spectrum_id.setNativeId(cbor_spectrum.id);
322
323 spectrum_id.setSpectrumIndex(cbor_spectrum.index);
324
325 qualified_mass_spectrum.setMassSpectrumId(spectrum_id);
326 qualified_mass_spectrum.setRtInSeconds(cbor_spectrum.getRtInSeconds());
327 if(cbor_spectrum.precursorList.size() > 0)
328 {
329 qualified_mass_spectrum.setPrecursorNativeId(
330 cbor_spectrum.precursorList.at(0).spectrumRef);
331 qualified_mass_spectrum.setPrecursorSpectrumIndex(
332 m_nativeId2SpectrumIndexMap.at(cbor_spectrum.precursorList.at(0).spectrumRef));
333
334 for(auto &precursor : cbor_spectrum.precursorList)
335 {
336 for(auto &ion : precursor.selectedIonList)
337 {
338 PrecursorIonData precursor_ion_data;
339 precursor_ion_data.charge = ion.getChargeState();
340 precursor_ion_data.intensity = ion.getIntensity();
341 precursor_ion_data.mz = ion.getMz();
342 qualified_mass_spectrum.appendPrecursorIonData(precursor_ion_data);
343 }
344 }
345 }
346
347
348 qualified_mass_spectrum.setMsLevel(cbor_spectrum.getMsLevel());
349 qualified_mass_spectrum.setEmptyMassSpectrum(!cbor_spectrum.defaultArrayLength);
350 if(cbor_spectrum.binaryDataArrayList.size() == 2)
351 {
352 MassSpectrumSPtr mass_spectrum_sp = std::make_shared<MassSpectrum>();
353 cbor_spectrum.decodeTrace(*(mass_spectrum_sp.get()));
354 qualified_mass_spectrum.setMassSpectrumSPtr(mass_spectrum_sp);
355 }
356
357 qDebug() << "spectrum id=" << cbor_spectrum.id;
358
359 return qualified_mass_spectrum;
360 }
361
362 catch(const pappso::PappsoException &pappso_error)
363 {
364 qDebug() << "Going to throw";
365
367 QObject::tr("Error reading data (qualifiedMassSpectrum) using the "
368 "mzcbor reader: %1")
369 .arg(pappso_error.what()));
370 }
371}
372
373void
375{
376
378 QObject::tr("%1 %2 %3 not implemented").arg(__FILE__).arg(__FUNCTION__).arg(__LINE__));
379 MsRunReadConfig config;
380 std::vector<size_t> ms_levels;
381 for(std::size_t i = 1; i < 9; i++)
382 {
383 if(handler.needMsLevelPeakList(i))
384 {
385 ms_levels.push_back(i);
386 }
387 }
388 config.setMsLevels(ms_levels);
389 config.setNeedPeakList(handler.needPeakList());
390 // readSpectrumCollectionByMsLevel(reader_timeline, 1);
391
392 try
393 {
394 readSpectrumCollectionWithMsrunReadConfig(config, handler);
395 }
396 catch(const pappso::PappsoException &pappso_error)
397 {
398 qDebug() << "Going to throw";
399
401 QObject::tr("Error reading data (spectrum collection2) using the "
402 "mzcbor reader: %1")
403 .arg(pappso_error.what()));
404 }
405 catch(std::exception &error)
406 {
407 qDebug() << "Going to throw";
408
410 QObject::tr("Error reading data (spectrum collection) using the "
411 "mzcbor reader: %1")
412 .arg(error.what()));
413 }
414}
415
416void
419{
420 qDebug();
421 try
422 {
423 readSpectrumCollectionWithMsrunReadConfig(config, handler);
424 }
425 catch(const pappso::PappsoException &pappso_error)
426 {
427 qDebug() << "Going to throw";
428
430 QObject::tr("Error reading data (spectrum collection2) using the "
431 "mzcbor reader: %1")
432 .arg(pappso_error.what()));
433 }
434 catch(std::exception &error)
435 {
436 qDebug() << "Going to throw";
437
439 QObject::tr("Error reading data (spectrum collection2) using the "
440 "mzcbor reader: %1")
441 .arg(error.what()));
442 }
443}
444
445void
447 SpectrumCollectionHandlerInterface &handler, unsigned int ms_level)
448{
449 MsRunReadConfig config;
450 config.setMsLevels({ms_level});
451 config.setNeedPeakList(handler.needPeakList());
452 // readSpectrumCollectionByMsLevel(reader_timeline, 1);
453 readSpectrumCollection2(config, handler);
454}
455
456std::size_t
458{
459 return m_spectrumIndexPositionInFile.size();
460}
461
462std::size_t
464 const QString &spectrum_identifier)
465{
466 auto it = m_nativeId2SpectrumIndexMap.find(spectrum_identifier);
467 if(it == m_nativeId2SpectrumIndexMap.end())
468 {
470 QObject::tr("spectrum identifier %1 not found").arg(spectrum_identifier));
471 }
472 return it->second;
473}
474
475
476void
479{
480 // acquireDevice();
481 try
482 {
484 pappso::UiMonitorVoid monitor;
485 pappso::cbor::mzcbor::MzcborSpectrumCollectionReader mzcbor_spectrum_collection_reader(
486 config, handler);
487 mzcbor_spectrum_collection_reader.setMsRunId(getMsRunId());
488 mzcbor_spectrum_collection_reader.setNativeId2SpectrumIndexMapPtr(
490 mzcbor_spectrum_collection_reader.readCbor(mpa_mzcborFileDevice, monitor);
491
492
493 qDebug();
494 mzcbor_spectrum_collection_reader.close();
495 qDebug();
496 }
497 catch(const pappso::PappsoException &pappso_err)
498 {
500 QObject::tr("ERROR in MzcborMsRunReader::readSpectrumCollectionWithMsrunReadConfig:\n%1")
501 .arg(pappso_err.qwhat()));
502 }
503 // End of
504 // for(std::size_t iter = 0; iter < spectrum_list_size; iter++)
505
506 // Now let the loading handler know that the loading of the data has ended.
507 // The handler might need this "signal" to perform additional tasks or to
508 // cleanup cruft.
509}
510
511
512void
513MzcborMsRunReader::fillMzcborSpectrum(std::size_t spectrum_index,
515 bool want_binary_data) const
516{
517
518 if(spectrum_index >= m_spectrumIndexPositionInFile.size())
519 {
521 QObject::tr("spectrum index %1 not found").arg(spectrum_index));
522 }
523 if(mpa_mzcborFileDevice == nullptr)
524 {
525
527 QObject::tr("mzCBOR file device is not ready, use acquireDevice() before access"));
528 }
529 qDebug() << spectrum_index << " " << m_spectrumIndexPositionInFile[spectrum_index];
530
531
532 // qDebug() << spectrum_index << " " << m_spectrumIndexPositionInFile[spectrum_index-1];
533
534
537 cbor_stream_reader.setDevice(mpa_mzcborFileDevice);
538
539
540 spectrum.fromCbor(cbor_stream_reader, want_binary_data);
541 // cbor_stream_reader.leaveContainer();
542
543 // mzcbor_file.close();
544}
545
546} // namespace pappso
void setNativeId(const QString &native_id)
void setSpectrumIndex(std::size_t index)
void setNeedPeakList(bool need_peak_list)
void setMsLevels(std::vector< std::size_t > ms_levels)
base class to read MSrun the only way to build a MsRunReader object is to use the MsRunReaderFactory
Definition msrunreader.h:64
MsRunIdCstSPtr mcsp_msRunId
const MsRunIdCstSPtr & getMsRunId() const
const std::vector< qint64 > & getSpectrumIndexPositionInFile() const
virtual pappso::XicCoordSPtr newXicCoordSPtrFromSpectrumIndex(std::size_t spectrum_index, pappso::PrecisionPtr precision) const override
get a xic coordinate object from a given spectrum index
std::map< QString, std::size_t > m_nativeId2SpectrumIndexMap
virtual void readSpectrumCollection2(const MsRunReadConfig &config, SpectrumCollectionHandlerInterface &handler) override
virtual pappso::XicCoordSPtr newXicCoordSPtrFromQualifiedMassSpectrum(const pappso::QualifiedMassSpectrum &mass_spectrum, pappso::PrecisionPtr precision) const override
get a xic coordinate object from a given spectrum
virtual bool hasScanNumbers() const override
tells if spectra can be accessed using scan numbers by default, it returns false. Only overrided func...
void readSpectrumCollectionWithMsrunReadConfig(const MsRunReadConfig &config, SpectrumCollectionHandlerInterface &handler)
virtual const OboPsiModTerm getOboPsiModTermInstrumentModelName() const override
get OboPsiModTerm corresponding to the instrument model name child of : [Term] id: MS:1000031 name: i...
virtual bool releaseDevice() override
release data back end device if a the data back end is released, the developper has to use acquireDev...
virtual void readSpectrumCollection(SpectrumCollectionHandlerInterface &handler) override
function to visit an MsRunReader and get each Spectrum in a spectrum collection handler
std::vector< qint64 > m_spectrumIndexPositionInFile
virtual std::size_t spectrumStringIdentifier2SpectrumIndex(const QString &spectrum_identifier) override
if possible, get the spectrum index given a string identifier throw a not found exception if spectrum...
virtual MassSpectrumCstSPtr massSpectrumCstSPtr(std::size_t spectrum_index) override
virtual std::size_t scanNumber2SpectrumIndex(std::size_t scan_number) override
if possible, converts a scan number into a spectrum index This is a convenient function to help trans...
std::map< std::size_t, std::size_t > m_scan2SpectrumIndexMap
virtual std::size_t spectrumListSize() const override
get the totat number of spectrum conained in the MSrun data file
MzcborMsRunReader(MsRunIdCstSPtr &msrun_id_csp)
void fillMzcborSpectrum(std::size_t spectrum_index, pappso::cbor::mzcbor::Spectrum &spectrum, bool want_binary_data) const
virtual void readSpectrumCollectionByMsLevel(SpectrumCollectionHandlerInterface &handler, unsigned int ms_level) override
function to visit an MsRunReader and get each Spectrum in a spectrum collection handler by Ms Levels
virtual QualifiedMassSpectrum qualifiedMassSpectrum(std::size_t spectrum_index, bool want_binary_data=true) const override
get a QualifiedMassSpectrum class given its scan number
virtual MassSpectrumSPtr massSpectrumSPtr(std::size_t spectrum_index) override
get a MassSpectrumSPtr class given its spectrum index
virtual bool acquireDevice() override
acquire data back end device
virtual bool accept(const QString &file_name) const override
tells if the reader is able to handle this file must be implemented by private MS run reader,...
virtual void initialize() override
const char * what() const noexcept override
virtual const QString & qwhat() const
Class representing a fully specified mass spectrum.
void setPrecursorNativeId(const QString &native_id)
Set the scan native id of the precursor ion.
void appendPrecursorIonData(const PrecursorIonData &precursor_ion_data)
void setMassSpectrumId(const MassSpectrumId &iD)
Set the MassSpectrumId.
void setMsLevel(uint ms_level)
Set the mass spectrum level.
void setPrecursorSpectrumIndex(std::size_t precursor_scan_num)
Set the scan number of the precursor ion.
pappso_double getPrecursorMz(bool *ok=nullptr) const
get precursor mz
void setMassSpectrumSPtr(MassSpectrumSPtr massSpectrum)
Set the MassSpectrumSPtr.
void setRtInSeconds(pappso_double rt)
Set the retention time in seconds.
pappso_double getRtInSeconds() const
Get the retention time in seconds.
void setEmptyMassSpectrum(bool is_empty_mass_spectrum)
interface to collect spectrums from the MsRunReader class
virtual bool needPeakList() const =0
tells if we need the peak list (if we want the binary data) for each spectrum
virtual bool needMsLevelPeakList(unsigned int ms_level) const final
tells if we need the peak list (if we want the binary data) for each spectrum, given an MS level
const std::vector< qint64 > & getSpectrumIndexPositionInFile() const
virtual void readCbor(QFile *cborp, pappso::UiMonitorInterface &monitor) override
const std::map< QString, std::size_t > & getNativeId2SpectrumIndexMap() const
const std::vector< std::vector< qint64 > > & getRunAndSpectrumOffsetList() const
const std::vector< std::vector< QString > > & getRunAndSpectrumIdList() const
const std::vector< QString > & getRunIdList() const
virtual void readCbor(QFile *cborp, pappso::UiMonitorInterface &monitor)
void setNativeId2SpectrumIndexMapPtr(const std::map< QString, std::size_t > *nativeId2SpectrumIndexMap)
read mzcbor to build an index
read mzcbor index file
read mzcbor for spectrum collection handler
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition aa.cpp:39
std::shared_ptr< const MsRunId > MsRunIdCstSPtr
Definition msrunid.h:46
std::shared_ptr< const MassSpectrum > MassSpectrumCstSPtr
std::shared_ptr< MassSpectrum > MassSpectrumSPtr
std::shared_ptr< XicCoord > XicCoordSPtr
Definition xiccoord.h:44
void decodeTrace(pappso::Trace &trace)
Definition spectrum.cpp:270
std::vector< Precursor > precursorList
Definition spectrum.h:66
void fromCbor(CborStreamReader &reader, bool want_binary_data)
Definition spectrum.cpp:34
std::vector< BinaryDataArray > binaryDataArrayList
Definition spectrum.h:67