libpappsomspp
Library for mass spectrometry
msfileaccessor.cpp
Go to the documentation of this file.
1// #include <proteowizard/pwiz/data/msdata/DefaultReaderList.hpp>
2
3#include <QDebug>
4#include <QFile>
5#include <QFileInfo>
6
7
8#include "msfileaccessor.h"
9#include "pwizmsfilereader.h"
10#include "timsmsfilereader.h"
11#include "bafasciifilereader.h"
12#include "xymsfilereader.h"
13
14
15#include "../exception/exceptionnotfound.h"
16#include "../exception/exceptionnotpossible.h"
17#include "../exception/exceptionnotrecognized.h"
18#include "../msrun/msrunid.h"
19#include "../msrun/private/timsframesmsrunreader.h"
20
21#include "../msrun/private/pwizmsrunreader.h"
22#include "../msrun/private/timsmsrunreader.h"
23#include "../msrun/private/timsmsrunreaderms2.h"
24#include "../msrun/private/timsmsrunreaderdia.h"
25#include "../msrun/bafasciimsrunreader.h"
26#include "../msrun/xymsrunreader.h"
27
28#include "../utils.h"
29
30
31namespace pappso
32{
33
34
35MsFileAccessor::MsFileAccessor(const QString &file_name, const QString &xml_prefix)
36 : m_fileName(file_name), m_xmlPrefix(xml_prefix)
37{
38 QFile file(file_name);
39 if(!file.exists())
41 QObject::tr("File %1 not found.").arg(QFileInfo(file_name).absoluteFilePath())));
42
43
45 m_oboPsiModTermNativeIDFormat.m_name = "no nativeID format";
47 "No nativeID format indicates that the file tagged with this term does not "
48 "contain spectra that can have a nativeID format.";
49}
50
51
53 : m_fileName(other.m_fileName),
54 m_xmlPrefix(other.m_xmlPrefix),
55 m_fileFormat(other.m_fileFormat),
56 m_fileReaderType(other.m_fileReaderType)
57{
59}
60
62{
63}
64
65
66const QString &
68{
69 return m_fileName;
70}
71
72
75{
76 return m_fileFormat;
77}
78
79const OboPsiModTerm
81{
82 OboPsiModTerm term;
83
84 // is_a: MS:1000560 ! mass spectrometer file format
85 switch(m_fileFormat)
86 {
88 term.setAccession("MS:1001560");
89 term.m_name = "SCIEX TOF/TOF T2D format";
90 term.m_definition =
91 "Applied Biosystems/MDS Analytical Technologies TOF/TOF instrument "
92 "export format.";
93 break;
95 term.setAccession("MS:1000562");
96 term.m_name = "ABI WIFF format";
97 term.m_definition = "Applied Biosystems WIFF file format.";
98 break;
100 term.setAccession("MS:1001509");
101 term.m_name = "Agilent MassHunter format";
102 term.m_definition =
103 "A data file format found in an Agilent MassHunter directory which "
104 "contains raw data acquired by an Agilent mass spectrometer.";
105 break;
107 break;
109 term.setAccession("MS:1000825");
110 term.m_name = "Bruker FID format";
111 term.m_definition = "Bruker FID file format.";
112 break;
114 term.setAccession("MS:1002817");
115 term.m_name = "Bruker TDF format";
116 term.m_definition = "Bruker TDF raw file format.";
117 break;
119 term.setAccession("MS:1000567");
120 term.m_name = "Bruker/Agilent YEP format";
121 term.m_definition = "Bruker/Agilent YEP file format.";
122 break;
124 term.setAccession("MS:1001062");
125 term.m_name = "Mascot MGF format";
126 term.m_definition = "Mascot MGF file format.";
127 break;
129 break;
131 term.setAccession("MS:1001881");
132 term.m_name = "mz5 format";
133 term.m_definition = "mz5 file format, modelled after mzML.";
134 break;
136 term.setAccession("MS:1000584");
137 term.m_name = "mzML format";
138 term.m_definition = "Proteomics Standards Inititative mzML file format.";
139 break;
141 term.setAccession("MS:1000566");
142 term.m_name = "ISB mzXML format";
143 term.m_definition = "Institute of Systems Biology mzXML file format.";
144 break;
146 break;
148
149 term.setAccession("MS:1000563");
150 term.m_name = "Thermo RAW format";
151 term.m_definition = "Thermo Scientific RAW file format.";
152 break;
154 break;
156 term.setAccession("MS:1000526");
157 term.m_name = "Waters raw format";
158 term.m_definition =
159 "Waters data file format found in a Waters RAW directory, generated "
160 "from an MS acquisition.";
161 break;
163 term.setAccession("MS:1001369");
164 term.m_name = "BafAscii text format";
165 term.m_definition =
166 "Simple text file format obtained by exporting Bruker Baf to ascii "
167 "using Bruker software";
168 break;
169 case MsDataFormat::xy:
170 term.setAccession("MS:1001369");
171 term.m_name = "text format";
172 term.m_definition =
173 "Simple text file format of \"m/z<separator>intensity\" value pairs "
174 "for a single mass spectrum, a PMF (or single MS2) search.";
175 break;
176 default:
177 break;
178 }
179
180 return term;
181}
182
183
184const OboPsiModTerm &
186{
187
188
190}
191
192
193std::vector<MsRunIdCstSPtr>
195{
196 // qDebug();
197
198 // Try the PwizMsFileReader
199
200 PwizMsFileReader pwiz_ms_file_reader(m_fileName);
201
202 std::vector<MsRunIdCstSPtr> ms_run_ids = pwiz_ms_file_reader.getMsRunIds(m_xmlPrefix);
203 if(ms_run_ids.size())
204 {
205 qDebug() << "Might well be handled using the Pwiz code.";
206
207 m_fileFormat = pwiz_ms_file_reader.getFileFormat();
209
210 // But the user might have configured one preferred reader type.
211
213 if(pref != m_preferredFileReaderTypeMap.end())
214 {
215 m_fileReaderType = pref->second;
216 }
217
218 return ms_run_ids;
219 }
220
221 qDebug() << "The Pwiz reader did not work.";
222
223 // Try the TimsData reader
224
225 QString tims_dir = m_fileName;
226 if(!QFileInfo(tims_dir).isDir())
227 {
228 tims_dir = QFileInfo(m_fileName).absolutePath();
229 }
230
231 TimsMsFileReader tims_file_reader(tims_dir);
232
233 ms_run_ids = tims_file_reader.getMsRunIds(m_xmlPrefix);
234
235 if(ms_run_ids.size())
236 {
237 qDebug() << "Might well be handled using the Bruker code";
238
239 m_fileName = tims_dir;
240 m_fileFormat = tims_file_reader.getFileFormat();
242
244 if(pref != m_preferredFileReaderTypeMap.end())
245 {
246 m_fileReaderType = pref->second;
247 }
248
249 qDebug() << "Returning Bruker::tims ms run(s)."
250 << "with preferred reader type:" << Utils::fileReaderTypeAsString(m_fileReaderType);
251
252 return ms_run_ids;
253 }
254
255 qDebug() << "The Tims reader did not work.";
256
257 // Try the Baf->ascii export format from Bruker Compass
258
259 try
260 {
261 ms_run_ids.clear();
262 BafAsciiFileReader baf_ascii_ms_file_reader(m_fileName);
263
264 ms_run_ids = baf_ascii_ms_file_reader.getMsRunIds(m_xmlPrefix);
265
266 if(ms_run_ids.size())
267 {
268 qDebug() << "Might well be handled using the BafAscii code";
269
271
272 m_fileFormat = baf_ascii_ms_file_reader.getFileFormat();
273
275 {
276 ms_run_ids.clear();
277 }
278 else
279 {
280 return ms_run_ids;
281 }
282 }
283 }
284 catch(const pappso::PappsoException &error)
285 {
286 qDebug() << "This is not a BafAscii code file" << error.qwhat();
287 }
288
289
290 qDebug() << "The BafAscii reader did not work.";
291
292 // At this point try the XyMsFileReader
293
294 XyMsFileReader xy_ms_file_reader(m_fileName);
295
296 ms_run_ids = xy_ms_file_reader.getMsRunIds(m_xmlPrefix);
297
298 if(ms_run_ids.size())
299 {
300 qDebug() << "Might well be handled using the XY code";
301
303
304 m_fileFormat = xy_ms_file_reader.getFileFormat();
305
306 return ms_run_ids;
307 }
308
309 qDebug() << "The XY reader did not work.";
310
311 return ms_run_ids;
312}
313
314
315void
317{
318 // qDebug();
319
320 auto ret = m_preferredFileReaderTypeMap.insert(
321 std::pair<MsDataFormat, FileReaderType>(format, reader_type));
322
323 if(!ret.second)
324 {
325 // replace
326 ret.first->second = reader_type;
327 }
328}
329
330
333{
334 // qDebug();
335
336 auto ret = m_preferredFileReaderTypeMap.find(format);
337
338 if(ret != m_preferredFileReaderTypeMap.end())
339 {
340 return ret->second;
341 }
342
343 return m_fileReaderType;
344}
345
346
349{
350 return m_fileReaderType;
351}
352
353
354void
356{
357 mcsp_selectedMsRunId = ms_run_id_csp;
358}
359
360
363{
365}
366
369{
370 // try TimsData reader
371 QString tims_dir = m_fileName;
372 if(!QFileInfo(tims_dir).isDir())
373 {
374 tims_dir = QFileInfo(m_fileName).absolutePath();
375 }
376 TimsMsFileReader tims_file_reader(tims_dir);
377
378 std::vector<MsRunIdCstSPtr> ms_run_ids = tims_file_reader.getMsRunIds(m_xmlPrefix);
379
380 if(ms_run_ids.size())
381 {
382 // qDebug() << "Might well be handled using the Bruker code";
384 m_fileFormat = tims_file_reader.getFileFormat();
385 m_fileName = tims_dir;
386
387 return std::make_shared<TimsMsRunReaderMs2>(ms_run_ids.front());
388 }
389 else
390 {
392 QObject::tr("Unable to read mz data directory %1 with TimsTOF reader.").arg(tims_dir)));
393 }
394}
395
396
399{
400 // qDebug();
401
402 // We want to return a MsRunReader that accounts for the configuration that
403 // the user might have set.
404
405 if(m_fileName != ms_run_id->getFileName())
406 throw(
407 ExceptionNotPossible(QObject::tr("The MsRunId instance must have the name file name as the "
408 "MsFileAccessor.")));
409
411 {
412 // qDebug() << "Returning a PwizMsRunReader.";
413 auto pwiz_reader = std::make_shared<PwizMsRunReader>(ms_run_id);
414 m_oboPsiModTermNativeIDFormat = pwiz_reader->getOboPsiModTermNativeIDFormat();
415 return pwiz_reader;
416 }
418 {
419 // qDebug() << "Returning a XyMsRunReader.";
420
421 return std::make_shared<XyMsRunReader>(ms_run_id);
422 }
424 {
425 // qDebug() << "Returning a TimsMsRunReader.";
426
427 return std::make_shared<TimsMsRunReader>(ms_run_id);
428 }
430 {
431 // qDebug() << "Returning a TimsFramesMsRunReader.";
432
433 return std::make_shared<TimsFramesMsRunReader>(ms_run_id);
434 }
436 {
437 // qDebug() << "Returning a TimsMsRunReaderMs2.";
438
439 return std::make_shared<TimsMsRunReaderMs2>(ms_run_id);
440 }
442 {
443 // qDebug() << "Returning a TimsMsRunReaderMs2.";
444
445 // qInfo() << "std::make_shared<TimsMsRunReaderDia>(ms_run_id);";
446 return std::make_shared<TimsMsRunReaderDia>(ms_run_id);
447 }
449 {
450 // qDebug() << "Returning a BafAsciiMsRunReader.";
451
452 return std::make_shared<BafAsciiMsRunReader>(ms_run_id);
453 }
455 {
456 if(ms_run_id.get()->getMsDataFormat() == MsDataFormat::xy)
457 {
458 return std::make_shared<XyMsRunReader>(ms_run_id);
459 }
460 else
461 {
462 auto pwiz_reader = std::make_shared<PwizMsRunReader>(ms_run_id);
463 m_oboPsiModTermNativeIDFormat = pwiz_reader->getOboPsiModTermNativeIDFormat();
464 return pwiz_reader;
465 }
466 }
467 else
468 {
469 throw PappsoException(QObject::tr("No file format was found."));
470 }
471
472 return nullptr;
473}
474
475
477MsFileAccessor::msRunReaderSPtr(std::size_t ms_run_id_index)
478{
479 std::vector<MsRunIdCstSPtr> ms_run_ids = getMsRunIds();
480 if(ms_run_id_index >= ms_run_ids.size())
481 throw PappsoException(QObject::tr("MsRunId request out-of-bound error."));
482
483 return msRunReaderSPtr(ms_run_ids.at(ms_run_id_index));
484}
485
486
489{
490 // qDebug();
491
493}
494
495
498{
500}
501
504 pappso::FileReaderType preferred_file_reader_type)
505{
506 QFile file(ms_run_id.get()->getFileName());
507 if(!file.exists())
508 throw(ExceptionNotFound(QObject::tr("unable to build a reader : file %1 not found.")
509 .arg(QFileInfo(ms_run_id.get()->getFileName()).absoluteFilePath())));
510
511 MsDataFormat file_format = ms_run_id.get()->getMsDataFormat();
512
513 if(file_format == MsDataFormat::xy)
514 {
515 // qDebug() << "Returning a XyMsRunReader.";
516
517 return std::make_shared<XyMsRunReader>(ms_run_id);
518 }
519 else if(file_format == MsDataFormat::brukerBafAscii)
520 {
521 // qDebug() << "Returning a XyMsRunReader.";
522
523 return std::make_shared<BafAsciiMsRunReader>(ms_run_id);
524 }
525 else if(file_format == MsDataFormat::unknown)
526 {
527 throw(PappsoException(QObject::tr("unable to build a reader for %1 : unknown file format")
528 .arg(QFileInfo(ms_run_id.get()->getFileName()).absoluteFilePath())));
529 }
530
531 else if(file_format == MsDataFormat::brukerTims)
532 {
533 if(preferred_file_reader_type == pappso::FileReaderType::tims)
534 {
535 return std::make_shared<TimsMsRunReader>(ms_run_id);
536 }
537 else if(preferred_file_reader_type == pappso::FileReaderType::tims_ms2)
538 {
539 return std::make_shared<TimsMsRunReaderMs2>(ms_run_id);
540 }
541 else if(preferred_file_reader_type == pappso::FileReaderType::tims_frames)
542 {
543 qDebug() << "returning std::make_shared<TimsFramesMsRunReader>(ms_run_id).";
544 return std::make_shared<TimsFramesMsRunReader>(ms_run_id);
545 }
546 // qDebug() << "by default, build a TimsMsRunReader.";
547 return std::make_shared<TimsMsRunReader>(ms_run_id);
548 }
549 else
550 {
551 // qDebug() << "Returning a PwizMsRunReader .";
552 return std::make_shared<PwizMsRunReader>(ms_run_id);
553 }
554}
555
556
558MsFileAccessor::getMsRunReaderSPtrByRunId(const QString &run_id, const QString &xml_id)
559{
560 std::vector<MsRunIdCstSPtr> run_list = getMsRunIds();
561 MsRunReaderSPtr reader_sp;
562 for(MsRunIdCstSPtr &original_run_id : run_list)
563 {
564 if(original_run_id.get()->getRunId() == run_id)
565 {
566 MsRunId new_run_id(*original_run_id.get());
567 new_run_id.setXmlId(xml_id);
568
569 return msRunReaderSPtr(std::make_shared<MsRunId>(new_run_id));
570 }
571 }
572
573 if((run_id.isEmpty()) && (run_list.size() == 1))
574 {
575 MsRunId new_run_id(*run_list[0].get());
576 new_run_id.setXmlId(xml_id);
577
578 return msRunReaderSPtr(std::make_shared<MsRunId>(new_run_id));
579 }
580
581
582 if(reader_sp == nullptr)
583 {
584 throw(ExceptionNotFound(QObject::tr("run id %1 not found in file %2")
585 .arg(run_id)
586 .arg(QFileInfo(m_fileName).absoluteFilePath())));
587 }
588 return reader_sp;
589}
590
591
592} // namespace pappso
virtual MsDataFormat getFileFormat() override
virtual std::vector< MsRunIdCstSPtr > getMsRunIds(const QString &run_prefix) override
const OboPsiModTerm & getOboPsiModTermNativeIDFormat() const
get OboPsiModTerm corresponding to the nativeID format format of mz data
std::map< MsDataFormat, FileReaderType > m_preferredFileReaderTypeMap
MsRunIdCstSPtr getSelectedMsRunId() const
MsRunReaderSPtr msRunReaderSPtr(MsRunIdCstSPtr ms_run_id)
const QString m_xmlPrefix
void setPreferredFileReaderType(MsDataFormat format, FileReaderType reader_type)
given an mz format, explicitly set the preferred reader
FileReaderType getpreferredFileReaderType(MsDataFormat format)
MsRunReaderSPtr msRunReaderSPtrForSelectedMsRunId()
FileReaderType getFileReaderType() const
get the file reader type
MsRunIdCstSPtr mcsp_selectedMsRunId
MsDataFormat getFileFormat() const
get the raw format of mz data
std::vector< MsRunIdCstSPtr > getMsRunIds()
OboPsiModTerm m_oboPsiModTermNativeIDFormat
void setSelectedMsRunId(MsRunIdCstSPtr ms_run_id_csp)
const OboPsiModTerm getOboPsiModTermFileFormat() const
get OboPsiModTerm corresponding to the raw format of mz data
FileReaderType m_fileReaderType
MsRunReaderSPtr getMsRunReaderSPtrByRunId(const QString &run_id, const QString &xml_id)
get an msrun reader by finding the run_id in file
MsFileAccessor(const QString &file_name, const QString &xml_prefix)
static MsRunReaderSPtr buildMsRunReaderSPtr(MsRunIdCstSPtr ms_run_id)
get an MsRunReader directly from a valid MsRun ID
TimsMsRunReaderMs2SPtr buildTimsMsRunReaderMs2SPtr()
if possible, builds directly a dedicated Tims TOF tdf file reader
const QString & getFileName() const
MS run identity MsRunId identifies an MS run with a unique ID (XmlId) and contains eventually informa...
Definition: msrunid.h:54
void setXmlId(const QString &xml_id)
set an XML unique identifier for this MsRunId
Definition: msrunid.cpp:137
void setAccession(const QString &accession)
virtual const QString & qwhat() const
virtual std::vector< MsRunIdCstSPtr > getMsRunIds(const QString &run_prefix) override
virtual MsDataFormat getFileFormat() override
virtual MsDataFormat getFileFormat() override
virtual std::vector< MsRunIdCstSPtr > getMsRunIds(const QString &run_prefix) override
static QString fileReaderTypeAsString(FileReaderType file_reader_type)
Definition: utils.cpp:501
virtual std::vector< MsRunIdCstSPtr > getMsRunIds(const QString &run_prefix) override
virtual MsDataFormat getFileFormat() override
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition: aa.cpp:39
std::shared_ptr< MsRunReader > MsRunReaderSPtr
Definition: msrunreader.h:57
MsDataFormat
Definition: types.h:131
@ xy
(x,y) format
@ unknown
unknown format
@ SQLite3
SQLite3 format.
@ MGF
Mascot format.
std::shared_ptr< TimsMsRunReaderMs2 > TimsMsRunReaderMs2SPtr
std::shared_ptr< const MsRunId > MsRunIdCstSPtr
Definition: msrunid.h:46
FileReaderType
Definition: types.h:158
@ pwiz
using libpwizlite
@ tims
TimsMsRunReader : each scan is returned as a mass spectrum.
MSrun file reader for native Bruker TimsTOF raw data.