libpappsomspp
Library for mass spectrometry
Loading...
Searching...
No Matches
mzmlconvert.cpp
Go to the documentation of this file.
1/**
2 * \file pappsomspp/processing/cbor/mzcbor/mzmlconvert.cpp
3 * \date 19/11/2025
4 * \author Olivier Langella
5 * \brief convert mzML to mzcbor
6 */
7
8/*******************************************************************************
9 * Copyright (c) 2025 Olivier Langella <Olivier.Langella@universite-paris-saclay.fr>.
10 *
11 * This file is part of PAPPSOms-tools.
12 *
13 * PAPPSOms-tools is free software: you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation, either version 3 of the License, or
16 * (at your option) any later version.
17 *
18 * PAPPSOms-tools is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with PAPPSOms-tools. If not, see <http://www.gnu.org/licenses/>.
25 *
26 ******************************************************************************/
27
28#include "mzmlconvert.h"
29#include <QDebug>
30#include <zlib.h>
32#include "pappsomspp/config.h"
33#include "binarydataarray.h"
34#include "cvparam.h"
35
36
39 : mp_monitor(p_monitor), mp_cborWriter(p_output)
40{
41 m_elementToStoreInArray << "cv" << "userParam" << "cvParam" << "binaryDataArray" << "spectrum"
42 << "sourceFile"
43 << "referenceableParamGroup" << "software" << "instrumentConfiguration"
44 << "processingMethod" << "dataProcessing" << "scan" << "scanWindow"
45 << "precursor" << "selectedIon";
46}
47
51
52
53void
55{
56 writer->startMap();
57
58 writer->writeInformations(PAPPSOMSPP_NAME, PAPPSOMSPP_VERSION, "mzCBORindex", "mzMLconvert");
59
60 writer->append("from");
61 writer->append(m_uuid);
62
63 writer->append("runIdList");
64 writer->writeArray(m_runIdList);
65
66
67 writer->append("runSpectrumIndexList");
68 writer->startArray(m_runAndSpectrumOffsetList.size());
69 for(auto &spectrum_offset_list : m_runAndSpectrumOffsetList)
70 {
71 writer->writeArray(spectrum_offset_list);
72 }
73 writer->endArray();
74
75
76 writer->append("runSpectrumNativeIdList");
77 writer->startArray(m_runAndSpectrumIdList.size());
78 for(auto &spectrum_id_list : m_runAndSpectrumIdList)
79 {
80 writer->writeArray(spectrum_id_list);
81 }
82 writer->endArray();
83
84 writer->endMap();
85}
86
87
88void
90{
91 mp_cborWriter->startMap();
92 if(m_qxmlStreamReader.readNextStartElement())
93 {
94 qDebug() << m_qxmlStreamReader.name().toString();
95 if(m_qxmlStreamReader.name().toString() == "indexedmzML")
96 {
97 m_qxmlStreamReader.readNextStartElement();
98 }
99 if(m_qxmlStreamReader.name().toString() == "mzML")
100 {
101 // write mzCBOR header
102 mp_cborWriter->append("mzCBOR");
103 mp_cborWriter->startMap();
104
105 mp_cborWriter->append("mode");
106 mp_cborWriter->append(0);
107
108 mp_cborWriter->writeInformations(
109 PAPPSOMSPP_NAME, PAPPSOMSPP_VERSION, "mzCBOR", "mzMLconvert");
110
111 m_uuid = mp_cborWriter->getUuid();
112 mp_cborWriter->endMap();
113
114 mp_cborWriter->append(m_qxmlStreamReader.name().toString());
115
116 mp_cborWriter->startMap();
117 mp_cborWriter->append("xmlns");
118 mp_cborWriter->append(m_qxmlStreamReader.namespaceUri());
119 attributeListToCbor(m_qxmlStreamReader.attributes());
120 mp_cborWriter->endMap();
121
122
123 bool array_started = false;
124 QString last_element;
125 while(m_qxmlStreamReader.readNextStartElement())
126 {
127 qDebug();
128 insideElement(last_element, array_started);
129 last_element = m_qxmlStreamReader.name().toString();
130 qDebug();
131 }
132 }
133 else
134 {
135 m_qxmlStreamReader.raiseError(QObject::tr("Not an mzML input file"));
136 m_qxmlStreamReader.skipCurrentElement();
137 }
138 }
139 mp_cborWriter->endMap();
140}
141
142
143void
145{
146 // defaultArrayLength 1552
147
148 m_currentSpectrumSize = m_qxmlStreamReader.attributes().value("defaultArrayLength").toULongLong();
149}
150
151
152void
154{
155 // m_qxmlStreamReader.skipCurrentElement();
156 /*
157 *
158 <binaryDataArray encodedLength="9092">
159 <cvParam cvRef="MS" accession="MS:1000515" value="" name="intensity array"
160 unitAccession="MS:1000131" unitName="number of counts" unitCvRef="MS" /> <cvParam cvRef="MS"
161 accession="MS:1000523" value="" name="64-bit float" /> <cvParam cvRef="MS" accession="MS:1000574"
162 value="" name="zlib compression" /> <binary>*/
163 std::size_t count = m_qxmlStreamReader.attributes().value("count").toULongLong();
164
165 mp_cborWriter->append("binaryDataArray");
166 mp_cborWriter->startArray(count);
167 while(m_qxmlStreamReader.readNext() && !m_qxmlStreamReader.isEndElement())
168 {
169 if(m_qxmlStreamReader.isStartElement())
170 {
171 BinaryDataArray binary_data_array;
172 binary_data_array.fromMzml(m_qxmlStreamReader);
173 binary_data_array.toCbor(*mp_cborWriter);
174 // writeZlibDataArray();
175 }
176 }
177 mp_cborWriter->endArray();
178}
179
180
181void
182pappso::cbor::mzcbor::MzmlConvert::insideElement(QString &last_element_in, bool &array_started_in)
183{
184 m_elementStash.push_back(m_qxmlStreamReader.name().toString());
185
186
187 if(m_elementStash.back() == "spectrum")
188 {
189 // qDebug() << m_qxmlStreamReader.attributes().value("id").toString();
190 m_runAndSpectrumIdList.back().push_back(
191 m_qxmlStreamReader.attributes().value("id").toString());
192 getCurrentSpectrumSize();
193 // qFatal();
194 }
195
196
197 if(m_elementStash.back() == "binaryDataArrayList")
198 {
199 readBinaryDataArrayList();
200 }
201
202 else
203 {
204 // stop an array ?
205 qDebug() << "current element=" << m_elementStash.back();
206 qDebug() << "last_element=" << last_element_in;
207 if(array_started_in && (last_element_in != m_qxmlStreamReader.name().toString()))
208 {
209 mp_cborWriter->endArray();
210 array_started_in = false;
211 }
212
213
214 // start an array ?
215 if(m_elementToStoreInArray.contains(m_elementStash.back()))
216 {
217 // start an array ?
218 if((!array_started_in) && (last_element_in != m_elementStash.back()))
219 {
220 mp_cborWriter->append(m_elementStash.back());
221 mp_cborWriter->startArray();
222
223 array_started_in = true;
224 }
225 }
226
227 if(m_elementStash.back() == "spectrum")
228 {
229 if(mp_cborWriter->device() != nullptr)
230 m_runAndSpectrumOffsetList.back().push_back(mp_cborWriter->device()->pos());
231 }
232
233
234 if(!array_started_in)
235 mp_cborWriter->append(m_elementStash.back());
236
237 bool array_started = false;
238
239
240 if(m_elementStash.back() == "cvParam")
241 {
242 // array_started = true;
243 qDebug() << m_qxmlStreamReader.name();
244 CvParam cv_param;
245 cv_param.fromMzml(m_qxmlStreamReader);
246 cv_param.toCbor(*mp_cborWriter);
247 qDebug();
248 }
249 else
250 {
251
252 mp_cborWriter->startMap();
253 attributeListToCbor(m_qxmlStreamReader.attributes());
254
255 QString last_element;
256
257 while(m_qxmlStreamReader.readNext() && !m_qxmlStreamReader.isEndElement())
258 {
259
260 if(m_qxmlStreamReader.isCharacters())
261 {
262 // clean content:
263 QStringView content = m_qxmlStreamReader.text().trimmed();
264 if((m_qxmlStreamReader.text().toString() == "\n") ||
265 (m_qxmlStreamReader.text().toString() == "\n\t"))
266 {
267 }
268 else
269 {
270 // text node
271 if(!content.isEmpty())
272 {
273 qDebug() << "text isCharacters" << content.mid(0, 10);
274 mp_cborWriter->append("@text@");
275 mp_cborWriter->append(content);
276 }
277 }
278 }
279 else if(m_qxmlStreamReader.isStartElement())
280 {
281 QString tmp_element = m_qxmlStreamReader.name().toString();
282 insideElement(last_element, array_started);
283 last_element = tmp_element;
284 }
285 }
286
287 if(array_started)
288 {
289 mp_cborWriter->endArray();
290 }
291
292 mp_cborWriter->endMap();
293 }
294 }
295
296 qDebug() << m_elementStash.back();
297 m_elementStash.pop_back();
298}
299
300
301void
303{
304 bool ok(false);
305 double d = value_str.toDouble(&ok);
306 if(ok)
307 {
308 if(value_str.contains('.'))
309 {
310 mp_cborWriter->append(d);
311 }
312 else
313 {
314 qint64 bigint = value_str.toLongLong(&ok);
315 if(ok)
316 {
317 mp_cborWriter->append(bigint);
318 }
319 }
320 }
321 else
322 {
323 mp_cborWriter->append(value_str);
324 }
325}
326
327
328void
329pappso::cbor::mzcbor::MzmlConvert::attributeListToCbor(const QXmlStreamAttributes &xml_attributes)
330{
331 for(auto &xml_attribute : xml_attributes)
332 {
333 qDebug() << xml_attribute.name() << " " << xml_attribute.value();
334 mp_cborWriter->append(xml_attribute.name());
335 attributeValueToCbor(xml_attribute.value());
336
337 if((m_elementStash.size() > 0) && (m_elementStash.back() == "run") &&
338 (xml_attribute.name() == "id"))
339 {
340 m_runAndSpectrumOffsetList.push_back(std::vector<qint64>());
341 m_runAndSpectrumIdList.push_back(std::vector<QString>());
342 m_runIdList.push_back(xml_attribute.value().toString());
343 }
344 }
345}
346
347const std::vector<QString> &
349{
350
351 return m_runIdList;
352}
353
354const std::vector<std::vector<qint64>> &
356{
357 return m_runAndSpectrumOffsetList;
358}
359
360const std::vector<std::vector<QString>> &
362{
363 return m_runAndSpectrumIdList;
364}
PSI BinaryDataArray object for mzML/mzCBOR.
void writeInformations(const QString &software_name, const QString &software_version, const QString &type, const QString &operation)
void writeArray(const std::vector< std::size_t > &int_list)
virtual void readStream() override
void writeMzcborIndex(pappso::cbor::CborStreamWriter *writer) const
const std::vector< std::vector< qint64 > > & getRunAndSpectrumOffsetList() const
void insideElement(QString &last_element, bool &array_started)
const std::vector< QString > & getRunIdList() const
void attributeListToCbor(const QXmlStreamAttributes &xml_attributes)
const std::vector< std::vector< QString > > & getRunAndSpectrumIdList() const
void attributeValueToCbor(const QStringView &value_str)
MzmlConvert(pappso::UiMonitorInterface *p_monitor, pappso::cbor::CborStreamWriter *p_output)
#define PAPPSOMSPP_VERSION
Definition config.h:6
#define PAPPSOMSPP_NAME
Definition config.h:5
PSI cvParam object for mzML/mzCBOR.
void fromMzml(QXmlStreamReader &reader)
void toCbor(CborStreamWriter &writer)
void fromMzml(QXmlStreamReader &reader)
Definition cvparam.cpp:113
void toCbor(CborStreamWriter &writer)
Definition cvparam.cpp:153