libpappsomspp
Library for mass spectrometry
timsframetype1.cpp
Go to the documentation of this file.
1/**
2 * \file pappsomspp/vendors/tims/timsframetype1.cpp
3 * \date 3/10/2021
4 * \author Olivier Langella
5 * \brief handle a single Bruker's TimsTof frame type 1 compression
6 */
7
8/*******************************************************************************
9 * Copyright (c) 2021 Olivier Langella <Olivier.Langella@u-psud.fr>.
10 *
11 * This file is part of the PAPPSOms++ library.
12 *
13 * PAPPSOms++ is free software: you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation, either version 3 of the License, or
16 * (at your option) any later version.
17 *
18 * PAPPSOms++ is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with PAPPSOms++. If not, see <http://www.gnu.org/licenses/>.
25 *
26 ******************************************************************************/
27
28
29#include "timsframetype1.h"
30#include "../../../pappsomspp/pappsoexception.h"
31#include "../../../pappsomspp/exception/exceptionoutofrange.h"
32#include "../../../pappsomspp/exception/exceptionnotimplemented.h"
33#include <QDebug>
34#include <liblzf/lzf.h>
35#include <cerrno>
36
37
38namespace pappso
39{
41 quint32 scanNum,
42 char *p_bytes,
43 std::size_t len)
44 : TimsFrame(timsId, scanNum)
45{
46 qDebug() << timsId;
47 m_timsDataFrame.resize(len * 2);
48
49 if(p_bytes != nullptr)
50 {
51 qDebug() << timsId;
52 copyAndLzfDecompress(p_bytes, len);
53 qDebug() << timsId;
54 }
55 else
56 {
57 if(m_scanNumber == 0)
58 {
59
61 QObject::tr(
62 "TimsFrameType1::TimsFrameType1(%1,%2,nullptr,%3) FAILED")
63 .arg(m_timsId)
64 .arg(m_scanNumber)
65 .arg(len));
66 }
67 }
68}
69
71{
72}
73
75{
76}
77
78
79void
80TimsFrameType1::copyAndLzfDecompress(const char *src, std::size_t len)
81{
82
83 qDebug() << " m_scanNumber=" << m_scanNumber << " len=" << len;
84 // the start position offset for each scan and the length of the last scan
85 // copy first m_scanNumber*4 bytes in qbyte array
86 std::size_t count = (m_scanNumber + 2) * 4;
87
88 qDebug() << " count=" << count;
89 if(m_timsDataFrame.size() < (long)(count + count))
90 {
91 qDebug() << " m_timsDataFrame.size()=" << m_timsDataFrame.size();
92 m_timsDataFrame.resize(count + count);
93 }
94
95 /*
96 std::size_t decompressed_size =
97 lzfDecompressScan(src + 3687 - 8,
98 9,
99 m_timsDataFrame.data() + 3660,
100 m_timsDataFrame.size() - 3660);
101
102 qDebug() << "decompressed_size=" << decompressed_size;
103 */
104 // memcpy(m_timsDataFrame.data(), src, count);
105
106 qDebug() << "offset begin at last :" << count + 4;
107
108 // std::vector<std::size_t> compressed_len_list;
109 std::size_t offset;
110 std::size_t previous_offset = (*(quint32 *)(src));
111 qDebug() << "first offset= " << previous_offset;
112 std::size_t cumul_decompressed_size = 0;
113
114
115 for(quint32 i = 1; i <= m_scanNumber; i++)
116 {
117 offset = (*(quint32 *)(src + (i * 4)));
118
119 std::size_t compressed_size = offset - previous_offset;
120
121 qDebug() << "scan i=" << i << " previous_offset=" << previous_offset
122 << " offset=" << offset << " length=" << compressed_size;
123 // compressed_len_list.push_back(offset - previous_offset);
124 std::size_t remaining_size = m_timsDataFrame.size();
125
126 if(cumul_decompressed_size < remaining_size)
127 {
128 remaining_size = remaining_size - cumul_decompressed_size;
129 }
130 else
131 {
132 remaining_size = 0;
133 }
134 qDebug() << " remaining_size=" << remaining_size;
135 std::size_t decompressed_size =
136 lzfDecompressScan(src + previous_offset - 8,
137 compressed_size,
138 m_timsDataFrame.data() + cumul_decompressed_size,
139 remaining_size);
140
141
142 m_scanOffsetList.push_back(cumul_decompressed_size);
143 m_scanSizeList.push_back(decompressed_size / 4);
144 cumul_decompressed_size += decompressed_size;
145 qDebug() << " decompressed_size=" << decompressed_size;
146
147
148 previous_offset = offset;
149 }
150 /*
151 std::size_t last_offset = (*(quint32 *)(src + (m_scanNumber * 4)));
152 qDebug() << "last scan length :" << last_offset;
153
154 qDebug() << "last scan length bonus:"
155 << (*(quint32 *)(src + (m_scanNumber + 1 * 4)));
156
157 qDebug() << " m_scanOffsetList.size()=" << m_scanOffsetList.size()
158 << " m_scanNumber=" << m_scanNumber;
159 */
160 /*
161 throw PappsoException(
162 QObject::tr("ERROR reading TimsFrameType1 ").arg(m_timsId));
163 */
164}
165
166
167unsigned int
169 unsigned int src_len,
170 char *dest,
171 unsigned int dest_len)
172{
173 qDebug() << "src=" << src << " src_len=" << src_len
174 << " dest_len=" << dest_len;
175 if(src_len == 0)
176 return 0;
177 unsigned int decompressed_size;
178 unsigned int more_space = src_len * 2;
179 decompressed_size = lzf_decompress(src, src_len, dest, dest_len);
180 while(decompressed_size == 0)
181 {
182 qDebug() << "dest_len=" << dest_len;
183 qDebug() << "decompressed_size=" << decompressed_size;
184
185 if(errno == EINVAL)
186 {
187 throw PappsoException(
188 QObject::tr("ERROR reading TimsFrameType1 %1 TIMS binary file %2: "
189 "LZF decompression error EINVAL")
190 .arg(m_timsId));
191 }
192 else if(errno == E2BIG)
193 {
194 qDebug() << " m_timsDataFrame.size()=" << m_timsDataFrame.size()
195 << " more_space=" << more_space;
196 m_timsDataFrame.resize(m_timsDataFrame.size() + more_space);
197 dest_len += more_space;
198 qDebug();
199 decompressed_size = lzf_decompress(src, src_len, dest, dest_len);
200 }
201 else
202 {
203 break;
204 }
205 }
206 return decompressed_size;
207}
208
209std::size_t
210TimsFrameType1::getNbrPeaks(std::size_t scanNum) const
211{
212 pappso::MassSpectrumSPtr mass_spectrum_sptr = getMassSpectrumSPtr(scanNum);
213 return mass_spectrum_sptr.get()->size();
214}
215
216
217void
219 std::map<quint32, quint32> &accumulate_into) const
220{
221 if(m_timsDataFrame.size() == 0)
222 return;
223 // checkScanNum(scanNum);
224
225
226 std::size_t size = m_scanSizeList[scanNum];
227
228 std::size_t offset = m_scanOffsetList[scanNum];
229
230 // qDebug() << "begin offset=" << offset << " size=" << size;
231 qint32 value = 0;
232 qint32 tof_index = 0;
233 for(std::size_t i = 0; i < size; i++)
234 {
235 value = (*(qint32 *)(m_timsDataFrame.constData() + offset + (i * 4)));
236 // qDebug() << " i=" << i << " value=" << value;
237
238 if(value < 0)
239 {
240 tof_index += -1 * value;
241 }
242 else
243 {
244
245 quint32 x = tof_index;
246 quint32 y = value;
247
248 auto ret = accumulate_into.insert(std::pair<quint32, quint32>(x, y));
249
250 if(ret.second == false)
251 {
252 // already existed : cumulate
253 ret.first->second += y;
254 }
255 tof_index++;
256 }
257 }
258 qDebug() << "end";
259}
260
261std::vector<quint32>
262TimsFrameType1::getScanIndexList(std::size_t scanNum) const
263{
264 qDebug();
265 checkScanNum(scanNum);
266
267 std::vector<quint32> mzindex_values;
268
269 try
270 {
271 qDebug();
272
273
274 if(m_timsDataFrame.size() == 0)
275 return mzindex_values;
276 qDebug();
277
278 std::size_t size = m_scanSizeList[scanNum];
279
280 std::size_t offset = m_scanOffsetList[scanNum];
281
282 qDebug() << " offset=" << offset << " size=" << size;
283 if(size == 0)
284 return mzindex_values;
285
286 qint32 value = 0;
287 qint32 tof_index = 0;
288 // std::vector<quint32> index_list;
289 for(std::size_t i = 0; i < size; i++)
290 {
291 value = (*(qint32 *)(m_timsDataFrame.constData() + offset + (i * 4)));
292
293 if(value < 0)
294 {
295 tof_index += -1 * value;
296 }
297 else
298 {
299 mzindex_values.push_back(tof_index);
300 tof_index++;
301 }
302 }
303
304
305 qDebug();
306 return mzindex_values;
307 }
308 catch(PappsoException &error)
309 {
310 throw pappso::PappsoException(QObject::tr("Error %1 frameId=%2 "
311 "scanNum=%3 :\n%4")
312 .arg(__FUNCTION__)
313 .arg(getId())
314 .arg(scanNum)
315 .arg(error.qwhat()));
316 }
317 qDebug();
318}
319
320std::vector<quint32>
321TimsFrameType1::getScanIntensities(std::size_t scanNum) const
322{
323
324
325 qDebug() << " scanNum=" << scanNum;
326
327 checkScanNum(scanNum);
328
329 std::vector<quint32> int_values;
330
331 try
332 {
333 qDebug();
334
335
336 if(m_timsDataFrame.size() == 0)
337 return int_values;
338 qDebug();
339
340 std::size_t size = m_scanSizeList[scanNum];
341
342 std::size_t offset = m_scanOffsetList[scanNum];
343
344 qDebug() << " offset=" << offset << " size=" << size;
345 if(size == 0)
346 return int_values;
347
348 qint32 value = 0;
349 qint32 tof_index = 0;
350 // std::vector<quint32> index_list;
351 for(std::size_t i = 0; i < size; i++)
352 {
353 value = (*(qint32 *)(m_timsDataFrame.constData() + offset + (i * 4)));
354
355 if(value < 0)
356 {
357 tof_index += -1 * value;
358 }
359 else
360 {
361 int_values.push_back(value);
362 tof_index++;
363 }
364 }
365
366
367 qDebug();
368 return int_values;
369 }
370 catch(PappsoException &error)
371 {
372 throw pappso::PappsoException(QObject::tr("Error %1 frameId=%2 "
373 "scanNum=%3 :\n%4")
374 .arg(__FUNCTION__)
375 .arg(getId())
376 .arg(scanNum)
377 .arg(error.qwhat()));
378 }
379}
380
382TimsFrameType1::getMassSpectrumSPtr(std::size_t scanNum) const
383{
384
385 qDebug() << " scanNum=" << scanNum;
386
387 checkScanNum(scanNum);
388
389 try
390 {
391 qDebug();
392
393 pappso::MassSpectrumSPtr mass_spectrum_sptr =
394 std::make_shared<pappso::MassSpectrum>();
395 // std::vector<DataPoint>
396
397 if(m_timsDataFrame.size() == 0)
398 return mass_spectrum_sptr;
399 qDebug();
400
401 std::size_t size = m_scanSizeList[scanNum];
402
403 std::size_t offset = m_scanOffsetList[scanNum];
404
405 qDebug() << " offset=" << offset << " size=" << size;
406 if(size == 0)
407 return mass_spectrum_sptr;
408
409
410 MzCalibrationInterface *mz_calibration_p =
412
413
414 qint32 value = 0;
415 qint32 tof_index = 0;
416 // std::vector<quint32> index_list;
417 DataPoint data_point;
418 for(std::size_t i = 0; i < size; i++)
419 {
420 value = (*(qint32 *)(m_timsDataFrame.constData() + offset + (i * 4)));
421
422 if(value < 0)
423 {
424 tof_index += -1 * value;
425 }
426 else
427 {
428 data_point.y = value;
429
430 // intensity normalization
431 data_point.y *= 100.0 / m_accumulationTime;
432
433
434 // mz calibration
435 data_point.x = mz_calibration_p->getMzFromTofIndex(tof_index);
436 mass_spectrum_sptr.get()->push_back(data_point);
437 tof_index++;
438 }
439 }
440
441
442 qDebug() << mass_spectrum_sptr.get()->toString();
443 return mass_spectrum_sptr;
444 }
445 catch(PappsoException &error)
446 {
448 QObject::tr("Error TimsFrameType1::getMassSpectrumSPtr frameId=%1 "
449 "scanNum=%2 :\n%3")
450 .arg(getId())
451 .arg(scanNum)
452 .arg(error.qwhat()));
453 }
454}
455
456
458TimsFrameType1::getRawTraceSPtr(std::size_t scanNum) const
459{
460
461 // qDebug();
462
463 pappso::TraceSPtr trace_sptr = std::make_shared<pappso::Trace>();
464 // std::vector<DataPoint>
465
466 if(m_timsDataFrame.size() == 0)
467 return trace_sptr;
468 qDebug();
469
470 std::size_t size = m_scanSizeList[scanNum];
471
472 std::size_t offset = m_scanOffsetList[scanNum];
473
474 qDebug() << " offset=" << offset << " size=" << size;
475 if(size == 0)
476 return trace_sptr;
477
478 // qDebug();
479 qint32 value = 0;
480 qint32 tof_index = 0;
481
482 // std::vector<quint32> index_list;
483 DataPoint data_point;
484 for(std::size_t i = 0; i < size; i++)
485 {
486 value = (*(qint32 *)(m_timsDataFrame.constData() + offset + (i * 4)));
487
488 if(value < 0)
489 {
490 tof_index += -1 * value;
491 }
492 else
493 {
494 data_point.y = value;
495
496 // intensity normalization
497 data_point.y *= 100.0 / m_accumulationTime;
498
499
500 // mz calibration
501 data_point.x = tof_index;
502 trace_sptr.get()->push_back(data_point);
503 tof_index++;
504 }
505 }
506
507
508 // qDebug();
509 return trace_sptr;
510}
511
512} // namespace pappso
virtual double getMzFromTofIndex(quint32 tof_index)=0
get m/z from time of flight raw index
virtual const QString & qwhat() const
double m_accumulationTime
accumulation time in milliseconds
quint32 m_scanNumber
total number of scans contained in this frame
std::size_t m_timsId
Tims frame database id (the SQL identifier of this frame)
virtual const MzCalibrationInterfaceSPtr & getMzCalibrationInterfaceSPtr() const final
get the MzCalibration model to compute mz and TOF for this frame
bool checkScanNum(std::size_t scanNum) const
check that this scan number exists
std::size_t getId() const
virtual std::vector< quint32 > getScanIndexList(std::size_t scanNum) const override
get raw index list for one given scan index are not TOF nor m/z, just index on digitizer
std::vector< std::size_t > m_scanSizeList
TimsFrameType1(std::size_t timsId, quint32 scanNum, char *p_bytes, std::size_t len)
void copyAndLzfDecompress(const char *src, std::size_t len)
copy buffer header and lzf decompress each scan for tims compression type 1
virtual std::size_t getNbrPeaks(std::size_t scanNum) const override
get the number of peaks in this spectrum need the binary file
virtual void cumulateScan(std::size_t scanNum, std::map< quint32, quint32 > &accumulate_into) const override
cumulate a scan into a map
unsigned int lzfDecompressScan(const char *src, unsigned int src_len, char *dest, unsigned int dest_len)
decompress a single LZF compressed scan buffer
virtual pappso::MassSpectrumSPtr getMassSpectrumSPtr(std::size_t scanNum) const override
get the mass spectrum corresponding to a scan number
virtual std::vector< quint32 > getScanIntensities(std::size_t scanNum) const override
get raw intensities without transformation from one scan it needs intensity normalization
std::vector< std::size_t > m_scanOffsetList
virtual pappso::TraceSPtr getRawTraceSPtr(std::size_t scanNum) const override
get the raw index tof_index and intensities (normalized)
QByteArray m_timsDataFrame
Definition: timsframe.h:181
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition: aa.cpp:39
std::shared_ptr< Trace > TraceSPtr
Definition: trace.h:134
std::shared_ptr< MassSpectrum > MassSpectrumSPtr
Definition: massspectrum.h:54
pappso_double x
Definition: datapoint.h:22
pappso_double y
Definition: datapoint.h:23
handle a single Bruker's TimsTof frame type 1 compression