IsoSpec  1.95
isoSpec++.h
1 
17 #pragma once
18 
19 #include <tuple>
20 #include <unordered_map>
21 #include <queue>
22 #include <limits>
23 #include <string>
24 #include "platform.h"
25 #include "dirtyAllocator.h"
26 #include "summator.h"
27 #include "operators.h"
28 #include "marginalTrek++.h"
29 
30 
31 #if ISOSPEC_BUILDING_R
32 #include <Rcpp.h>
33 using namespace Rcpp;
34 #endif /* ISOSPEC_BUILDING_R */
35 
36 
37 namespace IsoSpec
38 {
39 
40 // This function is NOT guaranteed to be secure against malicious input. It should be used only for debugging.
41 unsigned int parse_formula(const char* formula,
42  std::vector<const double*>& isotope_masses,
43  std::vector<const double*>& isotope_probabilities,
44  int** isotopeNumbers,
45  int** atomCounts,
46  unsigned int* confSize);
47 
48 
50 
53 class ISOSPEC_EXPORT_SYMBOL Iso {
54 private:
55 
57 
63  void setupMarginals(const double* const * _isotopeMasses,
64  const double* const * _isotopeProbabilities);
65  bool disowned;
66 protected:
67  int dimNumber;
69  int* atomCounts;
70  unsigned int confSize;
71  int allDim;
73  double modeLProb;
75 public:
76 
77  Iso();
78 
80 
87  Iso(
88  int _dimNumber,
89  const int* _isotopeNumbers,
90  const int* _atomCounts,
91  const double* const * _isotopeMasses,
92  const double* const * _isotopeProbabilities
93  );
94 
96  Iso(const char* formula);
97 
99  inline Iso(const std::string& formula) : Iso(formula.c_str()) {};
100 
102  Iso(Iso&& other);
103 
105 
109  Iso(const Iso& other, bool fullcopy);
110 
112  virtual ~Iso();
113 
115  double getLightestPeakMass() const;
116 
118  double getHeaviestPeakMass() const;
119 
125  double getMonoisotopicPeakMass() const;
126 
128  inline double getModeLProb() const { return modeLProb; };
129 
131  double getUnlikeliestPeakLProb() const;
132 
134  double getModeMass() const;
135 
137  double getTheoreticalAverageMass() const;
138 
140  inline int getDimNumber() const { return dimNumber; };
141 
143  inline int getAllDim() const { return allDim; };
144 
146  void addElement(int atomCount, int noIsotopes, const double* isotopeMasses, const double* isotopeProbabilities);
147 };
148 
149 
151 
154 class ISOSPEC_EXPORT_SYMBOL IsoGenerator : public Iso
155 {
156 protected:
157  double* partialLProbs;
158  double* partialMasses;
159  double* partialProbs;
161 public:
163 
166  virtual bool advanceToNextConfiguration() = 0;
167 
168  ISOSPEC_FORCE_INLINE virtual bool advanceToNextConfigurationWithinLayer() { return advanceToNextConfiguration(); };
169  ISOSPEC_FORCE_INLINE virtual bool nextLayer(double) { return false; };
170 
172 
175  virtual double lprob() const { return partialLProbs[0]; };
176 
178 
181  virtual double mass() const { return partialMasses[0]; };
182 
184 
187  virtual double prob() const { return partialProbs[0]; };
188 
190  virtual void get_conf_signature(int* space) const = 0;
191 
193  IsoGenerator(Iso&& iso, bool alloc_partials = true);
194 
196  virtual ~IsoGenerator();
197 };
198 
199 
200 
202 
207 class ISOSPEC_EXPORT_SYMBOL IsoOrderedGenerator: public IsoGenerator
208 {
209 private:
210  MarginalTrek** marginalResults;
211  std::priority_queue<void*,std::vector<void*>,ConfOrder> pq;
212  void* topConf;
213  DirtyAllocator allocator;
214  const std::vector<double>** logProbs;
215  const std::vector<double>** masses;
216  const std::vector<int*>** marginalConfs;
217  double currentLProb;
218  double currentMass;
219  double currentProb;
220  int ccount;
221 
222 public:
223  bool advanceToNextConfiguration() override final;
224 
226 
230  inline void get_conf_signature(int* space) const override final
231  {
232  int* c = getConf(topConf);
233 
234  if (ccount >= 0)
235  c[ccount]--;
236 
237  for(int ii=0; ii<dimNumber; ii++)
238  {
239  memcpy(space, marginalResults[ii]->confs()[c[ii]], isotopeNumbers[ii]*sizeof(int));
240  space += isotopeNumbers[ii];
241  }
242 
243  if (ccount >= 0)
244  c[ccount]++;
245  };
246 
248  IsoOrderedGenerator(Iso&& iso, int _tabSize = 1000, int _hashSize = 1000);
249 
251  virtual ~IsoOrderedGenerator();
252 };
253 
254 
255 
256 
258 
263 class ISOSPEC_EXPORT_SYMBOL IsoThresholdGenerator: public IsoGenerator
264 {
265 private:
266 
267  int* counter;
268  double* maxConfsLPSum;
269  const double Lcutoff;
270  PrecalculatedMarginal** marginalResults;
271  PrecalculatedMarginal** marginalResultsUnsorted;
272  int* marginalOrder;
273 
274  const double* lProbs_ptr;
275  const double* lProbs_ptr_start;
276  double* partialLProbs_second;
277  double partialLProbs_second_val, lcfmsv;
278  bool empty;
279 
280 public:
281  inline void get_conf_signature(int* space) const override final
282  {
283  counter[0] = lProbs_ptr - lProbs_ptr_start;
284  if(marginalOrder != nullptr)
285  for(int ii=0; ii<dimNumber; ii++)
286  {
287  int jj = marginalOrder[ii];
288  memcpy(space, marginalResultsUnsorted[ii]->get_conf(counter[jj]), isotopeNumbers[ii]*sizeof(int));
289  space += isotopeNumbers[ii];
290  }
291  else
292  for(int ii=0; ii<dimNumber; ii++)
293  {
294  memcpy(space, marginalResultsUnsorted[ii]->get_conf(counter[ii]), isotopeNumbers[ii]*sizeof(int));
295  space += isotopeNumbers[ii];
296  }
297 
298  };
299 
301 
309  IsoThresholdGenerator(Iso&& iso, double _threshold, bool _absolute=true, int _tabSize=1000, int _hashSize=1000, bool reorder_marginals = true);
310 
312 
313  // Perform highly aggressive inling as this function is often called as while(advanceToNextConfiguration()) {}
314  // which leads to an extremely tight loop and some compilers miss this (potentially due to the length of the function).
315  ISOSPEC_FORCE_INLINE bool advanceToNextConfiguration() override final
316  {
317  lProbs_ptr++;
318 
319  if(ISOSPEC_LIKELY(*lProbs_ptr >= lcfmsv))
320  {
321  return true;
322  }
323 
324  // If we reached this point, a carry is needed
325 
326  int idx = 0;
327  lProbs_ptr = lProbs_ptr_start;
328 
329  int * cntr_ptr = counter;
330 
331  while(idx<dimNumber-1)
332  {
333  // counter[idx] = 0;
334  *cntr_ptr = 0;
335  idx++;
336  cntr_ptr++;
337  // counter[idx]++;
338  (*cntr_ptr)++;
339  partialLProbs[idx] = partialLProbs[idx+1] + marginalResults[idx]->get_lProb(counter[idx]);
340  if(partialLProbs[idx] + maxConfsLPSum[idx-1] >= Lcutoff)
341  {
342  partialMasses[idx] = partialMasses[idx+1] + marginalResults[idx]->get_mass(counter[idx]);
343  partialProbs[idx] = partialProbs[idx+1] * marginalResults[idx]->get_prob(counter[idx]);
344  recalc(idx-1);
345  return true;
346  }
347  }
348 
349  terminate_search();
350  return false;
351  }
352 
353 
354  ISOSPEC_FORCE_INLINE double lprob() const override final { return partialLProbs_second_val + (*(lProbs_ptr)); };
355  ISOSPEC_FORCE_INLINE double mass() const override final { return partialMasses[1] + marginalResults[0]->get_mass(lProbs_ptr - lProbs_ptr_start); };
356  ISOSPEC_FORCE_INLINE double prob() const override final { return partialProbs[1] * marginalResults[0]->get_prob(lProbs_ptr - lProbs_ptr_start); };
357 
359  void terminate_search();
360 
365  void reset();
366 
371  size_t count_confs();
372 
373 private:
375  ISOSPEC_FORCE_INLINE void recalc(int idx)
376  {
377  for(; idx > 0; idx--)
378  {
379  partialLProbs[idx] = partialLProbs[idx+1] + marginalResults[idx]->get_lProb(counter[idx]);
380  partialMasses[idx] = partialMasses[idx+1] + marginalResults[idx]->get_mass(counter[idx]);
381  partialProbs[idx] = partialProbs[idx+1] * marginalResults[idx]->get_prob(counter[idx]);
382  }
383  partialLProbs_second_val = *partialLProbs_second;
384  partialLProbs[0] = *partialLProbs_second + marginalResults[0]->get_lProb(counter[0]);
385  lcfmsv = Lcutoff - partialLProbs_second_val;
386  }
387 };
388 
389 
390 
391 
392 
393 class ISOSPEC_EXPORT_SYMBOL IsoLayeredGenerator : public IsoGenerator
394 {
395 private:
396 
397  int* counter;
398  double* maxConfsLPSum;
399  double currentLThreshold, lastLThreshold;
400  LayeredMarginal** marginalResults;
401  LayeredMarginal** marginalResultsUnsorted;
402  int* marginalOrder;
403 
404  const double* lProbs_ptr;
405  const double* lProbs_ptr_start;
406  const double** resetPositions;
407  double* partialLProbs_second;
408  double partialLProbs_second_val, lcfmsv, last_lcfmsv;
409 
410 public:
411  inline void get_conf_signature(int* space) const override final
412  {
413  counter[0] = lProbs_ptr - lProbs_ptr_start;
414  if(marginalOrder != nullptr)
415  for(int ii=0; ii<dimNumber; ii++)
416  {
417  int jj = marginalOrder[ii];
418  memcpy(space, marginalResultsUnsorted[ii]->get_conf(counter[jj]), isotopeNumbers[ii]*sizeof(int));
419  space += isotopeNumbers[ii];
420  }
421  else
422  for(int ii=0; ii<dimNumber; ii++)
423  {
424  memcpy(space, marginalResultsUnsorted[ii]->get_conf(counter[ii]), isotopeNumbers[ii]*sizeof(int));
425  space += isotopeNumbers[ii];
426  }
427 
428  };
429 
430  inline double get_currentLThreshold() const { return currentLThreshold; };
431 
432  IsoLayeredGenerator(Iso&& iso, int _tabSize=1000, int _hashSize=1000, bool reorder_marginals = true, double t_prob_hint = 0.99);
433 
434  ~IsoLayeredGenerator();
435 
436  ISOSPEC_FORCE_INLINE bool advanceToNextConfiguration() override final
437  {
438  do
439  {
440  if(advanceToNextConfigurationWithinLayer())
441  return true;
442  } while(nextLayer(-2.0));
443  return false;
444  }
445 
446  ISOSPEC_FORCE_INLINE bool advanceToNextConfigurationWithinLayer() override final
447  {
448  do{
449  lProbs_ptr++;
450 
451  if(ISOSPEC_LIKELY(*lProbs_ptr >= lcfmsv))
452  return true;
453  }
454  while(carry());
455  return false;
456  }
457 
458  ISOSPEC_FORCE_INLINE double lprob() const override final { return partialLProbs_second_val + (*(lProbs_ptr)); };
459  ISOSPEC_FORCE_INLINE double mass() const override final { return partialMasses[1] + marginalResults[0]->get_mass(lProbs_ptr - lProbs_ptr_start); };
460  ISOSPEC_FORCE_INLINE double prob() const override final { return partialProbs[1] * marginalResults[0]->get_prob(lProbs_ptr - lProbs_ptr_start); };
461 
463  void terminate_search();
464 
465 
467  ISOSPEC_FORCE_INLINE void recalc(int idx)
468  {
469  for(; idx > 0; idx--)
470  {
471  partialLProbs[idx] = partialLProbs[idx+1] + marginalResults[idx]->get_lProb(counter[idx]);
472  partialMasses[idx] = partialMasses[idx+1] + marginalResults[idx]->get_mass(counter[idx]);
473  partialProbs[idx] = partialProbs[idx+1] * marginalResults[idx]->get_prob(counter[idx]);
474  }
475  partialLProbs_second_val = *partialLProbs_second;
476  partialLProbs[0] = partialLProbs_second_val + marginalResults[0]->get_lProb(counter[0]);
477  lcfmsv = currentLThreshold - partialLProbs_second_val;
478  last_lcfmsv = lastLThreshold - partialLProbs_second_val;
479  }
480 
481  virtual bool nextLayer(double offset) override final;
482 
483 private:
484  bool carry();
485 };
486 
487 
488 
489 
490 
491 #if !ISOSPEC_BUILDING_R
492 
493 void printConfigurations(
494  const std::tuple<double*,double*,int*,int>& results,
495  int dimNumber,
496  int* isotopeNumbers
497 );
498 #endif /* !ISOSPEC_BUILDING_R */
499 
500 } // namespace IsoSpec
501 
IsoSpec::Iso::allDim
int allDim
Definition: isoSpec++.h:71
IsoSpec::IsoGenerator::partialProbs
double * partialProbs
Definition: isoSpec++.h:159
IsoSpec::IsoOrderedGenerator
The generator of isotopologues sorted by their probability of occurrence.
Definition: isoSpec++.h:207
IsoSpec::Iso::confSize
unsigned int confSize
Definition: isoSpec++.h:70
IsoSpec::Iso::getDimNumber
int getDimNumber() const
Get the number of elements in the chemical formula of the molecule.
Definition: isoSpec++.h:140
IsoSpec::PrecalculatedMarginal
Precalculated Marginal class.
Definition: marginalTrek++.h:226
IsoSpec::IsoLayeredGenerator::advanceToNextConfiguration
ISOSPEC_FORCE_INLINE bool advanceToNextConfiguration() override final
Advance to the next, not yet visited, most probable isotopologue.
Definition: isoSpec++.h:436
IsoSpec::IsoOrderedGenerator::get_conf_signature
void get_conf_signature(int *space) const override final
Save the counts of isotopes in the space.
Definition: isoSpec++.h:230
IsoSpec::Marginal
The marginal distribution class (a subisotopologue).
Definition: marginalTrek++.h:44
IsoSpec::IsoLayeredGenerator::recalc
ISOSPEC_FORCE_INLINE void recalc(int idx)
Recalculate the current partial log-probabilities, masses, and probabilities.
Definition: isoSpec++.h:467
IsoSpec::Iso::dimNumber
int dimNumber
Definition: isoSpec++.h:67
IsoSpec::PrecalculatedMarginal::get_mass
const double & get_mass(int idx) const
Get the mass of the idx-th subisotopologue.
Definition: marginalTrek++.h:281
IsoSpec::IsoThresholdGenerator::mass
ISOSPEC_FORCE_INLINE double mass() const override final
Get the mass of the current isotopologue.
Definition: isoSpec++.h:355
IsoSpec::IsoThresholdGenerator::advanceToNextConfiguration
ISOSPEC_FORCE_INLINE bool advanceToNextConfiguration() override final
Advance to the next, not yet visited, most probable isotopologue.
Definition: isoSpec++.h:315
IsoSpec
Definition: allocator.cpp:21
IsoSpec::PrecalculatedMarginal::get_lProb
const double & get_lProb(int idx) const
Get the log-probability of the idx-th subisotopologue.
Definition: marginalTrek++.h:267
IsoSpec::IsoThresholdGenerator::get_conf_signature
void get_conf_signature(int *space) const override final
Write the signature of configuration into target memory location. It must be large enough to accomoda...
Definition: isoSpec++.h:281
IsoSpec::IsoGenerator::prob
virtual double prob() const
Get the probability of the current isotopologue.
Definition: isoSpec++.h:187
IsoSpec::LayeredMarginal::get_mass
double get_mass(int idx) const
get the mass of the idx-th subisotopologue, see details in PrecalculatedMarginal::get_mass.
Definition: marginalTrek++.h:354
IsoSpec::Iso::getModeLProb
double getModeLProb() const
Get the log-probability of the mode-configuration (if there are many modes, they share this value).
Definition: isoSpec++.h:128
IsoSpec::Iso
The Iso class for the calculation of the isotopic distribution.
Definition: isoSpec++.h:53
IsoSpec::IsoLayeredGenerator::mass
ISOSPEC_FORCE_INLINE double mass() const override final
Get the mass of the current isotopologue.
Definition: isoSpec++.h:459
IsoSpec::IsoGenerator::lprob
virtual double lprob() const
Get the log-probability of the current isotopologue.
Definition: isoSpec++.h:175
IsoSpec::IsoThresholdGenerator
The generator of isotopologues above a given threshold value.
Definition: isoSpec++.h:263
IsoSpec::Iso::isotopeNumbers
int * isotopeNumbers
Definition: isoSpec++.h:68
IsoSpec::IsoThresholdGenerator::prob
ISOSPEC_FORCE_INLINE double prob() const override final
Get the probability of the current isotopologue.
Definition: isoSpec++.h:356
IsoSpec::IsoGenerator::partialMasses
double * partialMasses
Definition: isoSpec++.h:158
IsoSpec::Iso::modeLProb
double modeLProb
Definition: isoSpec++.h:73
IsoSpec::IsoLayeredGenerator::get_conf_signature
void get_conf_signature(int *space) const override final
Write the signature of configuration into target memory location. It must be large enough to accomoda...
Definition: isoSpec++.h:411
IsoSpec::Iso::marginals
Marginal ** marginals
Definition: isoSpec++.h:72
IsoSpec::IsoGenerator::partialLProbs
double * partialLProbs
Definition: isoSpec++.h:157
IsoSpec::IsoLayeredGenerator::lprob
ISOSPEC_FORCE_INLINE double lprob() const override final
Get the log-probability of the current isotopologue.
Definition: isoSpec++.h:458
IsoSpec::LayeredMarginal
LayeredMarginal class.
Definition: marginalTrek++.h:315
IsoSpec::Iso::Iso
Iso(const std::string &formula)
Constructor from C++ std::string chemical formula.
Definition: isoSpec++.h:99
IsoSpec::Iso::getAllDim
int getAllDim() const
Get the total number of isotopes of elements present in a chemical formula.
Definition: isoSpec++.h:143
IsoSpec::LayeredMarginal::get_prob
double get_prob(int idx) const
get the probability of the idx-th subisotopologue, see details in PrecalculatedMarginal::get_eProb.
Definition: marginalTrek++.h:351
IsoSpec::IsoLayeredGenerator
Definition: isoSpec++.h:393
IsoSpec::ConfOrder
Definition: operators.h:66
IsoSpec::DirtyAllocator
Definition: dirtyAllocator.h:26
IsoSpec::IsoLayeredGenerator::prob
ISOSPEC_FORCE_INLINE double prob() const override final
Get the probability of the current isotopologue.
Definition: isoSpec++.h:460
IsoSpec::IsoGenerator
The generator of isotopologues.
Definition: isoSpec++.h:154
IsoSpec::MarginalTrek
The marginal distribution class (a subisotopologue).
Definition: marginalTrek++.h:154
IsoSpec::LayeredMarginal::get_lProb
double get_lProb(int idx) const
get the log-probability of the idx-th subisotopologue, see details in PrecalculatedMarginal::get_lPro...
Definition: marginalTrek++.h:348
IsoSpec::PrecalculatedMarginal::get_prob
const double & get_prob(int idx) const
Get the probability of the idx-th subisotopologue.
Definition: marginalTrek++.h:274
IsoSpec::IsoThresholdGenerator::lprob
ISOSPEC_FORCE_INLINE double lprob() const override final
Get the log-probability of the current isotopologue.
Definition: isoSpec++.h:354
IsoSpec::Iso::atomCounts
int * atomCounts
Definition: isoSpec++.h:69
IsoSpec::IsoGenerator::mass
virtual double mass() const
Get the mass of the current isotopologue.
Definition: isoSpec++.h:181