IsoSpec  1.95
isoSpec++.h
1 
17 #pragma once
18 
19 #include <tuple>
20 #include <unordered_map>
21 #include <queue>
22 #include <limits>
23 #include "platform.h"
24 #include "dirtyAllocator.h"
25 #include "summator.h"
26 #include "operators.h"
27 #include "marginalTrek++.h"
28 
29 
30 #if ISOSPEC_BUILDING_R
31 #include <Rcpp.h>
32 using namespace Rcpp;
33 #endif /* ISOSPEC_BUILDING_R */
34 
35 
36 namespace IsoSpec
37 {
38 
39 // This function is NOT guaranteed to be secure against malicious input. It should be used only for debugging.
40 unsigned int parse_formula(const char* formula,
41  std::vector<const double*>& isotope_masses,
42  std::vector<const double*>& isotope_probabilities,
43  int** isotopeNumbers,
44  int** atomCounts,
45  unsigned int* confSize);
46 
47 
49 
52 class ISOSPEC_EXPORT_SYMBOL Iso {
53 private:
54 
56 
62  void setupMarginals(const double* const * _isotopeMasses,
63  const double* const * _isotopeProbabilities);
64 public:
65  bool disowned;
66 protected:
67  int dimNumber;
69  int* atomCounts;
70  unsigned int confSize;
71  int allDim;
73  double modeLProb;
75 public:
77 
84  Iso(
85  int _dimNumber,
86  const int* _isotopeNumbers,
87  const int* _atomCounts,
88  const double* const * _isotopeMasses,
89  const double* const * _isotopeProbabilities
90  );
91 
93  Iso(const char* formula);
94 
96  Iso(Iso&& other);
97 
99 
103  Iso(const Iso& other, bool fullcopy);
104 
106  virtual ~Iso();
107 
109  double getLightestPeakMass() const;
110 
112  double getHeaviestPeakMass() const;
113 
115  inline double getModeLProb() const { return modeLProb; };
116 
118  inline int getDimNumber() const { return dimNumber; };
119 
121  inline int getAllDim() const { return allDim; };
122 };
123 
124 
126 
129 class ISOSPEC_EXPORT_SYMBOL IsoGenerator : public Iso
130 {
131 protected:
132  double* partialLProbs;
133  double* partialMasses;
134  double* partialProbs;
136 public:
138 
141  virtual bool advanceToNextConfiguration() = 0;
142 
144 
147  virtual double lprob() const { return partialLProbs[0]; };
148 
150 
153  virtual double mass() const { return partialMasses[0]; };
154 
156 
159  virtual double prob() const { return partialProbs[0]; };
160 
161  //TODO: what is this???
162  virtual void get_conf_signature(int* space) const = 0;
163 
165  IsoGenerator(Iso&& iso, bool alloc_partials = true);
166 
168  virtual ~IsoGenerator();
169 };
170 
171 
172 
174 
179 class ISOSPEC_EXPORT_SYMBOL IsoOrderedGenerator: public IsoGenerator
180 {
181 private:
182  MarginalTrek** marginalResults;
183  std::priority_queue<void*,std::vector<void*>,ConfOrder> pq;
184  void* topConf;
185  DirtyAllocator allocator;
186  const std::vector<double>** logProbs;
187  const std::vector<double>** masses;
188  const std::vector<int*>** marginalConfs;
189  double currentLProb;
190  double currentMass;
191  double currentProb;
192  int ccount;
193 
194 public:
195  bool advanceToNextConfiguration() override final;
196 
198 
202  inline void get_conf_signature(int* space) const override final
203  {
204  int* c = getConf(topConf);
205 
206  if (ccount >= 0)
207  c[ccount]--;
208 
209  for(int ii=0; ii<dimNumber; ii++)
210  {
211  memcpy(space, marginalResults[ii]->confs()[c[ii]], isotopeNumbers[ii]*sizeof(int));
212  space += isotopeNumbers[ii];
213  }
214 
215  if (ccount >= 0)
216  c[ccount]++;
217  };
218 
220  IsoOrderedGenerator(Iso&& iso, int _tabSize = 1000, int _hashSize = 1000);
221 
223  virtual ~IsoOrderedGenerator();
224 };
225 
226 
227 
228 
230 
235 class ISOSPEC_EXPORT_SYMBOL IsoThresholdGenerator: public IsoGenerator
236 {
237 private:
238 
239  int* counter;
240  double* maxConfsLPSum;
241  const double Lcutoff;
242  PrecalculatedMarginal** marginalResults;
243  PrecalculatedMarginal** marginalResultsUnsorted;
244  int* marginalOrder;
245 
246  const double* lProbs_ptr;
247  const double* lProbs_ptr_start;
248  double* partialLProbs_second;
249  double partialLProbs_second_val, lcfmsv;
250  bool empty;
251 
252 public:
253  inline void get_conf_signature(int* space) const override final
254  {
255  counter[0] = lProbs_ptr - lProbs_ptr_start;
256  if(marginalOrder != nullptr)
257  for(int ii=0; ii<dimNumber; ii++)
258  {
259  int jj = marginalOrder[ii];
260  memcpy(space, marginalResultsUnsorted[ii]->get_conf(counter[jj]), isotopeNumbers[ii]*sizeof(int));
261  space += isotopeNumbers[ii];
262  }
263  else
264  for(int ii=0; ii<dimNumber; ii++)
265  {
266  memcpy(space, marginalResultsUnsorted[ii]->get_conf(counter[ii]), isotopeNumbers[ii]*sizeof(int));
267  space += isotopeNumbers[ii];
268  }
269 
270  };
271 
273 
281  IsoThresholdGenerator(Iso&& iso, double _threshold, bool _absolute=true, int _tabSize=1000, int _hashSize=1000, bool reorder_marginals = true);
282 
283  inline ~IsoThresholdGenerator()
284  {
285  delete[] counter;
286  delete[] maxConfsLPSum;
287  if (marginalResultsUnsorted != marginalResults)
288  delete[] marginalResultsUnsorted;
289  dealloc_table(marginalResults, dimNumber);
290  if(marginalOrder != nullptr)
291  delete[] marginalOrder;
292  };
293 
294  // Perform highly aggressive inling as this function is often called as while(advanceToNextConfiguration()) {}
295  // which leads to an extremely tight loop and some compilers miss this (potentially due to the length of the function).
296  ISOSPEC_FORCE_INLINE bool advanceToNextConfiguration() override final
297  {
298  lProbs_ptr++;
299 
300  if(ISOSPEC_LIKELY(*lProbs_ptr >= lcfmsv))
301  {
302  return true;
303  }
304 
305  // If we reached this point, a carry is needed
306 
307  int idx = 0;
308  lProbs_ptr = lProbs_ptr_start;
309 
310  int * cntr_ptr = counter;
311 
312  while(idx<dimNumber-1)
313  {
314  // counter[idx] = 0;
315  *cntr_ptr = 0;
316  idx++;
317  cntr_ptr++;
318  // counter[idx]++;
319  (*cntr_ptr)++;
320  partialLProbs[idx] = partialLProbs[idx+1] + marginalResults[idx]->get_lProb(counter[idx]);
321  if(partialLProbs[idx] + maxConfsLPSum[idx-1] >= Lcutoff)
322  {
323  partialMasses[idx] = partialMasses[idx+1] + marginalResults[idx]->get_mass(counter[idx]);
324  partialProbs[idx] = partialProbs[idx+1] * marginalResults[idx]->get_prob(counter[idx]);
325  recalc(idx-1);
326  return true;
327  }
328  }
329 
330  terminate_search();
331  return false;
332  }
333 
334 
335  ISOSPEC_FORCE_INLINE double lprob() const override final { return partialLProbs_second_val + (*(lProbs_ptr)); };
336  ISOSPEC_FORCE_INLINE double mass() const override final { return partialMasses[1] + marginalResults[0]->get_mass(lProbs_ptr - lProbs_ptr_start); };
337  ISOSPEC_FORCE_INLINE double prob() const override final { return partialProbs[1] * marginalResults[0]->get_prob(lProbs_ptr - lProbs_ptr_start); };
338 
340  void terminate_search();
341 
346  void reset();
347 
352  size_t count_confs();
353 
354 private:
356  ISOSPEC_FORCE_INLINE void recalc(int idx)
357  {
358  for(; idx > 0; idx--)
359  {
360  partialLProbs[idx] = partialLProbs[idx+1] + marginalResults[idx]->get_lProb(counter[idx]);
361  partialMasses[idx] = partialMasses[idx+1] + marginalResults[idx]->get_mass(counter[idx]);
362  partialProbs[idx] = partialProbs[idx+1] * marginalResults[idx]->get_prob(counter[idx]);
363  }
364  partialLProbs_second_val = *partialLProbs_second;
365  partialLProbs[0] = *partialLProbs_second + marginalResults[0]->get_lProb(counter[0]);
366  lcfmsv = Lcutoff - partialLProbs_second_val;
367  }
368 };
369 
370 
371 
373 
383 class ISOSPEC_EXPORT_SYMBOL IsoLayeredGenerator : public IsoGenerator
384 {
385 private:
386  Summator totalProb;
387  std::vector<void*> newaccepted;
388  DirtyAllocator allocator;
389  int* candidate;
390  const std::vector<double>** logProbs;
391  const std::vector<double>** masses;
392  const std::vector<int*>** marginalConfs;
393  MarginalTrek** marginalResults;
394  std::vector<void*>* current;
395  std::vector<void*>* next;
396  double lprobThr;
397  double targetCoverage;
398  double percentageToExpand;
399  bool do_trim;
400  int layers;
401  size_t generator_position;
402 
403  bool advanceToNextLayer();
404 
405 public:
406  bool advanceToNextConfiguration() override final;
407 
408  inline void get_conf_signature(int* space) const override final
409  {
410  int* conf = getConf(newaccepted[generator_position]);
411  for(int ii=0; ii<dimNumber; ii++)
412  {
413  memcpy(space, marginalResults[ii]->confs()[conf[ii]], isotopeNumbers[ii]*sizeof(int));
414  space += isotopeNumbers[ii];
415  }
416  };
417 
418 
419  IsoLayeredGenerator(Iso&& iso, double _targetCoverage, double _percentageToExpand = 0.3, int _tabSize = 1000, int _hashSize = 1000, bool trim = false);
420  virtual ~IsoLayeredGenerator();
421 
422  void terminate_search();
423 
424 };
425 
426 
427 
428 
429 #if !ISOSPEC_BUILDING_R
430 
431 void printConfigurations(
432  const std::tuple<double*,double*,int*,int>& results,
433  int dimNumber,
434  int* isotopeNumbers
435 );
436 #endif /* !ISOSPEC_BUILDING_R */
437 
438 } // namespace IsoSpec
439 
IsoSpec::Iso::allDim
int allDim
Definition: isoSpec++.h:71
IsoSpec::IsoGenerator::partialProbs
double * partialProbs
Definition: isoSpec++.h:134
IsoSpec::IsoOrderedGenerator
The generator of isotopologues sorted by their probability of occurrence.
Definition: isoSpec++.h:179
IsoSpec::Iso::confSize
unsigned int confSize
Definition: isoSpec++.h:70
IsoSpec::Iso::getDimNumber
int getDimNumber() const
Get the number of elements in the chemical formula of the molecule.
Definition: isoSpec++.h:118
IsoSpec::PrecalculatedMarginal
Precalculated Marginal class.
Definition: marginalTrek++.h:213
IsoSpec::IsoOrderedGenerator::get_conf_signature
void get_conf_signature(int *space) const override final
Save the counts of isotopes in the space.
Definition: isoSpec++.h:202
IsoSpec::Marginal
The marginal distribution class (a subisotopologue).
Definition: marginalTrek++.h:45
IsoSpec::Iso::dimNumber
int dimNumber
Definition: isoSpec++.h:67
IsoSpec::PrecalculatedMarginal::get_mass
const double & get_mass(int idx) const
Get the mass of the idx-th subisotopologue.
Definition: marginalTrek++.h:268
IsoSpec::IsoThresholdGenerator::mass
ISOSPEC_FORCE_INLINE double mass() const override final
Get the mass of the current isotopologue.
Definition: isoSpec++.h:336
IsoSpec::IsoThresholdGenerator::advanceToNextConfiguration
ISOSPEC_FORCE_INLINE bool advanceToNextConfiguration() override final
Advance to the next, not yet visited, most probable isotopologue.
Definition: isoSpec++.h:296
IsoSpec
Definition: allocator.cpp:21
IsoSpec::PrecalculatedMarginal::get_lProb
const double & get_lProb(int idx) const
Get the log-probability of the idx-th subisotopologue.
Definition: marginalTrek++.h:254
IsoSpec::IsoGenerator::prob
virtual double prob() const
Get the probability of the current isotopologue.
Definition: isoSpec++.h:159
IsoSpec::Iso::getModeLProb
double getModeLProb() const
Get the log-probability of the mode-configuration (if there are many modes, they share this value).
Definition: isoSpec++.h:115
IsoSpec::Iso
The Iso class for the calculation of the isotopic distribution.
Definition: isoSpec++.h:52
IsoSpec::IsoGenerator::lprob
virtual double lprob() const
Get the log-probability of the current isotopologue.
Definition: isoSpec++.h:147
IsoSpec::IsoThresholdGenerator
The generator of isotopologues above a given threshold value.
Definition: isoSpec++.h:235
IsoSpec::Iso::isotopeNumbers
int * isotopeNumbers
Definition: isoSpec++.h:68
IsoSpec::Summator
Definition: summator.h:76
IsoSpec::IsoThresholdGenerator::prob
ISOSPEC_FORCE_INLINE double prob() const override final
Get the probability of the current isotopologue.
Definition: isoSpec++.h:337
IsoSpec::IsoGenerator::partialMasses
double * partialMasses
Definition: isoSpec++.h:133
IsoSpec::Iso::modeLProb
double modeLProb
Definition: isoSpec++.h:73
IsoSpec::Iso::marginals
Marginal ** marginals
Definition: isoSpec++.h:72
IsoSpec::IsoGenerator::partialLProbs
double * partialLProbs
Definition: isoSpec++.h:132
IsoSpec::Iso::getAllDim
int getAllDim() const
Get the total number of isotopes of elements present in a chemical formula.
Definition: isoSpec++.h:121
IsoSpec::IsoLayeredGenerator
The class that represents isotopologues above a given joint probability value.
Definition: isoSpec++.h:383
IsoSpec::ConfOrder
Definition: operators.h:66
IsoSpec::DirtyAllocator
Definition: dirtyAllocator.h:26
IsoSpec::IsoGenerator
The generator of isotopologues.
Definition: isoSpec++.h:129
IsoSpec::MarginalTrek
The marginal distribution class (a subisotopologue).
Definition: marginalTrek++.h:141
IsoSpec::PrecalculatedMarginal::get_prob
const double & get_prob(int idx) const
Get the probability of the idx-th subisotopologue.
Definition: marginalTrek++.h:261
IsoSpec::IsoThresholdGenerator::lprob
ISOSPEC_FORCE_INLINE double lprob() const override final
Get the log-probability of the current isotopologue.
Definition: isoSpec++.h:335
IsoSpec::Iso::atomCounts
int * atomCounts
Definition: isoSpec++.h:69
IsoSpec::IsoGenerator::mass
virtual double mass() const
Get the mass of the current isotopologue.
Definition: isoSpec++.h:153
IsoSpec::Iso::disowned
bool disowned
Definition: isoSpec++.h:65