23 #include <unordered_map>
24 #include <unordered_set>
34 #include "marginalTrek++.h"
36 #include "allocator.h"
37 #include "operators.h"
39 #include "element_tables.h"
55 Conf
initialConfigure(
const int atomCnt,
const int isotopeNo,
const double* probs,
const double* lprobs)
61 Conf res =
new int[isotopeNo];
64 for(
int i = 0; i < isotopeNo; ++i )
65 res[i] =
int( atomCnt * probs[i] ) + 1;
70 for(
int i = 0; i < isotopeNo; ++i) s += res[i];
72 int diff = atomCnt - s;
81 int i = 0, coordDiff = 0;
84 coordDiff = res[i] - diff;
92 diff = abs(coordDiff);
100 double LP = unnormalized_logProb(res, lprobs, isotopeNo);
106 for(
int ii = 0; ii<isotopeNo; ii++)
107 for(
int jj = 0; jj<isotopeNo; jj++)
108 if(ii != jj && res[ii] > 0)
112 NLP = unnormalized_logProb(res, lprobs, isotopeNo);
113 if(NLP>LP || (NLP==LP && ii>jj))
132 #if !ISOSPEC_BUILDING_R
133 void printMarginal(
const std::tuple<double*,double*,int*,int>& results,
int dim)
135 for(
int i=0; i<std::get<3>(results); i++){
137 std::cout <<
"Mass = " << std::get<0>(results)[i] <<
138 " log-prob =\t" << std::get<1>(results)[i] <<
139 " prob =\t" << exp(std::get<1>(results)[i]) <<
140 "\tand configuration =\t";
142 for(
int j=0; j<dim; j++) std::cout << std::get<2>(results)[i*dim + j] <<
" ";
144 std::cout << std::endl;
157 for(
int ii = 0; ii < isoNo; ii++)
158 if(probs[ii] <= 0.0 || probs[ii] > 1.0)
159 throw std::invalid_argument(
"All isotope probabilities p must fulfill: 0.0 < p <= 1.0");
161 int curr_method = fegetround();
162 fesetround(FE_UPWARD);
163 double* ret =
new double[isoNo];
166 for(
int i = 0; i < isoNo; i++)
168 ret[i] = log(probs[i]);
169 for(
int j=0; j<ISOSPEC_NUMBER_OF_ISOTOPIC_ENTRIES; j++)
170 if(elem_table_probability[j] == probs[i])
172 ret[i] = elem_table_log_probability[j];
176 fesetround(curr_method);
180 double get_loggamma_nominator(
int x)
183 int curr_method = fegetround();
184 fesetround(FE_UPWARD);
185 double ret = lgamma(x+1);
186 fesetround(curr_method);
190 int verify_atom_cnt(
int atomCnt)
192 if(ISOSPEC_G_FACT_TABLE_SIZE-1 <= atomCnt)
193 throw std::length_error(
"Subisotopologue too large, size limit (that is, the maximum number of atoms of a single element in a molecule) is: " + std::to_string(ISOSPEC_G_FACT_TABLE_SIZE-1));
198 const double* _masses,
199 const double* _probs,
204 isotopeNo(_isotopeNo),
205 atomCnt(verify_atom_cnt(_atomCnt)),
207 atom_masses(array_copy<double>(_masses, _isotopeNo)),
208 loggamma_nominator(get_loggamma_nominator(_atomCnt)),
210 mode_lprob(loggamma_nominator+unnormalized_logProb(mode_conf, atom_lProbs, isotopeNo)),
211 mode_mass(mass(mode_conf, atom_masses, isotopeNo)),
212 mode_prob(exp(mode_lprob)),
213 smallest_lprob(atomCnt * *std::min_element(atom_lProbs, atom_lProbs+isotopeNo))
218 disowned(other.disowned),
219 isotopeNo(other.isotopeNo),
220 atomCnt(other.atomCnt),
221 atom_lProbs(other.atom_lProbs),
222 atom_masses(other.atom_masses),
223 loggamma_nominator(other.loggamma_nominator),
224 mode_conf(other.mode_conf),
225 mode_lprob(other.mode_lprob),
226 mode_mass(other.mode_mass),
227 mode_prob(other.mode_prob),
228 smallest_lprob(other.smallest_lprob)
230 other.disowned =
true;
246 double ret_mass = std::numeric_limits<double>::infinity();
247 for(
unsigned int ii=0; ii <
isotopeNo; ii++)
255 double ret_mass = 0.0;
256 for(
unsigned int ii=0; ii <
isotopeNo; ii++)
264 double found_prob = -std::numeric_limits<double>::infinity();
265 double found_mass = 0.0;
266 for(
unsigned int ii=0; ii <
isotopeNo; ii++)
278 for(
unsigned int ii = 0; ii <
isotopeNo; ii++)
291 keyHasher(isotopeNo),
292 equalizer(isotopeNo),
293 orderMarginal(atom_lProbs, isotopeNo),
294 visited(hashSize,keyHasher,equalizer),
297 candidate(new int[isotopeNo]),
298 allocator(isotopeNo, tabSize)
300 int* initialConf = allocator.makeCopy(
mode_conf);
302 pq.push(initialConf);
303 visited[initialConf] = 0;
313 bool MarginalTrek::add_next_conf()
319 if(pq.size() < 1)
return false;
321 Conf topConf = pq.top();
324 visited[topConf] = current_count;
326 _confs.push_back(topConf);
328 double logprob =
logProb(topConf);
329 _conf_lprobs.push_back(logprob);
332 totalProb.add( exp( logprob ) );
334 for(
unsigned int i = 0; i <
isotopeNo; ++i )
336 for(
unsigned int j = 0; j <
isotopeNo; ++j )
340 if( i != j && topConf[j] > 0 ){
347 if( visited.count( candidate ) == 0 )
349 Conf acceptedCandidate = allocator.makeCopy(candidate);
350 pq.push(acceptedCandidate);
352 visited[acceptedCandidate] = 0;
365 for(
unsigned int i=0; i<_conf_lprobs.size(); i++)
367 s.add(_conf_lprobs[i]);
368 if(s.get() >= cutoff)
377 while(totalProb.get() < cutoff && add_next_conf()) {}
378 return _conf_lprobs.size();
382 MarginalTrek::~MarginalTrek()
396 allocator(isotopeNo, tabSize)
402 std::unordered_set<Conf,KeyHasher,ConfEqual> visited(hashSize,keyHasher,equalizer);
404 Conf currentConf = allocator.makeCopy(
mode_conf);
405 if(
logProb(currentConf) >= lCutOff)
409 auto tmp = allocator.makeCopy(currentConf);
410 configurations.push_back(tmp);
414 unsigned int idx = 0;
416 while(idx < configurations.size())
418 memcpy(currentConf, configurations[idx],
sizeof(
int)*
isotopeNo);
420 for(
unsigned int ii = 0; ii <
isotopeNo; ii++ )
421 for(
unsigned int jj = 0; jj <
isotopeNo; jj++ )
422 if( ii != jj && currentConf[jj] > 0)
427 if (visited.count(currentConf) == 0 &&
logProb(currentConf) >= lCutOff)
431 auto tmp = allocator.makeCopy(currentConf);
433 configurations.push_back(tmp);
446 std::sort(configurations.begin(), configurations.end(), orderMarginal);
449 confs = &configurations[0];
450 no_confs = configurations.size();
451 lProbs =
new double[no_confs+1];
452 probs =
new double[no_confs];
453 masses =
new double[no_confs];
456 for(
unsigned int ii=0; ii < no_confs; ii++)
458 lProbs[ii] =
logProb(confs[ii]);
459 probs[ii] = exp(lProbs[ii]);
462 lProbs[no_confs] = -std::numeric_limits<double>::infinity();
468 if(lProbs !=
nullptr)
470 if(masses !=
nullptr)
483 :
Marginal(std::move(m)), current_threshold(1.0), allocator(isotopeNo, tabSize), sorted_up_to_idx(0),
484 equalizer(isotopeNo), keyHasher(isotopeNo), orderMarginal(atom_lProbs, isotopeNo), hashSize(_hashSize)
487 lProbs.push_back(std::numeric_limits<double>::infinity());
488 lProbs.push_back(-std::numeric_limits<double>::infinity());
489 guarded_lProbs = lProbs.data()+1;
498 std::vector<Conf> new_fringe;
499 std::unordered_set<Conf,KeyHasher,ConfEqual> visited(hashSize,keyHasher,equalizer);
501 for(
unsigned int ii = 0; ii<fringe.size(); ii++)
502 visited.insert(fringe[ii]);
507 while(!fringe.empty())
509 currentConf = fringe.back();
514 if(opc < new_threshold)
515 new_fringe.push_back(currentConf);
519 configurations.push_back(currentConf);
520 for(
unsigned int ii = 0; ii <
isotopeNo; ii++ )
521 for(
unsigned int jj = 0; jj <
isotopeNo; jj++ )
522 if( ii != jj && currentConf[jj] > 0 )
529 if (visited.count(currentConf) == 0 && lpc < current_threshold &&
530 (opc > lpc || (opc == lpc && ii > jj)))
532 Conf nc = allocator.makeCopy(currentConf);
536 if(lpc >= new_threshold)
537 fringe.push_back(nc);
539 new_fringe.push_back(nc);
551 current_threshold = new_threshold;
552 fringe.swap(new_fringe);
554 std::sort(configurations.begin()+sorted_up_to_idx, configurations.end(), orderMarginal);
556 if(lProbs.capacity() * 2 < configurations.size() + 2)
559 lProbs.reserve(configurations.size()+2);
560 probs.reserve(configurations.size());
561 masses.reserve(configurations.size());
567 for(
unsigned int ii=sorted_up_to_idx; ii < configurations.size(); ii++)
569 lProbs.push_back(
logProb(configurations[ii]));
570 probs.push_back(exp(lProbs.back()));
574 lProbs.push_back(-std::numeric_limits<double>::infinity());
576 sorted_up_to_idx = configurations.size();
577 guarded_lProbs = lProbs.data()+1;