24 #include <unordered_map>
37 #include "dirtyAllocator.h"
38 #include "operators.h"
40 #include "marginalTrek++.h"
41 #include "isoSpec++.h"
43 #include "element_tables.h"
54 isotopeNumbers(new int[0]),
55 atomCounts(new int[0]),
58 marginals(new Marginal*[0]),
65 const int* _isotopeNumbers,
66 const int* _atomCounts,
67 const double*
const * _isotopeMasses,
68 const double*
const * _isotopeProbabilities
71 dimNumber(_dimNumber),
72 isotopeNumbers(array_copy<int>(_isotopeNumbers, _dimNumber)),
73 atomCounts(array_copy<int>(_atomCounts, _dimNumber)),
74 confSize(_dimNumber * sizeof(int)),
80 setupMarginals(_isotopeMasses, _isotopeProbabilities);
90 Iso::Iso(
Iso&& other) :
91 disowned(other.disowned),
92 dimNumber(other.dimNumber),
93 isotopeNumbers(other.isotopeNumbers),
94 atomCounts(other.atomCounts),
95 confSize(other.confSize),
97 marginals(other.marginals),
98 modeLProb(other.modeLProb)
100 other.disowned =
true;
104 Iso::Iso(
const Iso& other,
bool fullcopy) :
105 disowned(fullcopy ? throw std::logic_error(
"Not implemented") : true),
106 dimNumber(other.dimNumber),
107 isotopeNumbers(fullcopy ? array_copy<int>(other.isotopeNumbers, dimNumber) : other.isotopeNumbers),
108 atomCounts(fullcopy ? array_copy<int>(other.atomCounts, dimNumber) : other.atomCounts),
109 confSize(other.confSize),
110 allDim(other.allDim),
111 marginals(fullcopy ? throw std::logic_error(
"Not implemented") : other.marginals),
112 modeLProb(other.modeLProb)
116 inline void Iso::setupMarginals(
const double*
const * _isotopeMasses,
const double*
const * _isotopeProbabilities)
129 _isotopeProbabilities[ii],
169 mass +=
marginals[ii]->getLightestConfMass();
177 mass +=
marginals[ii]->getHeaviestConfMass();
185 mass +=
marginals[ii]->getMonoisotopicConfMass();
193 lprob +=
marginals[ii]->getSmallestLProb();
214 Iso::Iso(
const char* formula) :
220 std::vector<const double*> isotope_masses;
221 std::vector<const double*> isotope_probabilities;
225 setupMarginals(isotope_masses.data(), isotope_probabilities.data());
229 void Iso::addElement(
int atomCount,
int noIsotopes,
const double* isotopeMasses,
const double* isotopeProbabilities)
231 Marginal* m =
new Marginal(isotopeMasses, isotopeProbabilities, noIsotopes, atomCount);
242 unsigned int parse_formula(
const char* formula, std::vector<const double*>& isotope_masses, std::vector<const double*>& isotope_probabilities,
int** isotopeNumbers,
int** atomCounts,
unsigned int* confSize)
245 size_t slen = strlen(formula);
249 std::vector<std::pair<const char*, size_t> > elements;
250 std::vector<int> numbers;
253 throw invalid_argument(
"Invalid formula: can't be empty");
255 if(!isdigit(formula[slen-1]))
256 throw invalid_argument(
"Invalid formula: every element must be followed by a number - write H2O1 and not H2O for water");
258 for(
size_t ii=0; ii<slen; ii++)
259 if(!isdigit(formula[ii]) && !isalpha(formula[ii]))
260 throw invalid_argument(
"Invalid formula: contains invalid (non-digit, non-alpha) character");
264 size_t digit_end = 0;
266 while(position < slen)
269 while(isalpha(formula[elem_end]))
271 digit_end = elem_end;
272 while(isdigit(formula[digit_end]))
274 elements.emplace_back(&formula[position], elem_end-position);
275 numbers.push_back(atoi(&formula[elem_end]));
276 position = digit_end;
279 std::vector<int> element_indexes;
281 for (
unsigned int i=0; i<elements.size(); i++)
284 for(
int j=0; j<ISOSPEC_NUMBER_OF_ISOTOPIC_ENTRIES; j++)
286 if ((strlen(elem_table_symbol[j]) == elements[i].second) && (strncmp(elements[i].first, elem_table_symbol[j], elements[i].second) == 0))
293 throw invalid_argument(
"Invalid formula");
294 element_indexes.push_back(idx);
297 vector<int> _isotope_numbers;
299 for(vector<int>::iterator it = element_indexes.begin(); it != element_indexes.end(); ++it)
303 int atomicNo = elem_table_atomicNo[at_idx];
304 while(at_idx < ISOSPEC_NUMBER_OF_ISOTOPIC_ENTRIES && elem_table_atomicNo[at_idx] == atomicNo)
309 _isotope_numbers.push_back(num);
312 for(vector<int>::iterator it = element_indexes.begin(); it != element_indexes.end(); ++it)
314 isotope_masses.push_back(&elem_table_mass[*it]);
315 isotope_probabilities.push_back(&elem_table_probability[*it]);
318 const unsigned int dimNumber = elements.size();
320 *isotopeNumbers = array_copy<int>(_isotope_numbers.data(), dimNumber);
321 *atomCounts = array_copy<int>(numbers.data(), dimNumber);
322 *confSize = dimNumber *
sizeof(int);
337 partialLProbs(alloc_partials ? new double[dimNumber+1] : nullptr),
338 partialMasses(alloc_partials ? new double[dimNumber+1] : nullptr),
339 partialProbs(alloc_partials ? new double[dimNumber+1] : nullptr)
370 Lcutoff(_threshold <= 0.0 ? std::numeric_limits<double>::lowest() : (_absolute ? log(_threshold) : log(_threshold) + modeLProb))
387 if(!marginalResultsUnsorted[ii]->inRange(0))
394 int* tmpMarginalOrder =
new int[
dimNumber];
397 tmpMarginalOrder[ii] = ii;
399 std::sort(tmpMarginalOrder, tmpMarginalOrder +
dimNumber, comparator);
403 marginalResults[ii] = marginalResultsUnsorted[tmpMarginalOrder[ii]];
407 marginalOrder[tmpMarginalOrder[ii]] = ii;
409 delete[] tmpMarginalOrder;
414 marginalResults = marginalResultsUnsorted;
415 marginalOrder =
nullptr;
424 maxConfsLPSum[ii] = maxConfsLPSum[ii-1] + marginalResults[ii]->
getModeLProb();
426 lProbs_ptr = lProbs_ptr_start;
429 partialLProbs_second++;
440 lcfmsv = std::numeric_limits<double>::infinity();
452 partialLProbs[ii] = -std::numeric_limits<double>::infinity();
455 lProbs_ptr = lProbs_ptr_start + marginalResults[0]->
get_no_confs()-1;
478 memset(counter, 0,
sizeof(
int)*
dimNumber);
482 lProbs_ptr = lProbs_ptr_start - 1;
485 IsoThresholdGenerator::~IsoThresholdGenerator()
488 delete[] maxConfsLPSum;
489 if (marginalResultsUnsorted != marginalResults)
490 delete[] marginalResultsUnsorted;
491 dealloc_table(marginalResults,
dimNumber);
492 if(marginalOrder !=
nullptr)
493 delete[] marginalOrder;
502 IsoLayeredGenerator::IsoLayeredGenerator(Iso&& iso,
int tabSize,
int hashSize,
bool reorder_marginals,
double t_prob_hint)
503 : IsoGenerator(std::move(iso))
507 currentLThreshold = nextafter(
getModeLProb(), -std::numeric_limits<double>::infinity());
508 lastLThreshold = std::numeric_limits<double>::min();
509 marginalResultsUnsorted =
new LayeredMarginal*[
dimNumber];
510 resetPositions =
new const double*[
dimNumber];
515 marginalResultsUnsorted[ii] =
new LayeredMarginal(std::move(*(
marginals[ii])), tabSize, hashSize);
520 double* marginal_priorities =
new double[
dimNumber];
534 double log_R2 = log(InverseChiSquareCDF2(K, t_prob_hint));
540 marginal_priorities[ii] = 0.0;
543 double k =
static_cast<double>(i - 1);
546 double sum_lprobs = 0.0;
547 for(
int jj = 0; jj < i; jj++)
548 sum_lprobs += marginalResultsUnsorted[ii]->get_lProbs()[jj];
550 double sum_rademacher = 0.0;
551 for(
int jj = 1; jj < i; jj++)
552 sum_rademacher += log1p((
static_cast<double>(jj)) /
static_cast<double>(n));
554 marginal_priorities[ii] = -(sum_lprobs/2.0 + sum_rademacher - lgamma((k+2.0)/2.0) + k/2.0 * (log_R2 + log2pluslogpi + log(n)));
558 int* tmpMarginalOrder =
new int[
dimNumber];
561 tmpMarginalOrder[ii] = ii;
563 TableOrder<double> TO(marginal_priorities);
565 std::sort(tmpMarginalOrder, tmpMarginalOrder +
dimNumber, TO);
566 marginalResults =
new LayeredMarginal*[
dimNumber];
569 marginalResults[ii] = marginalResultsUnsorted[tmpMarginalOrder[ii]];
573 marginalOrder[tmpMarginalOrder[ii]] = ii;
575 delete[] tmpMarginalOrder;
576 delete[] marginal_priorities;
580 marginalResults = marginalResultsUnsorted;
581 marginalOrder =
nullptr;
590 maxConfsLPSum[ii] = maxConfsLPSum[ii-1] + marginalResults[ii]->
getModeLProb();
592 lProbs_ptr = lProbs_ptr_start;
595 partialLProbs_second++;
599 lastLThreshold = 10.0;
603 bool IsoLayeredGenerator::nextLayer(
double offset)
605 size_t first_mrg_size = marginalResults[0]->
get_no_confs();
610 lastLThreshold = currentLThreshold;
611 currentLThreshold += offset;
621 lProbs_ptr = lProbs_ptr_start + first_mrg_size - 1;
624 resetPositions[ii] = lProbs_ptr;
631 bool IsoLayeredGenerator::carry()
637 int * cntr_ptr = counter;
646 if(
partialLProbs[idx] + maxConfsLPSum[idx-1] >= currentLThreshold)
651 lProbs_ptr = resetPositions[idx];
653 while(*lProbs_ptr <= last_lcfmsv)
656 for(
int ii=0; ii<idx; ii++)
657 resetPositions[ii] = lProbs_ptr;
672 partialLProbs[ii] = -std::numeric_limits<double>::infinity();
675 lProbs_ptr = lProbs_ptr_start + marginalResults[0]->
get_no_confs()-1;
678 IsoLayeredGenerator::~IsoLayeredGenerator()
681 delete[] maxConfsLPSum;
682 delete[] resetPositions;
683 if (marginalResultsUnsorted != marginalResults)
684 delete[] marginalResultsUnsorted;
685 dealloc_table(marginalResults,
dimNumber);
686 if(marginalOrder !=
nullptr)
687 delete[] marginalOrder;
697 IsoGenerator(std::move(iso), false), allocator(dimNumber, _tabSize)
708 logProbs =
new const vector<double>*[
dimNumber];
709 masses =
new const vector<double>*[
dimNumber];
710 marginalConfs =
new const vector<int*>*[
dimNumber];
714 masses[i] = &marginalResults[i]->conf_masses();
715 logProbs[i] = &marginalResults[i]->conf_lprobs();
716 marginalConfs[i] = &marginalResults[i]->confs();
719 topConf = allocator.newConf();
721 reinterpret_cast<char*
>(topConf) +
sizeof(
double),
726 *(
reinterpret_cast<double*
>(topConf)) =
740 dealloc_table<MarginalTrek*>(marginalResults,
dimNumber);
743 delete[] marginalConfs;
759 int* topConfIsoCounts = getConf(topConf);
761 currentLProb = *(
reinterpret_cast<double*
>(topConf));
762 currentMass = combinedSum( topConfIsoCounts, masses,
dimNumber );
763 currentProb = exp(currentLProb);
768 if(marginalResults[j]->probeConfigurationIdx(topConfIsoCounts[j] + 1))
772 topConfIsoCounts[j]++;
773 *(
reinterpret_cast<double*
>(topConf)) = combinedSum(topConfIsoCounts, logProbs,
dimNumber);
775 topConfIsoCounts[j]--;
780 void* acceptedCandidate = allocator.newConf();
781 int* acceptedCandidateIsoCounts = getConf(acceptedCandidate);
782 memcpy(acceptedCandidateIsoCounts, topConfIsoCounts,
confSize);
784 acceptedCandidateIsoCounts[j]++;
786 *(
reinterpret_cast<double*
>(acceptedCandidate)) = combinedSum(acceptedCandidateIsoCounts, logProbs,
dimNumber);
788 pq.push(acceptedCandidate);
791 if(topConfIsoCounts[j] > 0)
795 topConfIsoCounts[ccount]++;
809 #if !ISOSPEC_BUILDING_R
811 void printConfigurations(
812 const std::tuple<double*,double*,int*,int>& results,
818 for(
int i=0; i<std::get<3>(results); i++){
820 std::cout <<
"Mass = " << std::get<0>(results)[i] <<
821 "\tand log-prob = " << std::get<1>(results)[i] <<
822 "\tand prob = " << exp(std::get<1>(results)[i]) <<
823 "\tand configuration =\t";
826 for(
int j=0; j<dimNumber; j++){
827 for(
int k=0; k<isotopeNumbers[j]; k++ )
829 std::cout << std::get<2>(results)[m] <<
" ";
836 std::cout << std::endl;