10 #ifndef RGROUP_DECOMP_DATA
11 #define RGROUP_DECOMP_DATA
41 std::vector<std::vector<RGroupMatch>>
matches;
55 :
params(std::move(inputParams)) {
62 :
params(std::move(inputParams)) {
63 for (
size_t i = 0; i < inputCores.size(); ++i) {
70 for (
auto &core :
cores) {
71 RWMol *alignCore = core.first ?
cores[0].core.get() :
nullptr;
73 "Could not prepare at least one core");
75 core.second.labelledCore.reset(
new RWMol(*core.second.core));
100 "Scoring method is not fingerprint variance!");
103 "Illegal permutation prune length");
135 for (
size_t mol_idx = 0; mol_idx <
permutation.size(); ++mol_idx) {
136 std::vector<RGroupMatch> keepVector;
137 size_t mi = mol_idx + offset;
152 const bool removeAllHydrogenRGroups =
156 std::vector<RGroupMatch> results;
158 for (
size_t i = 0; i <
matches.size(); ++i) {
160 results.push_back(
matches[i].at(pi));
173 std::map<int, std::set<int>> labelCores;
174 std::set<int> coresVisited;
175 for (
auto &position : results) {
176 int core_idx = position.core_idx;
177 if (coresVisited.find(core_idx) == coresVisited.end()) {
178 coresVisited.insert(core_idx);
179 auto core =
cores.find(core_idx);
180 if (core !=
cores.end()) {
181 for (
auto rlabels :
getRlabels(*core->second.core)) {
182 int rlabel = rlabels.first;
183 labelCores[rlabel].insert(core_idx);
189 for (
int label :
labels) {
190 if (label > 0 && !removeAllHydrogenRGroups) {
194 for (
auto &position : results) {
195 R_DECOMP::const_iterator rgroup = position.rgroups.find(label);
196 bool labelHasCore = labelCores[label].find(position.core_idx) !=
197 labelCores[label].end();
198 if (labelHasCore && rgroup != position.rgroups.end() &&
199 !rgroup->second->is_hydrogen) {
206 for (
auto &position : results) {
207 position.rgroups.erase(label);
237 for (
const auto &p : atoms) {
245 const std::vector<std::pair<Atom *, Atom *>> &atomsToAdd) {
246 for (
const auto &i : atomsToAdd) {
247 mol.
addAtom(i.second,
false,
true);
257 UsedLabels &used_labels,
const std::set<int> &indexLabels,
258 const std::map<
int, std::vector<int>> &extraAtomRLabels) {
266 std::map<int, Atom *> atoms =
getRlabels(core);
274 std::map<int, std::vector<int>> bondsToCore;
275 std::vector<std::pair<Atom *, Atom *>> atomsToAdd;
278 for (
const auto &rlabels : atoms) {
279 int userLabel = rlabels.first;
283 Atom *atom = rlabels.second;
284 mappings[userLabel] = userLabel;
285 used_labels.
add(userLabel);
291 auto *newAt =
new Atom(0);
293 atomsToAdd.emplace_back(atom, newAt);
298 for (
auto newLabel : indexLabels) {
299 auto atm = atoms.find(newLabel);
300 if (atm == atoms.end()) {
304 Atom *atom = atm->second;
307 auto mapping = mappings.find(newLabel);
308 if (mapping == mappings.end()) {
309 rlabel = used_labels.
next();
310 mappings[newLabel] = rlabel;
312 rlabel = mapping->second;
320 auto *newAt =
new Atom(0);
322 atomsToAdd.emplace_back(atom, newAt);
327 for (
const auto &extraAtomRLabel : extraAtomRLabels) {
328 auto atm = atoms.find(extraAtomRLabel.first);
329 if (atm == atoms.end()) {
332 Atom *atom = atm->second;
334 for (
size_t i = 0; i < extraAtomRLabel.second.size(); ++i) {
335 int rlabel = used_labels.
next();
339 "Multiple attachments to a dummy (or hydrogen) is weird.");
340 auto *newAt =
new Atom(0);
342 atomsToAdd.emplace_back(atom, newAt);
347 for (
const auto &rlabels : atoms) {
348 auto atom = rlabels.second;
366 std::vector<std::pair<Atom *, Atom *>> atomsToAdd;
367 std::map<int, int> rLabelCoreIndexToAtomicWt;
374 const std::vector<int> &rlabels =
378 for (
int rlabel : rlabels) {
379 auto label = mappings.find(rlabel);
388 auto *newAt =
new Atom(0);
390 atomsToAdd.emplace_back(atom, newAt);
398 rLabelCoreIndexToAtomicWt[rLabelCoreIndex] = atom->
getAtomicNum();
407 bool implicitOnly =
false;
408 bool updateExplicitCount =
false;
409 bool sanitize =
false;
431 std::cerr <<
"Relabel Rgroup smiles " <<
MolToSmiles(mol) << std::endl;
442 std::set<int> indexLabels;
451 std::map<int, std::vector<int>> extraAtomRLabels;
453 for (
auto &it : best) {
454 for (
auto &rgroup : it.rgroups) {
455 if (rgroup.first > 0) {
459 indexLabels.insert(rgroup.first);
462 std::map<int, int> rlabelsUsedInRGroup =
463 rgroup.second->getNumBondsToRlabels();
464 for (
auto &numBondsUsed : rlabelsUsedInRGroup) {
466 if (numBondsUsed.second > 1) {
467 extraAtomRLabels[numBondsUsed.first].resize(numBondsUsed.second -
475 for (
auto &core :
cores) {
476 core.second.labelledCore.reset(
new RWMol(*core.second.core));
489 used_labels.
add(userLabel);
491 for (
auto &core :
cores) {
493 indexLabels, extraAtomRLabels);
496 for (
auto &it : best) {
497 for (
auto &rgroup : it.rgroups) {
502 std::set<int> uniqueMappedValues;
504 std::inserter(uniqueMappedValues, uniqueMappedValues.end()),
505 [](
const std::pair<int, int> &p) { return p.second; });
507 "Error in uniqueness of final RLabel mapping");
509 uniqueMappedValues.size() ==
userLabels.size() + indexLabels.size(),
510 "Error in final RMapping size");
517 switch (scoreMethod) {
523 fingerprintVarianceScoreData);
531 bool finalize =
false) {
535 auto t0 = std::chrono::steady_clock::now();
536 std::unique_ptr<CartesianProduct> iterator;
546 auto best = max_element(results.begin(), results.end(),
548 return a.rGroupScorer.getBestScore() <
549 b.rGroupScorer.getBestScore();
553 auto result = ga.
run();
563 std::vector<size_t> permutations;
571 std::back_inserter(permutations),
572 [](
const std::vector<RGroupMatch> &m) { return m.size(); });
573 permutation = std::vector<size_t>(permutations.size(), 0);
579 std::cerr <<
"Processing" << std::endl;
582 iterator = std::move(it);
586 while (iterator->next()) {
587 if (count > iterator->maxPermutations) {
591 std::cerr <<
"**************************************************"
596 :
score(iterator->permutation);
603 std::cerr <<
" ===> current best:" << newscore <<
">"
621 if (pruneMatches || finalize) {
#define CHECK_INVARIANT(expr, mess)
#define PRECONDITION(expr, mess)
The class for representing atoms.
void setNoImplicit(bool what)
sets our noImplicit flag, indicating whether or not we are allowed to have implicit Hs
void setAtomicNum(int newNum)
sets our atomic number
void setIsotope(unsigned int what)
sets our isotope number
int getAtomicNum() const
returns our atomic number
void setAtomMapNum(int mapno, bool strict=true)
Set the atom map Number of the atom.
unsigned int getDegree() const
void clearProp(const std::string &key) const
clears the value of a property
void getProp(const std::string &key, T &res) const
allows retrieval of a particular property value
bool hasProp(const std::string &key) const
This is an overloaded member function, provided for convenience. It differs from the above function o...
void setProp(const std::string &key, T val, bool computed=false) const
sets a property value
std::set< int > labels_used
vector< GaResult > runBatch()
GaResult run(int runNumber=1)
unsigned int numberPermutations() const
void pushTieToStore(const std::vector< size_t > &permutation)
store the passed tied permutation for subsequent processing
void startProcessing()
called when process() starts to initialize State
void setBestPermutation(const std::vector< size_t > &permutation, double score)
set the passed permutation and score as the best one
void breakTies(const std::vector< std::vector< RGroupMatch >> &matches, const std::set< int > &labels, const std::unique_ptr< CartesianProduct > &iterator, const std::chrono::steady_clock::time_point &t0, double timeout)
find the best permutation across the tied ones that were stored
void clearTieStore()
clear all stored tied permutations
const std::vector< size_t > & getBestPermutation() const
return the best permutation found so far
double matchScore(const std::vector< size_t > &permutation, const std::vector< std::vector< RGroupMatch >> &matches, const std::set< int > &labels)
score the passed permutation of matches
size_t tieStoreSize() const
number of stored tied permutations
double getBestScore() const
return the best score found so far
unsigned int getNumConformers() const
AtomIterator endAtoms()
get an AtomIterator pointing at the end of our Atoms
void updatePropertyCache(bool strict=true)
calculates any of our lazy properties
AtomIterator beginAtoms()
get an AtomIterator pointing at our first Atom
RWMol is a molecule class that is intended to be edited.
unsigned int addAtom(bool updateLabel=true)
adds an empty Atom to our collection
unsigned int addBond(unsigned int beginAtomIdx, unsigned int endAtomIdx, Bond::BondType order=Bond::UNSPECIFIED)
adds a Bond between the indicated Atoms
Class to allow us to throw a ValueError from C++ and have it make it back to Python.
static std::string to_string(const Descriptor &desc)
RDKIT_GRAPHMOL_EXPORT void setTerminalAtomCoords(ROMol &mol, unsigned int idx, unsigned int otherIdx)
RDKIT_GRAPHMOL_EXPORT ROMol * removeHs(const ROMol &mol, bool implicitOnly=false, bool updateExplicitCount=false, bool sanitize=true)
returns a copy of a molecule with hydrogens removed
RDKIT_RDGENERAL_EXPORT const std::string dummyLabel
std::map< int, Atom * > getRlabels(const RWMol &mol)
Get the RLabels,atom mapping for the current molecule.
const std::string RLABEL_TYPE
const std::string RLABEL_CORE_INDEX
RDKIT_GRAPHMOL_EXPORT void setAtomRLabel(Atom *atm, int rlabel)
RDKIT_SMILESPARSE_EXPORT std::string MolToSmiles(const ROMol &mol, const SmilesWriteParams ¶ms)
returns canonical SMILES for a molecule
RDKIT_RGROUPDECOMPOSITION_EXPORT const std::string RLABEL
bool checkForTimeout(const std::chrono::steady_clock::time_point &t0, double timeout, bool throwOnTimeout=true)
bool isAnyAtomWithMultipleNeighborsOrNotUserRLabel(const Atom &atom)
const std::string SIDECHAIN_RLABELS
const unsigned int EMPTY_CORE_LABEL
RDKIT_RGROUPDECOMPOSITION_EXPORT double fingerprintVarianceScore(const std::vector< size_t > &permutation, const std::vector< std::vector< RGroupMatch >> &matches, const std::set< int > &labels, FingerprintVarianceScoreData *fingerprintVarianceScoreData=nullptr)
iterate through all possible permutations of the rgroups
double fingerprintVarianceGroupScore()
void addVarianceData(int matchNumber, int permutationNumber, const std::vector< std::vector< RGroupMatch >> &matches, const std::set< int > &labels)
void removeVarianceData(int matchNumber, int permutationNumber, const std::vector< std::vector< RGroupMatch >> &matches, const std::set< int > &labels)
RCore is the core common to a series of molecules.
A single rgroup attached to a given core.
boost::shared_ptr< RWMol > combinedMol
std::vector< std::vector< RGroupMatch > > matches
FingerprintVarianceScoreData prunedFingerprintVarianceScoreData
RGroupDecompData(const RWMol &inputCore, RGroupDecompositionParameters inputParams)
double score(const std::vector< size_t > &permutation, FingerprintVarianceScoreData *fingerprintVarianceScoreData=nullptr) const
double scoreFromPrunedData(const std::vector< size_t > &permutation, bool reset=true)
size_t permutationProduct
void relabelRGroup(RGroupData &rgroup, const std::map< int, int > &mappings)
std::vector< size_t > permutation
std::map< int, std::vector< int > > userLabels
RGroupDecompositionParameters params
std::map< std::string, int > newCores
RGroupDecompositionProcessResult process(bool pruneMatches, bool finalize=false)
std::map< int, RCore > cores
void setRlabel(Atom *atom, int rlabel)
void addAtoms(RWMol &mol, const std::vector< std::pair< Atom *, Atom * >> &atomsToAdd)
std::vector< int > processedRlabels
std::map< int, int > finalRlabelMapping
void addCoreUserLabels(const RWMol &core, std::set< int > &userLabels)
RGroupDecompData(const std::vector< ROMOL_SPTR > &inputCores, RGroupDecompositionParameters inputParams)
void relabelCore(RWMol &core, std::map< int, int > &mappings, UsedLabels &used_labels, const std::set< int > &indexLabels, const std::map< int, std::vector< int >> &extraAtomRLabels)
std::vector< RGroupMatch > GetCurrentBestPermutation() const
RGroupScorer rGroupScorer
bool removeAllHydrogenRGroupsAndLabels
unsigned int matchingStrategy
bool onlyMatchAtRGroups
only allow rgroup decomposition at the specified rgroups
bool removeAllHydrogenRGroups
remove all user-defined rgroups that only have hydrogens
double timeout
timeout in seconds. <=0 indicates no timeout
bool removeHydrogensPostMatch
remove all hydrogens from the output molecules
unsigned int rgroupLabelling
bool prepareCore(RWMol &, const RWMol *alignCore)