RDKit
Open-source cheminformatics and machine learning.
RGroupDecomp.h
Go to the documentation of this file.
1 //
2 // Copyright (c) 2017-2021, Novartis Institutes for BioMedical Research Inc.
3 // and other RDKit contributors
4 //
5 // @@ All Rights Reserved @@
6 // This file is part of the RDKit.
7 // The contents are covered by the terms of the BSD license
8 // which is included in the file license.txt, found at the root
9 // of the RDKit source tree.
10 //
11 #include <RDGeneral/export.h>
12 #ifndef RDKIT_RGROUPDECOMP_H
13 #define RDKIT_RGROUPDECOMP_H
14 
15 #include "../RDKitBase.h"
17 #include <chrono>
18 
19 namespace RDKit {
20 
21 //! Compute the isomorphic degenerative points in the
22 //! molecule. These are points that are symmetrically
23 //! equivalent.
24 /*!
25  \param mol Molecule to compute the degenerative points
26 
27  \return the set of degenerative points (set<unsigned int>)
28 */
29 
30 typedef enum {
31  IsotopeLabels = 0x01,
32  AtomMapLabels = 0x02,
36  DummyAtomLabels = 0x20, // These are rgroups but will get relabelled
37  AutoDetect = 0xFF,
38 } RGroupLabels;
39 
40 typedef enum {
41  Greedy = 0x01,
42  GreedyChunks = 0x02,
43  Exhaustive = 0x04, // not really useful for large sets
45  GA = 0x10,
47 
48 typedef enum {
49  AtomMap = 0x01,
50  Isotope = 0x02,
51  MDLRGroup = 0x04,
53 
54 typedef enum {
55  // DEPRECATED, remove the following line in release 2021.03
56  None = 0x0,
57  NoAlignment = 0x0,
58  MCS = 0x01,
60 
61 typedef enum {
62  Match = 0x1,
64 } RGroupScore;
65 
67  const bool success;
68  const double score;
69  RGroupDecompositionProcessResult(const bool success, const double score)
70  : success(success), score(score) {}
71 };
72 
73 struct RGroupMatch;
74 
76  unsigned int labels = AutoDetect;
77  unsigned int matchingStrategy = GreedyChunks;
78  unsigned int scoreMethod = Match;
79  unsigned int rgroupLabelling = AtomMap | MDLRGroup;
80  unsigned int alignment = MCS;
81 
82  unsigned int chunkSize = 5;
83  //! only allow rgroup decomposition at the specified rgroups
84  bool onlyMatchAtRGroups = false;
85  //! remove all user-defined rgroups that only have hydrogens
86  bool removeAllHydrogenRGroups = true;
87  //! remove all user-defined rgroups that only have hydrogens,
88  //! and also remove the corresponding labels from the core
89  bool removeAllHydrogenRGroupsAndLabels = true;
90  //! remove all hydrogens from the output molecules
91  bool removeHydrogensPostMatch = true;
92  //! allow labelled Rgroups of degree 2 or more
93  bool allowNonTerminalRGroups = false;
94 
95  double timeout = -1.0; ///< timeout in seconds. <=0 indicates no timeout
96 
97  // Determine how to assign the rgroup labels from the given core
98  unsigned int autoGetLabels(const RWMol &);
99 
100  // Prepare the core for substructure searching and rgroup assignment
101  bool prepareCore(RWMol &, const RWMol *alignCore);
102 
103  // Parameters specific to GA
104 
105  // GA population size or -1 to use best guess
106  int gaPopulationSize = -1;
107  // GA maximum number of operations or -1 to use best guess
108  int gaMaximumOperations = -1;
109  // GA number of operations permitted without improvement before exiting (-1
110  // for best guess)
111  int gaNumberOperationsWithoutImprovement = -1;
112  // GA random number seed (-1 for default, -2 for random seed)
113  int gaRandomSeed = -1;
114  // Number of runs
115  int gaNumberRuns = 1;
116  // Sequential or parallel runs?
117 #ifdef RDK_THREADSAFE_SSS
118  bool gaParallelRuns = true;
119 #else
120  bool gaParallelRuns = false;
121 #endif
122  // Controls the way substructure matching with the core is done
124 
125  RGroupDecompositionParameters() { substructmatchParams.useChirality = true; }
126 
127  private:
128  int indexOffset{-1};
129  void checkNonTerminal(const Atom &atom) const;
130 };
131 
132 typedef std::map<std::string, ROMOL_SPTR> RGroupRow;
133 typedef std::vector<ROMOL_SPTR> RGroupColumn;
134 
135 typedef std::vector<RGroupRow> RGroupRows;
136 typedef std::map<std::string, RGroupColumn> RGroupColumns;
137 
139  public:
140  UsedLabelMap(const std::map<int, int> &mapping) {
141  for (const auto &rl : mapping) {
142  d_map[rl.second] = std::make_pair(false, (rl.first > 0));
143  }
144  }
145  bool getIsUsed(int label) const { return d_map.at(label).first; }
146  void setIsUsed(int label) { d_map[label].first = true; }
147  bool isUserDefined(int label) const { return d_map.at(label).second; }
148 
149  private:
150  std::map<int, std::pair<bool, bool>> d_map;
151 };
152 
153 struct RGroupDecompData;
155  private:
156  RGroupDecompData *data; // implementation details
157  RGroupDecomposition(const RGroupDecomposition &); // no copy construct
158  RGroupDecomposition &operator=(
159  const RGroupDecomposition &); // Prevent assignment
160  RWMOL_SPTR outputCoreMolecule(const RGroupMatch &match,
161  const UsedLabelMap &usedRGroupMap) const;
162  std::map<int, bool> getBlankRGroupMap() const;
163 
164  public:
166  const RGroupDecompositionParameters &params =
168  RGroupDecomposition(const std::vector<ROMOL_SPTR> &cores,
169  const RGroupDecompositionParameters &params =
171 
173 
174  //! Returns the index of the added molecule in the RGroupDecomposition
175  /// or a negative error code
176  /// :param mol: Molecule to add to the decomposition
177  /// :result: index of the molecle or
178  /// -1 if none of the core matches
179  /// -2 if the matched molecule has no sidechains, i.e. is the
180  /// same as the scaffold
181  int add(const ROMol &mol);
183  bool process();
184 
186  //! return the current group labels
187  std::vector<std::string> getRGroupLabels() const;
188 
189  //! return rgroups in row order group[row][attachment_point] = ROMol
191  //! return rgroups in column order group[attachment_point][row] = ROMol
193 };
194 
196  const std::vector<ROMOL_SPTR> &cores, const std::vector<ROMOL_SPTR> &mols,
197  RGroupRows &rows, std::vector<unsigned int> *unmatched = nullptr,
198  const RGroupDecompositionParameters &options =
200 
202  const std::vector<ROMOL_SPTR> &cores, const std::vector<ROMOL_SPTR> &mols,
203  RGroupColumns &columns, std::vector<unsigned int> *unmatched = nullptr,
204  const RGroupDecompositionParameters &options =
206 
207 inline bool checkForTimeout(const std::chrono::steady_clock::time_point &t0,
208  double timeout, bool throwOnTimeout = true) {
209  if (timeout <= 0) return false;
210  auto t1 = std::chrono::steady_clock::now();
211  std::chrono::duration<double> elapsed = t1 - t0;
212  if (elapsed.count() >= timeout) {
213  if (throwOnTimeout) {
214  throw std::runtime_error("operation timed out");
215  }
216  return true;
217  }
218  return false;
219 }
220 
221 } // namespace RDKit
222 
223 #endif
RGroupRows getRGroupsAsRows() const
return rgroups in row order group[row][attachment_point] = ROMol
RGroupDecomposition(const std::vector< ROMOL_SPTR > &cores, const RGroupDecompositionParameters &params=RGroupDecompositionParameters())
RGroupColumns getRGroupsAsColumns() const
return rgroups in column order group[attachment_point][row] = ROMol
const RGroupDecompositionParameters & params() const
RGroupDecomposition(const ROMol &core, const RGroupDecompositionParameters &params=RGroupDecompositionParameters())
int add(const ROMol &mol)
RGroupDecompositionProcessResult processAndScore()
std::vector< std::string > getRGroupLabels() const
return the current group labels
RWMol is a molecule class that is intended to be edited.
Definition: RWMol.h:32
void setIsUsed(int label)
Definition: RGroupDecomp.h:146
bool getIsUsed(int label) const
Definition: RGroupDecomp.h:145
bool isUserDefined(int label) const
Definition: RGroupDecomp.h:147
UsedLabelMap(const std::map< int, int > &mapping)
Definition: RGroupDecomp.h:140
#define RDKIT_RGROUPDECOMPOSITION_EXPORT
Definition: export.h:385
Std stuff.
Definition: Abbreviations.h:18
@ FingerprintVariance
Definition: RGroupDecomp.h:63
RGroupCoreAlignment
Definition: RGroupDecomp.h:54
@ NoAlignment
Definition: RGroupDecomp.h:57
RGroupMatching
Definition: RGroupDecomp.h:40
@ NoSymmetrization
Definition: RGroupDecomp.h:44
@ Greedy
Definition: RGroupDecomp.h:41
@ Exhaustive
Definition: RGroupDecomp.h:43
@ GreedyChunks
Definition: RGroupDecomp.h:42
std::map< std::string, ROMOL_SPTR > RGroupRow
Definition: RGroupDecomp.h:132
std::vector< ROMOL_SPTR > RGroupColumn
Definition: RGroupDecomp.h:133
std::map< std::string, RGroupColumn > RGroupColumns
Definition: RGroupDecomp.h:136
RGroupLabels
Definition: RGroupDecomp.h:30
@ MDLRGroupLabels
Definition: RGroupDecomp.h:35
@ AtomMapLabels
Definition: RGroupDecomp.h:32
@ AtomIndexLabels
Definition: RGroupDecomp.h:33
@ RelabelDuplicateLabels
Definition: RGroupDecomp.h:34
@ AutoDetect
Definition: RGroupDecomp.h:37
@ DummyAtomLabels
Definition: RGroupDecomp.h:36
@ IsotopeLabels
Definition: RGroupDecomp.h:31
RGroupLabelling
Definition: RGroupDecomp.h:48
@ MDLRGroup
Definition: RGroupDecomp.h:51
@ AtomMap
Definition: RGroupDecomp.h:49
@ Isotope
Definition: RGroupDecomp.h:50
bool checkForTimeout(const std::chrono::steady_clock::time_point &t0, double timeout, bool throwOnTimeout=true)
Definition: RGroupDecomp.h:207
RDKIT_RGROUPDECOMPOSITION_EXPORT unsigned int RGroupDecompose(const std::vector< ROMOL_SPTR > &cores, const std::vector< ROMOL_SPTR > &mols, RGroupRows &rows, std::vector< unsigned int > *unmatched=nullptr, const RGroupDecompositionParameters &options=RGroupDecompositionParameters())
std::vector< RGroupRow > RGroupRows
Definition: RGroupDecomp.h:135
boost::shared_ptr< RWMol > RWMOL_SPTR
Definition: RWMol.h:222
unsigned int autoGetLabels(const RWMol &)
bool prepareCore(RWMol &, const RWMol *alignCore)
SubstructMatchParameters substructmatchParams
Definition: RGroupDecomp.h:123
RGroupDecompositionProcessResult(const bool success, const double score)
Definition: RGroupDecomp.h:69
RGroupMatch is the decomposition for a single molecule.
Definition: RGroupMatch.h:19