RDKit
Open-source cheminformatics and machine learning.
Embedder.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2004-2012 Greg Landrum and Rational Discovery LLC
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 
11 #ifndef _RD_EMBEDDER_H_
12 #define _RD_EMBEDDER_H_
13 
14 #include <map>
15 #include <Geometry/point.h>
16 #include <GraphMol/ROMol.h>
17 
18 namespace RDKit {
19 namespace DGeomHelpers {
20 
21 //! Compute an embedding (in 3D) for the specified molecule using Distance
22 // Geometry
23 /*!
24  The following operations are performed (in order) here:
25  -# Build a distance bounds matrix based on the topology, including 1-5
26  distances but not VDW scaling
27  -# Triangle smooth this bounds matrix
28  -# If step 2 fails - repeat step 1, this time without 1-5 bounds and with vdW
29  scaling, and repeat step 2
30  -# Pick a distance matrix at random using the bounds matrix
31  -# Compute initial coordinates from the distance matrix
32  -# Repeat steps 3 and 4 until maxIterations is reached or embedding is
33  successful
34  -# Adjust initial coordinates by minimizing a Distance Violation error
35  function
36 
37  **NOTE**: if the molecule has multiple fragments, they will be embedded
38  separately,
39  this means that they will likely occupy the same region of space.
40 
41  \param mol Molecule of interest
42  \param maxIterations Max. number of times the embedding will be tried if
43  coordinates are
44  not obtained successfully. The default value is 10x the
45  number of atoms.
46  \param seed provides a seed for the random number generator (so that
47  the same
48  coordinates can be obtained for a molecule on multiple
49  runs)
50  If negative, the RNG will not be seeded.
51  \param clearConfs Clear all existing conformations on the molecule
52  \param useRandomCoords Start the embedding from random coordinates instead of
53  using eigenvalues of the distance matrix.
54  \param boxSizeMult Determines the size of the box that is used for
55  random coordinates. If this is a positive number, the
56  side length will equal the largest element of the
57  distance
58  matrix times \c boxSizeMult. If this is a negative
59  number,
60  the side length will equal \c -boxSizeMult (i.e.
61  independent
62  of the elements of the distance matrix).
63  \param randNegEig Picks coordinates at random when a embedding process
64  produces
65  negative eigenvalues
66  \param numZeroFail Fail embedding if we find this many or more zero
67  eigenvalues
68  (within a tolerance)
69  \param coordMap a map of int to Point3D, between atom IDs and their locations
70  their locations. If this container is provided, the
71  coordinates
72  are used to set distance constraints on the embedding. The
73  resulting
74  conformer(s) should have distances between the specified
75  atoms that
76  reproduce those between the points in \c coordMap. Because
77  the embedding
78  produces a molecule in an arbitrary reference frame, an
79  alignment step
80  is required to actually reproduce the provided coordinates.
81  \param optimizerForceTol set the tolerance on forces in the distgeom optimizer
82  (this shouldn't normally be altered in client code).
83  \param ignoreSmoothingFailures try to embed the molecule even if triangle
84  bounds
85  smoothing fails
86  \param enforceChirality enforce the correct chirality if chiral centers are
87  present
88 
89  \param useExpTorsionAnglePrefs impose experimental torsion-angle preferences
90  \param useBasicKnowledge impose "basic knowledge" terms such as flat
91  aromatic rings, ketones, etc.
92  \param verbose print output of experimental torsion-angle preferences
93 
94  \param basinThresh set the basin threshold for the DGeom force field,
95  (this shouldn't normally be altered in client code).
96 
97  \return ID of the conformations added to the molecule, -1 if the emdedding
98  failed
99 */
100 int EmbedMolecule(ROMol &mol, unsigned int maxIterations = 0, int seed = -1,
101  bool clearConfs = true, bool useRandomCoords = false,
102  double boxSizeMult = 2.0, bool randNegEig = true,
103  unsigned int numZeroFail = 1,
104  const std::map<int, RDGeom::Point3D> *coordMap = 0,
105  double optimizerForceTol = 1e-3,
106  bool ignoreSmoothingFailures = false,
107  bool enforceChirality = true,
108  bool useExpTorsionAnglePrefs = false,
109  bool useBasicKnowledge = false, bool verbose = false,
110  double basinThresh = 5.0);
111 
112 //*! Embed multiple conformations for a molecule
113 /*!
114  This is kind of equivalent to calling EmbedMolecule multiple times - just that
115  the bounds
116  matrix is computed only once from the topology
117 
118  **NOTE**: if the molecule has multiple fragments, they will be embedded
119  separately,
120  this means that they will likely occupy the same region of space.
121 
122 
123  \param mol Molecule of interest
124  \param res Used to return the resulting conformer ids
125  \param numConfs Number of conformations to be generated
126  \param numThreads Sets the number of threads to use (more than one thread
127  will only
128  be used if the RDKit was build with multithread support)
129  If set to zero, the max supported by the system will be
130  used.
131  \param maxIterations Max. number of times the embedding will be tried if
132  coordinates are
133  not obtained successfully. The default value is 10x the
134  number of atoms.
135  \param seed provides a seed for the random number generator (so that
136  the same
137  coordinates can be obtained for a molecule on multiple
138  runs).
139  If negative, the RNG will not be seeded.
140  \param clearConfs Clear all existing conformations on the molecule
141  \param useRandomCoords Start the embedding from random coordinates instead of
142  using eigenvalues of the distance matrix.
143  \param boxSizeMult Determines the size of the box that is used for
144  random coordinates. If this is a positive number, the
145  side length will equal the largest element of the
146  distance
147  matrix times \c boxSizeMult. If this is a negative
148  number,
149  the side length will equal \c -boxSizeMult (i.e.
150  independent
151  of the elements of the distance matrix).
152  \param randNegEig Picks coordinates at random when a embedding process
153  produces
154  negative eigenvalues
155  \param numZeroFail Fail embedding if we find this many or more zero
156  eigenvalues
157  (within a tolerance)
158  \param pruneRmsThresh Retain only the conformations out of 'numConfs' after
159  embedding that are
160  at least this far apart from each other. RMSD is
161  computed on the heavy atoms.
162  Prunining is greedy; i.e. the first embedded
163  conformation is retained and from
164  then on only those that are atleast pruneRmsThresh away
165  from already
166  retained conformations are kept. The pruning is done
167  after embedding and
168  bounds violation minimization. No pruning by default.
169  \param coordMap a map of int to Point3D, between atom IDs and their locations
170  their locations. If this container is provided, the
171  coordinates
172  are used to set distance constraints on the embedding. The
173  resulting
174  conformer(s) should have distances between the specified
175  atoms that
176  reproduce those between the points in \c coordMap. Because
177  the embedding
178  produces a molecule in an arbitrary reference frame, an
179  alignment step
180  is required to actually reproduce the provided coordinates.
181 
182  \param optimizerForceTol set the tolerance on forces in the DGeom optimizer
183  (this shouldn't normally be altered in client code).
184 
185  \param ignoreSmoothingFailures try to embed the molecule even if triangle
186  bounds
187  smoothing fails
188  \param enforceChirality enforce the correct chirality if chiral centers are
189  present
190 
191  \param useExpTorsionAnglePrefs impose experimental torsion-angle preferences
192  \param useBasicKnowledge impose "basic knowledge" terms such as flat
193  aromatic rings, ketones, etc.
194  \param verbose print output of experimental torsion-angle preferences
195 
196  \param basinThresh set the basin threshold for the DGeom force field,
197  (this shouldn't normally be altered in client code).
198 
199 */
200 void EmbedMultipleConfs(ROMol &mol, INT_VECT &res, unsigned int numConfs = 10,
201  int numThreads = 1, unsigned int maxIterations = 30,
202  int seed = -1, bool clearConfs = true,
203  bool useRandomCoords = false, double boxSizeMult = 2.0,
204  bool randNegEig = true, unsigned int numZeroFail = 1,
205  double pruneRmsThresh = -1.0,
206  const std::map<int, RDGeom::Point3D> *coordMap = 0,
207  double optimizerForceTol = 1e-3,
208  bool ignoreSmoothingFailures = false,
209  bool enforceChirality = true,
210  bool useExpTorsionAnglePrefs = false,
211  bool useBasicKnowledge = false, bool verbose = false,
212  double basinThresh = 5.0);
213 //! \overload
215  ROMol &mol, unsigned int numConfs = 10, unsigned int maxIterations = 30,
216  int seed = -1, bool clearConfs = true, bool useRandomCoords = false,
217  double boxSizeMult = 2.0, bool randNegEig = true,
218  unsigned int numZeroFail = 1, double pruneRmsThresh = -1.0,
219  const std::map<int, RDGeom::Point3D> *coordMap = 0,
220  double optimizerForceTol = 1e-3, bool ignoreSmoothingFailures = false,
221  bool enforceChirality = true, bool useExpTorsionAnglePrefs = false,
222  bool useBasicKnowledge = false, bool verbose = false,
223  double basinThresh = 5.0);
224 }
225 }
226 
227 #endif
int EmbedMolecule(ROMol &mol, unsigned int maxIterations=0, int seed=-1, bool clearConfs=true, bool useRandomCoords=false, double boxSizeMult=2.0, bool randNegEig=true, unsigned int numZeroFail=1, const std::map< int, RDGeom::Point3D > *coordMap=0, double optimizerForceTol=1e-3, bool ignoreSmoothingFailures=false, bool enforceChirality=true, bool useExpTorsionAnglePrefs=false, bool useBasicKnowledge=false, bool verbose=false, double basinThresh=5.0)
Compute an embedding (in 3D) for the specified molecule using Distance.
Defines the primary molecule class ROMol as well as associated typedefs.
std::vector< int > INT_VECT
Definition: types.h:146
Includes a bunch of functionality for handling Atom and Bond queries.
Definition: Atom.h:28
void EmbedMultipleConfs(ROMol &mol, INT_VECT &res, unsigned int numConfs=10, int numThreads=1, unsigned int maxIterations=30, int seed=-1, bool clearConfs=true, bool useRandomCoords=false, double boxSizeMult=2.0, bool randNegEig=true, unsigned int numZeroFail=1, double pruneRmsThresh=-1.0, const std::map< int, RDGeom::Point3D > *coordMap=0, double optimizerForceTol=1e-3, bool ignoreSmoothingFailures=false, bool enforceChirality=true, bool useExpTorsionAnglePrefs=false, bool useBasicKnowledge=false, bool verbose=false, double basinThresh=5.0)