RDKit
Open-source cheminformatics and machine learning.
SubstructMatch.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2001-2020 Greg Landrum and Rational Discovery LLC
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 #include <RDGeneral/export.h>
11 #ifndef RD_SUBSTRUCTMATCH_H
12 #define RD_SUBSTRUCTMATCH_H
13 
14 // std bits
15 #include <vector>
16 #include <functional>
17 #include <unordered_map>
18 #include <cstdint>
19 #include "GraphMol/StereoGroup.h"
20 #include <string>
21 
22 namespace RDKit {
23 class ROMol;
24 class Atom;
25 class Bond;
26 class ResonanceMolSupplier;
27 class MolBundle;
28 
29 //! \brief used to return matches from substructure searching,
30 //! The format is (queryAtomIdx, molAtomIdx)
31 typedef std::vector<std::pair<int, int>> MatchVectType;
32 
34  bool useChirality = false; //!< Use chirality in determining whether or not
35  //!< atoms/bonds match
36  bool useEnhancedStereo = false; //!< Use enhanced stereochemistry in
37  //!< determining whether atoms/bonds match
38  bool aromaticMatchesConjugated = false; //!< Aromatic and conjugated bonds
39  //!< match each other
40  bool useQueryQueryMatches = false; //!< Consider query-query matches, not
41  //!< just simple matches
42  bool recursionPossible = true; //!< Allow recursive queries
43  bool uniquify = true; //!< uniquify (by atom index) match results
44  unsigned int maxMatches = 1000; //!< maximum number of matches to return
45  int numThreads = 1; //!< number of threads to use when multi-threading
46  //!< is possible. 0 selects the number of
47  //!< concurrent threads supported by the hardware
48  //!< negative values are added to the number of
49  //!< concurrent threads supported by the hardware
50  std::function<bool(const ROMol &mol,
51  const std::vector<unsigned int> &match)>
52  extraFinalCheck; //!< a function to be called at the end to validate a
53  //!< match
54 
56 };
57 
59  SubstructMatchParameters &params, const std::string &json);
60 
61 //! Find a substructure match for a query in a molecule
62 /*!
63  \param mol The ROMol to be searched
64  \param query The query ROMol
65  \param matchParams Parameters controlling the matching
66 
67  \return The matches, if any
68 
69 */
70 RDKIT_SUBSTRUCTMATCH_EXPORT std::vector<MatchVectType> SubstructMatch(
71  const ROMol &mol, const ROMol &query,
73 
74 //! Find all substructure matches for a query in a ResonanceMolSupplier object
75 /*!
76  \param resMolSuppl The ResonanceMolSupplier object to be searched
77  \param query The query ROMol
78  \param matchParams Parameters controlling the matching
79 
80  \return The matches, if any
81 
82 */
83 RDKIT_SUBSTRUCTMATCH_EXPORT std::vector<MatchVectType> SubstructMatch(
84  ResonanceMolSupplier &resMolSuppl, const ROMol &query,
86 
87 RDKIT_SUBSTRUCTMATCH_EXPORT std::vector<MatchVectType> SubstructMatch(
88  const MolBundle &bundle, const ROMol &query,
90 RDKIT_SUBSTRUCTMATCH_EXPORT std::vector<MatchVectType> SubstructMatch(
91  const ROMol &mol, const MolBundle &query,
93 RDKIT_SUBSTRUCTMATCH_EXPORT std::vector<MatchVectType> SubstructMatch(
94  const MolBundle &bundle, const MolBundle &query,
96 
97 //! Find a substructure match for a query
98 /*!
99  \param mol The object to be searched
100  \param query The query
101  \param matchVect Used to return the match
102  (pre-existing contents will be deleted)
103  \param recursionPossible flags whether or not recursive matches are allowed
104  \param useChirality use atomic CIP codes as part of the comparison
105  \param useQueryQueryMatches if set, the contents of atom and bond queries
106  will be used as part of the matching
107 
108  \return whether or not a match was found
109 
110 */
111 template <typename T1, typename T2>
112 bool SubstructMatch(T1 &mol, const T2 &query, MatchVectType &matchVect,
113  bool recursionPossible = true, bool useChirality = false,
114  bool useQueryQueryMatches = false) {
116  params.recursionPossible = recursionPossible;
117  params.useChirality = useChirality;
118  params.useQueryQueryMatches = useQueryQueryMatches;
119  params.maxMatches = 1;
120  std::vector<MatchVectType> matchVects = SubstructMatch(mol, query, params);
121  if (matchVects.size()) {
122  matchVect = matchVects.front();
123  } else {
124  matchVect.clear();
125  }
126  return matchVect.size() != 0;
127 };
128 
129 //! Find all substructure matches for a query
130 /*!
131  \param mol The object to be searched
132  \param query The query
133  \param matchVect Used to return the matches
134  (pre-existing contents will be deleted)
135  \param uniquify Toggles uniquification (by atom index) of the results
136  \param recursionPossible flags whether or not recursive matches are allowed
137  \param useChirality use atomic CIP codes as part of the comparison
138  \param useQueryQueryMatches if set, the contents of atom and bond queries
139  will be used as part of the matching
140  \param maxMatches The maximum number of matches that will be returned.
141  In high-symmetry cases with medium-sized molecules, it is
142  very
143  easy to end up with a combinatorial explosion in the
144  number of
145  possible matches. This argument prevents that from having
146  unintended consequences
147 
148  \return the number of matches found
149 
150 */
151 template <typename T1, typename T2>
152 unsigned int SubstructMatch(T1 &mol, const T2 &query,
153  std::vector<MatchVectType> &matchVect,
154  bool uniquify = true, bool recursionPossible = true,
155  bool useChirality = false,
156  bool useQueryQueryMatches = false,
157  unsigned int maxMatches = 1000,
158  int numThreads = 1) {
160  params.uniquify = uniquify;
161  params.recursionPossible = recursionPossible;
162  params.useChirality = useChirality;
163  params.useQueryQueryMatches = useQueryQueryMatches;
164  params.maxMatches = maxMatches;
165  params.numThreads = numThreads;
166  matchVect = SubstructMatch(mol, query, params);
167  return matchVect.size();
168 };
169 
170 // ----------------------------------------------
171 //
172 // find one match in ResonanceMolSupplier object
173 //
174 template <>
175 inline bool SubstructMatch(ResonanceMolSupplier &resMolSupplier,
176  const ROMol &query, MatchVectType &matchVect,
177  bool recursionPossible, bool useChirality,
178  bool useQueryQueryMatches) {
180  params.recursionPossible = recursionPossible;
181  params.useChirality = useChirality;
182  params.useQueryQueryMatches = useQueryQueryMatches;
183  params.maxMatches = 1;
184  std::vector<MatchVectType> matchVects =
185  SubstructMatch(resMolSupplier, query, params);
186  if (matchVects.size()) {
187  matchVect = matchVects.front();
188  } else {
189  matchVect.clear();
190  }
191  return matchVect.size() != 0;
192 }
193 
194 template <>
195 inline unsigned int SubstructMatch(ResonanceMolSupplier &resMolSupplier,
196  const ROMol &query,
197  std::vector<MatchVectType> &matchVect,
198  bool uniquify, bool recursionPossible,
199  bool useChirality, bool useQueryQueryMatches,
200  unsigned int maxMatches, int numThreads) {
202  params.uniquify = uniquify;
203  params.recursionPossible = recursionPossible;
204  params.useChirality = useChirality;
205  params.useQueryQueryMatches = useQueryQueryMatches;
206  params.maxMatches = maxMatches;
207  params.numThreads = numThreads;
208  matchVect = SubstructMatch(resMolSupplier, query, params);
209  return matchVect.size();
210 };
211 
212 //! Class used as a final step to confirm whether or not a given atom->atom
213 //! mapping is a valid substructure match.
215  public:
216  MolMatchFinalCheckFunctor(const ROMol &query, const ROMol &mol,
217  const SubstructMatchParameters &ps);
218 
219  bool operator()(const std::uint32_t q_c[], const std::uint32_t m_c[]) const;
220 
221  private:
222  const ROMol &d_query;
223  const ROMol &d_mol;
224  const SubstructMatchParameters &d_params;
225  std::unordered_map<unsigned int, StereoGroup const *> d_molStereoGroups;
226 };
227 
228 } // namespace RDKit
229 
230 #endif
Defines the class StereoGroup which stores relationships between the absolute configurations of atoms...
MolBundle contains a collection of related ROMols.
Definition: MolBundle.h:39
MolMatchFinalCheckFunctor(const ROMol &query, const ROMol &mol, const SubstructMatchParameters &ps)
bool operator()(const std::uint32_t q_c[], const std::uint32_t m_c[]) const
#define RDKIT_SUBSTRUCTMATCH_EXPORT
Definition: export.h:473
Std stuff.
Definition: Abbreviations.h:18
std::vector< std::pair< int, int > > MatchVectType
used to return matches from substructure searching, The format is (queryAtomIdx, molAtomIdx)
RDKIT_SUBSTRUCTMATCH_EXPORT std::vector< MatchVectType > SubstructMatch(const ROMol &mol, const ROMol &query, const SubstructMatchParameters &params=SubstructMatchParameters())
Find a substructure match for a query in a molecule.
RDKIT_SUBSTRUCTMATCH_EXPORT void updateSubstructMatchParamsFromJSON(SubstructMatchParameters &params, const std::string &json)
unsigned int maxMatches
maximum number of matches to return
bool uniquify
uniquify (by atom index) match results
bool recursionPossible
Allow recursive queries.
std::function< bool(const ROMol &mol, const std::vector< unsigned int > &match)> extraFinalCheck