RDKit
Open-source cheminformatics and machine learning.
SubstructLibrary.h
Go to the documentation of this file.
1 // Copyright (c) 2017-2019, Novartis Institutes for BioMedical Research Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following
12 // disclaimer in the documentation and/or other materials provided
13 // with the distribution.
14 // * Neither the name of Novartis Institutes for BioMedical Research Inc.
15 // nor the names of its contributors may be used to endorse or promote
16 // products derived from this software without specific prior written
17 // permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 //
31 #ifndef RDK_SUBSTRUCT_LIBRARY
32 #define RDK_SUBSTRUCT_LIBRARY
33 #include <RDGeneral/export.h>
34 #include <GraphMol/RDKitBase.h>
35 #include <GraphMol/MolPickler.h>
40 #include <DataStructs/BitOps.h>
41 #include <GraphMol/MolOps.h>
42 
43 namespace RDKit {
44 
46 
47 //! Base class API for holding molecules to substructure search.
48 /*!
49  This is an API that hides the implementation details used for
50  indexing molecules for substructure searching. It simply
51  provides an API for adding and getting molecules from a set.
52  */
54  public:
55  virtual ~MolHolderBase() {}
56 
57  //! Add a new molecule to the substructure search library
58  //! Returns the molecules index in the library
59  virtual unsigned int addMol(const ROMol &m) = 0;
60 
61  // implementations should throw IndexError on out of range
62  virtual boost::shared_ptr<ROMol> getMol(unsigned int) const = 0;
63 
64  //! Get the current library size
65  virtual unsigned int size() const = 0;
66 };
67 
68 //! Concrete class that holds molecules in memory
69 /*!
70  This is currently one of the faster implementations.
71  However it is very memory intensive.
72 */
74  std::vector<boost::shared_ptr<ROMol>> mols;
75 
76  public:
77  MolHolder() : MolHolderBase(), mols() {}
78 
79  virtual unsigned int addMol(const ROMol &m) {
80  mols.push_back(boost::make_shared<ROMol>(m));
81  return size() - 1;
82  }
83 
84  virtual boost::shared_ptr<ROMol> getMol(unsigned int idx) const {
85  if (idx >= mols.size()) throw IndexErrorException(idx);
86  return mols[idx];
87  }
88 
89  virtual unsigned int size() const {
90  return rdcast<unsigned int>(mols.size());
91  }
92 
93  std::vector<boost::shared_ptr<ROMol>> &getMols() { return mols; }
94  const std::vector<boost::shared_ptr<ROMol>> &getMols() const { return mols; }
95 };
96 
97 //! Concrete class that holds binary cached molecules in memory
98 /*!
99  This implementation uses quite a bit less memory than the
100  non cached implementation. However, due to the reduced speed
101  it should be used in conjunction with a pattern fingerprinter.
102 
103  See RDKit::FPHolder
104 */
106  std::vector<std::string> mols;
107 
108  public:
110 
111  virtual unsigned int addMol(const ROMol &m) {
112  mols.emplace_back();
113  MolPickler::pickleMol(m, mols.back());
114  return size() - 1;
115  }
116 
117  //! Adds a pickled binary molecule, no validity checking of the input
118  //! is done.
119  unsigned int addBinary(const std::string &pickle) {
120  mols.push_back(pickle);
121  return size() - 1;
122  }
123 
124  virtual boost::shared_ptr<ROMol> getMol(unsigned int idx) const {
125  if (idx >= mols.size()) throw IndexErrorException(idx);
126  boost::shared_ptr<ROMol> mol(new ROMol);
127  MolPickler::molFromPickle(mols[idx], mol.get());
128  return mol;
129  }
130 
131  virtual unsigned int size() const {
132  return rdcast<unsigned int>(mols.size());
133  }
134 
135  std::vector<std::string> &getMols() { return mols; }
136  const std::vector<std::string> &getMols() const { return mols; }
137 };
138 
139 //! Concrete class that holds smiles strings in memory
140 /*!
141  This implementation uses quite a bit less memory than the
142  cached binary or uncached implementation. However, due to the
143  reduced speed it should be used in conjunction with a pattern
144  fingerprinter.
145 
146  See RDKit::FPHolder
147 */
149  : public MolHolderBase {
150  std::vector<std::string> mols;
151 
152  public:
154 
155  virtual unsigned int addMol(const ROMol &m) {
156  bool doIsomericSmiles = true;
157  mols.push_back(MolToSmiles(m, doIsomericSmiles));
158  return size() - 1;
159  }
160 
161  //! Add a smiles to the dataset, no validation is done
162  //! to the inputs.
163  unsigned int addSmiles(const std::string &smiles) {
164  mols.push_back(smiles);
165  return size() - 1;
166  }
167 
168  virtual boost::shared_ptr<ROMol> getMol(unsigned int idx) const {
169  if (idx >= mols.size()) throw IndexErrorException(idx);
170 
171  boost::shared_ptr<ROMol> mol(SmilesToMol(mols[idx]));
172  return mol;
173  }
174 
175  virtual unsigned int size() const {
176  return rdcast<unsigned int>(mols.size());
177  }
178 
179  std::vector<std::string> &getMols() { return mols; }
180  const std::vector<std::string> &getMols() const { return mols; }
181 };
182 
183 //! Concrete class that holds trusted smiles strings in memory
184 /*!
185  A trusted smiles is essentially a smiles string that
186  RDKit has generated. This indicates that fewer
187  sanitization steps are required. See
188  http://rdkit.blogspot.com/2016/09/avoiding-unnecessary-work-and.html
189 
190  This implementation uses quite a bit less memory than the
191  cached binary or uncached implementation. However, due to the
192  reduced speed it should be used in conjunction with a pattern
193  fingerprinter.
194 
195  See RDKit::FPHolder
196 */
198  : public MolHolderBase {
199  std::vector<std::string> mols;
200 
201  public:
203 
204  virtual unsigned int addMol(const ROMol &m) {
205  bool doIsomericSmiles = true;
206  mols.push_back(MolToSmiles(m, doIsomericSmiles));
207  return size() - 1;
208  }
209 
210  //! Add a smiles to the dataset, no validation is done
211  //! to the inputs.
212  unsigned int addSmiles(const std::string &smiles) {
213  mols.push_back(smiles);
214  return size() - 1;
215  }
216 
217  virtual boost::shared_ptr<ROMol> getMol(unsigned int idx) const {
218  if (idx >= mols.size()) throw IndexErrorException(idx);
219 
220  RWMol *m = SmilesToMol(mols[idx], 0, false);
221  if(m) {
222  m->updatePropertyCache();
223  }
224  return boost::shared_ptr<ROMol>(m);
225  }
226 
227  virtual unsigned int size() const {
228  return rdcast<unsigned int>(mols.size());
229  }
230 
231  std::vector<std::string> &getMols() { return mols; }
232  const std::vector<std::string> &getMols() const { return mols; }
233 };
234 
235 //! Base FPI for the fingerprinter used to rule out impossible matches
237  std::vector<ExplicitBitVect *> fps;
238 
239  public:
240  virtual ~FPHolderBase() {
241  for (size_t i = 0; i < fps.size(); ++i) delete fps[i];
242  }
243 
244  virtual unsigned int size() const {
245  return rdcast<unsigned int>(fps.size());
246  }
247 
248  //! Adds a molecule to the fingerprinter
249  unsigned int addMol(const ROMol &m) {
250  fps.push_back(makeFingerprint(m));
251  return rdcast<unsigned int>(fps.size() - 1);
252  }
253 
254  //! Adds a raw bit vector to the fingerprinter
255  unsigned int addFingerprint(const ExplicitBitVect &v) {
256  fps.push_back(new ExplicitBitVect(v));
257  return rdcast<unsigned int>(fps.size() - 1);
258  }
259 
260  //! Return false if a substructure search can never match the molecule
261  bool passesFilter(unsigned int idx, const ExplicitBitVect &query) const {
262  if (idx >= fps.size()) throw IndexErrorException(idx);
263 
264  return AllProbeBitsMatch(query, *fps[idx]);
265  }
266 
267  //! Get the bit vector at the specified index (throws IndexError if out of
268  //! range)
269  const ExplicitBitVect &getFingerprint(unsigned int idx) const {
270  if (idx >= fps.size()) throw IndexErrorException(idx);
271  return *fps[idx];
272  }
273 
274  //! make the query vector
275  //! Caller owns the vector!
276  virtual ExplicitBitVect *makeFingerprint(const ROMol &m) const = 0;
277 
278  std::vector<ExplicitBitVect *> &getFingerprints() { return fps; }
279  const std::vector<ExplicitBitVect *> &getFingerprints() const { return fps; }
280 };
281 
282 //! Uses the pattern fingerprinter to rule out matches
284  public:
285  //! Caller owns the vector!
286  virtual ExplicitBitVect *makeFingerprint(const ROMol &m) const {
287  return PatternFingerprintMol(m, 2048);
288  }
289 };
290 
291 //! Substructure Search a library of molecules
292 /*! This class allows for multithreaded substructure searches os
293  large datasets.
294 
295  The implementations can use fingerprints to speed up searches
296  and have molecules cached as binary forms to reduce memory
297  usage.
298 
299  basic usage:
300  \code
301  SubstructLibrary lib;
302  lib.addMol(mol);
303  std::vector<unsigned int> results = lib.getMatches(query);
304  for(std::vector<unsigned int>::const_iterator matchIndex=results.begin();
305  matchIndex != results.end();
306  ++matchIndex) {
307  boost::shared_ptr<ROMol> match = lib.getMol(*matchIndex);
308  }
309  \endcode
310 
311  Using different mol holders and pattern fingerprints.
312 
313  \code
314  boost::shared_ptr<CachedTrustedSmilesMolHolder> molHolder = \
315  boost::make_shared<CachedTrustedSmilesMolHolder>();
316  boost::shared_ptr<PatternHolder> patternHolder = \
317  boost::make_shared<PatternHolder>();
318 
319  SubstructLibrary lib(molHolder, patternHolder);
320  lib.addMol(mol);
321  \endcode
322 
323  Cached molecule holders create molecules on demand. There are currently
324  three styles of cached molecules.
325 
326  CachedMolHolder: stores molecules in the rdkit binary format.
327  CachedSmilesMolHolder: stores molecules in smiles format.
328  CachedTrustedSmilesMolHolder: stores molecules in smiles format.
329 
330  The CachedTrustedSmilesMolHolder is made to add molecules from
331  a trusted source. This makes the basic assumption that RDKit was
332  used to sanitize and canonicalize the smiles string. In practice
333  this is considerably faster than using arbitrary smiles strings since
334  certain assumptions can be made. Molecules generated from trusted
335  smiles do not have ring information (although this is created
336  in the molecule being searched if necessary).
337 
338  When loading from external data, as opposed to using the "addMol" API,
339  care must be taken to ensure that the pattern fingerprints and smiles
340  are synchronized.
341 
342  Each pattern holder has an API point for making its fingerprint. This
343  is useful to ensure that the pattern stored in the database will be
344  compatible with the patterns made when analyzing queries.
345 
346  \code
347  boost::shared_ptr<CachedTrustedSmilesMolHolder> molHolder = \
348  boost::make_shared<CachedTrustedSmilesMolHolder>();
349  boost::shared_ptr<PatternHolder> patternHolder = \
350  boost::make_shared<PatternHolder>();
351 
352  // the PatternHolder instance is able to make fingerprints.
353  // These, of course, can be read from a file. For demonstration
354  // purposes we construct them here.
355  const std::string trustedSmiles = "c1ccccc1";
356  ROMol *m = SmilesToMol(trustedSmiles);
357  const ExplicitBitVect *bitVector = patternHolder->makeFingerprint(*m);
358 
359  // The trusted smiles and bitVector can be read from any source.
360  // This is the fastest way to load a substruct library.
361  molHolder->addSmiles( trustedSmiles );
362  patternHolder->addFingerprint( *bitVector );
363  SubstructLibrary lib(molHolder, patternHolder);
364  delete m;
365  delete bitVector;
366  \endcode
367 
368 */
370  boost::shared_ptr<MolHolderBase> molholder;
371  boost::shared_ptr<FPHolderBase> fpholder;
372  MolHolderBase *mols; // used for a small optimization
373  FPHolderBase *fps{nullptr};
374 
375  public:
377  : molholder(new MolHolder),
378  fpholder(),
379  mols(molholder.get())
380  {}
381 
382  SubstructLibrary(boost::shared_ptr<MolHolderBase> molecules)
383  : molholder(molecules), fpholder(), mols(molholder.get()), fps(nullptr) {}
384 
385  SubstructLibrary(boost::shared_ptr<MolHolderBase> molecules,
386  boost::shared_ptr<FPHolderBase> fingerprints)
387  : molholder(molecules),
388  fpholder(fingerprints),
389  mols(molholder.get()),
390  fps(fpholder.get()) {}
391 
392  SubstructLibrary(const std::string &pickle)
393  : molholder(new MolHolder),
394  fpholder(),
395  mols(molholder.get()),
396  fps(nullptr) {
397  initFromString(pickle);
398  }
399 
400  //! Get the underlying molecule holder implementation
401  boost::shared_ptr<MolHolderBase> &getMolHolder() { return molholder; }
402 
403  const boost::shared_ptr<MolHolderBase> &getMolHolder() const {
404  return molholder;
405  }
406 
407  //! Get the underlying molecule holder implementation
408  boost::shared_ptr<FPHolderBase> &getFpHolder() { return fpholder; }
409 
410  //! Get the underlying molecule holder implementation
411  const boost::shared_ptr<FPHolderBase> &getFpHolder() const {
412  return fpholder;
413  }
414 
415  const MolHolderBase &getMolecules() const {
416  PRECONDITION(mols, "Molecule holder NULL in SubstructLibrary");
417  return *mols;
418  }
419 
420  //! Get the underlying fingerprint implementation.
421  /*! Throws a value error if no fingerprints have been set */
423  if (!fps)
424  throw ValueErrorException("Substruct Library does not have fingerprints");
425  return *fps;
426  }
427 
428  const FPHolderBase &getFingerprints() const {
429  if (!fps)
430  throw ValueErrorException("Substruct Library does not have fingerprints");
431  return *fps;
432  }
433 
434  //! Add a molecule to the library
435  /*!
436  \param mol Molecule to add
437 
438  returns index for the molecule in the library
439  */
440  unsigned int addMol(const ROMol &mol);
441 
442  //! Get the matching indices for the query
443  /*!
444  \param query Query to match against molecules
445  \param recursionPossible flags whether or not recursive matches are allowed
446  [ default true ]
447  \param useChirality use atomic CIP codes as part of the comparison [
448  default true ]
449  \param useQueryQueryMatches if set, the contents of atom and bond queries [
450  default false ]
451  will be used as part of the matching
452  \param numThreads If -1 use all available processors [default -1]
453  \param maxResults Maximum results to return, -1 means return all [default
454  -1]
455  */
456  std::vector<unsigned int> getMatches(const ROMol &query,
457  bool recursionPossible = true,
458  bool useChirality = true,
459  bool useQueryQueryMatches = false,
460  int numThreads = -1,
461  int maxResults = -1);
462  //! Get the matching indices for the query between the given indices
463  /*!
464  \param query Query to match against molecules
465  \param startIdx Start index of the search
466  \param endIdx Ending idx (non-inclusive) of the search.
467  \param recursionPossible flags whether or not recursive matches are allowed
468  [ default true ]
469  \param useChirality use atomic CIP codes as part of the comparison [
470  default true ]
471  \param useQueryQueryMatches if set, the contents of atom and bond queries [
472  default false ]
473  will be used as part of the matching
474  \param numThreads If -1 use all available processors [default -1]
475  \param maxResults Maximum results to return, -1 means return all [default
476  -1]
477  */
478  std::vector<unsigned int> getMatches(
479  const ROMol &query, unsigned int startIdx, unsigned int endIdx,
480  bool recursionPossible = true, bool useChirality = true,
481  bool useQueryQueryMatches = false, int numThreads = -1,
482  int maxResults = -1);
483 
484  //! Return the number of matches for the query
485  /*!
486  \param query Query to match against molecules
487  \param recursionPossible flags whether or not recursive matches are allowed
488  [ default true ]
489  \param useChirality use atomic CIP codes as part of the comparison [
490  default true ]
491  \param useQueryQueryMatches if set, the contents of atom and bond queries [
492  default false ]
493  will be used as part of the matching
494  \param numThreads If -1 use all available processors [default -1]
495  */
496  unsigned int countMatches(const ROMol &query, bool recursionPossible = true,
497  bool useChirality = true,
498  bool useQueryQueryMatches = false,
499  int numThreads = -1);
500  //! Return the number of matches for the query between the given indices
501  /*!
502  \param query Query to match against molecules
503  \param startIdx Start index of the search
504  \param endIdx Ending idx (non-inclusive) of the search.
505  \param recursionPossible flags whether or not recursive matches are allowed
506  [ default true ]
507  \param useChirality use atomic CIP codes as part of the comparison [
508  default true ]
509  \param useQueryQueryMatches if set, the contents of atom and bond queries [
510  default false ]
511  will be used as part of the matching
512  \param numThreads If -1 use all available processors [default -1]
513  */
514  unsigned int countMatches(const ROMol &query, unsigned int startIdx,
515  unsigned int endIdx, bool recursionPossible = true,
516  bool useChirality = true,
517  bool useQueryQueryMatches = false,
518  int numThreads = -1);
519 
520  //! Returns true if any match exists for the query
521  /*!
522  \param query Query to match against molecules
523  \param recursionPossible flags whether or not recursive matches are allowed
524  [ default true ]
525  \param useChirality use atomic CIP codes as part of the comparison [
526  default true ]
527  \param useQueryQueryMatches if set, the contents of atom and bond queries [
528  default false ]
529  will be used as part of the matching
530  \param numThreads If -1 use all available processors [default -1]
531  */
532  bool hasMatch(const ROMol &query, bool recursionPossible = true,
533  bool useChirality = true, bool useQueryQueryMatches = false,
534  int numThreads = -1);
535  //! Returns true if any match exists for the query between the specified
536  //! indices
537  /*!
538  \param query Query to match against molecules
539  \param startIdx Start index of the search
540  \param endIdx Ending idx (inclusive) of the search.
541  \param recursionPossible flags whether or not recursive matches are allowed
542  [ default true ]
543  \param useChirality use atomic CIP codes as part of the comparison [
544  default true ]
545  \param useQueryQueryMatches if set, the contents of atom and bond queries [
546  default false ]
547  will be used as part of the matching
548  \param numThreads If -1 use all available processors [default -1]
549  */
550  bool hasMatch(const ROMol &query, unsigned int startIdx, unsigned int endIdx,
551  bool recursionPossible = true, bool useChirality = true,
552  bool useQueryQueryMatches = false, int numThreads = -1);
553 
554  //! Returns the molecule at the given index
555  /*!
556  \param idx Index of the molecule in the library (n.b. could contain null)
557  */
558  boost::shared_ptr<ROMol> getMol(unsigned int idx) const {
559  // expects implementation to throw IndexError if out of range
560  PRECONDITION(mols, "molholder is null in SubstructLibrary");
561  return mols->getMol(idx);
562  }
563 
564  //! Returns the molecule at the given index
565  /*!
566  \param idx Index of the molecule in the library (n.b. could contain null)
567  */
568  boost::shared_ptr<ROMol> operator[](unsigned int idx) {
569  // expects implementation to throw IndexError if out of range
570  PRECONDITION(mols, "molholder is null in SubstructLibrary");
571  return mols->getMol(idx);
572  }
573 
574  //! return the number of molecules in the library
575  unsigned int size() const {
576  PRECONDITION(mols, "molholder is null in SubstructLibrary");
577  return rdcast<unsigned int>(molholder->size());
578  }
579 
580  //! access required for serialization
581  void resetHolders() {
582  mols = molholder.get();
583  fps = fpholder.get();
584  }
585 
586  //! serializes (pickles) to a stream
587  void toStream(std::ostream &ss) const;
588  //! returns a string with a serialized (pickled) representation
589  std::string Serialize() const;
590  //! initializes from a stream pickle
591  void initFromStream(std::istream &ss);
592  //! initializes from a string pickle
593  void initFromString(const std::string &text);
594 };
595 } // namespace RDKit
596 
598 #endif
Contains general bit-comparison and similarity operations.
RDKIT_DATASTRUCTS_EXPORT bool AllProbeBitsMatch(const char *probe, const char *ref)
#define PRECONDITION(expr, mess)
Definition: Invariant.h:109
pulls in the core RDKit functionality
a class for bit vectors that are densely occupied
Class to allow us to throw an IndexError from C++ and have it make it back to Python.
Definition: Exceptions.h:19
Concrete class that holds binary cached molecules in memory.
virtual unsigned int addMol(const ROMol &m)
const std::vector< std::string > & getMols() const
virtual unsigned int size() const
Get the current library size.
virtual boost::shared_ptr< ROMol > getMol(unsigned int idx) const
std::vector< std::string > & getMols()
unsigned int addBinary(const std::string &pickle)
Concrete class that holds smiles strings in memory.
std::vector< std::string > & getMols()
unsigned int addSmiles(const std::string &smiles)
const std::vector< std::string > & getMols() const
virtual unsigned int addMol(const ROMol &m)
virtual unsigned int size() const
Get the current library size.
virtual boost::shared_ptr< ROMol > getMol(unsigned int idx) const
Concrete class that holds trusted smiles strings in memory.
virtual unsigned int addMol(const ROMol &m)
std::vector< std::string > & getMols()
unsigned int addSmiles(const std::string &smiles)
virtual boost::shared_ptr< ROMol > getMol(unsigned int idx) const
virtual unsigned int size() const
Get the current library size.
const std::vector< std::string > & getMols() const
Base FPI for the fingerprinter used to rule out impossible matches.
const ExplicitBitVect & getFingerprint(unsigned int idx) const
unsigned int addMol(const ROMol &m)
Adds a molecule to the fingerprinter.
virtual unsigned int size() const
std::vector< ExplicitBitVect * > & getFingerprints()
virtual ExplicitBitVect * makeFingerprint(const ROMol &m) const =0
bool passesFilter(unsigned int idx, const ExplicitBitVect &query) const
Return false if a substructure search can never match the molecule.
const std::vector< ExplicitBitVect * > & getFingerprints() const
unsigned int addFingerprint(const ExplicitBitVect &v)
Adds a raw bit vector to the fingerprinter.
Base class API for holding molecules to substructure search.
virtual unsigned int addMol(const ROMol &m)=0
virtual unsigned int size() const =0
Get the current library size.
virtual boost::shared_ptr< ROMol > getMol(unsigned int) const =0
Concrete class that holds molecules in memory.
const std::vector< boost::shared_ptr< ROMol > > & getMols() const
virtual boost::shared_ptr< ROMol > getMol(unsigned int idx) const
std::vector< boost::shared_ptr< ROMol > > & getMols()
virtual unsigned int addMol(const ROMol &m)
virtual unsigned int size() const
Get the current library size.
static void molFromPickle(const std::string &pickle, ROMol *mol)
constructs a molecule from a pickle stored in a string
static void pickleMol(const ROMol *mol, std::ostream &ss)
pickles a molecule and sends the results to stream ss
Uses the pattern fingerprinter to rule out matches.
virtual ExplicitBitVect * makeFingerprint(const ROMol &m) const
Caller owns the vector!
RWMol is a molecule class that is intended to be edited.
Definition: RWMol.h:31
Substructure Search a library of molecules.
unsigned int addMol(const ROMol &mol)
Add a molecule to the library.
boost::shared_ptr< ROMol > getMol(unsigned int idx) const
Returns the molecule at the given index.
void initFromStream(std::istream &ss)
initializes from a stream pickle
const MolHolderBase & getMolecules() const
const FPHolderBase & getFingerprints() const
bool hasMatch(const ROMol &query, unsigned int startIdx, unsigned int endIdx, bool recursionPossible=true, bool useChirality=true, bool useQueryQueryMatches=false, int numThreads=-1)
void initFromString(const std::string &text)
initializes from a string pickle
const boost::shared_ptr< FPHolderBase > & getFpHolder() const
Get the underlying molecule holder implementation.
FPHolderBase & getFingerprints()
Get the underlying fingerprint implementation.
bool hasMatch(const ROMol &query, bool recursionPossible=true, bool useChirality=true, bool useQueryQueryMatches=false, int numThreads=-1)
Returns true if any match exists for the query.
boost::shared_ptr< MolHolderBase > & getMolHolder()
Get the underlying molecule holder implementation.
unsigned int countMatches(const ROMol &query, unsigned int startIdx, unsigned int endIdx, bool recursionPossible=true, bool useChirality=true, bool useQueryQueryMatches=false, int numThreads=-1)
Return the number of matches for the query between the given indices.
boost::shared_ptr< FPHolderBase > & getFpHolder()
Get the underlying molecule holder implementation.
SubstructLibrary(boost::shared_ptr< MolHolderBase > molecules, boost::shared_ptr< FPHolderBase > fingerprints)
std::vector< unsigned int > getMatches(const ROMol &query, bool recursionPossible=true, bool useChirality=true, bool useQueryQueryMatches=false, int numThreads=-1, int maxResults=-1)
Get the matching indices for the query.
std::vector< unsigned int > getMatches(const ROMol &query, unsigned int startIdx, unsigned int endIdx, bool recursionPossible=true, bool useChirality=true, bool useQueryQueryMatches=false, int numThreads=-1, int maxResults=-1)
Get the matching indices for the query between the given indices.
unsigned int countMatches(const ROMol &query, bool recursionPossible=true, bool useChirality=true, bool useQueryQueryMatches=false, int numThreads=-1)
Return the number of matches for the query.
void resetHolders()
access required for serialization
unsigned int size() const
return the number of molecules in the library
SubstructLibrary(boost::shared_ptr< MolHolderBase > molecules)
SubstructLibrary(const std::string &pickle)
std::string Serialize() const
returns a string with a serialized (pickled) representation
const boost::shared_ptr< MolHolderBase > & getMolHolder() const
void toStream(std::ostream &ss) const
serializes (pickles) to a stream
boost::shared_ptr< ROMol > operator[](unsigned int idx)
Returns the molecule at the given index.
Class to allow us to throw a ValueError from C++ and have it make it back to Python.
Definition: Exceptions.h:39
#define RDKIT_SUBSTRUCTLIBRARY_EXPORT
Definition: export.h:749
RDKIT_CHEMREACTIONS_EXPORT void pickle(const boost::shared_ptr< EnumerationStrategyBase > &enumerator, std::ostream &ss)
pickles a EnumerationStrategy and adds the results to a stream ss
Std stuff.
Definition: Abbreviations.h:17
RDKIT_SMILESPARSE_EXPORT std::string MolToSmiles(const ROMol &mol, bool doIsomericSmiles=true, bool doKekule=false, int rootedAtAtom=-1, bool canonical=true, bool allBondsExplicit=false, bool allHsExplicit=false, bool doRandom=false)
returns canonical SMILES for a molecule
RDKIT_FINGERPRINTS_EXPORT ExplicitBitVect * PatternFingerprintMol(const ROMol &mol, unsigned int fpSize=2048, std::vector< unsigned int > *atomCounts=nullptr, ExplicitBitVect *setOnlyBits=nullptr, bool tautomericFingerprint=false)
Generates a topological fingerprint for a molecule using a series of pre-defined structural patterns.
RDKIT_SUBSTRUCTLIBRARY_EXPORT bool SubstructLibraryCanSerialize()
RDKIT_SMILESPARSE_EXPORT RWMol * SmilesToMol(const std::string &smi, const SmilesParserParams &params)