33 #ifndef RDK_SUBSTRUCT_LIBRARY
34 #define RDK_SUBSTRUCT_LIBRARY
52 #include <boost/lexical_cast.hpp>
73 virtual boost::shared_ptr<ROMol>
getMol(
unsigned int)
const = 0;
76 virtual unsigned int size()
const = 0;
85 std::vector<boost::shared_ptr<ROMol>> mols;
91 mols.push_back(boost::make_shared<ROMol>(m));
95 boost::shared_ptr<ROMol>
getMol(
unsigned int idx)
const override {
100 unsigned int size()
const override {
101 return rdcast<unsigned int>(mols.size());
104 std::vector<boost::shared_ptr<ROMol>> &
getMols() {
return mols; }
105 const std::vector<boost::shared_ptr<ROMol>> &
getMols()
const {
return mols; }
117 std::vector<std::string> mols;
135 boost::shared_ptr<ROMol>
getMol(
unsigned int idx)
const override {
137 boost::shared_ptr<ROMol> mol(
new ROMol);
142 unsigned int size()
const override {
143 return rdcast<unsigned int>(mols.size());
146 std::vector<std::string> &
getMols() {
return mols; }
147 const std::vector<std::string> &
getMols()
const {
return mols; }
161 std::vector<std::string> mols;
167 bool doIsomericSmiles =
true;
175 mols.push_back(smiles);
179 boost::shared_ptr<ROMol>
getMol(
unsigned int idx)
const override {
182 boost::shared_ptr<ROMol> mol(
SmilesToMol(mols[idx]));
186 unsigned int size()
const override {
187 return rdcast<unsigned int>(mols.size());
190 std::vector<std::string> &
getMols() {
return mols; }
191 const std::vector<std::string> &
getMols()
const {
return mols; }
210 std::vector<std::string> mols;
216 bool doIsomericSmiles =
true;
224 mols.push_back(smiles);
228 boost::shared_ptr<ROMol>
getMol(
unsigned int idx)
const override {
233 m->updatePropertyCache();
235 return boost::shared_ptr<ROMol>(m);
238 unsigned int size()
const override {
239 return rdcast<unsigned int>(mols.size());
242 std::vector<std::string> &
getMols() {
return mols; }
243 const std::vector<std::string> &
getMols()
const {
return mols; }
248 std::vector<ExplicitBitVect *> fps;
252 for (
size_t i = 0; i < fps.size(); ++i)
delete fps[i];
255 virtual unsigned int size()
const {
return rdcast<unsigned int>(fps.size()); }
259 fps.push_back(makeFingerprint(m));
260 return rdcast<unsigned int>(fps.size() - 1);
268 return rdcast<unsigned int>(fps.size() - 1);
303 unsigned int numBits;
315 static const unsigned int DEFAULT_NUM_BITS = 2048;
316 return DEFAULT_NUM_BITS;
326 std::vector<unsigned int> *atomCounts =
nullptr;
328 const bool tautomericFingerprint =
true;
330 tautomericFingerprint);
343 virtual unsigned int addKey(
const std::string &) = 0;
347 virtual const std::string &
getKey(
unsigned int)
const = 0;
350 virtual std::vector<std::string>
getKeys(
const std::vector<unsigned int> &indices)
const = 0;
352 virtual unsigned int size()
const = 0;
356 std::string propname;
357 std::vector<std::string> keys;
358 const std::string empty_string = {};
367 std::vector<std::string> &
getKeys() {
return keys; }
368 const std::vector<std::string> &
getKeys()
const {
return keys; }
372 if (m.getPropIfPresent(propname, key)) {
373 keys.push_back(std::move(key));
377 const static std::string prefix(
"LIBIDX-");
378 keys.emplace_back(prefix + boost::lexical_cast<std::string>(keys.size()));
380 return keys.size() - 1u;
383 unsigned int addKey(
const std::string &key)
override {
385 return keys.size() - 1u;
388 const std::string &
getKey(
unsigned int idx)
const override {
393 std::vector<std::string>
getKeys(
const std::vector<unsigned int> &indices)
const override{
394 std::vector<std::string> res;
395 std::transform(indices.begin(), indices.end(), std::back_inserter(res),
396 [=](
unsigned idx){return keys.at(idx);});
399 unsigned int size()
const override {
501 boost::shared_ptr<MolHolderBase> molholder;
502 boost::shared_ptr<FPHolderBase> fpholder;
503 boost::shared_ptr<KeyHolderBase> keyholder;
507 bool is_tautomerquery =
false;
508 std::vector<unsigned int> searchOrder;
512 : molholder(new
MolHolder), fpholder(), keyholder(), mols(molholder.get()) {}
515 : molholder(std::move(molecules)),
518 mols(molholder.get()),
522 boost::shared_ptr<FPHolderBase> fingerprints)
523 : molholder(std::move(molecules)),
524 fpholder(std::move(fingerprints)),
526 mols(molholder.get()),
527 fps(fpholder.get()) {
528 if (fpholder.get() &&
530 is_tautomerquery =
true;
535 boost::shared_ptr<KeyHolderBase> keys)
536 : molholder(std::move(molecules)),
538 keyholder(std::move(keys)),
539 mols(molholder.get()),
541 if (fpholder.get() &&
543 is_tautomerquery =
true;
548 boost::shared_ptr<FPHolderBase> fingerprints,
549 boost::shared_ptr<KeyHolderBase> keys)
550 : molholder(std::move(molecules)),
551 fpholder(std::move(fingerprints)),
552 keyholder(std::move(keys)),
553 mols(molholder.get()),
554 fps(fpholder.get()) {
555 if (fpholder.get() &&
557 is_tautomerquery =
true;
564 mols(molholder.get()),
567 if (fpholder.get() &&
569 is_tautomerquery =
true;
581 boost::shared_ptr<FPHolderBase> &
getFpHolder() {
return fpholder; }
597 PRECONDITION(mols,
"Molecule holder NULL in SubstructLibrary");
620 return *keyholder.get();
628 return *keyholder.get();
653 template <
class Query>
655 bool recursionPossible =
true,
656 bool useChirality =
true,
657 bool useQueryQueryMatches =
false,
659 int maxResults = -1)
const {
664 return getMatches(query, 0, size(), params, numThreads, maxResults);
667 template <
class Query>
671 int maxResults = -1)
const {
672 return getMatches(query, 0, size(), params, numThreads, maxResults);
690 template <
class Query>
692 const Query &query,
unsigned int startIdx,
unsigned int endIdx,
693 bool recursionPossible =
true,
bool useChirality =
true,
694 bool useQueryQueryMatches =
false,
int numThreads = -1,
695 int maxResults = -1)
const {
700 return getMatches(query, startIdx, endIdx, params, numThreads, maxResults);
704 unsigned int startIdx,
708 int maxResults = -1)
const;
711 unsigned int startIdx,
715 int maxResults = -1)
const;
718 unsigned int startIdx,
722 int maxResults = -1)
const;
736 template <
class Query>
737 unsigned int countMatches(
const Query &query,
bool recursionPossible =
true,
738 bool useChirality =
true,
739 bool useQueryQueryMatches =
false,
740 int numThreads = -1)
const {
745 return countMatches(query, 0, size(), params, numThreads);
748 template <
class Query>
751 int numThreads = -1)
const {
752 return countMatches(query, 0, size(), params, numThreads);
771 template <
class Query>
773 unsigned int endIdx,
bool recursionPossible =
true,
774 bool useChirality =
true,
775 bool useQueryQueryMatches =
false,
776 int numThreads = -1)
const {
781 return countMatches(query, startIdx, endIdx, params, numThreads);
788 int numThreads = -1)
const;
793 int numThreads = -1)
const;
798 int numThreads = -1)
const;
812 template <
class Query>
813 bool hasMatch(
const Query &query,
bool recursionPossible =
true,
814 bool useChirality =
true,
bool useQueryQueryMatches =
false,
815 int numThreads = -1)
const {
820 return hasMatch(query, 0, size(), params, numThreads);
823 template <
class Query>
825 int numThreads = -1)
const {
826 return hasMatch(query, 0, size(), params, numThreads);
841 template <
class Query>
842 bool hasMatch(
const Query &query,
unsigned int startIdx,
unsigned int endIdx,
843 bool recursionPossible =
true,
bool useChirality =
true,
844 bool useQueryQueryMatches =
false,
int numThreads = -1)
const {
849 return hasMatch(query, startIdx, endIdx, params, numThreads);
852 bool hasMatch(
const ROMol &query,
unsigned int startIdx,
unsigned int endIdx,
854 int numThreads = -1)
const;
858 int numThreads = -1)
const;
862 int numThreads = -1)
const;
868 boost::shared_ptr<ROMol>
getMol(
unsigned int idx)
const {
870 PRECONDITION(mols,
"molholder is null in SubstructLibrary");
881 PRECONDITION(mols,
"molholder is null in SubstructLibrary");
887 PRECONDITION(mols,
"molholder is null in SubstructLibrary");
888 return rdcast<unsigned int>(molholder->size());
893 for (
const auto idx : order) {
894 if (idx >= mols->
size()) {
908 is_tautomerquery =
false;
909 mols = molholder.get();
910 fps = fpholder.get();
912 is_tautomerquery =
true;
Contains general bit-comparison and similarity operations.
RDKIT_DATASTRUCTS_EXPORT bool AllProbeBitsMatch(const char *probe, const char *ref)
#define PRECONDITION(expr, mess)
Defines a class for managing bundles of molecules.
pulls in the core RDKit functionality
a class for bit vectors that are densely occupied
Class to allow us to throw an IndexError from C++ and have it make it back to Python.
Concrete class that holds binary cached molecules in memory.
unsigned int size() const override
Get the current library size.
const std::vector< std::string > & getMols() const
unsigned int addMol(const ROMol &m) override
std::vector< std::string > & getMols()
unsigned int addBinary(const std::string &pickle)
boost::shared_ptr< ROMol > getMol(unsigned int idx) const override
Concrete class that holds smiles strings in memory.
std::vector< std::string > & getMols()
unsigned int addSmiles(const std::string &smiles)
const std::vector< std::string > & getMols() const
boost::shared_ptr< ROMol > getMol(unsigned int idx) const override
unsigned int addMol(const ROMol &m) override
unsigned int size() const override
Get the current library size.
Concrete class that holds trusted smiles strings in memory.
boost::shared_ptr< ROMol > getMol(unsigned int idx) const override
std::vector< std::string > & getMols()
unsigned int addSmiles(const std::string &smiles)
unsigned int addMol(const ROMol &m) override
unsigned int size() const override
Get the current library size.
CachedTrustedSmilesMolHolder()
const std::vector< std::string > & getMols() const
Base FPI for the fingerprinter used to rule out impossible matches.
const ExplicitBitVect & getFingerprint(unsigned int idx) const
unsigned int addMol(const ROMol &m)
Adds a molecule to the fingerprinter.
virtual unsigned int size() const
std::vector< ExplicitBitVect * > & getFingerprints()
virtual ExplicitBitVect * makeFingerprint(const ROMol &m) const =0
bool passesFilter(unsigned int idx, const ExplicitBitVect &query) const
Return false if a substructure search can never match the molecule.
unsigned int addFingerprint(ExplicitBitVect *v)
const std::vector< ExplicitBitVect * > & getFingerprints() const
unsigned int addFingerprint(const ExplicitBitVect &v)
const std::vector< std::string > & getKeys() const
KeyFromPropHolder(const std::string &propname="_Name")
std::vector< std::string > & getKeys()
unsigned int addKey(const std::string &key) override
unsigned int size() const override
Get the current keeyholder size.
std::vector< std::string > getKeys(const std::vector< unsigned int > &indices) const override
unsigned int addMol(const ROMol &m) override
Add a key to the database getting it from the molecule.
const std::string & getKey(unsigned int idx) const override
std::string & getPropName()
const std::string & getPropName() const
virtual const std::string & getKey(unsigned int) const =0
virtual std::vector< std::string > getKeys(const std::vector< unsigned int > &indices) const =0
virtual unsigned int addMol(const ROMol &m)=0
Add a key to the database getting it from the molecule.
virtual unsigned int size() const =0
Get the current keeyholder size.
virtual unsigned int addKey(const std::string &)=0
MolBundle contains a collection of related ROMols.
Base class API for holding molecules to substructure search.
virtual unsigned int addMol(const ROMol &m)=0
virtual unsigned int size() const =0
Get the current library size.
virtual boost::shared_ptr< ROMol > getMol(unsigned int) const =0
Concrete class that holds molecules in memory.
const std::vector< boost::shared_ptr< ROMol > > & getMols() const
unsigned int addMol(const ROMol &m) override
std::vector< boost::shared_ptr< ROMol > > & getMols()
unsigned int size() const override
Get the current library size.
boost::shared_ptr< ROMol > getMol(unsigned int idx) const override
static void molFromPickle(const std::string &pickle, ROMol *mol, unsigned int propertyFlags)
constructs a molecule from a pickle stored in a string
static void pickleMol(const ROMol *mol, std::ostream &ss)
pickles a molecule and sends the results to stream ss
ExplicitBitVect * makeFingerprint(const ROMol &m) const override
Caller owns the vector!
PatternHolder(unsigned int numBits)
unsigned int & getNumBits()
static unsigned int defaultNumBits()
const unsigned int & getNumBits() const
RWMol is a molecule class that is intended to be edited.
Substructure Search a library of molecules.
unsigned int countMatches(const Query &query, bool recursionPossible=true, bool useChirality=true, bool useQueryQueryMatches=false, int numThreads=-1) const
Return the number of matches for the query.
unsigned int addMol(const ROMol &mol)
Add a molecule to the library.
boost::shared_ptr< ROMol > getMol(unsigned int idx) const
Returns the molecule at the given index.
void initFromStream(std::istream &ss)
initializes from a stream pickle
const MolHolderBase & getMolecules() const
const FPHolderBase & getFingerprints() const
bool hasMatch(const Query &query, bool recursionPossible=true, bool useChirality=true, bool useQueryQueryMatches=false, int numThreads=-1) const
Returns true if any match exists for the query.
SubstructLibrary(boost::shared_ptr< MolHolderBase > molecules, boost::shared_ptr< FPHolderBase > fingerprints, boost::shared_ptr< KeyHolderBase > keys)
unsigned int countMatches(const Query &query, unsigned int startIdx, unsigned int endIdx, bool recursionPossible=true, bool useChirality=true, bool useQueryQueryMatches=false, int numThreads=-1) const
Return the number of matches for the query.
void initFromString(const std::string &text)
initializes from a string pickle
unsigned int countMatches(const Query &query, const SubstructMatchParameters ¶ms, int numThreads=-1) const
overload
const boost::shared_ptr< FPHolderBase > & getFpHolder() const
Get the underlying molecule holder implementation.
FPHolderBase & getFingerprints()
Get the underlying fingerprint implementation.
std::vector< unsigned int > getMatches(const Query &query, unsigned int startIdx, unsigned int endIdx, bool recursionPossible=true, bool useChirality=true, bool useQueryQueryMatches=false, int numThreads=-1, int maxResults=-1) const
Get the matching indices for the query between the given indices.
boost::shared_ptr< MolHolderBase > & getMolHolder()
Get the underlying molecule holder implementation.
bool hasMatch(const ROMol &query, unsigned int startIdx, unsigned int endIdx, const SubstructMatchParameters ¶ms, int numThreads=-1) const
overload
boost::shared_ptr< KeyHolderBase > & getKeyHolder()
Get the underlying molecule holder implementation.
unsigned int countMatches(const MolBundle &query, unsigned int startIdx, unsigned int endIdx, const SubstructMatchParameters ¶ms, int numThreads=-1) const
overload
std::vector< unsigned int > getMatches(const MolBundle &query, unsigned int startIdx, unsigned int endIdx, const SubstructMatchParameters ¶ms, int numThreads=-1, int maxResults=-1) const
overload
void setSearchOrder(const std::vector< unsigned int > &order)
does error checking
bool hasMatch(const Query &query, unsigned int startIdx, unsigned int endIdx, bool recursionPossible=true, bool useChirality=true, bool useQueryQueryMatches=false, int numThreads=-1) const
boost::shared_ptr< FPHolderBase > & getFpHolder()
Get the underlying molecule holder implementation.
const std::vector< unsigned int > & getSearchOrder() const
SubstructLibrary(boost::shared_ptr< MolHolderBase > molecules, boost::shared_ptr< KeyHolderBase > keys)
const KeyHolderBase & getKeys() const
Get the underlying key holder implementation.
bool hasMatch(const MolBundle &query, unsigned int startIdx, unsigned int endIdx, const SubstructMatchParameters ¶ms, int numThreads=-1) const
overload
KeyHolderBase & getKeys()
Get the underlying key holder implementation.
unsigned int countMatches(const ROMol &query, unsigned int startIdx, unsigned int endIdx, const SubstructMatchParameters ¶ms, int numThreads=-1) const
overload
bool hasMatch(const Query &query, const SubstructMatchParameters ¶ms, int numThreads=-1) const
overload
SubstructLibrary(boost::shared_ptr< MolHolderBase > molecules, boost::shared_ptr< FPHolderBase > fingerprints)
std::vector< unsigned int > getMatches(const ROMol &query, unsigned int startIdx, unsigned int endIdx, const SubstructMatchParameters ¶ms, int numThreads=-1, int maxResults=-1) const
overload
std::vector< unsigned int > getMatches(const Query &query, const SubstructMatchParameters ¶ms, int numThreads=-1, int maxResults=-1) const
overload
std::vector< unsigned int > & getSearchOrder()
bool hasMatch(const TautomerQuery &query, unsigned int startIdx, unsigned int endIdx, const SubstructMatchParameters ¶ms, int numThreads=-1) const
overload
void resetHolders()
access required for serialization
unsigned int size() const
return the number of molecules in the library
std::vector< unsigned int > getMatches(const TautomerQuery &query, unsigned int startIdx, unsigned int endIdx, const SubstructMatchParameters ¶ms, int numThreads=-1, int maxResults=-1) const
overload
const boost::shared_ptr< KeyHolderBase > & getKeyHolder() const
Get the underlying molecule holder implementation.
SubstructLibrary(boost::shared_ptr< MolHolderBase > molecules)
SubstructLibrary(const std::string &pickle)
std::string Serialize() const
returns a string with a serialized (pickled) representation
unsigned int countMatches(const TautomerQuery &query, unsigned int startIdx, unsigned int endIdx, const SubstructMatchParameters ¶ms, int numThreads=-1) const
overload
const boost::shared_ptr< MolHolderBase > & getMolHolder() const
void toStream(std::ostream &ss) const
serializes (pickles) to a stream
boost::shared_ptr< ROMol > operator[](unsigned int idx)
Returns the molecule at the given index.
std::vector< unsigned int > getMatches(const Query &query, bool recursionPossible=true, bool useChirality=true, bool useQueryQueryMatches=false, int numThreads=-1, int maxResults=-1) const
Get the matching indices for the query.
ExplicitBitVect * makeFingerprint(const ROMol &m) const override
Caller owns the vector!
TautomerPatternHolder(unsigned int numBits)
Class to allow us to throw a ValueError from C++ and have it make it back to Python.
#define RDKIT_SUBSTRUCTLIBRARY_EXPORT
RDKIT_CHEMREACTIONS_EXPORT void pickle(const boost::shared_ptr< EnumerationStrategyBase > &enumerator, std::ostream &ss)
pickles a EnumerationStrategy and adds the results to a stream ss
RDKIT_FINGERPRINTS_EXPORT ExplicitBitVect * PatternFingerprintMol(const ROMol &mol, unsigned int fpSize=2048, std::vector< unsigned int > *atomCounts=nullptr, ExplicitBitVect *setOnlyBits=nullptr, bool tautomericFingerprint=false)
Generates a topological fingerprint for a molecule using a series of pre-defined structural patterns.
RDKIT_SMILESPARSE_EXPORT std::string MolToSmiles(const ROMol &mol, const SmilesWriteParams ¶ms)
returns canonical SMILES for a molecule
RDKIT_SUBSTRUCTLIBRARY_EXPORT bool SubstructLibraryCanSerialize()
RDKIT_SMILESPARSE_EXPORT RWMol * SmilesToMol(const std::string &smi, const SmilesParserParams ¶ms)
bool recursionPossible
Allow recursive queries.
bool useQueryQueryMatches