RDKit
Open-source cheminformatics and machine learning.
EnumerationStrategyBase.h
Go to the documentation of this file.
1 //
2 // Copyright (c) 2015, Novartis Institutes for BioMedical Research Inc.
3 // All rights reserved.
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 // * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 // * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following
13 // disclaimer in the documentation and/or other materials provided
14 // with the distribution.
15 // * Neither the name of Novartis Institutes for BioMedical Research Inc.
16 // nor the names of its contributors may be used to endorse or promote
17 // products derived from this software without specific prior written
18 // permission.
19 //
20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 //
32 #include <RDGeneral/export.h>
33 #ifndef ENUMERATION_STRATEGY_H
34 #define ENUMERATION_STRATEGY_H
35 
36 #include "EnumerateTypes.h"
37 #include "../Reaction.h"
38 #include <utility>
39 #include <vector>
41 #include <cstdint>
42 #ifdef RDK_USE_BOOST_SERIALIZATION
43 #include <boost/serialization/assume_abstract.hpp>
44 #include <boost/serialization/vector.hpp>
45 // the next two includes need to be there for boost 1.56
46 #include <boost/serialization/singleton.hpp>
47 #include <boost/serialization/extended_type_info.hpp>
48 #include <boost/serialization/shared_ptr.hpp>
49 #endif
51 
52 #include <GraphMol/RDKitBase.h>
53 
54 namespace RDKit {
55 
56 //! class for flagging enumeration strategy errors
58  : public std::exception {
59  public:
60  EnumerationStrategyException(const char *msg) : _msg(msg) {}
61  EnumerationStrategyException(std::string msg) : _msg(std::move(msg)) {}
62  const char *what() const noexcept override { return _msg.c_str(); }
63  ~EnumerationStrategyException() noexcept override = default;
64 
65  private:
66  std::string _msg;
67 };
68 
69 //! Return the number of elements per input vector
70 /*! \param bbs vector<vector<T> >
71 
72  \result vector<unint64_t> number of elements in each vector
73  */
74 template <class T>
75 EnumerationTypes::RGROUPS getSizesFromBBs(
76  const std::vector<std::vector<T>> &bbs) {
78  for (size_t i = 0; i < bbs.size(); ++i) sizes.push_back(bbs[i].size());
79  return sizes;
80 }
81 
82 //! getSizesFromReactants
83 //! Helper function for enumeration, bbs are stored in a
84 //! std::vector< std::vector<boost:shared_ptr<ROMol> >
85 //
87  const std::vector<MOL_SPTR_VECT> &bbs);
88 
89 //! getReactantsFromRGroups
90 //! Helper function for enumeration, bbs are stored in a
91 //! std::vector< std::vector<boost:shared_ptr<ROMol> >
92 //
94 getReactantsFromRGroups(const std::vector<MOL_SPTR_VECT> &bbs,
95  const EnumerationTypes::RGROUPS &rgroups);
96 
97 //! computeNumProducts
98 //! Returns the number of possible product combination from
99 //! The given numbers of building blocks for each rgroup
100 //! or EnumerationStrategyBase::EnumerationOverflow if the
101 //! number will not fit into the machines integer type.
102 //! n.b. An overflow simply means there are a lot of products
103 //! not that they cannot be enumerated
105  const EnumerationTypes::RGROUPS &sizes);
106 
107 //! Base Class for enumeration strategies
108 //! Usage:
109 //! EnumerationStrategyBase must be initialized with both a reaction
110 //! and the building block (molecule) vector to be sampled.
111 //!
112 //! \verbatim
113 //! EnumerationStrategyBase &eb = ...
114 //! if(eb) { // can we get another entry
115 //! const std::vector<int> &v = eb.next();
116 //! v[0] // RGroup 0 position
117 //! v[1] // RGroup 1 position...
118 //! }
119 //! \endverbatim
120 
122  protected:
123  EnumerationTypes::RGROUPS m_permutation; // where are we currently?
125  m_permutationSizes; // m_permutationSizes num bbs per group
126  boost::uint64_t
127  m_numPermutations{}; // total number of permutations for this group
128  // -1 if > ssize_t::max
129  public:
130  static const boost::uint64_t EnumerationOverflow =
131  static_cast<boost::uint64_t>(-1);
132  EnumerationStrategyBase() : m_permutation(), m_permutationSizes() {}
133 
135 
136  virtual const char *type() const { return "EnumerationStrategyBase"; }
137 
138  //! Initialize the enumerator based on the reaction and the
139  //! supplied building blocks
140  //! This is the standard API point.
141  //! This calls the derived class's initializeStrategy method which must be
142  //! implemented
143  void initialize(const ChemicalReaction &reaction,
144  const EnumerationTypes::BBS &building_blocks) {
145  // default initialization, may be overridden (sets the # reactants
146  // and computes the default # of permutations)
147  m_permutationSizes = getSizesFromBBs(building_blocks);
148  m_permutation.resize(m_permutationSizes.size());
149 
150  m_numPermutations = computeNumProducts(m_permutationSizes);
151  std::fill(m_permutation.begin(), m_permutation.end(), 0);
152 
153  initializeStrategy(reaction, building_blocks);
154  }
155 
156  // ! Initialize derived class. Must exist.
157  // ! EnumerationStrategyBase structures are already initialized:
158  // ! m_permutationSizes - [ length of building blocks for each reactant set ]
159  // ! m_numPermutations - number of possible permutations
160  // ! ( -1 if not computable )
161  // ! m_permutation - the first permutation, always the first supplied
162  // ! reactants
163  virtual void initializeStrategy(
164  const ChemicalReaction &reaction,
165  const EnumerationTypes::BBS &building_blocks) = 0;
166 
167  //! returns true if there are more permutations left
168  //! random enumerators may always return true...
169  virtual operator bool() const = 0;
170 
171  //! The current permutation {r1, r2, ...}
172  virtual const EnumerationTypes::RGROUPS &next() = 0;
173 
174  //! copy the enumeration strategy complete with current state
175  virtual EnumerationStrategyBase *copy() const = 0;
176 
177  //! The current position in the enumeration
178  const EnumerationTypes::RGROUPS &getPosition() const { return m_permutation; }
179 
180  //! a result of EnumerationOverflow indicates that the number of
181  //! permutations is not computable with the current
182  //! rdlonglong size.
183  boost::uint64_t getNumPermutations() const { return m_numPermutations; }
184 
185  //! Returns how many permutations have been processed by this strategy
186  virtual boost::uint64_t getPermutationIdx() const = 0;
187 
188  //! Skip the specified number of permutations (useful for
189  //! resetting state to a known position)
190  bool skip(boost::uint64_t skipCount) {
191  for (boost::uint64_t i = 0; i < skipCount; ++i) next();
192  return true;
193  }
194 
195  protected:
196  //! Initialize the internal data structures
197  //! i.e. RGROUPS = {10,40,50};
199  m_permutation.resize(rgroups.size());
200  m_permutationSizes = rgroups;
201  m_numPermutations = computeNumProducts(m_permutationSizes);
202  std::fill(m_permutation.begin(), m_permutation.end(), 0);
203  }
204 
205  private:
206  friend class boost::serialization::access;
207  template <class Archive>
208  void serialize(Archive &ar, const unsigned int /*version*/) {
209  ar &m_permutation;
210  ar &m_permutationSizes;
211  ar &m_numPermutations;
212  }
213 };
214 #ifdef RDK_USE_BOOST_SERIALIZATION
215 BOOST_SERIALIZATION_ASSUME_ABSTRACT(EnumerationStrategyBase)
216 #endif
217 } // namespace RDKit
218 
219 #ifdef RDK_USE_BOOST_SERIALIZATION
220 BOOST_CLASS_VERSION(RDKit::EnumerationStrategyBase, 1)
221 #endif
222 
223 #endif
pulls in the core RDKit functionality
This is a class for storing and applying general chemical reactions.
Definition: Reaction.h:121
virtual EnumerationStrategyBase * copy() const =0
copy the enumeration strategy complete with current state
void initialize(const ChemicalReaction &reaction, const EnumerationTypes::BBS &building_blocks)
const EnumerationTypes::RGROUPS & getPosition() const
The current position in the enumeration.
bool skip(boost::uint64_t skipCount)
virtual const EnumerationTypes::RGROUPS & next()=0
The current permutation {r1, r2, ...}.
virtual const char * type() const
virtual boost::uint64_t getPermutationIdx() const =0
Returns how many permutations have been processed by this strategy.
virtual void initializeStrategy(const ChemicalReaction &reaction, const EnumerationTypes::BBS &building_blocks)=0
EnumerationTypes::RGROUPS m_permutation
void internalInitialize(const EnumerationTypes::RGROUPS &rgroups)
boost::uint64_t getNumPermutations() const
EnumerationTypes::RGROUPS m_permutationSizes
class for flagging enumeration strategy errors
~EnumerationStrategyException() noexcept override=default
const char * what() const noexcept override
#define RDKIT_CHEMREACTIONS_EXPORT
Definition: export.h:49
std::vector< boost::uint64_t > RGROUPS
std::vector< MOL_SPTR_VECT > BBS
Std stuff.
Definition: Abbreviations.h:18
RDKIT_CHEMREACTIONS_EXPORT EnumerationTypes::RGROUPS getSizesFromReactants(const std::vector< MOL_SPTR_VECT > &bbs)
EnumerationTypes::RGROUPS getSizesFromBBs(const std::vector< std::vector< T >> &bbs)
Return the number of elements per input vector.
std::vector< boost::shared_ptr< ROMol > > MOL_SPTR_VECT
Definition: FragCatParams.h:20
RDKIT_CHEMREACTIONS_EXPORT MOL_SPTR_VECT getReactantsFromRGroups(const std::vector< MOL_SPTR_VECT > &bbs, const EnumerationTypes::RGROUPS &rgroups)
RDKIT_CHEMREACTIONS_EXPORT boost::uint64_t computeNumProducts(const EnumerationTypes::RGROUPS &sizes)