RDKit
Open-source cheminformatics and machine learning.
MorganGenerator.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2018 Boran Adas, Google Summer of Code
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 
11 #include <RDGeneral/export.h>
12 #ifndef RD_MORGANGEN_H_2018_07
13 #define RD_MORGANGEN_H_2018_07
14 
16 #include <cstdint>
17 
18 namespace RDKit {
19 
20 namespace MorganFingerprint {
21 
22 /**
23  \brief Default atom invariants generator for Morgan fingerprint, generates
24  ECFP-type invariants
25 
26  */
28  : public AtomInvariantsGenerator {
29  const bool df_includeRingMembership;
30 
31  public:
32  /**
33  \brief Construct a new MorganAtomInvGenerator object
34 
35  \param includeRingMembership : if set, whether or not the atom is in a ring
36  will be used in the invariant list.
37  */
38  MorganAtomInvGenerator(const bool includeRingMembership = true);
39 
40  std::vector<std::uint32_t> *getAtomInvariants(
41  const ROMol &mol) const override;
42 
43  std::string infoString() const override;
44  MorganAtomInvGenerator *clone() const override;
45 };
46 
47 /**
48  \brief Alternative atom invariants generator for Morgan fingerprint, generate
49  FCFP-type invariants
50 
51  */
53  : public AtomInvariantsGenerator {
54  std::vector<const ROMol *> *dp_patterns;
55 
56  public:
57  /**
58  \brief Construct a new MorganFeatureAtomInvGenerator object
59 
60  \param patterns : if provided should contain the queries used to assign
61  atom-types. if not provided, feature definitions adapted from reference:
62  Gobbi and Poppinger, Biotech. Bioeng. _61_ 47-54 (1998) will be used for
63  Donor, Acceptor, Aromatic, Halogen, Basic, Acidic.
64  */
65  MorganFeatureAtomInvGenerator(std::vector<const ROMol *> *patterns = nullptr);
66 
67  std::vector<std::uint32_t> *getAtomInvariants(
68  const ROMol &mol) const override;
69 
70  std::string infoString() const override;
72 };
73 
74 /**
75  \brief Bond invariants generator for Morgan fingerprint
76 
77  */
79  : public BondInvariantsGenerator {
80  const bool df_useBondTypes;
81  const bool df_useChirality;
82 
83  public:
84  /**
85  \brief Construct a new MorganBondInvGenerator object
86 
87  \param useBondTypes : if set, bond types will be included as a part of the
88  bond invariants
89  \param useChirality : if set, chirality information will be included as a
90  part of the bond invariants
91  */
92  MorganBondInvGenerator(const bool useBondTypes = true,
93  const bool useChirality = false);
94 
95  std::vector<std::uint32_t> *getBondInvariants(
96  const ROMol &mol) const override;
97 
98  std::string infoString() const override;
99  MorganBondInvGenerator *clone() const override;
100  ~MorganBondInvGenerator() override = default;
101 };
102 
103 /**
104  \brief Class for holding Morgan fingerprint specific arguments
105 
106  */
107 template <typename OutputType>
109  : public FingerprintArguments<OutputType> {
110  public:
113  const unsigned int d_radius;
114 
115  OutputType getResultSize() const override;
116 
117  std::string infoString() const override;
118 
119  /**
120  \brief Construct a new MorganArguments object
121 
122  \param radius the number of iterations to grow the fingerprint
123  \param countSimulation if set, use count simulation while generating the
124  fingerprint
125  \param includeChirality if set, chirality information will be added to the
126  generated bit id, independently from bond invariants
127  \param onlyNonzeroInvariants if set, bits will only be set from atoms that
128  have a nonzero invariant
129  \param countBounds boundaries for count simulation, corresponding bit will
130  be set if the count is higher than the number provided for that spot
131  \param fpSize size of the generated fingerprint, does not affect the sparse
132  versions
133  */
134  MorganArguments(const unsigned int radius, const bool countSimulation = false,
135  const bool includeChirality = false,
136  const bool onlyNonzeroInvariants = false,
137  const std::vector<std::uint32_t> countBounds = {1, 2, 4, 8},
138  const std::uint32_t fpSize = 2048);
139 };
140 
141 /**
142  \brief Class for holding the bit-id created from Morgan fingerprint
143  environments and the additional data necessary extra outputs
144 
145  */
146 template <typename OutputType>
148  : public AtomEnvironment<OutputType> {
149  const OutputType d_code;
150  const unsigned int d_atomId;
151  const unsigned int d_layer;
152 
153  public:
155  const std::vector<std::uint32_t> *atomInvariants,
156  const std::vector<std::uint32_t> *bondInvariants,
157  const AdditionalOutput *additionalOutput,
158  const bool hashResults = false,
159  const std::uint64_t fpSize = 0) const override;
160 
161  /**
162  \brief Construct a new MorganAtomEnv object
163 
164  \param code bit id generated from this environment
165  \param atomId atom id of the atom at the center of this environment
166  \param layer radius of this environment
167  */
168  MorganAtomEnv(const std::uint32_t code, const unsigned int atomId,
169  const unsigned int layer);
170 };
171 
172 /**
173  \brief Class that generates atom environments for Morgan fingerprint
174 
175  */
176 template <typename OutputType>
178  : public AtomEnvironmentGenerator<OutputType> {
179  public:
180  std::vector<AtomEnvironment<OutputType> *> getEnvironments(
181  const ROMol &mol, FingerprintArguments<OutputType> *arguments,
182  const std::vector<std::uint32_t> *fromAtoms,
183  const std::vector<std::uint32_t> *ignoreAtoms, const int confId,
184  const AdditionalOutput *additionalOutput,
185  const std::vector<std::uint32_t> *atomInvariants,
186  const std::vector<std::uint32_t> *bondInvariants,
187  const bool hashResults = false) const override;
188 
189  std::string infoString() const override;
190 };
191 
192 /**
193  \brief Get a fingerprint generator for Morgan fingerprint
194 
195  \tparam OutputType determines the size of the bitIds and the result, can be 32
196  or 64 bit unsigned integer
197 
198  \param radius the number of iterations to grow the fingerprint
199 
200  \param countSimulation if set, use count simulation while generating the
201  fingerprint
202 
203  \param includeChirality if set, chirality information will be added to the
204  generated bit id, independently from bond invariants
205 
206  \param onlyNonzeroInvariants if set, bits will only be set from atoms that
207  have a nonzero invariant
208 
209  \param countBounds boundaries for count simulation, corresponding bit will be
210  set if the count is higher than the number provided for that spot
211 
212  \param fpSize size of the generated fingerprint, does not affect the sparse
213  versions
214  \param countSimulation if set, use count simulation while generating the
215  fingerprint
216  \param includeChirality sets includeChirality flag for both MorganArguments
217  and the default bond generator MorganBondInvGenerator
218  \param useBondTypes if set, bond types will be included as a part of the
219  default bond invariants
220  \param onlyNonzeroInvariants if set, bits will only be set from atoms that
221  have a nonzero invariant
222  \param atomInvariantsGenerator custom atom invariants generator to use
223  \param bondInvariantsGenerator custom bond invariants generator to use
224  \param ownsAtomInvGen if set atom invariants generator is destroyed with the
225  fingerprint generator
226  \param ownsBondInvGen if set bond invariants generator is destroyed with the
227  fingerprint generator
228 
229  \return FingerprintGenerator<OutputType>* that generates Morgan fingerprints
230 
231 This generator supports the following \c AdditionalOutput types:
232  - \c atomToBits : which bits each atom is the central atom for
233  - \c atomCounts : how many bits each atom sets
234  - \c bitInfoMap : map from bitId to (atomId, radius) pairs
235 
236  */
237 template <typename OutputType>
239  const unsigned int radius, const bool countSimulation = false,
240  const bool includeChirality = false, const bool useBondTypes = true,
241  const bool onlyNonzeroInvariants = false,
242  AtomInvariantsGenerator *atomInvariantsGenerator = nullptr,
243  BondInvariantsGenerator *bondInvariantsGenerator = nullptr,
244  const std::uint32_t fpSize = 2048,
245  const std::vector<std::uint32_t> countBounds = {1, 2, 4, 8},
246  const bool ownsAtomInvGen = false, const bool ownsBondInvGen = false);
247 
248 } // namespace MorganFingerprint
249 } // namespace RDKit
250 
251 #endif
abstract base class that generates atom-environments from a molecule
abstract base class that holds atom-environments that will be hashed to generate the fingerprint
abstract base class for atom invariants generators
abstract base class for bond invariants generators
Abstract base class that holds molecule independent arguments that are common amongst all fingerprint...
class that generates same fingerprint style for different output formats
Class for holding Morgan fingerprint specific arguments.
std::string infoString() const override
method that returns information string about the fingerprint specific argument set and the arguments ...
MorganArguments(const unsigned int radius, const bool countSimulation=false, const bool includeChirality=false, const bool onlyNonzeroInvariants=false, const std::vector< std::uint32_t > countBounds={1, 2, 4, 8}, const std::uint32_t fpSize=2048)
Construct a new MorganArguments object.
OutputType getResultSize() const override
Returns the size of the fingerprint based on arguments.
Class for holding the bit-id created from Morgan fingerprint environments and the additional data nec...
MorganAtomEnv(const std::uint32_t code, const unsigned int atomId, const unsigned int layer)
Construct a new MorganAtomEnv object.
OutputType getBitId(FingerprintArguments< OutputType > *arguments, const std::vector< std::uint32_t > *atomInvariants, const std::vector< std::uint32_t > *bondInvariants, const AdditionalOutput *additionalOutput, const bool hashResults=false, const std::uint64_t fpSize=0) const override
calculates and returns the bit id to be set for this atom-environment
Default atom invariants generator for Morgan fingerprint, generates ECFP-type invariants.
MorganAtomInvGenerator(const bool includeRingMembership=true)
Construct a new MorganAtomInvGenerator object.
std::vector< std::uint32_t > * getAtomInvariants(const ROMol &mol) const override
get atom invariants from a molecule
std::string infoString() const override
method that returns information about this /c AtomInvariantsGenerator and its arguments
MorganAtomInvGenerator * clone() const override
Bond invariants generator for Morgan fingerprint.
std::string infoString() const override
method that returns information about this /c BondInvariantsGenerator and its arguments
MorganBondInvGenerator * clone() const override
MorganBondInvGenerator(const bool useBondTypes=true, const bool useChirality=false)
Construct a new MorganBondInvGenerator object.
std::vector< std::uint32_t > * getBondInvariants(const ROMol &mol) const override
get bond invariants from a molecule
Class that generates atom environments for Morgan fingerprint.
std::vector< AtomEnvironment< OutputType > * > getEnvironments(const ROMol &mol, FingerprintArguments< OutputType > *arguments, const std::vector< std::uint32_t > *fromAtoms, const std::vector< std::uint32_t > *ignoreAtoms, const int confId, const AdditionalOutput *additionalOutput, const std::vector< std::uint32_t > *atomInvariants, const std::vector< std::uint32_t > *bondInvariants, const bool hashResults=false) const override
generate and return all atom-envorinments from a molecule
std::string infoString() const override
method that returns information about this /c AtomEnvironmentGenerator and its arguments if any
Alternative atom invariants generator for Morgan fingerprint, generate FCFP-type invariants.
MorganFeatureAtomInvGenerator * clone() const override
MorganFeatureAtomInvGenerator(std::vector< const ROMol * > *patterns=nullptr)
Construct a new MorganFeatureAtomInvGenerator object.
std::string infoString() const override
method that returns information about this /c AtomInvariantsGenerator and its arguments
std::vector< std::uint32_t > * getAtomInvariants(const ROMol &mol) const override
get atom invariants from a molecule
#define RDKIT_FINGERPRINTS_EXPORT
Definition: export.h:169
RDKIT_FINGERPRINTS_EXPORT FingerprintGenerator< OutputType > * getMorganGenerator(const unsigned int radius, const bool countSimulation=false, const bool includeChirality=false, const bool useBondTypes=true, const bool onlyNonzeroInvariants=false, AtomInvariantsGenerator *atomInvariantsGenerator=nullptr, BondInvariantsGenerator *bondInvariantsGenerator=nullptr, const std::uint32_t fpSize=2048, const std::vector< std::uint32_t > countBounds={1, 2, 4, 8}, const bool ownsAtomInvGen=false, const bool ownsBondInvGen=false)
Get a fingerprint generator for Morgan fingerprint.
Std stuff.
Definition: Abbreviations.h:18