RDKit
Open-source cheminformatics and machine learning.
MolStandardize.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2018 Susan H. Leung
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 /*! \file MolStandardize.h
11 
12  \brief Defines the CleanupParameters and some convenience functions.
13 
14 */
15 #include <RDGeneral/export.h>
16 #ifndef __RD_MOLSTANDARDIZE_H__
17 #define __RD_MOLSTANDARDIZE_H__
18 
19 #include <string>
20 #include <GraphMol/RDKitBase.h>
21 
22 namespace RDKit {
23 class RWMol;
24 class ROMol;
25 
26 namespace MolStandardize {
27 
28 //! The CleanupParameters structure defines the default parameters for the
29 // cleanup process and also allows the user to customize the process by changing
30 // the parameters.
31 /*!
32 
33  <b>Notes:</b>
34  - To customize the parameters, the structure must be initialized first.
35  (Another on the TODO list)
36  - For this project, not all the parameters have been revealed.
37  (TODO)
38 
39 */
41  // TODO reveal all parameters
42  private:
43  const char *rdbase_cstr = std::getenv("RDBASE");
44 
45  public:
46  std::string rdbase = rdbase_cstr != nullptr ? rdbase_cstr : "";
47  std::string normalizations;
48  std::string acidbaseFile;
49  std::string fragmentFile;
50  // std::vector<std::string> chargeCorrections;
51  std::string tautomerTransforms;
52  // std::vector<std::string> TautomerScores;
53  int maxRestarts{200}; //! The maximum number of times to attempt to apply the
54  //! series of normalizations (default 200).
55  bool preferOrganic{false}; //! Whether to prioritize organic fragments when
56  //! choosing fragment parent (default False).
57  bool doCanonical{true}; //! Whether to apply normalizations in a
58  //! canonical order
59  int maxTautomers{1000}; //! The maximum number of tautomers to enumerate
60  //! (default 1000).
61  int maxTransforms{1000}; //! The maximum number of tautomer transformations
62  //! to apply (default 1000).
63  bool tautomerRemoveSp3Stereo{
64  true}; //! Whether to remove stereochemistry from sp3
65  //! centers involved in tautomerism (defaults to true)
66  bool tautomerRemoveBondStereo{
67  true}; //! Whether to remove stereochemistry from double
68  //! bonds involved in tautomerism (defaults to true)
69  bool tautomerRemoveIsotopicHs{
70  true}; //! Whether to remove isotopic Hs from centers
71  //! involved in tautomerism (defaults to true)
72  bool tautomerReassignStereo{
73  true}; //! Whether enumerate() should call assignStereochemistry
74  //! on all generated tautomers (defaults to true)
76  : // TODO
77  // normalizations(""),//this->DEFAULT_TRANSFORMS),
78  normalizations(rdbase + "/Data/MolStandardize/normalizations.txt"),
79  acidbaseFile(rdbase + "/Data/MolStandardize/acid_base_pairs.txt"),
80  fragmentFile(rdbase + "/Data/MolStandardize/fragmentPatterns.txt"),
81  // chargeCorrections()
82  tautomerTransforms(rdbase +
83  "/Data/MolStandardize/tautomerTransforms.in")
84  {}
85 };
86 
87 RDKIT_MOLSTANDARDIZE_EXPORT extern const CleanupParameters
89 
90 //! The cleanup function is equivalent to the
91 // molvs.Standardizer().standardize(mol) function. It calls the same steps,
92 // namely: RemoveHs, RDKit SanitizeMol, MetalDisconnector, Normalizer,
93 // Reionizer, RDKit AssignStereochemistry.
95  const RWMol &mol,
97 
98 //! TODO not yet finished!
100  RWMol &mol, const CleanupParameters &params = defaultCleanupParameters);
101 
102 //! Returns the fragment parent of a given molecule. The fragment parent is the
103 // largest organic covalent unit in the molecule.
105  const RWMol &mol,
107  bool skip_standardize = false);
108 
109 // TODO
111  RWMol &mol, const CleanupParameters &params = defaultCleanupParameters);
112 
113 // TODO
115  RWMol &mol, const CleanupParameters &params = defaultCleanupParameters);
116 
117 //! Returns the charge parent of a given molecule. The charge parent is the
118 //! uncharged
119 // version of the fragment parent.
121  const RWMol &mol,
123  bool skip_standardize = false);
124 
125 // TODO Need to do tautomers first
127  RWMol &mol, const CleanupParameters &params = defaultCleanupParameters);
128 
129 //! Works the same as Normalizer().normalize(mol)
131  const RWMol *mol,
133 
134 //! Works the same as Reionizer().reionize(mol)
136  const RWMol *mol,
138 
139 //! Convenience function for quickly standardizing a single SMILES string.
140 // Returns a standardized canonical SMILES string given a SMILES string.
142  const std::string &smiles);
143 
144 //! TODO
146  const std::string &smiles,
148 }; // namespace MolStandardize
149 } // namespace RDKit
150 #endif
pulls in the core RDKit functionality
RWMol is a molecule class that is intended to be edited.
Definition: RWMol.h:31
#define RDKIT_MOLSTANDARDIZE_EXPORT
Definition: export.h:489
RDKIT_MOLSTANDARDIZE_EXPORT RWMol * cleanup(const RWMol &mol, const CleanupParameters &params=defaultCleanupParameters)
The cleanup function is equivalent to the.
RDKIT_MOLSTANDARDIZE_EXPORT void stereoParent(RWMol &mol, const CleanupParameters &params=defaultCleanupParameters)
RDKIT_MOLSTANDARDIZE_EXPORT std::vector< std::string > enumerateTautomerSmiles(const std::string &smiles, const CleanupParameters &params=defaultCleanupParameters)
TODO.
RDKIT_MOLSTANDARDIZE_EXPORT RWMol * fragmentParent(const RWMol &mol, const CleanupParameters &params=defaultCleanupParameters, bool skip_standardize=false)
Returns the fragment parent of a given molecule. The fragment parent is the.
RDKIT_MOLSTANDARDIZE_EXPORT RWMol * reionize(const RWMol *mol, const CleanupParameters &params=defaultCleanupParameters)
Works the same as Reionizer().reionize(mol)
RDKIT_MOLSTANDARDIZE_EXPORT void tautomerParent(RWMol &mol, const CleanupParameters &params=defaultCleanupParameters)
TODO not yet finished!
RDKIT_MOLSTANDARDIZE_EXPORT void isotopeParent(RWMol &mol, const CleanupParameters &params=defaultCleanupParameters)
RDKIT_MOLSTANDARDIZE_EXPORT RWMol * chargeParent(const RWMol &mol, const CleanupParameters &params=defaultCleanupParameters, bool skip_standardize=false)
RDKIT_MOLSTANDARDIZE_EXPORT void superParent(RWMol &mol, const CleanupParameters &params=defaultCleanupParameters)
RDKIT_MOLSTANDARDIZE_EXPORT const CleanupParameters defaultCleanupParameters
Definition: Fragment.h:25
RDKIT_MOLSTANDARDIZE_EXPORT std::string standardizeSmiles(const std::string &smiles)
Convenience function for quickly standardizing a single SMILES string.
RDKIT_MOLSTANDARDIZE_EXPORT RWMol * normalize(const RWMol *mol, const CleanupParameters &params=defaultCleanupParameters)
Works the same as Normalizer().normalize(mol)
Std stuff.
Definition: Abbreviations.h:17
The CleanupParameters structure defines the default parameters for the.