RDKit
Open-source cheminformatics and machine learning.
SmilesParse.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2001-2021 Greg Landrum and other RDKit contributors
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 #include <RDGeneral/export.h>
11 #ifndef RD_SMILESPARSE_H
12 #define RD_SMILESPARSE_H
13 
14 #include <GraphMol/RWMol.h>
16 #include <string>
17 #include <exception>
18 #include <map>
19 
20 namespace RDKit {
21 
23  int debugParse = 0; /**< enable debugging in the SMILES parser*/
24  bool sanitize = true; /**< sanitize the molecule after building it */
25  std::map<std::string, std::string> *replacements =
26  nullptr; /**< allows SMILES "macros" */
27  bool allowCXSMILES = true; /**< recognize and parse CXSMILES*/
28  bool strictCXSMILES =
29  true; /**< throw an exception if the CXSMILES parsing fails */
30  bool parseName = true; /**< parse (and set) the molecule name as well */
31  bool removeHs = true; /**< remove Hs after constructing the molecule */
32  bool useLegacyStereo =
33  true; /**< use the legacy stereochemistry perception code */
34 };
35 RDKIT_SMILESPARSE_EXPORT RWMol *SmilesToMol(const std::string &smi,
36  const SmilesParserParams &params);
37 
38 RDKIT_SMILESPARSE_EXPORT Atom *SmilesToAtom(const std::string &smi);
39 RDKIT_SMILESPARSE_EXPORT Bond *SmilesToBond(const std::string &smi);
40 
41 //! Construct a molecule from a SMILES string
42 /*!
43  \param smi the SMILES to convert
44  \param debugParse toggles verbose debugging information from the parser
45  \param sanitize toggles H removal and sanitization of the molecule
46  \param replacements a string->string map of replacement strings. See below
47  for more information about replacements.
48 
49  \return a pointer to the new molecule; the caller is responsible for free'ing
50  this.
51 
52  The optional replacements map can be used to do string substitution of
53  abbreviations
54  in the input SMILES. The set of substitutions is repeatedly looped through
55  until
56  the string no longer changes. It is the responsibility of the caller to make
57  sure
58  that substitutions results in legal and sensible SMILES.
59 
60  Examples of substitutions:
61  \code
62  CC{Q}C with {"{Q}":"OCCO"} -> CCOCCOC
63  C{A}C{Q}C with {"{Q}":"OCCO", "{A}":"C1(CC1)"} -> CC1(CC1)COCCOC
64  C{A}C{Q}C with {"{Q}":"{X}CC{X}", "{A}":"C1CC1", "{X}":"N"} -> CC1CC1CNCCNC
65  \endcode
66 
67  */
69  const std::string &smi, int debugParse = 0, bool sanitize = true,
70  std::map<std::string, std::string> *replacements = nullptr) {
71  SmilesParserParams params;
72  params.debugParse = debugParse;
73  params.replacements = replacements;
74  if (sanitize) {
75  params.sanitize = true;
76  params.removeHs = true;
77  } else {
78  params.sanitize = false;
79  params.removeHs = false;
80  }
81  return SmilesToMol(smi, params);
82 };
83 
85  int debugParse = 0; /**< enable debugging in the SMARTS parser*/
86  std::map<std::string, std::string> *replacements =
87  nullptr; /**< allows SMARTS "macros" */
88  bool allowCXSMILES = true; /**< recognize and parse CXSMILES extensions */
89  bool strictCXSMILES =
90  true; /**< throw an exception if the CXSMILES parsing fails */
91  bool parseName = true; /**< parse (and set) the molecule name as well */
92  bool mergeHs =
93  true; /**< toggles merging H atoms in the SMARTS into neighboring atoms*/
94 };
95 RDKIT_SMILESPARSE_EXPORT RWMol *SmartsToMol(const std::string &sma,
96  const SmartsParserParams &ps);
97 
98 //! Construct a molecule from a SMARTS string
99 /*!
100  \param sma the SMARTS to convert
101  \param debugParse toggles verbose debugging information from the parser
102  \param mergeHs toggles merging H atoms in the SMARTS into neighboring
103  atoms
104  \param replacements a string->string map of replacement strings.
105  \see SmilesToMol for more information about replacements
106 
107  \return a pointer to the new molecule; the caller is responsible for free'ing
108  this.
109  */
111  const std::string &sma, int debugParse = 0, bool mergeHs = false,
112  std::map<std::string, std::string> *replacements = nullptr) {
114  ps.debugParse = debugParse;
115  ps.mergeHs = mergeHs;
116  ps.replacements = replacements;
117  return SmartsToMol(sma, ps);
118 };
119 
120 RDKIT_SMILESPARSE_EXPORT Atom *SmartsToAtom(const std::string &sma);
121 RDKIT_SMILESPARSE_EXPORT Bond *SmartsToBond(const std::string &sma);
122 
123 class RDKIT_SMILESPARSE_EXPORT SmilesParseException : public std::exception {
124  public:
125  SmilesParseException(const char *msg) : _msg(msg) {}
126  SmilesParseException(const std::string msg) : _msg(msg) {}
127  const char *what() const noexcept override { return _msg.c_str(); }
128  ~SmilesParseException() noexcept override = default;
129 
130  private:
131  std::string _msg;
132 };
133 
134 inline std::unique_ptr<RDKit::RWMol> operator"" _smiles(const char *text,
135  size_t len) {
136  std::string smi(text, len);
137  RWMol *ptr = nullptr;
138  try {
139  ptr = SmilesToMol(smi);
140  } catch (const RDKit::MolSanitizeException &) {
141  ptr = nullptr;
142  }
143  return std::unique_ptr<RWMol>(ptr);
144 }
145 inline std::unique_ptr<RDKit::RWMol> operator"" _smarts(const char *text,
146  size_t len) {
147  std::string smi(text, len);
148  // no need for exception handling here: SmartsToMol() doesn't do
149  // sanitization
150  RWMol *ptr = SmartsToMol(smi);
151  return std::unique_ptr<RWMol>(ptr);
152 }
153 
154 } // namespace RDKit
155 
156 #endif
Defines the editable molecule class RWMol.
The class for representing atoms.
Definition: Atom.h:68
class for representing a bond
Definition: Bond.h:46
class for flagging sanitization errors
RWMol is a molecule class that is intended to be edited.
Definition: RWMol.h:32
SmilesParseException(const std::string msg)
Definition: SmilesParse.h:126
SmilesParseException(const char *msg)
Definition: SmilesParse.h:125
const char * what() const noexcept override
Definition: SmilesParse.h:127
~SmilesParseException() noexcept override=default
#define RDKIT_SMILESPARSE_EXPORT
Definition: export.h:441
RDKIT_GRAPHMOL_EXPORT ROMol * removeHs(const ROMol &mol, bool implicitOnly=false, bool updateExplicitCount=false, bool sanitize=true)
returns a copy of a molecule with hydrogens removed
Std stuff.
Definition: Abbreviations.h:18
RDKIT_SMILESPARSE_EXPORT Atom * SmartsToAtom(const std::string &sma)
RDKIT_SMILESPARSE_EXPORT RWMol * SmartsToMol(const std::string &sma, const SmartsParserParams &ps)
RDKIT_SMILESPARSE_EXPORT Atom * SmilesToAtom(const std::string &smi)
RDKIT_SMILESPARSE_EXPORT Bond * SmilesToBond(const std::string &smi)
RDKIT_SMILESPARSE_EXPORT Bond * SmartsToBond(const std::string &sma)
RDKIT_SMILESPARSE_EXPORT RWMol * SmilesToMol(const std::string &smi, const SmilesParserParams &params)
std::map< std::string, std::string > * replacements
Definition: SmilesParse.h:86
std::map< std::string, std::string > * replacements
Definition: SmilesParse.h:25