RDKit
Open-source cheminformatics and machine learning.
SmilesWrite.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2002-2020 Greg Landrum and Rational Discovery LLC
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 #include <RDGeneral/export.h>
11 #ifndef RD_SMILESWRITE_H_012020
12 #define RD_SMILESWRITE_H_012020
13 
14 #include <string>
15 #include <vector>
16 #include <memory>
17 
18 namespace RDKit {
19 class Atom;
20 class Bond;
21 class ROMol;
22 namespace SmilesWrite {
23 
24 //! \brief returns the cxsmiles data for a molecule
26 
27 //! \brief returns true if the atom number is in the SMILES organic subset
29 
30 //! \brief returns the SMILES for an atom
31 /*!
32  \param atom : the atom to work with
33  \param doKekule : we're doing kekulized smiles (e.g. don't use
34  lower case for the atom label)
35  \param bondIn : the bond we came into the atom on (unused)
36  \param allHsExplicit : if true, hydrogen counts will be provided for every
37  atom.
38  \param isomericSmiles : if true, isomeric SMILES will be generated
39 */
41  bool doKekule = false,
42  const Bond *bondIn = nullptr,
43  bool allHsExplicit = false,
44  bool isomericSmiles = true);
45 
46 //! \brief returns the SMILES for a bond
47 /*!
48  \param bond : the bond to work with
49  \param atomToLeftIdx : the index of the atom preceding \c bond
50  in the SMILES
51  \param doKekule : we're doing kekulized smiles (e.g. write out
52  bond orders for aromatic bonds)
53  \param allBondsExplicit : if true, symbols will be included for all bonds.
54 */
56  const Bond *bond, int atomToLeftIdx = -1, bool doKekule = false,
57  bool allBondsExplicit = false);
58 } // namespace SmilesWrite
59 
60 //! \brief returns canonical SMILES for a molecule
61 /*!
62  \param mol : the molecule in question.
63  \param doIsomericSmiles : include stereochemistry and isotope information
64  in the SMILES
65  \param doKekule : do Kekule smiles (i.e. don't use aromatic bonds)
66  \param rootedAtAtom : make sure the SMILES starts at the specified atom.
67  The resulting SMILES is not, of course, canonical.
68  \param canonical : if false, no attempt will be made to canonicalize the
69  SMILES
70  \param allBondsExplicit : if true, symbols will be included for all bonds.
71  \param allHsExplicit : if true, hydrogen counts will be provided for every
72  atom.
73  */
75  const ROMol &mol, bool doIsomericSmiles = true, bool doKekule = false,
76  int rootedAtAtom = -1, bool canonical = true, bool allBondsExplicit = false,
77  bool allHsExplicit = false, bool doRandom = false);
78 
79 //! \brief returns a vector of random SMILES for a molecule (may contain
80 //! duplicates)
81 /*!
82  \param mol : the molecule in question.
83  \param numSmiles : the number of SMILES to return
84  \param randomSeed : if >0, will be used to seed the random number generator
85  \param doIsomericSmiles : include stereochemistry and isotope information
86  in the SMILES
87  \param doKekule : do Kekule smiles (i.e. don't use aromatic bonds)
88  \param allBondsExplicit : if true, symbols will be included for all bonds.
89  \param allHsExplicit : if true, hydrogen counts will be provided for every
90  atom.
91  */
93  const ROMol &mol, unsigned int numSmiles, unsigned int randomSeed = 0,
94  bool doIsomericSmiles = true, bool doKekule = false,
95  bool allBondsExplicit = false, bool allHsExplicit = false);
96 
97 //! \brief returns canonical SMILES for part of a molecule
98 /*!
99  \param mol : the molecule in question.
100  \param atomsToUse : indices of the atoms in the fragment
101  \param bondsToUse : indices of the bonds in the fragment. If this is not
102  provided,
103  all bonds between the atoms in atomsToUse will be included
104  \param atomSymbols : symbols to use for the atoms in the output SMILES
105  \param bondSymbols : symbols to use for the bonds in the output SMILES
106  \param doIsomericSmiles : include stereochemistry and isotope information
107  in the SMILES
108  \param doKekule : do Kekule smiles (i.e. don't use aromatic bonds)
109  \param rootedAtAtom : make sure the SMILES starts at the specified atom.
110  The resulting SMILES is not, of course, canonical.
111  \param canonical : if false, no attempt will be made to canonicalize the
112  SMILES
113  \param allBondsExplicit : if true, symbols will be included for all bonds.
114  \param allHsExplicit : if true, hydrogen counts will be provided for every
115  atom.
116  \param doRandom : generate a randomized smiles string by randomly choosing
117  the priority to follow in the DFS traversal. [default false]
118 
119  \b NOTE: the bondSymbols are *not* currently used in the canonicalization.
120 
121  */
123  const ROMol &mol, const std::vector<int> &atomsToUse,
124  const std::vector<int> *bondsToUse = nullptr,
125  const std::vector<std::string> *atomSymbols = nullptr,
126  const std::vector<std::string> *bondSymbols = nullptr,
127  bool doIsomericSmiles = true, bool doKekule = false, int rootedAtAtom = -1,
128  bool canonical = true, bool allBondsExplicit = false,
129  bool allHsExplicit = false);
130 
131 //! \brief returns canonical CXSMILES for a molecule
132 /*!
133  \param mol : the molecule in question.
134  \param doIsomericSmiles : include stereochemistry and isotope information
135  in the SMILES
136  \param doKekule : do Kekule smiles (i.e. don't use aromatic bonds)
137  \param rootedAtAtom : make sure the SMILES starts at the specified atom.
138  The resulting SMILES is not, of course, canonical.
139  \param canonical : if false, no attempt will be made to canonicalize the
140  SMILES
141  \param allBondsExplicit : if true, symbols will be included for all bonds.
142  \param allHsExplicit : if true, hydrogen counts will be provided for every
143  atom.
144  */
146  const ROMol &mol, bool doIsomericSmiles = true, bool doKekule = false,
147  int rootedAtAtom = -1, bool canonical = true, bool allBondsExplicit = false,
148  bool allHsExplicit = false, bool doRandom = false);
149 
150 //! \brief returns canonical CXSMILES for part of a molecule
151 /*!
152  \param mol : the molecule in question.
153  \param atomsToUse : indices of the atoms in the fragment
154  \param bondsToUse : indices of the bonds in the fragment. If this is not
155  provided,
156  all bonds between the atoms in atomsToUse will be included
157  \param atomSymbols : symbols to use for the atoms in the output SMILES
158  \param bondSymbols : symbols to use for the bonds in the output SMILES
159  \param doIsomericSmiles : include stereochemistry and isotope information
160  in the SMILES
161  \param doKekule : do Kekule smiles (i.e. don't use aromatic bonds)
162  \param rootedAtAtom : make sure the SMILES starts at the specified atom.
163  The resulting SMILES is not, of course, canonical.
164  \param canonical : if false, no attempt will be made to canonicalize the
165  SMILES
166  \param allBondsExplicit : if true, symbols will be included for all bonds.
167  \param allHsExplicit : if true, hydrogen counts will be provided for every
168  atom.
169  \param doRandom : generate a randomized smiles string by randomly choosing
170  the priority to follow in the DFS traversal. [default false]
171 
172  \b NOTE: the bondSymbols are *not* currently used in the canonicalization.
173 
174  */
176  const ROMol &mol, const std::vector<int> &atomsToUse,
177  const std::vector<int> *bondsToUse = nullptr,
178  const std::vector<std::string> *atomSymbols = nullptr,
179  const std::vector<std::string> *bondSymbols = nullptr,
180  bool doIsomericSmiles = true, bool doKekule = false, int rootedAtAtom = -1,
181  bool canonical = true, bool allBondsExplicit = false,
182  bool allHsExplicit = false);
183 
184 } // namespace RDKit
185 #endif
The class for representing atoms.
Definition: Atom.h:69
class for representing a bond
Definition: Bond.h:47
#define RDKIT_SMILESPARSE_EXPORT
Definition: export.h:710
RDKIT_SMILESPARSE_EXPORT bool inOrganicSubset(int atomicNumber)
returns true if the atom number is in the SMILES organic subset
RDKIT_SMILESPARSE_EXPORT std::string getCXExtensions(const ROMol &mol)
returns the cxsmiles data for a molecule
RDKIT_SMILESPARSE_EXPORT std::string GetBondSmiles(const Bond *bond, int atomToLeftIdx=-1, bool doKekule=false, bool allBondsExplicit=false)
returns the SMILES for a bond
RDKIT_SMILESPARSE_EXPORT std::string GetAtomSmiles(const Atom *atom, bool doKekule=false, const Bond *bondIn=nullptr, bool allHsExplicit=false, bool isomericSmiles=true)
returns the SMILES for an atom
Std stuff.
Definition: Abbreviations.h:17
RDKIT_SMILESPARSE_EXPORT std::string MolToSmiles(const ROMol &mol, bool doIsomericSmiles=true, bool doKekule=false, int rootedAtAtom=-1, bool canonical=true, bool allBondsExplicit=false, bool allHsExplicit=false, bool doRandom=false)
returns canonical SMILES for a molecule
RDKIT_SMILESPARSE_EXPORT std::string MolFragmentToCXSmiles(const ROMol &mol, const std::vector< int > &atomsToUse, const std::vector< int > *bondsToUse=nullptr, const std::vector< std::string > *atomSymbols=nullptr, const std::vector< std::string > *bondSymbols=nullptr, bool doIsomericSmiles=true, bool doKekule=false, int rootedAtAtom=-1, bool canonical=true, bool allBondsExplicit=false, bool allHsExplicit=false)
returns canonical CXSMILES for part of a molecule
RDKIT_SMILESPARSE_EXPORT std::vector< std::string > MolToRandomSmilesVect(const ROMol &mol, unsigned int numSmiles, unsigned int randomSeed=0, bool doIsomericSmiles=true, bool doKekule=false, bool allBondsExplicit=false, bool allHsExplicit=false)
returns a vector of random SMILES for a molecule (may contain duplicates)
RDKIT_SMILESPARSE_EXPORT std::string MolFragmentToSmiles(const ROMol &mol, const std::vector< int > &atomsToUse, const std::vector< int > *bondsToUse=nullptr, const std::vector< std::string > *atomSymbols=nullptr, const std::vector< std::string > *bondSymbols=nullptr, bool doIsomericSmiles=true, bool doKekule=false, int rootedAtAtom=-1, bool canonical=true, bool allBondsExplicit=false, bool allHsExplicit=false)
returns canonical SMILES for part of a molecule
RDKIT_SMILESPARSE_EXPORT std::string MolToCXSmiles(const ROMol &mol, bool doIsomericSmiles=true, bool doKekule=false, int rootedAtAtom=-1, bool canonical=true, bool allBondsExplicit=false, bool allHsExplicit=false, bool doRandom=false)
returns canonical CXSMILES for a molecule