RDKit
Open-source cheminformatics and machine learning.
FileParsers.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2002-2013 Greg Landrum, Rational Discovery LLC
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 #include <RDGeneral/export.h>
11 #ifndef _RD_FILEPARSERS_H
12 #define _RD_FILEPARSERS_H
13 
14 #include <RDGeneral/types.h>
15 #include <GraphMol/RDKitBase.h>
16 
17 #include <string>
18 #include <iostream>
19 #include <vector>
20 #include <exception>
21 
22 #include <boost/shared_ptr.hpp>
23 
24 namespace RDKit {
25 const int MOLFILE_MAXLINE = 256;
26 RDKIT_FILEPARSERS_EXPORT std::string strip(const std::string &orig);
27 
28 class MolFileUnhandledFeatureException : public std::exception {
29  public:
30  //! construct with an error message
31  explicit MolFileUnhandledFeatureException(const char *msg) : _msg(msg){};
32  //! construct with an error message
33  explicit MolFileUnhandledFeatureException(const std::string msg)
34  : _msg(msg){};
35  //! get the error message
36  const char *what() const noexcept override { return _msg.c_str(); };
37  ~MolFileUnhandledFeatureException() noexcept override{};
38 
39  private:
40  std::string _msg;
41 };
42 
43 //-----
44 // mol files
45 //-----
46 typedef std::vector<RWMOL_SPTR> RWMOL_SPTR_VECT;
47 // \brief construct a molecule from MDL mol data in a stream
48 /*!
49  * \param inStream - stream containing the data
50  * \param line - current line number (used for error reporting)
51  * \param sanitize - toggles sanitization and stereochemistry
52  * perception of the molecule
53  * \param removeHs - toggles removal of Hs from the molecule. H removal
54  * is only done if the molecule is sanitized
55  * \param line - current line number (used for error reporting)
56  * \param strictParsing - if not set, the parser is more lax about correctness
57  * of the contents.
58  *
59  */
61  unsigned int &line,
62  bool sanitize = true,
63  bool removeHs = true,
64  bool strictParsing = true);
65 // \overload
67  unsigned int &line,
68  bool sanitize = true,
69  bool removeHs = true,
70  bool strictParsing = true);
71 // \brief construct a molecule from an MDL mol block
72 /*!
73  * \param molBlock - string containing the mol block
74  * \param sanitize - toggles sanitization and stereochemistry
75  * perception of the molecule
76  * \param removeHs - toggles removal of Hs from the molecule. H removal
77  * is only done if the molecule is sanitized
78  * \param strictParsing - if set, the parser is more lax about correctness
79  * of the contents.
80  */
81 RDKIT_FILEPARSERS_EXPORT RWMol *MolBlockToMol(const std::string &molBlock,
82  bool sanitize = true,
83  bool removeHs = true,
84  bool strictParsing = true);
85 
86 // \brief construct a molecule from an MDL mol file
87 /*!
88  * \param fName - string containing the file name
89  * \param sanitize - toggles sanitization and stereochemistry
90  * perception of the molecule
91  * \param removeHs - toggles removal of Hs from the molecule. H removal
92  * is only done if the molecule is sanitized
93  * \param strictParsing - if set, the parser is more lax about correctness
94  * of the contents.
95  */
96 RDKIT_FILEPARSERS_EXPORT RWMol *MolFileToMol(const std::string &fName,
97  bool sanitize = true,
98  bool removeHs = true,
99  bool strictParsing = true);
100 
101 // \brief generates an MDL mol block for a molecule
102 /*!
103  * \param mol - the molecule in question
104  * \param includeStereo - toggles inclusion of stereochemistry information
105  * \param confId - selects the conformer to be used
106  * \param kekulize - triggers kekulization of the molecule before it is
107  * written
108  * \param forceV3000 - force generation a V3000 mol block (happens
109  * automatically with
110  * more than 999 atoms or bonds)
111  */
113  bool includeStereo = true,
114  int confId = -1,
115  bool kekulize = true,
116  bool forceV3000 = false);
117 
118 // \brief generates an MDL v3000 mol block for a molecule
119 /*!
120  * \param mol - the molecule in question
121  * \param includeStereo - toggles inclusion of stereochemistry information
122  * \param confId - selects the conformer to be used
123  * \param kekulize - triggers kekulization of the molecule before it is
124  * written
125  */
126 inline std::string MolToV3KMolBlock(const ROMol &mol, bool includeStereo = true,
127  int confId = -1, bool kekulize = true) {
128  return MolToMolBlock(mol, includeStereo, confId, kekulize, true);
129 }
130 
131 // \brief Writes a molecule to an MDL mol file
132 /*!
133  * \param mol - the molecule in question
134  * \param fName - the name of the file to use
135  * \param includeStereo - toggles inclusion of stereochemistry information
136  * \param confId - selects the conformer to be used
137  * \param kekulize - triggers kekulization of the molecule before it is
138  * written
139  * \param forceV3000 - force generation a V3000 mol block (happens
140  * automatically with
141  * more than 999 atoms or bonds)
142  */
144  const ROMol &mol, const std::string &fName, bool includeStereo = true,
145  int confId = -1, bool kekulize = true, bool forceV3000 = false);
146 
147 // \brief Writes a molecule to an MDL V3000 mol file
148 /*!
149  * \param mol - the molecule in question
150  * \param fName - the name of the file to use
151  * \param includeStereo - toggles inclusion of stereochemistry information
152  * \param confId - selects the conformer to be used
153  * \param kekulize - triggers kekulization of the molecule before it is
154  * written
155  */
156 inline void MolToV3KMolFile(const ROMol &mol, const std::string &fName,
157  bool includeStereo = true, int confId = -1,
158  bool kekulize = true) {
159  MolToMolFile(mol, fName, includeStereo, confId, kekulize, true);
160 }
161 
163  int confId = -1);
164 
166  const std::string &fName,
167  int confId = -1);
168 
169 //-----
170 // TPL handling:
171 //-----
172 
173 //! \brief translate TPL data (BioCad format) into a multi-conf molecule
174 /*!
175  \param inStream: the stream from which to read
176  \param line: used to track the line number of errors
177  \param sanitize: toggles sanitization and stereochemistry
178  perception of the molecule
179  \param skipFirstConf: according to the TPL format description, the atomic
180  coords in the atom-information block describe the first
181  conformation and the first conf block describes second
182  conformation. The CombiCode, on the other hand, writes
183  the first conformation data both to the atom-information
184  block and to the first conf block. We want to be able to
185  read CombiCode-style tpls, so we'll allow this
186  mis-feature
187  to be parsed when this flag is set.
188 */
190  unsigned int &line,
191  bool sanitize = true,
192  bool skipFirstConf = false);
193 
194 //! \brief construct a multi-conf molecule from a TPL (BioCad format) file
195 /*!
196  \param fName: the name of the file from which to read
197  \param sanitize: toggles sanitization and stereochemistry
198  perception of the molecule
199  \param skipFirstConf: according to the TPL format description, the atomic
200  coords in the atom-information block describe the first
201  conformation and the first conf block describes second
202  conformation. The CombiCode, on the other hand, writes
203  the first conformation data both to the atom-information
204  block and to the first conf block. We want to be able to
205  read CombiCode-style tpls, so we'll allow this
206  mis-feature
207  to be parsed when this flag is set.
208 */
209 RDKIT_FILEPARSERS_EXPORT RWMol *TPLFileToMol(const std::string &fName,
210  bool sanitize = true,
211  bool skipFirstConf = false);
212 
214  const ROMol &mol, const std::string &partialChargeProp = "_GasteigerCharge",
215  bool writeFirstConfTwice = false);
217  const ROMol &mol, const std::string &fName,
218  const std::string &partialChargeProp = "_GasteigerCharge",
219  bool writeFirstConfTwice = false);
220 
221 //-----
222 // MOL2 handling
223 //-----
224 
225 typedef enum {
226  CORINA = 0 //! supports output from Corina and some dbtranslate output
228 
229 // \brief construct a molecule from a Tripos mol2 file
230 /*!
231  *
232  * \param fName - string containing the file name
233  * \param sanitize - toggles sanitization of the molecule
234  * \param removeHs - toggles removal of Hs from the molecule. H removal
235  * is only done if the molecule is sanitized
236  * \param variant - the atom type definitions to use
237  * \param cleanupSubstructures - toggles recognition and cleanup of common
238  * substructures
239  */
240 RDKIT_FILEPARSERS_EXPORT RWMol *Mol2FileToMol(const std::string &fName,
241  bool sanitize = true,
242  bool removeHs = true,
243  Mol2Type variant = CORINA,
244  bool cleanupSubstructures = true);
245 
246 // \brief construct a molecule from Tripos mol2 data in a stream
247 /*!
248  * \param inStream - stream containing the data
249  * \param sanitize - toggles sanitization of the molecule
250  * \param removeHs - toggles removal of Hs from the molecule. H removal
251  * is only done if the molecule is sanitized
252  * \param variant - the atom type definitions to use
253  * \param cleanupSubstructures - toggles recognition and cleanup of common
254  * substructures
255  */
257  std::istream *inStream, bool sanitize = true, bool removeHs = true,
258  Mol2Type variant = CORINA, bool cleanupSubstructures = true);
259 // \overload
261  std::istream &inStream, bool sanitize = true, bool removeHs = true,
262  Mol2Type variant = CORINA, bool cleanupSubstructures = true);
263 
264 // \brief construct a molecule from a Tripos mol2 block
265 /*!
266  * \param molBlock - string containing the mol block
267  * \param sanitize - toggles sanitization of the molecule
268  * \param removeHs - toggles removal of Hs from the molecule. H removal
269  * is only done if the molecule is sanitized
270  * \param variant - the atom type definitions to use
271  * \param cleanupSubstructures - toggles recognition and cleanup of common
272  * substructures
273  */
275  const std::string &molBlock, bool sanitize = true, bool removeHs = true,
276  Mol2Type variant = CORINA, bool cleanupSubstructures = true);
277 
279  bool sanitize = true,
280  bool removeHs = true,
281  unsigned int flavor = 0,
282  bool proximityBonding = true);
283 
285  bool sanitize = true,
286  bool removeHs = true,
287  unsigned int flavor = 0,
288  bool proximityBonding = true);
290  std::istream *inStream, bool sanitize = true, bool removeHs = true,
291  unsigned int flavor = 0, bool proximityBonding = true);
293  std::istream &inStream, bool sanitize = true, bool removeHs = true,
294  unsigned int flavor = 0, bool proximityBonding = true);
295 RDKIT_FILEPARSERS_EXPORT RWMol *PDBFileToMol(const std::string &fname,
296  bool sanitize = true,
297  bool removeHs = true,
298  unsigned int flavor = 0,
299  bool proximityBonding = true);
300 
301 // \brief generates an PDB block for a molecule
302 /*!
303  * \param mol - the molecule in question
304  * \param confId - selects the conformer to be used
305  * \param flavor - controls what gets written:
306  * flavor & 1 : Write MODEL/ENDMDL lines around each record
307  * flavor & 2 : Don't write any CONECT records
308  * flavor & 4 : Write CONECT records in both directions
309  * flavor & 8 : Don't use multiple CONECTs to encode bond order
310  * flavor & 16 : Write MASTER record
311  * flavor & 32 : Write TER record
312  */
314  int confId = -1,
315  unsigned int flavor = 0);
316 // \brief Writes a molecule to an MDL mol file
317 /*!
318  * \param mol - the molecule in question
319  * \param fName - the name of the file to use
320  * \param confId - selects the conformer to be used
321  * \param flavor - controls what gets written:
322  * flavor & 1 : Write MODEL/ENDMDL lines around each record
323  * flavor & 2 : Don't write any CONECT records
324  * flavor & 4 : Write CONECT records in both directions
325  * flavor & 8 : Don't use multiple CONECTs to encode bond order
326  * flavor & 16 : Write MASTER record
327  * flavor & 32 : Write TER record
328  */
330  const std::string &fname,
331  int confId = -1,
332  unsigned int flavor = 0);
333 
334 // \brief reads a molecule from the metadata in an RDKit-generated SVG file
335 /*!
336  * \param svg - string containing the SVG
337  * \param sanitize - toggles sanitization of the molecule
338  * \param removeHs - toggles removal of Hs from the molecule. H removal
339  * is only done if the molecule is sanitized
340  *
341  * **NOTE** This functionality should be considered beta.
342  */
344  bool sanitize = true,
345  bool removeHs = true);
346 /*! \overload
347  */
349  bool sanitize = true,
350  bool removeHs = true);
351 
352 inline std::unique_ptr<RDKit::RWMol> operator"" _ctab(const char *text,
353  size_t len) {
354  std::string data(text, len);
355  RWMol *ptr = nullptr;
356  try {
357  ptr = MolBlockToMol(data);
358  } catch (const RDKit::MolSanitizeException &) {
359  ptr = nullptr;
360  }
361  return std::unique_ptr<RWMol>(ptr);
362 }
363 inline std::unique_ptr<RDKit::RWMol> operator"" _mol2(const char *text,
364  size_t len) {
365  std::string data(text, len);
366  RWMol *ptr = nullptr;
367  try {
368  ptr = Mol2BlockToMol(data);
369  } catch (const RDKit::MolSanitizeException &) {
370  ptr = nullptr;
371  }
372  return std::unique_ptr<RWMol>(ptr);
373 }
374 
375 inline std::unique_ptr<RDKit::RWMol> operator"" _pdb(const char *text,
376  size_t len) {
377  std::string data(text, len);
378  RWMol *ptr = nullptr;
379  try {
380  ptr = PDBBlockToMol(data);
381  } catch (const RDKit::MolSanitizeException &) {
382  ptr = nullptr;
383  }
384  return std::unique_ptr<RWMol>(ptr);
385 }
386 
387 } // namespace RDKit
388 
389 #endif
pulls in the core RDKit functionality
MolFileUnhandledFeatureException(const char *msg)
construct with an error message
Definition: FileParsers.h:31
~MolFileUnhandledFeatureException() noexcept override
Definition: FileParsers.h:37
MolFileUnhandledFeatureException(const std::string msg)
construct with an error message
Definition: FileParsers.h:33
const char * what() const noexcept override
get the error message
Definition: FileParsers.h:36
class for flagging sanitization errors
RWMol is a molecule class that is intended to be edited.
Definition: RWMol.h:31
#define RDKIT_FILEPARSERS_EXPORT
Definition: export.h:255
RDKIT_GRAPHMOL_EXPORT ROMol * removeHs(const ROMol &mol, bool implicitOnly=false, bool updateExplicitCount=false, bool sanitize=true)
returns a copy of a molecule with hydrogens removed
Std stuff.
Definition: Abbreviations.h:17
std::string MolToV3KMolBlock(const ROMol &mol, bool includeStereo=true, int confId=-1, bool kekulize=true)
Definition: FileParsers.h:126
RDKIT_FILEPARSERS_EXPORT std::string strip(const std::string &orig)
RDKIT_FILEPARSERS_EXPORT void MolToMolFile(const ROMol &mol, const std::string &fName, bool includeStereo=true, int confId=-1, bool kekulize=true, bool forceV3000=false)
RDKIT_FILEPARSERS_EXPORT std::string MolToPDBBlock(const ROMol &mol, int confId=-1, unsigned int flavor=0)
RDKIT_FILEPARSERS_EXPORT RWMol * MolBlockToMol(const std::string &molBlock, bool sanitize=true, bool removeHs=true, bool strictParsing=true)
RDKIT_FILEPARSERS_EXPORT std::string MolToXYZBlock(const ROMol &mol, int confId=-1)
RDKIT_FILEPARSERS_EXPORT void MolToXYZFile(const ROMol &mol, const std::string &fName, int confId=-1)
RDKIT_FILEPARSERS_EXPORT std::string MolToTPLText(const ROMol &mol, const std::string &partialChargeProp="_GasteigerCharge", bool writeFirstConfTwice=false)
RDKIT_FILEPARSERS_EXPORT void MolToPDBFile(const ROMol &mol, const std::string &fname, int confId=-1, unsigned int flavor=0)
RDKIT_FILEPARSERS_EXPORT RWMol * PDBFileToMol(const std::string &fname, bool sanitize=true, bool removeHs=true, unsigned int flavor=0, bool proximityBonding=true)
void MolToV3KMolFile(const ROMol &mol, const std::string &fName, bool includeStereo=true, int confId=-1, bool kekulize=true)
Definition: FileParsers.h:156
RDKIT_FILEPARSERS_EXPORT RWMol * TPLDataStreamToMol(std::istream *inStream, unsigned int &line, bool sanitize=true, bool skipFirstConf=false)
translate TPL data (BioCad format) into a multi-conf molecule
RDKIT_FILEPARSERS_EXPORT std::string MolToMolBlock(const ROMol &mol, bool includeStereo=true, int confId=-1, bool kekulize=true, bool forceV3000=false)
RDKIT_FILEPARSERS_EXPORT RWMol * PDBDataStreamToMol(std::istream *inStream, bool sanitize=true, bool removeHs=true, unsigned int flavor=0, bool proximityBonding=true)
@ CORINA
Definition: FileParsers.h:226
RDKIT_FILEPARSERS_EXPORT RWMol * Mol2FileToMol(const std::string &fName, bool sanitize=true, bool removeHs=true, Mol2Type variant=CORINA, bool cleanupSubstructures=true)
RDKIT_FILEPARSERS_EXPORT RWMol * RDKitSVGToMol(const std::string &svg, bool sanitize=true, bool removeHs=true)
RDKIT_FILEPARSERS_EXPORT void MolToTPLFile(const ROMol &mol, const std::string &fName, const std::string &partialChargeProp="_GasteigerCharge", bool writeFirstConfTwice=false)
RDKIT_FILEPARSERS_EXPORT RWMol * Mol2DataStreamToMol(std::istream *inStream, bool sanitize=true, bool removeHs=true, Mol2Type variant=CORINA, bool cleanupSubstructures=true)
RDKIT_FILEPARSERS_EXPORT RWMol * PDBBlockToMol(const char *str, bool sanitize=true, bool removeHs=true, unsigned int flavor=0, bool proximityBonding=true)
RDKIT_FILEPARSERS_EXPORT RWMol * Mol2BlockToMol(const std::string &molBlock, bool sanitize=true, bool removeHs=true, Mol2Type variant=CORINA, bool cleanupSubstructures=true)
RDKIT_FILEPARSERS_EXPORT RWMol * TPLFileToMol(const std::string &fName, bool sanitize=true, bool skipFirstConf=false)
construct a multi-conf molecule from a TPL (BioCad format) file
RDKIT_FILEPARSERS_EXPORT RWMol * MolDataStreamToMol(std::istream *inStream, unsigned int &line, bool sanitize=true, bool removeHs=true, bool strictParsing=true)
RDKIT_FILEPARSERS_EXPORT RWMol * MolFileToMol(const std::string &fName, bool sanitize=true, bool removeHs=true, bool strictParsing=true)
const int MOLFILE_MAXLINE
Definition: FileParsers.h:25
std::vector< RWMOL_SPTR > RWMOL_SPTR_VECT
Definition: FileParsers.h:46