RDKit
Open-source cheminformatics and machine learning.
RGroupData.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2017 Novartis Institutes for BioMedical Research
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 #ifndef RGROUP_DATA
11 #define RGROUP_DATA
12 
13 #include "../RDKitBase.h"
14 #include "RGroupUtils.h"
18 #include <boost/scoped_ptr.hpp>
19 #include <set>
20 #include <vector>
21 
22 
23 namespace RDKit
24 {
25 
26 //! A single rgroup attached to a given core.
27 struct RGroupData {
28  boost::shared_ptr<RWMol> combinedMol;
29  std::vector<boost::shared_ptr<ROMol>> mols; // All the mols in the rgroup
30  std::set<std::string> smilesSet; // used for rgroup equivalence
31  std::string smiles; // smiles for all the mols in the rgroup (with attachments)
32  std::set<int> attachments; // core attachment points
33  bool is_hydrogen = false;
34  bool single_fragment = true;
35  bool multiple_attachments = false;
36  bool is_linker = false;
37  bool labelled = false;
38 
39  private:
40  RGroupData(const RGroupData &rhs);
41 
42  public:
44 
45  void add(boost::shared_ptr<ROMol> newMol,
46  const std::vector<int> &rlabel_attachments) {
47  // some fragments can be add multiple times if they are cyclic
48  for (auto &mol : mols) {
49  if (newMol.get() == mol.get()) {
50  return;
51  }
52  }
53 
54  labelled = false;
55  std::copy(rlabel_attachments.begin(), rlabel_attachments.end(),
56  std::inserter(attachments, attachments.end()));
57 
58  mols.push_back(newMol);
59  std::string smi = MolToSmiles(*newMol, true);
60  // REVIEW: we probably shouldn't be using a set here... the merging of
61  // duplicates is likely not what we want
62  smilesSet.insert(smi);
63  if (!combinedMol.get()) {
64  combinedMol = boost::shared_ptr<RWMol>(new RWMol(*mols[0].get()));
65  } else {
66  ROMol *m = combineMols(*combinedMol.get(), *newMol.get());
67  single_fragment = false;
68  m->updateProps(*combinedMol.get());
69  combinedMol.reset(new RWMol(*m));
70  delete m;
71  }
72  smiles = getSmiles();
74  computeIsHydrogen();
75  is_linker = single_fragment && attachments.size() > 1;
76  }
77 
78  std::map<int, int> getNumBondsToRlabels() const {
79  std::map<int, int> rlabelsUsedCount;
80 
81  for (ROMol::AtomIterator atIt = combinedMol->beginAtoms();
82  atIt != combinedMol->endAtoms(); ++atIt) {
83  Atom *atom = *atIt;
84  int rlabel;
85  if (atom->getPropIfPresent<int>(RLABEL, rlabel)) {
86  rlabelsUsedCount[rlabel] += 1;
87  }
88  }
89  return rlabelsUsedCount;
90  }
91 
92  private:
93  void computeIsHydrogen() { // is the rgroup all Hs
94  for (const auto &mol : mols) {
95  for (ROMol::AtomIterator atIt = mol->beginAtoms();
96  atIt != mol->endAtoms(); ++atIt) {
97  if ((*atIt)->getAtomicNum() > 1) {
98  is_hydrogen = false;
99  return;
100  }
101  }
102  }
103  is_hydrogen = true;
104  }
105 
106  //! compute the canonical smiles for the attachments (bug: removes dupes since we are using a set...)
107  std::string getSmiles() const {
108  std::string s;
109  for (const auto &it : smilesSet) {
110  if (s.length()) {
111  s += ".";
112  }
113  s += it;
114  }
115  return s;
116  }
117 };
118 }
119 
120 #endif
The class for representing atoms.
Definition: Atom.h:69
bool getPropIfPresent(const std::string &key, T &res) const
Definition: RDProps.h:116
RWMol is a molecule class that is intended to be edited.
Definition: RWMol.h:31
RDKIT_RDGENERAL_EXPORT const std::string internalRgroupSmiles
Std stuff.
Definition: Abbreviations.h:17
RDKIT_SMILESPARSE_EXPORT std::string MolToSmiles(const ROMol &mol, bool doIsomericSmiles=true, bool doKekule=false, int rootedAtAtom=-1, bool canonical=true, bool allBondsExplicit=false, bool allHsExplicit=false, bool doRandom=false)
returns canonical SMILES for a molecule
RDKIT_CHEMTRANSFORMS_EXPORT ROMol * combineMols(const ROMol &mol1, const ROMol &mol2, RDGeom::Point3D offset=RDGeom::Point3D(0, 0, 0))
Combined two molecules to create a new one.
const std::string RLABEL
A single rgroup attached to a given core.
Definition: RGroupData.h:27
boost::shared_ptr< RWMol > combinedMol
Definition: RGroupData.h:28
std::vector< boost::shared_ptr< ROMol > > mols
Definition: RGroupData.h:29
std::set< int > attachments
Definition: RGroupData.h:32
bool multiple_attachments
Definition: RGroupData.h:35
std::string smiles
Definition: RGroupData.h:31
std::map< int, int > getNumBondsToRlabels() const
Definition: RGroupData.h:78
std::set< std::string > smilesSet
Definition: RGroupData.h:30
void add(boost::shared_ptr< ROMol > newMol, const std::vector< int > &rlabel_attachments)
Definition: RGroupData.h:45