RDKit
Open-source cheminformatics and machine learning.
ScaffoldNetwork.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2019 Greg Landrum and T5 Informatics GmbH
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 #include <RDGeneral/export.h>
11 #ifndef RD_SCAFFOLDNETWORK_H
12 #define RD_SCAFFOLDNETWORK_H
13 
14 #include <vector>
15 #include <map>
16 #include <string>
17 #include <sstream>
18 #include <memory>
19 #include <iostream>
20 
21 #ifdef RDK_USE_BOOST_SERIALIZATION
22 #include <RDGeneral/Invariant.h>
24 #include <boost/archive/text_oarchive.hpp>
25 #include <boost/archive/text_iarchive.hpp>
26 #include <boost/serialization/vector.hpp>
27 #include <boost/serialization/shared_ptr.hpp>
29 #endif
30 
31 namespace RDKit {
32 class ROMol;
33 class ChemicalReaction;
34 
35 namespace ScaffoldNetwork {
36 
38  bool includeGenericScaffolds =
39  true; ///< include scaffolds with all atoms replaced by dummies
40  bool includeGenericBondScaffolds =
41  false; ///< include scaffolds with all bonds replaced by single bonds
42  bool includeScaffoldsWithoutAttachments =
43  true; ///< remove attachment points from scaffolds and include the result
44  bool includeScaffoldsWithAttachments =
45  true; ///< Include the version of the scaffold with attachment points
46  bool keepOnlyFirstFragment =
47  true; ///< keep only the first fragment from the bond breaking rule
48  bool pruneBeforeFragmenting =
49  true; ///< Do a pruning/flattening step before starting fragmenting
50  bool flattenIsotopes = true; ///< remove isotopes when flattening
51  bool flattenChirality =
52  true; ///< remove chirality and bond stereo when flattening
53  bool flattenKeepLargest =
54  true; ///< keep only the largest fragment when doing flattening
55 
56  std::vector<std::shared_ptr<ChemicalReaction>>
57  bondBreakersRxns; ///< the reaction(s) used to fragment. Should expect a
58  ///< single reactant and produce two products
61  {"[!#0;R:1]-!@[!#0:2]>>[*:1]-[#0].[#0]-[*:2]"}} {};
62  ScaffoldNetworkParams(const std::vector<std::string> &bondBreakersSmarts);
63 };
64 
65 enum class RDKIT_SCAFFOLDNETWORK_EXPORT EdgeType {
66  Fragment = 1, ///< molecule -> fragment
67  Generic = 2, ///< molecule -> generic molecule (all atoms are dummies)
68  GenericBond = 3, ///< molecule -> generic bond molecule (all bonds single)
69  RemoveAttachment = 4, ///< molecule -> molecule with no attachment points
70  Initialize = 5 ///< molecule -> flattened molecule
71 };
72 
74  size_t beginIdx;
75  size_t endIdx;
76  EdgeType type;
77  NetworkEdge() : beginIdx(0), endIdx(0), type(EdgeType::Initialize){};
78  NetworkEdge(size_t bi, size_t ei, EdgeType typ)
79  : beginIdx(bi), endIdx(ei), type(typ){};
81  return (beginIdx == o.beginIdx) && (endIdx == o.endIdx) && (type == o.type);
82  }
84  return (beginIdx != o.beginIdx) || (endIdx != o.endIdx) || (type != o.type);
85  }
86 #ifdef RDK_USE_BOOST_SERIALIZATION
87  private:
88  friend class boost::serialization::access;
89  template <class Archive>
90  void serialize(Archive &ar, const unsigned int version) {
91  RDUNUSED_PARAM(version);
92  ar &beginIdx;
93  ar &endIdx;
94  ar &type;
95  }
96 #endif
97 };
98 
100  std::vector<std::string> nodes; ///< SMILES for the scaffolds
101  std::vector<unsigned>
102  counts; ///< number of times each scaffold was encountered
103  std::vector<NetworkEdge> edges; ///< edges in the network
105 #ifdef RDK_USE_BOOST_SERIALIZATION
106  ScaffoldNetwork(const std::string &pkl) {
107  std::stringstream iss(pkl);
108  boost::archive::text_iarchive ia(iss);
109  ia >> *this;
110  }
111 
112  private:
113  friend class boost::serialization::access;
114  template <class Archive>
115  void serialize(Archive &ar, const unsigned int version) {
116  RDUNUSED_PARAM(version);
117  ar &nodes;
118  ar &counts;
119  ar &edges;
120  }
121 #endif
122 };
123 
124 //! update an existing ScaffoldNetwork using a set of molecules
125 template <typename T>
126 void updateScaffoldNetwork(const T &mols, ScaffoldNetwork &network,
127  const ScaffoldNetworkParams &params);
128 
129 //! create a new ScaffoldNetwork for a set of molecules
130 template <typename T>
132  const ScaffoldNetworkParams &params) {
133  ScaffoldNetwork res;
134  updateScaffoldNetwork(mols, res, params);
135  return res;
136 }
137 //! allows nodes to output nicely as strings
138 inline std::ostream &operator<<(std::ostream &ostr,
139  const RDKit::ScaffoldNetwork::EdgeType &e) {
140  switch (e) {
142  ostr << "Fragment";
143  break;
145  ostr << "Generic";
146  break;
148  ostr << "GenericBond";
149  break;
151  ostr << "RemoveAttachment";
152  break;
153  case RDKit::ScaffoldNetwork::EdgeType::Initialize:
154  ostr << "Initialize";
155  break;
156  default:
157  ostr << "UNKNOWN";
158  break;
159  }
160  return ostr;
161 }
162 //! allows edges to output nicely as strings
163 inline std::ostream &operator<<(std::ostream &ostr,
165  ostr << "NetworkEdge( " << e.beginIdx << "->" << e.endIdx
166  << ", type:" << e.type << " )";
167  return ostr;
168 }
169 
170 //! returns parameters for constructing scaffold networks using BRICS
171 //! fragmentation
173 
174 } // namespace ScaffoldNetwork
175 } // namespace RDKit
176 
177 #endif
RDKit::ScaffoldNetwork::ScaffoldNetworkParams
Definition: ScaffoldNetwork.h:37
RDKit::ScaffoldNetwork::ScaffoldNetworkParams::ScaffoldNetworkParams
ScaffoldNetworkParams(const std::vector< std::string > &bondBreakersSmarts)
RDKit::ScaffoldNetwork::ScaffoldNetwork::edges
std::vector< NetworkEdge > edges
edges in the network
Definition: ScaffoldNetwork.h:103
BoostStartInclude.h
RDKit::ScaffoldNetwork::ScaffoldNetwork
Definition: ScaffoldNetwork.h:99
RDKit::ScaffoldNetwork::NetworkEdge::type
EdgeType type
Definition: ScaffoldNetwork.h:76
RDKit::ScaffoldNetwork::ScaffoldNetwork::nodes
std::vector< std::string > nodes
SMILES for the scaffolds.
Definition: ScaffoldNetwork.h:100
RDUNUSED_PARAM
#define RDUNUSED_PARAM(x)
Definition: Invariant.h:197
RDKit::ScaffoldNetwork::NetworkEdge::operator==
bool operator==(const RDKit::ScaffoldNetwork::NetworkEdge &o) const
Definition: ScaffoldNetwork.h:80
BoostEndInclude.h
Fragment
Fragment
molecule -> fragment
Definition: ScaffoldNetwork.h:66
GenericBond
GenericBond
molecule -> generic bond molecule (all bonds single)
Definition: ScaffoldNetwork.h:68
RDKit::ScaffoldNetwork::NetworkEdge
Definition: ScaffoldNetwork.h:73
RDKit::ScaffoldNetwork::NetworkEdge::operator!=
bool operator!=(const RDKit::ScaffoldNetwork::NetworkEdge &o) const
Definition: ScaffoldNetwork.h:83
RDKit::ScaffoldNetwork::ScaffoldNetworkParams::ScaffoldNetworkParams
ScaffoldNetworkParams()
Definition: ScaffoldNetwork.h:59
RDKit::ScaffoldNetwork::NetworkEdge::endIdx
size_t endIdx
Definition: ScaffoldNetwork.h:75
Invariant.h
RDKit::ScaffoldNetwork::ScaffoldNetwork::ScaffoldNetwork
ScaffoldNetwork()
Definition: ScaffoldNetwork.h:104
RDKIT_SCAFFOLDNETWORK_EXPORT
#define RDKIT_SCAFFOLDNETWORK_EXPORT
Definition: export.h:606
RDKit::ScaffoldNetwork::ScaffoldNetworkParams::bondBreakersRxns
std::vector< std::shared_ptr< ChemicalReaction > > bondBreakersRxns
Definition: ScaffoldNetwork.h:57
RDKit::ScaffoldNetwork::updateScaffoldNetwork
void updateScaffoldNetwork(const T &mols, ScaffoldNetwork &network, const ScaffoldNetworkParams &params)
update an existing ScaffoldNetwork using a set of molecules
RDKit::ScaffoldNetwork::ScaffoldNetwork::counts
std::vector< unsigned > counts
number of times each scaffold was encountered
Definition: ScaffoldNetwork.h:102
RDKit
Std stuff.
Definition: Atom.h:30
RDKit::ScaffoldNetwork::operator<<
std::ostream & operator<<(std::ostream &ostr, const RDKit::ScaffoldNetwork::EdgeType &e)
allows nodes to output nicely as strings
Definition: ScaffoldNetwork.h:138
RDKit::ScaffoldNetwork::NetworkEdge::NetworkEdge
NetworkEdge()
Definition: ScaffoldNetwork.h:77
RDKit::ScaffoldNetwork::NetworkEdge::NetworkEdge
NetworkEdge(size_t bi, size_t ei, EdgeType typ)
Definition: ScaffoldNetwork.h:78
RemoveAttachment
RemoveAttachment
molecule -> molecule with no attachment points
Definition: ScaffoldNetwork.h:69
RDKit::ScaffoldNetwork::createScaffoldNetwork
ScaffoldNetwork createScaffoldNetwork(const T &mols, const ScaffoldNetworkParams &params)
create a new ScaffoldNetwork for a set of molecules
Definition: ScaffoldNetwork.h:131
Generic
Generic
molecule -> generic molecule (all atoms are dummies)
Definition: ScaffoldNetwork.h:67
RDKit::ScaffoldNetwork::getBRICSNetworkParams
RDKIT_SCAFFOLDNETWORK_EXPORT ScaffoldNetworkParams getBRICSNetworkParams()
RDKit::ScaffoldNetwork::NetworkEdge::beginIdx
size_t beginIdx
Definition: ScaffoldNetwork.h:74
export.h