RDKit
Open-source cheminformatics and machine learning.
SubstructLibrarySerialization.h
Go to the documentation of this file.
1 // Copyright (c) 2019, Novartis Institutes for BioMedical Research Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following
12 // disclaimer in the documentation and/or other materials provided
13 // with the distribution.
14 // * Neither the name of Novartis Institutes for BioMedical Research Inc.
15 // nor the names of its contributors may be used to endorse or promote
16 // products derived from this software without specific prior written
17 // permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 //
31 // n.b. must be included at the END of SubstructLibrary.h
32 #ifndef RDK_SUBSTRUCT_LIBRARY_SERIALIZATION
33 #define RDK_SUBSTRUCT_LIBRARY_SERIALIZATION
34 
35 #ifdef RDK_USE_BOOST_SERIALIZATION
37 #include <boost/archive/text_oarchive.hpp>
38 #include <boost/archive/text_iarchive.hpp>
39 #include <boost/serialization/vector.hpp>
40 #include <boost/serialization/shared_ptr.hpp>
41 #include <boost/archive/archive_exception.hpp>
43 
44 BOOST_SERIALIZATION_ASSUME_ABSTRACT(RDKit::MolHolderBase)
45 BOOST_SERIALIZATION_ASSUME_ABSTRACT(RDKit::FPHolderBase)
46 
47 namespace boost {
48 namespace serialization {
49 
50 template <class Archive>
51 void serialize(Archive &, RDKit::MolHolderBase &,
52  const unsigned int) {
53 }
54 
55 template <class Archive>
56 void save(Archive &ar, const RDKit::MolHolder &molholder,
57  const unsigned int version) {
58  RDUNUSED_PARAM(version);
59  ar &boost::serialization::base_object<RDKit::MolHolderBase>(molholder);
60 
61  std::int64_t pkl_count = molholder.getMols().size();
62  ar &pkl_count;
63 
64  for (auto &mol : molholder.getMols()) {
65  std::string pkl;
66  RDKit::MolPickler::pickleMol(*mol.get(), pkl);
67  ar << pkl;
68  }
69 }
70 
71 template <class Archive>
72 void load(Archive &ar, RDKit::MolHolder &molholder,
73  const unsigned int version) {
74  RDUNUSED_PARAM(version);
75  ar &boost::serialization::base_object<RDKit::MolHolderBase>(molholder);
76 
77  std::vector<boost::shared_ptr<RDKit::ROMol>> &mols = molholder.getMols();
78  mols.clear();
79 
80  std::int64_t pkl_count = -1;
81  ar &pkl_count;
82 
83  for (std::int64_t i = 0; i < pkl_count; ++i) {
84  std::string pkl;
85  ar >> pkl;
86  mols.push_back(boost::make_shared<RDKit::ROMol>(pkl));
87  }
88 }
89 
90 template <class Archive, class MolHolder>
91 void serialize_strings(Archive &ar, MolHolder &molholder,
92  const unsigned int version) {
93  RDUNUSED_PARAM(version);
94  ar &boost::serialization::base_object<RDKit::MolHolderBase>(molholder);
95  ar &molholder.getMols();
96 }
97 
98 template <class Archive>
99 void serialize(Archive &ar, RDKit::CachedMolHolder &molholder,
100  const unsigned int version) {
101  serialize_strings(ar, molholder, version);
102 }
103 
104 template <class Archive>
105 void serialize(Archive &ar, RDKit::CachedSmilesMolHolder &molholder,
106  const unsigned int version) {
107  serialize_strings(ar, molholder, version);
108 }
109 
110 template <class Archive>
111 void serialize(Archive &ar, RDKit::CachedTrustedSmilesMolHolder &molholder,
112  const unsigned int version) {
113  serialize_strings(ar, molholder, version);
114 }
115 
116 template <class Archive>
117 void save(Archive &ar, const RDKit::FPHolderBase &fpholder,
118  const unsigned int version) {
119  RDUNUSED_PARAM(version);
120  std::vector<std::string> pickles;
121  for (auto &fp : fpholder.getFingerprints()) {
122  pickles.push_back(fp->toString());
123  }
124  ar &pickles;
125 }
126 
127 template <class Archive>
128 void load(Archive &ar, RDKit::FPHolderBase &fpholder,
129  const unsigned int version) {
130  RDUNUSED_PARAM(version);
131  std::vector<std::string> pickles;
132  std::vector<ExplicitBitVect *> &fps = fpholder.getFingerprints();
133 
134  ar &pickles;
135  for (size_t i = 0; i < fps.size(); ++i) delete fps[i];
136  fps.clear();
137 
138  for (auto &pkl : pickles) {
139  fps.push_back(new ExplicitBitVect(pkl));
140  }
141 }
142 
143 template <class Archive>
144 void serialize(Archive &ar, RDKit::PatternHolder &pattern_holder,
145  const unsigned int version) {
146  RDUNUSED_PARAM(version);
147  ar &boost::serialization::base_object<RDKit::FPHolderBase>(pattern_holder);
148  if (Archive::is_saving::value &&
149  pattern_holder.getNumBits() != RDKit::PatternHolder::defaultNumBits()) {
150  ar &pattern_holder.getNumBits();
151  } else if (Archive::is_loading::value) {
152  try {
153  ar &pattern_holder.getNumBits();
154  } catch (boost::archive::archive_exception &) {
156  }
157  }
158 }
159 
160 template <class Archive>
161 void serialize(Archive &ar, RDKit::TautomerPatternHolder &pattern_holder,
162  const unsigned int version) {
163  RDUNUSED_PARAM(version);
164  ar &boost::serialization::base_object<RDKit::FPHolderBase>(pattern_holder);
165  ar &pattern_holder.getNumBits();
166 }
167 
168 template <class Archive>
169 void serialize(Archive &, RDKit::KeyHolderBase &,
170  const unsigned int) {
171 }
172 
173 template <class Archive>
174 void serialize(Archive &ar, RDKit::KeyFromPropHolder &key_holder,
175  const unsigned int) {
176  ar &boost::serialization::base_object<RDKit::KeyHolderBase>(key_holder);
177  ar &key_holder.getPropName();
178  ar &key_holder.getKeys();
179 }
180 
181 template <class Archive>
182 void registerSubstructLibraryTypes(Archive &ar) {
183  ar.register_type(static_cast<RDKit::MolHolder *>(nullptr));
184  ar.register_type(static_cast<RDKit::CachedMolHolder *>(nullptr));
185  ar.register_type(static_cast<RDKit::CachedSmilesMolHolder *>(nullptr));
186  ar.register_type(static_cast<RDKit::CachedTrustedSmilesMolHolder *>(nullptr));
187  ar.register_type(static_cast<RDKit::PatternHolder *>(nullptr));
188  ar.register_type(static_cast<RDKit::TautomerPatternHolder *>(nullptr));
189  ar.register_type(static_cast<RDKit::KeyFromPropHolder *>(nullptr));
190 }
191 
192 template <class Archive>
193 void save(Archive &ar, const RDKit::SubstructLibrary &slib,
194  const unsigned int version) {
195  RDUNUSED_PARAM(version);
196  registerSubstructLibraryTypes(ar);
197  ar &slib.getSearchOrder();
198  ar &slib.getKeyHolder();
199  ar &slib.getMolHolder();
200  ar &slib.getFpHolder();
201 }
202 
203 template <class Archive>
204 void load(Archive &ar, RDKit::SubstructLibrary &slib,
205  const unsigned int version) {
206  RDUNUSED_PARAM(version);
207  registerSubstructLibraryTypes(ar);
208  if (version > 1) {
209  ar &slib.getSearchOrder();
210  ar &slib.getKeyHolder();
211  }
212  ar &slib.getMolHolder();
213  ar &slib.getFpHolder();
214  slib.resetHolders();
215 }
216 
217 } // end namespace serialization
218 } // end namespace boost
219 
220 BOOST_CLASS_VERSION(RDKit::MolHolder, 1);
221 BOOST_CLASS_VERSION(RDKit::CachedMolHolder, 1);
222 BOOST_CLASS_VERSION(RDKit::CachedSmilesMolHolder, 1);
223 BOOST_CLASS_VERSION(RDKit::CachedTrustedSmilesMolHolder, 1);
224 BOOST_CLASS_VERSION(RDKit::PatternHolder, 1);
225 BOOST_CLASS_VERSION(RDKit::TautomerPatternHolder, 1);
226 BOOST_CLASS_VERSION(RDKit::SubstructLibrary, 2);
227 
228 BOOST_SERIALIZATION_SPLIT_FREE(RDKit::MolHolder);
229 BOOST_SERIALIZATION_SPLIT_FREE(RDKit::FPHolderBase);
230 BOOST_SERIALIZATION_SPLIT_FREE(RDKit::SubstructLibrary);
231 
232 #endif
233 #endif
#define RDUNUSED_PARAM(x)
Definition: Invariant.h:196
a class for bit vectors that are densely occupied
Concrete class that holds binary cached molecules in memory.
Concrete class that holds smiles strings in memory.
Concrete class that holds trusted smiles strings in memory.
Base FPI for the fingerprinter used to rule out impossible matches.
std::vector< ExplicitBitVect * > & getFingerprints()
std::vector< std::string > & getKeys()
Base class API for holding molecules to substructure search.
Concrete class that holds molecules in memory.
std::vector< boost::shared_ptr< ROMol > > & getMols()
static void pickleMol(const ROMol *mol, std::ostream &ss)
pickles a molecule and sends the results to stream ss
static unsigned int defaultNumBits()
const unsigned int & getNumBits() const
Substructure Search a library of molecules.
boost::shared_ptr< MolHolderBase > & getMolHolder()
Get the underlying molecule holder implementation.
boost::shared_ptr< KeyHolderBase > & getKeyHolder()
Get the underlying molecule holder implementation.
boost::shared_ptr< FPHolderBase > & getFpHolder()
Get the underlying molecule holder implementation.
const std::vector< unsigned int > & getSearchOrder() const
void resetHolders()
access required for serialization
Definition: RDLog.h:22