RDKit
Open-source cheminformatics and machine learning.
SubstructLibrarySerialization.h
Go to the documentation of this file.
1 // Copyright (c) 2019, Novartis Institutes for BioMedical Research Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following
12 // disclaimer in the documentation and/or other materials provided
13 // with the distribution.
14 // * Neither the name of Novartis Institutes for BioMedical Research Inc.
15 // nor the names of its contributors may be used to endorse or promote
16 // products derived from this software without specific prior written
17 // permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 //
31 // n.b. must be included at the END of SubstructLibrary.h
32 #ifndef RDK_SUBSTRUCT_LIBRARY_SERIALIZATION
33 #define RDK_SUBSTRUCT_LIBRARY_SERIALIZATION
34 
35 #ifdef RDK_USE_BOOST_SERIALIZATION
37 #include <boost/archive/text_oarchive.hpp>
38 #include <boost/archive/text_iarchive.hpp>
39 #include <boost/serialization/vector.hpp>
40 #include <boost/serialization/shared_ptr.hpp>
42 
43 BOOST_SERIALIZATION_ASSUME_ABSTRACT(RDKit::MolHolderBase)
44 BOOST_SERIALIZATION_ASSUME_ABSTRACT(RDKit::FPHolderBase)
45 
46 namespace boost {
47 namespace serialization {
48 
49 template <class Archive>
50 void serialize(Archive &ar, RDKit::MolHolderBase &,
51  const unsigned int version) {
52  RDUNUSED_PARAM(version);
53  RDUNUSED_PARAM(ar);
54 }
55 
56 template <class Archive>
57 void save(Archive &ar, const RDKit::MolHolder &molholder,
58  const unsigned int version) {
59  RDUNUSED_PARAM(version);
60  ar &boost::serialization::base_object<RDKit::MolHolderBase>(molholder);
61 
62  std::int64_t pkl_count = molholder.getMols().size();
63  ar &pkl_count;
64 
65  for (auto &mol : molholder.getMols()) {
66  std::string pkl;
67  RDKit::MolPickler::pickleMol(*mol.get(), pkl);
68  ar << pkl;
69  }
70 }
71 
72 template <class Archive>
73 void load(Archive &ar, RDKit::MolHolder &molholder,
74  const unsigned int version) {
75  RDUNUSED_PARAM(version);
76  ar &boost::serialization::base_object<RDKit::MolHolderBase>(molholder);
77 
78  std::vector<boost::shared_ptr<RDKit::ROMol>> &mols = molholder.getMols();
79  mols.clear();
80 
81  std::int64_t pkl_count = -1;
82  ar &pkl_count;
83 
84  for (std::int64_t i = 0; i < pkl_count; ++i) {
85  std::string pkl;
86  ar >> pkl;
87  mols.push_back(boost::make_shared<RDKit::ROMol>(pkl));
88  }
89 }
90 
91 template <class Archive, class MolHolder>
92 void serialize_strings(Archive &ar, MolHolder &molholder,
93  const unsigned int version) {
94  RDUNUSED_PARAM(version);
95  ar &boost::serialization::base_object<RDKit::MolHolderBase>(molholder);
96  ar &molholder.getMols();
97 }
98 
99 template <class Archive>
100 void serialize(Archive &ar, RDKit::CachedMolHolder &molholder,
101  const unsigned int version) {
102  serialize_strings(ar, molholder, version);
103 }
104 
105 template <class Archive>
106 void serialize(Archive &ar, RDKit::CachedSmilesMolHolder &molholder,
107  const unsigned int version) {
108  serialize_strings(ar, molholder, version);
109 }
110 
111 template <class Archive>
112 void serialize(Archive &ar, RDKit::CachedTrustedSmilesMolHolder &molholder,
113  const unsigned int version) {
114  serialize_strings(ar, molholder, version);
115 }
116 
117 template <class Archive>
118 void save(Archive &ar, const RDKit::FPHolderBase &fpholder,
119  const unsigned int version) {
120  RDUNUSED_PARAM(version);
121  std::vector<std::string> pickles;
122  for (auto &fp : fpholder.getFingerprints()) {
123  pickles.push_back(fp->toString());
124  }
125  ar &pickles;
126 }
127 
128 template <class Archive>
129 void load(Archive &ar, RDKit::FPHolderBase &fpholder,
130  const unsigned int version) {
131  RDUNUSED_PARAM(version);
132  std::vector<std::string> pickles;
133  std::vector<ExplicitBitVect *> &fps = fpholder.getFingerprints();
134 
135  ar &pickles;
136  for (size_t i = 0; i < fps.size(); ++i) delete fps[i];
137  fps.clear();
138 
139  for (auto &pkl : pickles) {
140  fps.push_back(new ExplicitBitVect(pkl));
141  }
142 }
143 
144 template <class Archive>
145 void serialize(Archive &ar, RDKit::PatternHolder &pattern_holder,
146  const unsigned int version) {
147  RDUNUSED_PARAM(version);
148  ar &boost::serialization::base_object<RDKit::FPHolderBase>(pattern_holder);
149 }
150 
151 template <class Archive>
152 void registerSubstructLibraryTypes(Archive &ar) {
153  ar.register_type(static_cast<RDKit::MolHolder *>(nullptr));
154  ar.register_type(static_cast<RDKit::CachedMolHolder *>(nullptr));
155  ar.register_type(static_cast<RDKit::CachedSmilesMolHolder *>(nullptr));
156  ar.register_type(static_cast<RDKit::CachedTrustedSmilesMolHolder *>(nullptr));
157  ar.register_type(static_cast<RDKit::PatternHolder *>(nullptr));
158 }
159 
160 template <class Archive>
161 void save(Archive &ar, const RDKit::SubstructLibrary &slib,
162  const unsigned int version) {
163  RDUNUSED_PARAM(version);
164  registerSubstructLibraryTypes(ar);
165  ar &slib.getMolHolder();
166  ar &slib.getFpHolder();
167 }
168 
169 template <class Archive>
170 void load(Archive &ar, RDKit::SubstructLibrary &slib,
171  const unsigned int version) {
172  RDUNUSED_PARAM(version);
173  registerSubstructLibraryTypes(ar);
174  ar &slib.getMolHolder();
175  ar &slib.getFpHolder();
176  slib.resetHolders();
177 }
178 
179 } // end namespace serialization
180 } // end namespace boost
181 
182 BOOST_CLASS_VERSION(RDKit::MolHolder, 1);
183 BOOST_CLASS_VERSION(RDKit::CachedMolHolder, 1);
184 BOOST_CLASS_VERSION(RDKit::CachedSmilesMolHolder, 1);
185 BOOST_CLASS_VERSION(RDKit::CachedTrustedSmilesMolHolder, 1);
186 BOOST_CLASS_VERSION(RDKit::PatternHolder, 1);
187 BOOST_CLASS_VERSION(RDKit::SubstructLibrary, 1);
188 
189 BOOST_SERIALIZATION_SPLIT_FREE(RDKit::MolHolder);
190 BOOST_SERIALIZATION_SPLIT_FREE(RDKit::FPHolderBase);
191 BOOST_SERIALIZATION_SPLIT_FREE(RDKit::SubstructLibrary);
192 
193 #endif
194 #endif
#define RDUNUSED_PARAM(x)
Definition: Invariant.h:196
a class for bit vectors that are densely occupied
Concrete class that holds binary cached molecules in memory.
Concrete class that holds smiles strings in memory.
Concrete class that holds trusted smiles strings in memory.
Base FPI for the fingerprinter used to rule out impossible matches.
std::vector< ExplicitBitVect * > & getFingerprints()
Base class API for holding molecules to substructure search.
Concrete class that holds molecules in memory.
std::vector< boost::shared_ptr< ROMol > > & getMols()
static void pickleMol(const ROMol *mol, std::ostream &ss)
pickles a molecule and sends the results to stream ss
Uses the pattern fingerprinter to rule out matches.
Substructure Search a library of molecules.
boost::shared_ptr< MolHolderBase > & getMolHolder()
Get the underlying molecule holder implementation.
boost::shared_ptr< FPHolderBase > & getFpHolder()
Get the underlying molecule holder implementation.
void resetHolders()
access required for serialization
Definition: RDLog.h:22