10 #ifndef GENERAL_FILE_READER_H
11 #define GENERAL_FILE_READER_H
13 #include <RDStreams/streams.h>
15 #include <boost/algorithm/string.hpp>
26 namespace GeneralMolSupplier {
46 "sdf",
"mae",
"maegz",
"sdfgz",
"smi",
"csv",
"txt",
"tsv",
"tdt"};
55 std::string& compressionFormat) {
60 if (boost::algorithm::iends_with(path,
".maegz")) {
62 compressionFormat =
"gz";
64 }
else if (boost::algorithm::iends_with(path,
".sdfgz")) {
66 compressionFormat =
"gz";
68 }
else if (boost::algorithm::iends_with(path,
".gz")) {
69 compressionFormat =
"gz";
70 basename = path.substr(0, path.size() - 3);
71 }
else if (boost::algorithm::iends_with(path,
".zst") ||
72 boost::algorithm::iends_with(path,
".bz2") ||
73 boost::algorithm::iends_with(path,
".7z")) {
75 "Unsupported compression extension (.zst, .bz2, .7z) given path: " +
79 compressionFormat =
"";
82 if (boost::algorithm::iends_with(basename,
"." + suffix)) {
88 "Unsupported structure or compression extension given path: " + path);
98 std::unique_ptr<MolSupplier>
getSupplier(
const std::string& path,
100 std::string fileFormat =
"";
101 std::string compressionFormat =
"";
106 if (compressionFormat.empty()) {
107 strm =
new std::ifstream(path.c_str());
109 strm =
new gzstream(path);
113 if (fileFormat ==
"sdf") {
114 #ifdef RDK_THREADSAFE_SSS
116 MultithreadedSDMolSupplier* sdsup =
new MultithreadedSDMolSupplier(
119 std::unique_ptr<MolSupplier> p(sdsup);
125 std::unique_ptr<MolSupplier> p(sdsup);
129 else if (fileFormat ==
"smi" || fileFormat ==
"csv" || fileFormat ==
"txt" ||
130 fileFormat ==
"tsv") {
131 #ifdef RDK_THREADSAFE_SSS
133 MultithreadedSmilesMolSupplier* smsup =
134 new MultithreadedSmilesMolSupplier(
137 std::unique_ptr<MolSupplier> p(smsup);
144 std::unique_ptr<MolSupplier> p(smsup);
147 #ifdef RDK_BUILD_MAEPARSER_SUPPORT
148 else if (fileFormat ==
"mae") {
149 MaeMolSupplier* maesup =
151 std::unique_ptr<MolSupplier> p(maesup);
155 else if (fileFormat ==
"tdt") {
158 std::unique_ptr<MolSupplier> p(tdtsup);
used by various file parsing classes to indicate a bad file
lazy file parser for Smiles tables
lazy file parser for TDT files
const std::vector< std::string > supportedCompressionFormats
current supported compression formats
const std::vector< std::string > supportedFileFormats
current supported file formats
void determineFormat(const std::string path, std::string &fileFormat, std::string &compressionFormat)
std::unique_ptr< MolSupplier > getSupplier(const std::string &path, const struct SupplierOptions &opt)
unsigned int numWriterThreads