Libosmium  2.18.0
Fast and flexible C++ library for working with OpenStreetMap data
bzip2_compression.hpp
Go to the documentation of this file.
1 #ifndef OSMIUM_IO_BZIP2_COMPRESSION_HPP
2 #define OSMIUM_IO_BZIP2_COMPRESSION_HPP
3 
4 /*
5 
6 This file is part of Osmium (https://osmcode.org/libosmium).
7 
8 Copyright 2013-2022 Jochen Topf <jochen@topf.org> and others (see README).
9 
10 Boost Software License - Version 1.0 - August 17th, 2003
11 
12 Permission is hereby granted, free of charge, to any person or organization
13 obtaining a copy of the software and accompanying documentation covered by
14 this license (the "Software") to use, reproduce, display, distribute,
15 execute, and transmit the Software, and to prepare derivative works of the
16 Software, and to permit third-parties to whom the Software is furnished to
17 do so, all subject to the following:
18 
19 The copyright notices in the Software and this entire statement, including
20 the above license grant, this restriction and the following disclaimer,
21 must be included in all copies of the Software, in whole or in part, and
22 all derivative works of the Software, unless such copies or derivative
23 works are solely in the form of machine-executable object code generated by
24 a source language processor.
25 
26 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
27 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
28 FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
29 SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
30 FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
31 ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
32 DEALINGS IN THE SOFTWARE.
33 
34 */
35 
46 #include <osmium/io/detail/read_write.hpp>
47 #include <osmium/io/error.hpp>
50 #include <osmium/util/file.hpp>
51 
52 #include <bzlib.h>
53 
54 #include <cassert>
55 #include <cerrno>
56 #include <cstdio>
57 #include <limits>
58 #include <string>
59 #include <system_error>
60 
61 #ifndef _MSC_VER
62 # include <unistd.h>
63 #endif
64 
65 namespace osmium {
66 
71  struct bzip2_error : public io_error {
72 
74  int system_errno = 0;
75 
76  bzip2_error(const std::string& what, const int error_code) :
77  io_error(what),
78  bzip2_error_code(error_code) {
79  if (error_code == BZ_IO_ERROR) {
80  system_errno = errno;
81  }
82  }
83 
84  }; // struct bzip2_error
85 
86  namespace io {
87 
88  namespace detail {
89 
90  [[noreturn]] inline void throw_bzip2_error(BZFILE* bzfile, const char* msg, const int bzlib_error) {
91  std::string error{"bzip2 error: "};
92  error += msg;
93  error += ": ";
94  int errnum = bzlib_error;
95  if (bzlib_error) {
96  error += std::to_string(bzlib_error);
97  } else if (bzfile) {
98  error += ::BZ2_bzerror(bzfile, &errnum);
99  }
100  throw osmium::bzip2_error{error, errnum};
101  }
102 
103  class file_wrapper {
104 
105  FILE* m_file = nullptr;
106 
107  public:
108 
109  file_wrapper() noexcept = default;
110 
111  file_wrapper(const int fd, const char* mode) {
112 #ifdef _MSC_VER
113  osmium::detail::disable_invalid_parameter_handler diph;
114 #endif
115  m_file = fdopen(fd, mode);
116  if (!m_file) {
117 
118  // Do not close stdout
119  if (fd != 1) {
120  ::close(fd);
121  }
122  throw std::system_error{errno, std::system_category(), "fdopen failed"};
123  }
124  }
125 
126  file_wrapper(const file_wrapper&) = delete;
127  file_wrapper& operator=(const file_wrapper&) = delete;
128 
129  file_wrapper(file_wrapper&&) = delete;
130  file_wrapper& operator=(file_wrapper&&) = delete;
131 
132  ~file_wrapper() noexcept {
133 #ifdef _MSC_VER
134  osmium::detail::disable_invalid_parameter_handler diph;
135 #endif
136  if (m_file) {
137  fclose(m_file);
138  }
139  }
140 
141  FILE* file() const noexcept {
142  return m_file;
143  }
144 
145  void close() {
146 #ifdef _MSC_VER
147  osmium::detail::disable_invalid_parameter_handler diph;
148 #endif
149  if (m_file) {
150  FILE* wrapped_file = m_file;
151  m_file = nullptr;
152 
153  // Do not close stdout
154  if (fileno(wrapped_file) == 1) {
155  return;
156  }
157 
158  if (fclose(wrapped_file) != 0) {
159  throw std::system_error{errno, std::system_category(), "fclose failed"};
160  }
161  }
162  }
163 
164  }; // class file_wrapper
165 
166  } // namespace detail
167 
168  class Bzip2Compressor final : public Compressor {
169 
170  std::size_t m_file_size = 0;
171  detail::file_wrapper m_file;
172  BZFILE* m_bzfile = nullptr;
173 
174  public:
175 
176  explicit Bzip2Compressor(const int fd, const fsync sync) :
177  Compressor(sync),
178  m_file(fd, "wb") {
179 #ifdef _MSC_VER
180  osmium::detail::disable_invalid_parameter_handler diph;
181 #endif
182  int bzerror = BZ_OK;
183  m_bzfile = ::BZ2_bzWriteOpen(&bzerror, m_file.file(), 6, 0, 0);
184  if (!m_bzfile) {
185  throw bzip2_error{"bzip2 error: write open failed", bzerror};
186  }
187  }
188 
191 
194 
195  ~Bzip2Compressor() noexcept override {
196  try {
197  close();
198  } catch (...) {
199  // Ignore any exceptions because destructor must not throw.
200  }
201  }
202 
203  void write(const std::string& data) override {
204  assert(data.size() < std::numeric_limits<int>::max());
205  assert(m_bzfile);
206 #ifdef _MSC_VER
207  osmium::detail::disable_invalid_parameter_handler diph;
208 #endif
209  int bzerror = BZ_OK;
210  ::BZ2_bzWrite(&bzerror, m_bzfile, const_cast<char*>(data.data()), static_cast<int>(data.size()));
211  if (bzerror != BZ_OK && bzerror != BZ_STREAM_END) {
212  detail::throw_bzip2_error(m_bzfile, "write failed", bzerror);
213  }
214  }
215 
216  void close() override {
217  if (m_bzfile) {
218 #ifdef _MSC_VER
219  osmium::detail::disable_invalid_parameter_handler diph;
220 #endif
221  int bzerror = BZ_OK;
222  unsigned int nbytes_out_lo32 = 0;
223  unsigned int nbytes_out_hi32 = 0;
224  ::BZ2_bzWriteClose64(&bzerror, m_bzfile, 0, nullptr, nullptr, &nbytes_out_lo32, &nbytes_out_hi32);
225  m_bzfile = nullptr;
226  if (do_fsync() && m_file.file()) {
227  osmium::io::detail::reliable_fsync(fileno(m_file.file()));
228  }
229  m_file.close();
230  if (bzerror != BZ_OK) {
231  throw bzip2_error{"bzip2 error: write close failed", bzerror};
232  }
233  m_file_size = static_cast<std::size_t>(static_cast<uint64_t>(nbytes_out_hi32) << 32U | nbytes_out_lo32);
234  }
235  }
236 
237  std::size_t file_size() const override {
238  return m_file_size;
239  }
240 
241  }; // class Bzip2Compressor
242 
243  class Bzip2Decompressor final : public Decompressor {
244 
245  detail::file_wrapper m_file;
246  BZFILE* m_bzfile = nullptr;
247  bool m_stream_end = false;
248 
249  public:
250 
251  explicit Bzip2Decompressor(const int fd) :
252  m_file(fd, "rb") {
253 #ifdef _MSC_VER
254  osmium::detail::disable_invalid_parameter_handler diph;
255 #endif
256  int bzerror = BZ_OK;
257  m_bzfile = ::BZ2_bzReadOpen(&bzerror, m_file.file(), 0, 0, nullptr, 0);
258  if (!m_bzfile) {
259  throw bzip2_error{"bzip2 error: read open failed", bzerror};
260  }
261  }
262 
265 
268 
269  ~Bzip2Decompressor() noexcept override {
270  try {
271  close();
272  } catch (...) {
273  // Ignore any exceptions because destructor must not throw.
274  }
275  }
276 
277  std::string read() override {
278  const auto offset = ftell(m_file.file());
279  if (offset > 0 && want_buffered_pages_removed()) {
280  osmium::io::detail::remove_buffered_pages(fileno(m_file.file()), static_cast<std::size_t>(offset));
281  }
282 #ifdef _MSC_VER
283  osmium::detail::disable_invalid_parameter_handler diph;
284 #endif
285  assert(m_bzfile);
286  std::string buffer;
287 
288  if (!m_stream_end) {
290  int bzerror = BZ_OK;
291  assert(buffer.size() < std::numeric_limits<int>::max());
292  const int nread = ::BZ2_bzRead(&bzerror, m_bzfile, &*buffer.begin(), static_cast<int>(buffer.size()));
293  if (bzerror != BZ_OK && bzerror != BZ_STREAM_END) {
294  detail::throw_bzip2_error(m_bzfile, "read failed", bzerror);
295  }
296  if (bzerror == BZ_STREAM_END) {
297  void* unused = nullptr;
298  int nunused = 0;
299  if (!feof(m_file.file())) {
300  ::BZ2_bzReadGetUnused(&bzerror, m_bzfile, &unused, &nunused);
301  if (bzerror != BZ_OK) {
302  detail::throw_bzip2_error(m_bzfile, "get unused failed", bzerror);
303  }
304  std::string unused_data{static_cast<const char*>(unused), static_cast<std::string::size_type>(nunused)};
305  ::BZ2_bzReadClose(&bzerror, m_bzfile);
306  if (bzerror != BZ_OK) {
307  throw bzip2_error{"bzip2 error: read close failed", bzerror};
308  }
309  assert(unused_data.size() < std::numeric_limits<int>::max());
310  m_bzfile = ::BZ2_bzReadOpen(&bzerror, m_file.file(), 0, 0, &*unused_data.begin(), static_cast<int>(unused_data.size()));
311  if (!m_bzfile) {
312  throw bzip2_error{"bzip2 error: read open failed", bzerror};
313  }
314  } else {
315  m_stream_end = true;
316  }
317  }
318  buffer.resize(static_cast<std::string::size_type>(nread));
319  }
320 
321  set_offset(static_cast<std::size_t>(ftell(m_file.file())));
322 
323  return buffer;
324  }
325 
326  void close() override {
327  if (m_bzfile) {
329  osmium::io::detail::remove_buffered_pages(fileno(m_file.file()));
330  }
331 #ifdef _MSC_VER
332  osmium::detail::disable_invalid_parameter_handler diph;
333 #endif
334  int bzerror = BZ_OK;
335  ::BZ2_bzReadClose(&bzerror, m_bzfile);
336  m_bzfile = nullptr;
337  m_file.close();
338  if (bzerror != BZ_OK) {
339  throw bzip2_error{"bzip2 error: read close failed", bzerror};
340  }
341  }
342  }
343 
344  }; // class Bzip2Decompressor
345 
346  class Bzip2BufferDecompressor final : public Decompressor {
347 
348  const char* m_buffer;
349  std::size_t m_buffer_size;
350  bz_stream m_bzstream;
351 
352  public:
353 
354  Bzip2BufferDecompressor(const char* buffer, const std::size_t size) :
355  m_buffer(buffer),
356  m_buffer_size(size),
357  m_bzstream() {
358  m_bzstream.next_in = const_cast<char*>(buffer);
359  assert(size < std::numeric_limits<unsigned int>::max());
360  m_bzstream.avail_in = static_cast<unsigned int>(size);
361  const int result = BZ2_bzDecompressInit(&m_bzstream, 0, 0);
362  if (result != BZ_OK) {
363  throw bzip2_error{"bzip2 error: decompression init failed: ", result};
364  }
365  }
366 
369 
372 
373  ~Bzip2BufferDecompressor() noexcept override {
374  try {
375  close();
376  } catch (...) {
377  // Ignore any exceptions because destructor must not throw.
378  }
379  }
380 
381  std::string read() override {
382  std::string output;
383 
384  if (m_buffer) {
385  const std::size_t buffer_size = 10240;
386  output.resize(buffer_size);
387  m_bzstream.next_out = &*output.begin();
388  m_bzstream.avail_out = buffer_size;
389  const int result = BZ2_bzDecompress(&m_bzstream);
390 
391  if (result != BZ_OK) {
392  m_buffer = nullptr;
393  m_buffer_size = 0;
394  }
395 
396  if (result != BZ_OK && result != BZ_STREAM_END) {
397  throw bzip2_error{"bzip2 error: decompress failed: ", result};
398  }
399 
400  output.resize(static_cast<std::size_t>(m_bzstream.next_out - output.data()));
401  }
402 
403  return output;
404  }
405 
406  void close() override {
407  BZ2_bzDecompressEnd(&m_bzstream);
408  }
409 
410  }; // class Bzip2BufferDecompressor
411 
412  namespace detail {
413 
414  // we want the register_compression() function to run, setting
415  // the variable is only a side-effect, it will never be used
417  [](const int fd, const fsync sync) { return new osmium::io::Bzip2Compressor{fd, sync}; },
418  [](const int fd) { return new osmium::io::Bzip2Decompressor{fd}; },
419  [](const char* buffer, const std::size_t size) { return new osmium::io::Bzip2BufferDecompressor{buffer, size}; }
420  );
421 
422  // dummy function to silence the unused variable warning from above
423  inline bool get_registered_bzip2_compression() noexcept {
424  return registered_bzip2_compression;
425  }
426 
427  } // namespace detail
428 
429  } // namespace io
430 
431 } // namespace osmium
432 
433 #endif // OSMIUM_IO_BZIP2_COMPRESSION_HPP
Definition: bzip2_compression.hpp:346
Bzip2BufferDecompressor(const Bzip2BufferDecompressor &)=delete
Bzip2BufferDecompressor(const char *buffer, const std::size_t size)
Definition: bzip2_compression.hpp:354
Bzip2BufferDecompressor & operator=(const Bzip2BufferDecompressor &)=delete
Bzip2BufferDecompressor(Bzip2BufferDecompressor &&)=delete
std::string read() override
Definition: bzip2_compression.hpp:381
~Bzip2BufferDecompressor() noexcept override
Definition: bzip2_compression.hpp:373
Bzip2BufferDecompressor & operator=(Bzip2BufferDecompressor &&)=delete
void close() override
Definition: bzip2_compression.hpp:406
bz_stream m_bzstream
Definition: bzip2_compression.hpp:350
std::size_t m_buffer_size
Definition: bzip2_compression.hpp:349
const char * m_buffer
Definition: bzip2_compression.hpp:348
Definition: bzip2_compression.hpp:168
BZFILE * m_bzfile
Definition: bzip2_compression.hpp:172
Bzip2Compressor(const Bzip2Compressor &)=delete
Bzip2Compressor(Bzip2Compressor &&)=delete
~Bzip2Compressor() noexcept override
Definition: bzip2_compression.hpp:195
std::size_t file_size() const override
Definition: bzip2_compression.hpp:237
Bzip2Compressor(const int fd, const fsync sync)
Definition: bzip2_compression.hpp:176
std::size_t m_file_size
Definition: bzip2_compression.hpp:170
Bzip2Compressor & operator=(Bzip2Compressor &&)=delete
Bzip2Compressor & operator=(const Bzip2Compressor &)=delete
void close() override
Definition: bzip2_compression.hpp:216
void write(const std::string &data) override
Definition: bzip2_compression.hpp:203
detail::file_wrapper m_file
Definition: bzip2_compression.hpp:171
Definition: bzip2_compression.hpp:243
Bzip2Decompressor & operator=(Bzip2Decompressor &&)=delete
~Bzip2Decompressor() noexcept override
Definition: bzip2_compression.hpp:269
BZFILE * m_bzfile
Definition: bzip2_compression.hpp:246
detail::file_wrapper m_file
Definition: bzip2_compression.hpp:245
bool m_stream_end
Definition: bzip2_compression.hpp:247
Bzip2Decompressor(Bzip2Decompressor &&)=delete
void close() override
Definition: bzip2_compression.hpp:326
Bzip2Decompressor(const int fd)
Definition: bzip2_compression.hpp:251
Bzip2Decompressor(const Bzip2Decompressor &)=delete
std::string read() override
Definition: bzip2_compression.hpp:277
Bzip2Decompressor & operator=(const Bzip2Decompressor &)=delete
bool register_compression(osmium::io::file_compression compression, const create_compressor_type &create_compressor, const create_decompressor_type_fd &create_decompressor_fd, const create_decompressor_type_buffer &create_decompressor_buffer)
Definition: compression.hpp:196
static CompressionFactory & instance()
Definition: compression.hpp:191
Definition: compression.hpp:57
bool do_fsync() const noexcept
Definition: compression.hpp:63
Definition: compression.hpp:91
@ input_buffer_size
Definition: compression.hpp:100
void set_offset(const std::size_t offset) noexcept
Definition: compression.hpp:125
bool want_buffered_pages_removed() const noexcept
Definition: compression.hpp:131
Definition: attr.hpp:342
fsync
Definition: writer_options.hpp:51
Namespace for everything in the Osmium library.
Definition: assembler.hpp:53
Definition: bzip2_compression.hpp:71
bzip2_error(const std::string &what, const int error_code)
Definition: bzip2_compression.hpp:76
int bzip2_error_code
Definition: bzip2_compression.hpp:73
int system_errno
Definition: bzip2_compression.hpp:74
Definition: error.hpp:46