27 #if !defined(SEQAN3_HAS_ZLIB) && !defined(SEQAN3_HEADER_TEST)
28 # error "This file cannot be used when building without GZip-support."
31 #if defined(SEQAN3_HAS_ZLIB)
35 namespace seqan3::contrib
54 '\x00',
'\x00',
'\x00',
'\x00',
55 '\x00',
'\xff',
'\x06',
'\x00',
56 '\x42',
'\x43',
'\x02',
'\x00',
57 '\x1b',
'\x00',
'\x03',
'\x00',
58 '\x00',
'\x00',
'\x00',
'\x00',
59 '\x00',
'\x00',
'\x00',
'\x00'}};
61 template <
typename TAlgTag>
62 struct CompressionContext {};
64 template <
typename TAlgTag>
65 struct DefaultPageSize;
68 struct CompressionContext<detail::gz_compression>
79 struct CompressionContext<detail::bgzf_compression>:
80 CompressionContext<detail::gz_compression>
82 static constexpr
size_t BLOCK_HEADER_LENGTH = detail::bgzf_compression::magic_header.size();
83 unsigned char headerPos;
87 struct DefaultPageSize<detail::bgzf_compression>
89 static const unsigned MAX_BLOCK_SIZE = 64 * 1024;
90 static const unsigned BLOCK_FOOTER_LENGTH = 8;
92 static const unsigned ZLIB_BLOCK_OVERHEAD = 5;
96 enum { BLOCK_HEADER_LENGTH = CompressionContext<detail::bgzf_compression>::BLOCK_HEADER_LENGTH };
97 static const unsigned VALUE = MAX_BLOCK_SIZE - BLOCK_HEADER_LENGTH - BLOCK_FOOTER_LENGTH - ZLIB_BLOCK_OVERHEAD;
109 compressInit(CompressionContext<detail::gz_compression> & ctx)
111 const int GZIP_WINDOW_BITS = -15;
112 const int Z_DEFAULT_MEM_LEVEL = 8;
114 ctx.strm.zalloc = NULL;
115 ctx.strm.zfree = NULL;
121 int status = deflateInit2(&ctx.strm, Z_BEST_SPEED, Z_DEFLATED,
122 GZIP_WINDOW_BITS, Z_DEFAULT_MEM_LEVEL, Z_DEFAULT_STRATEGY);
124 throw io_error(
"Calling deflateInit2() failed for gz file.");
132 compressInit(CompressionContext<detail::bgzf_compression> & ctx)
134 compressInit(
static_cast<CompressionContext<detail::gz_compression> &
>(ctx));
143 _bgzfUnpack16(
char const * buffer)
147 return detail::to_little_endian(tmp);
151 _bgzfUnpack32(
char const * buffer)
155 return detail::to_little_endian(tmp);
163 _bgzfPack16(
char * buffer, uint16_t value)
165 value = detail::to_little_endian(value);
167 reinterpret_cast<char *
>(&value) +
sizeof(uint16_t),
172 _bgzfPack32(
char * buffer, uint32_t value)
174 value = detail::to_little_endian(value);
176 reinterpret_cast<char *
>(&value) +
sizeof(uint32_t),
184 template <
typename TDestValue,
typename TDestCapacity,
typename TSourceValue,
typename TSourceLength>
186 _compressBlock(TDestValue *dstBegin, TDestCapacity dstCapacity,
187 TSourceValue *srcBegin, TSourceLength srcLength, CompressionContext<detail::bgzf_compression> & ctx)
189 const size_t BLOCK_HEADER_LENGTH = DefaultPageSize<detail::bgzf_compression>::BLOCK_HEADER_LENGTH;
190 const size_t BLOCK_FOOTER_LENGTH = DefaultPageSize<detail::bgzf_compression>::BLOCK_FOOTER_LENGTH;
192 assert(dstCapacity > BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH);
193 assert(
sizeof(TDestValue) == 1u);
194 assert(
sizeof(
unsigned) == 4u);
197 std::ranges::copy(detail::bgzf_compression::magic_header, dstBegin);
201 ctx.strm.next_in = (Bytef *)(srcBegin);
202 ctx.strm.next_out = (Bytef *)(dstBegin + BLOCK_HEADER_LENGTH);
203 ctx.strm.avail_in = srcLength *
sizeof(TSourceValue);
204 ctx.strm.avail_out = dstCapacity - BLOCK_HEADER_LENGTH - BLOCK_FOOTER_LENGTH;
206 int status = deflate(&ctx.strm, Z_FINISH);
207 if (status != Z_STREAM_END)
209 deflateEnd(&ctx.strm);
210 throw io_error(
"Deflation failed. Compressed BGZF data is too big.");
213 status = deflateEnd(&ctx.strm);
215 throw io_error(
"BGZF deflateEnd() failed.");
222 size_t len = dstCapacity - ctx.strm.avail_out;
223 _bgzfPack16(dstBegin + 16, len - 1);
225 dstBegin += len - BLOCK_FOOTER_LENGTH;
226 _bgzfPack32(dstBegin, crc32(crc32(0u, NULL, 0u), (Bytef *)(srcBegin), srcLength *
sizeof(TSourceValue)));
227 _bgzfPack32(dstBegin + 4, srcLength *
sizeof(TSourceValue));
229 return dstCapacity - ctx.strm.avail_out;
237 decompressInit(CompressionContext<detail::gz_compression> & ctx)
239 const int GZIP_WINDOW_BITS = -15;
241 ctx.strm.zalloc = NULL;
242 ctx.strm.zfree = NULL;
243 int status = inflateInit2(&ctx.strm, GZIP_WINDOW_BITS);
245 throw io_error(
"GZip inflateInit2() failed.");
253 decompressInit(CompressionContext<detail::bgzf_compression> & ctx)
255 decompressInit(
static_cast<CompressionContext<detail::gz_compression> &
>(ctx));
263 template <
typename TDestValue,
typename TDestCapacity,
typename TSourceValue,
typename TSourceLength>
265 _decompressBlock(TDestValue *dstBegin, TDestCapacity dstCapacity,
266 TSourceValue *srcBegin, TSourceLength srcLength, CompressionContext<detail::bgzf_compression> & ctx)
268 const size_t BLOCK_HEADER_LENGTH = DefaultPageSize<detail::bgzf_compression>::BLOCK_HEADER_LENGTH;
269 const size_t BLOCK_FOOTER_LENGTH = DefaultPageSize<detail::bgzf_compression>::BLOCK_FOOTER_LENGTH;
271 assert(
sizeof(TSourceValue) == 1u);
272 assert(
sizeof(
unsigned) == 4u);
276 if (srcLength <= BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH)
277 throw io_error(
"BGZF block too short.");
279 if (!detail::bgzf_compression::validate_header(std::span{srcBegin, srcLength}))
280 throw io_error(
"Invalid BGZF block header.");
282 size_t compressedLen = _bgzfUnpack16(srcBegin + 16) + 1u;
283 if (compressedLen != srcLength)
284 throw io_error(
"BGZF compressed size mismatch.");
290 ctx.strm.next_in = (Bytef *)(srcBegin + BLOCK_HEADER_LENGTH);
291 ctx.strm.next_out = (Bytef *)(dstBegin);
292 ctx.strm.avail_in = srcLength - BLOCK_HEADER_LENGTH - BLOCK_FOOTER_LENGTH;
293 ctx.strm.avail_out = dstCapacity *
sizeof(TDestValue);
295 int status = inflate(&ctx.strm, Z_FINISH);
296 if (status != Z_STREAM_END)
298 inflateEnd(&ctx.strm);
299 throw io_error(
"Inflation failed. Decompressed BGZF data is too big.");
302 status = inflateEnd(&ctx.strm);
304 throw io_error(
"BGZF inflateEnd() failed.");
311 unsigned crc = crc32(crc32(0u, NULL, 0u), (Bytef *)(dstBegin), dstCapacity - ctx.strm.avail_out);
313 srcBegin += compressedLen - BLOCK_FOOTER_LENGTH;
314 if (_bgzfUnpack32(srcBegin) != crc)
315 throw io_error(
"BGZF wrong checksum.");
317 if (_bgzfUnpack32(srcBegin + 4) != dstCapacity - ctx.strm.avail_out)
318 throw io_error(
"BGZF size mismatch.");
320 return (dstCapacity - ctx.strm.avail_out) /
sizeof(TDestValue);
The <algorithm> header from C++20's standard library.
Provides various transformation traits used by the range module.
T hardware_concurrency(T... args)
Provides exceptions used in the I/O module.
Provides std::span from the C++20 standard library.
Provides utility functions for bit twiddling.
T uninitialized_copy(T... args)