SeqAn3  3.2.0
The Modern C++ library for sequence analysis.
io/sequence_file/output.hpp
Go to the documentation of this file.
1 // -----------------------------------------------------------------------------------------------------
2 // Copyright (c) 2006-2022, Knut Reinert & Freie Universität Berlin
3 // Copyright (c) 2016-2022, Knut Reinert & MPI für molekulare Genetik
4 // This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5 // shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6 // -----------------------------------------------------------------------------------------------------
7 
13 #pragma once
14 
15 #include <cassert>
16 #include <filesystem>
17 #include <fstream>
18 #include <ranges>
19 #include <string>
20 #include <variant>
21 #include <vector>
22 
26 #include <seqan3/io/detail/record.hpp>
28 #include <seqan3/io/exception.hpp>
29 #include <seqan3/io/record.hpp>
43 
44 namespace seqan3
45 {
46 
47 // ----------------------------------------------------------------------------
48 // sequence_file_output
49 // ----------------------------------------------------------------------------
50 
65 template <detail::fields_specialisation selected_field_ids_ = fields<field::seq, field::id, field::qual>,
66  detail::type_list_of_sequence_file_output_formats valid_formats_ =
67  type_list<format_embl, format_fasta, format_fastq, format_genbank, format_sam>>
69 {
70 public:
76  using selected_field_ids = selected_field_ids_;
78  using valid_formats = valid_formats_;
80  using stream_char_type = char;
82 
85 
86  static_assert(
87  []() constexpr {
88  for (field f : selected_field_ids::as_array)
89  if (!field_ids::contains(f))
90  return false;
91  return true;
92  }(),
93  "You selected a field that is not valid for sequence files, please refer to the documentation "
94  "of sequence_file_output::field_ids for the accepted values.");
95 
102  using value_type = void;
104  using reference = void;
106  using const_reference = void;
108  using size_type = void;
112  using iterator = detail::out_file_iterator<sequence_file_output>;
114  using const_iterator = void;
116  using sentinel = std::default_sentinel_t;
118 
134 
151  selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
152  primary_stream{new std::ofstream{}, stream_deleter_default}
153  {
154  primary_stream->rdbuf()->pubsetbuf(stream_buffer.data(), stream_buffer.size());
155  static_cast<std::basic_ofstream<char> *>(primary_stream.get())
156  ->open(filename, std::ios_base::out | std::ios::binary);
157 
158  if (!primary_stream->good())
159  throw file_open_error{"Could not open file " + filename.string() + " for writing."};
160 
161  // possibly add intermediate compression stream
162  secondary_stream = detail::make_secondary_ostream(*primary_stream, filename);
163 
164  // initialise format handler or throw if format is not found
165  detail::set_format(format, filename);
166  }
167 
183  template <output_stream stream_t, sequence_file_output_format file_format>
184  requires std::same_as<typename std::remove_reference_t<stream_t>::char_type, stream_char_type>
185  sequence_file_output(stream_t & stream,
186  file_format const & SEQAN3_DOXYGEN_ONLY(format_tag),
187  selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
188  primary_stream{&stream, stream_deleter_noop},
189  secondary_stream{&stream, stream_deleter_noop},
190  format{detail::sequence_file_output_format_exposer<file_format>{}}
191  {
192  static_assert(list_traits::contains<file_format, valid_formats>,
193  "You selected a format that is not in the valid_formats of this file.");
194  }
195 
197  template <output_stream stream_t, sequence_file_output_format file_format>
198  requires std::same_as<typename std::remove_reference_t<stream_t>::char_type, stream_char_type>
199  sequence_file_output(stream_t && stream,
200  file_format const & SEQAN3_DOXYGEN_ONLY(format_tag),
201  selected_field_ids const & SEQAN3_DOXYGEN_ONLY(fields_tag) = selected_field_ids{}) :
202  primary_stream{new stream_t{std::move(stream)}, stream_deleter_default},
203  secondary_stream{&*primary_stream, stream_deleter_noop},
204  format{detail::sequence_file_output_format_exposer<file_format>{}}
205  {
206  static_assert(list_traits::contains<file_format, valid_formats>,
207  "You selected a format that is not in the valid_formats of this file.");
208  }
210 
232  iterator begin() noexcept
233  {
234  return {*this};
235  }
236 
251  sentinel end() noexcept
252  {
253  return {};
254  }
255 
274  template <typename record_t>
275  void push_back(record_t && r)
276  requires detail::record_like<record_t>
277  {
278  write_record(detail::get_or_ignore<field::seq>(r),
279  detail::get_or_ignore<field::id>(r),
280  detail::get_or_ignore<field::qual>(r));
281  }
282 
304  template <typename tuple_t>
305  void push_back(tuple_t && t)
306  requires tuple_like<tuple_t> && (!detail::record_like<tuple_t>)
307  {
308  // index_of might return npos, but this will be handled well by get_or_ignore (and just return ignore)
309  write_record(detail::get_or_ignore<selected_field_ids::index_of(field::seq)>(t),
310  detail::get_or_ignore<selected_field_ids::index_of(field::id)>(t),
311  detail::get_or_ignore<selected_field_ids::index_of(field::qual)>(t));
312  }
313 
337  template <typename arg_t, typename... arg_types>
338  void emplace_back(arg_t && arg, arg_types &&... args)
339  {
340  push_back(std::tie(arg, args...));
341  }
342 
364  template <std::ranges::input_range rng_t>
365  sequence_file_output & operator=(rng_t && range)
367  {
368  for (auto && record : range)
369  push_back(std::forward<decltype(record)>(record));
370  return *this;
371  }
372 
400  template <std::ranges::input_range rng_t>
401  friend sequence_file_output & operator|(rng_t && range, sequence_file_output & f)
403  {
404  f = range;
405  return f;
406  }
407 
409  template <std::ranges::input_range rng_t>
410  friend sequence_file_output operator|(rng_t && range, sequence_file_output && f)
412  {
413  f = range;
414  return std::move(f);
415  }
417 
419  sequence_file_output_options options{};
420 
425  {
426  return *secondary_stream;
427  }
429 
430 protected:
433  std::vector<char> stream_buffer{std::vector<char>(1'000'000)};
434 
442  static void stream_deleter_noop(std::basic_ostream<stream_char_type> *)
443  {}
445  static void stream_deleter_default(std::basic_ostream<stream_char_type> * ptr)
446  {
447  delete ptr;
448  }
449 
451  stream_ptr_t primary_stream{nullptr, stream_deleter_noop};
453  stream_ptr_t secondary_stream{nullptr, stream_deleter_noop};
454 
456  using format_type =
457  typename detail::variant_from_tags<valid_formats, detail::sequence_file_output_format_exposer>::type;
459  format_type format;
461 
463  template <typename seq_t, typename id_t, typename qual_t>
464  void write_record(seq_t && seq, id_t && id, qual_t && qual)
465  {
466  assert(!format.valueless_by_exception());
467  std::visit(
468  [&](auto & f)
469  {
470  {
471  f.write_sequence_record(*secondary_stream, options, seq, id, qual);
472  }
473  },
474  format);
475  }
476 
478  friend iterator;
479 };
480 
487 template <output_stream stream_t, sequence_file_output_format file_format>
489  file_format const &)
492 
494 template <output_stream stream_t, sequence_file_output_format file_format>
496  file_format const &)
499 
501 template <output_stream stream_t,
502  sequence_file_output_format file_format,
503  detail::fields_specialisation selected_field_ids>
504 sequence_file_output(stream_t &&, file_format const &, selected_field_ids const &)
506 
508 template <output_stream stream_t,
509  sequence_file_output_format file_format,
510  detail::fields_specialisation selected_field_ids>
511 sequence_file_output(stream_t &, file_format const &, selected_field_ids const &)
514 } // namespace seqan3
A class for writing sequence files, e.g. FASTA, FASTQ ...
Definition: io/sequence_file/output.hpp:69
sequence_file_output(std::filesystem::path filename, selected_field_ids const &fields_tag=selected_field_ids{})
Construct from filename.
Definition: io/sequence_file/output.hpp:150
void const_iterator
The const iterator type is void, because files are not const-iterable.
Definition: io/sequence_file/output.hpp:114
sequence_file_output & operator=(sequence_file_output const &)=delete
Copy assignment is explicitly deleted, because you can't have multiple access to the same file.
char stream_char_type
Character type of the stream(s).
Definition: io/sequence_file/output.hpp:80
std::default_sentinel_t sentinel
The type returned by end().
Definition: io/sequence_file/output.hpp:116
sequence_file_output(stream_t &&, file_format const &, selected_field_ids const &) -> sequence_file_output< selected_field_ids, type_list< file_format >>
Deduction guide for given stream, file format and field ids.
sequence_file_output(stream_t &&, file_format const &) -> sequence_file_output< typename sequence_file_output<>::selected_field_ids, type_list< file_format >>
This is an overloaded member function, provided for convenience. It differs from the above function o...
sequence_file_output(sequence_file_output &&)=default
Move construction is defaulted.
sequence_file_output(stream_t &, file_format const &) -> sequence_file_output< typename sequence_file_output<>::selected_field_ids, type_list< file_format >>
Deduction guide for given stream and file format.
sequence_file_output(sequence_file_output const &)=delete
Copy construction is explicitly deleted, because you can't have multiple access to the same file.
valid_formats_ valid_formats
A seqan3::type_list with the possible formats.
Definition: io/sequence_file/output.hpp:78
sequence_file_output(stream_t &, file_format const &, selected_field_ids const &) -> sequence_file_output< selected_field_ids, type_list< file_format >>
This is an overloaded member function, provided for convenience. It differs from the above function o...
selected_field_ids_ selected_field_ids
A seqan3::fields list with the fields selected for the record.
Definition: io/sequence_file/output.hpp:76
requires std::same_as< typename std::remove_reference_t< stream_t >::char_type, stream_char_type > sequence_file_output(stream_t &&stream, file_format const &format_tag, selected_field_ids const &fields_tag=selected_field_ids{})
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: io/sequence_file/output.hpp:199
~sequence_file_output()=default
Destructor is defaulted.
void push_back(record_t &&r) requires detail
Write a seqan3::record to the file.
Definition: io/sequence_file/output.hpp:275
requires std::same_as< typename std::remove_reference_t< stream_t >::char_type, stream_char_type > sequence_file_output(stream_t &stream, file_format const &format_tag, selected_field_ids const &fields_tag=selected_field_ids{})
Construct from an existing stream and with specified format.
Definition: io/sequence_file/output.hpp:185
void emplace_back(arg_t &&arg, arg_types &&... args)
Write a record to the file by passing individual fields.
Definition: io/sequence_file/output.hpp:338
void value_type
The value type (void).
Definition: io/sequence_file/output.hpp:102
sentinel end() noexcept
Returns a sentinel for comparison with iterator.
Definition: io/sequence_file/output.hpp:251
void reference
The reference type (void).
Definition: io/sequence_file/output.hpp:104
sequence_file_output & operator=(sequence_file_output &&)=default
Move assignment is defaulted.
void const_reference
The const reference type (void).
Definition: io/sequence_file/output.hpp:106
sequence_file_output()=delete
Default constructor is explicitly deleted, you need to give a stream or file name.
detail::out_file_iterator< sequence_file_output > iterator
The iterator type of this view (an output iterator).
Definition: io/sequence_file/output.hpp:112
iterator begin() noexcept
Returns an iterator to current position in the file.
Definition: io/sequence_file/output.hpp:232
void size_type
The size type (void).
Definition: io/sequence_file/output.hpp:108
T data(T... args)
Provides seqan3::views::elements.
Provides the seqan3::sequence_file_format_genbank class.
Provides the seqan3::format_sam.
T forward(T... args)
T get(T... args)
requires requires
The rank_type of the semi-alphabet; defined as the return type of seqan3::to_rank....
Definition: alphabet/concept.hpp:164
requires std::common_with< typename std::remove_reference_t< validator1_type >::option_value_type, typename std::remove_reference_t< validator2_type >::option_value_type > auto operator|(validator1_type &&vali1, validator2_type &&vali2)
Enables the chaining of validators.
Definition: validators.hpp:1124
field
An enumerator for the fields used in file formats.
Definition: record.hpp:63
@ id
The identifier, usually a string.
@ seq
The "sequence", usually a range of nucleotides or amino acids.
@ qual
The qualities, usually in Phred score notation.
The generic concept for sequence file out formats.
Whether a type behaves like a tuple.
Provides various utility functions.
Provides exceptions used in the I/O module.
Stream concepts.
Provides various utility functions required only for output.
The main SeqAn3 namespace.
Definition: aligned_sequence_concept.hpp:29
Provides the seqan3::detail::out_file_iterator class template.
The <ranges> header from C++20's standard library.
Provides the seqan3::record template and the seqan3::field enum.
Provides seqan3::detail::record_like.
Provides seqan3::sequence_file_output_format and auxiliary classes.
Provides seqan3::sequence_file_output_options.
T size(T... args)
A class template that holds a choice of seqan3::field.
Definition: record.hpp:128
The class template that file records are based on; behaves like a std::tuple.
Definition: record.hpp:192
Type that contains multiple types.
Definition: type_list.hpp:29
T tie(T... args)
Provides traits for seqan3::type_list.
Provides seqan3::tuple_like.
Provides seqan3::views::convert.
T visit(T... args)
Provides seqan3::views::zip.