SeqAn3  3.2.0
The Modern C++ library for sequence analysis.
edit_distance_unbanded.hpp
Go to the documentation of this file.
1 // -----------------------------------------------------------------------------------------------------
2 // Copyright (c) 2006-2022, Knut Reinert & Freie Universität Berlin
3 // Copyright (c) 2016-2022, Knut Reinert & MPI für molekulare Genetik
4 // This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5 // shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6 // -----------------------------------------------------------------------------------------------------
7 
13 #pragma once
14 
15 #include <algorithm>
16 #include <bitset>
17 #include <ranges>
18 #include <utility>
19 
27 
28 namespace seqan3::detail
29 {
33 template <typename derived_t, typename edit_traits>
34 class edit_distance_unbanded_max_errors_policy :
36  edit_traits
38 {
39 protected:
40  static_assert(edit_traits::use_max_errors, "This policy assumes that edit_traits::use_max_errors is true.");
41 
43  friend derived_t;
44 
48  edit_distance_unbanded_max_errors_policy() noexcept = default;
49  edit_distance_unbanded_max_errors_policy(edit_distance_unbanded_max_errors_policy const &) noexcept =
50  default;
51  edit_distance_unbanded_max_errors_policy(edit_distance_unbanded_max_errors_policy &&) noexcept =
52  default;
53  edit_distance_unbanded_max_errors_policy &
54  operator=(edit_distance_unbanded_max_errors_policy const &) noexcept = default;
55  edit_distance_unbanded_max_errors_policy &
56  operator=(edit_distance_unbanded_max_errors_policy &&) noexcept = default;
57  ~edit_distance_unbanded_max_errors_policy() noexcept = default;
58 
60 
61  using typename edit_traits::score_type;
62  using typename edit_traits::word_type;
63 
69  score_type max_errors{255};
71  size_t last_block{0u};
73  word_type last_score_mask{};
75 
81  void max_errors_init(size_t block_count) noexcept
82  {
83  derived_t * self = static_cast<derived_t *>(this);
84 
85  max_errors = -get<align_cfg::min_score>(self->config).score;
86 
87  assert(max_errors >= score_type{0});
88 
89  if (std::ranges::empty(self->query)) // [[unlikely]]
90  {
91  last_block = 0u;
92  self->score_mask = 0u;
93  last_score_mask = self->score_mask;
94  return;
95  }
96 
97  last_block = block_count - 1u;
98  last_score_mask = self->score_mask;
99 
100  // local_max_errors either stores the maximal number of _score (me.max_errors) or the needle size minus one. It
101  // is used for the mask computation and setting the initial score (the minus one is there because of the Ukkonen
102  // trick).
103  size_t const local_max_errors = std::min<size_t>(max_errors, std::ranges::size(self->query) - 1u);
104  self->score_mask = word_type{1u} << (local_max_errors % self->word_size);
105  last_block = std::min(local_max_errors / self->word_size, last_block);
106  self->_score = local_max_errors + 1u;
107  }
108 
110  bool is_last_active_cell_within_last_row() const noexcept
111  {
112  derived_t const * self = static_cast<derived_t const *>(this);
113  return (self->score_mask == this->last_score_mask) && (this->last_block == self->vp.size() - 1u);
114  }
115 
117  bool prev_last_active_cell() noexcept
118  {
119  derived_t * self = static_cast<derived_t *>(this);
120  self->score_mask >>= 1u;
121  if (self->score_mask != 0u)
122  return true;
123 
124  if constexpr (edit_traits::is_global)
125  {
126  if (last_block == 0u) // [[unlikely]]
127  return false;
128  }
129 
130  last_block--;
131 
132  self->score_mask = word_type{1u} << (edit_traits::word_size - 1u);
133  return true;
134  }
135 
137  void next_last_active_cell() noexcept
138  {
139  derived_t * self = static_cast<derived_t *>(this);
140  self->score_mask <<= 1u;
141  if (self->score_mask)
142  return;
143 
144  self->score_mask = 1u;
145  last_block++;
146  }
147 
151  bool update_last_active_cell() noexcept
152  {
153  derived_t * self = static_cast<derived_t *>(this);
154  // update the last active cell
155  while (!(self->_score <= max_errors))
156  {
157  self->advance_score(self->vn[last_block], self->vp[last_block], self->score_mask);
158  if (!prev_last_active_cell())
159  {
160  // prev_last_active_cell = false can only happen for global alignments
161  assert(edit_traits::is_global);
162  // we abort here if we don't need to compute a matrix, because the continued
163  // computation can't produce an alignment.
164  return !edit_traits::compute_matrix;
165  }
166  }
167 
168  if (is_last_active_cell_within_last_row())
169  {
170  assert(self->_score <= max_errors);
171 
172  if constexpr (edit_traits::is_semi_global)
173  self->update_best_score();
174 
175  return self->on_hit();
176  }
177  else
178  {
179  next_last_active_cell();
180  self->advance_score(self->vp[last_block], self->vn[last_block], self->score_mask);
181  }
182 
183  return false;
184  }
186 
188  static size_t max_rows(word_type const score_mask,
189  unsigned const last_block,
190  score_type const score,
191  score_type const max_errors) noexcept
192  {
193  using score_matrix_type = typename edit_traits::score_matrix_type;
194  return score_matrix_type::max_rows(score_mask, last_block, score, max_errors);
195  }
196 };
197 
201 template <typename derived_t, typename edit_traits>
202 class edit_distance_unbanded_global_policy :
204  edit_traits
206 {
207 protected:
208  static_assert(edit_traits::is_global || edit_traits::is_semi_global,
209  "This policy assumes that edit_traits::is_global or edit_traits::is_semi_global is true.");
210 
212  friend derived_t;
213 
217  edit_distance_unbanded_global_policy() noexcept = default;
218  edit_distance_unbanded_global_policy(edit_distance_unbanded_global_policy const &) noexcept =
219  default;
220  edit_distance_unbanded_global_policy(edit_distance_unbanded_global_policy &&) noexcept = default;
221  edit_distance_unbanded_global_policy &
222  operator=(edit_distance_unbanded_global_policy const &) noexcept = default;
223  edit_distance_unbanded_global_policy &
224  operator=(edit_distance_unbanded_global_policy &&) noexcept = default;
225  ~edit_distance_unbanded_global_policy() noexcept = default;
226 
228 
230  using typename edit_traits::score_type;
231 
240  score_type _best_score{};
242 
248  void score_init() noexcept
249  {
250  derived_t const * self = static_cast<derived_t const *>(this);
251  _best_score = self->_score;
252  }
253 
255  bool is_valid() const noexcept
256  {
257  [[maybe_unused]] derived_t const * self = static_cast<derived_t const *>(this);
258  // This condition uses the observation that after each computation of a column, _score has either the initial
259  // value of the first row (i.e. the entire column consist of INF's), has the value _score = max_errors + 1
260  // (there exists a cell within the column that has value <= max_errors, but is not on the last row) or _score <=
261  // max_errors (the score of the last active cell is <= max_errors)
262  if constexpr (edit_traits::use_max_errors)
263  return _best_score <= self->max_errors;
264 
265  // When not using max_errors there is always a valid alignment, because the last row will always be updated and
266  // with it the score.
267  return true;
268  }
269 
271  seqan3::detail::advanceable_alignment_coordinate<> invalid_coordinate() const noexcept
272  {
273  derived_t const * self = static_cast<derived_t const *>(this);
274  return {column_index_type{std::ranges::size(self->database)}, row_index_type{std::ranges::size(self->query)}};
275  }
276 
278  void update_best_score() noexcept
279  {
280  derived_t const * self = static_cast<derived_t const *>(this);
281  _best_score = self->_score;
282  }
283 
285  size_t end_positions_first() const noexcept
286  {
287  derived_t const * self = static_cast<derived_t const *>(this);
288  return std::ranges::size(self->database);
289  }
291 
292 public:
299  std::optional<score_type> score() const noexcept
300  {
301  derived_t const * self = static_cast<derived_t const *>(this);
302  static_assert(edit_traits::compute_score,
303  "score() can only be computed if you specify the result type within "
304  "your alignment config.");
305  if (!self->is_valid())
306  return std::nullopt;
307 
308  return -_best_score;
309  }
310 
313  seqan3::detail::advanceable_alignment_coordinate<> end_positions() const noexcept
314  {
315  derived_t const * self = static_cast<derived_t const *>(this);
316  static_assert(edit_traits::compute_end_positions,
317  "end_positions() can only be computed if you specify the "
318  "result type within your alignment config.");
319  if (!self->is_valid())
320  return self->invalid_coordinate();
321 
322  column_index_type const first{self->end_positions_first()};
323  row_index_type const second{std::ranges::size(self->query)};
324  return {first, second};
325  }
327 };
328 
330 template <typename derived_t, typename edit_traits>
331 class edit_distance_unbanded_semi_global_policy : public edit_distance_unbanded_global_policy<derived_t, edit_traits>
332 {
333 protected:
334  static_assert(edit_traits::is_semi_global, "This policy assumes that edit_traits::is_semi_global is true.");
335 
337  friend derived_t;
338 
342  edit_distance_unbanded_semi_global_policy() noexcept = default;
343  edit_distance_unbanded_semi_global_policy(edit_distance_unbanded_semi_global_policy const &) noexcept =
344  default;
345  edit_distance_unbanded_semi_global_policy(edit_distance_unbanded_semi_global_policy &&) noexcept =
346  default;
347  edit_distance_unbanded_semi_global_policy &
348  operator=(edit_distance_unbanded_semi_global_policy const &) noexcept = default;
349  edit_distance_unbanded_semi_global_policy &
350  operator=(edit_distance_unbanded_semi_global_policy &&) noexcept = default;
351  ~edit_distance_unbanded_semi_global_policy() noexcept = default;
352 
354 
356  using base_t = edit_distance_unbanded_global_policy<derived_t, edit_traits>;
358  using database_iterator = typename edit_traits::database_iterator;
359  using base_t::_best_score;
361 
373  database_iterator _best_score_col{};
375 
381  void score_init() noexcept
382  {
383  derived_t const * self = static_cast<derived_t const *>(this);
384  base_t::score_init();
385  _best_score_col = self->database_it_end;
386  }
387 
389  void update_best_score() noexcept
390  {
391  derived_t const * self = static_cast<derived_t const *>(this);
392  // we have to make sure that update_best_score is only called after a score update within the last row.
393  if constexpr (edit_traits::use_max_errors)
394  {
395  assert(std::ranges::empty(self->query) || self->is_last_active_cell_within_last_row());
396  }
397 
398  _best_score_col = (self->_score <= _best_score) ? self->database_it : _best_score_col;
399  _best_score = (self->_score <= _best_score) ? self->_score : _best_score;
400  }
401 
403  size_t end_positions_first() const noexcept
404  {
405  derived_t const * self = static_cast<derived_t const *>(this);
406  // offset == 0u is a special case if database sequence is empty, because in this case the best column is zero.
407  size_t offset = std::ranges::empty(self->database) ? 0u : 1u;
408  return std::ranges::distance(std::ranges::begin(self->database), _best_score_col) + offset;
409  }
411 };
412 
416 template <typename derived_t, typename edit_traits>
417 class edit_distance_unbanded_score_matrix_policy :
419  edit_traits
421 {
422 protected:
423  static_assert(edit_traits::compute_score_matrix,
424  "This policy assumes that edit_traits::compute_score_matrix is true.");
425 
427  friend derived_t;
428 
432  edit_distance_unbanded_score_matrix_policy() noexcept = default;
433  edit_distance_unbanded_score_matrix_policy(edit_distance_unbanded_score_matrix_policy const &) noexcept =
434  default;
435  edit_distance_unbanded_score_matrix_policy(edit_distance_unbanded_score_matrix_policy &&) noexcept =
436  default;
437  edit_distance_unbanded_score_matrix_policy &
438  operator=(edit_distance_unbanded_score_matrix_policy const &) noexcept = default;
439  edit_distance_unbanded_score_matrix_policy &
440  operator=(edit_distance_unbanded_score_matrix_policy &&) noexcept = default;
441  ~edit_distance_unbanded_score_matrix_policy() noexcept = default;
442 
444 
445  using typename edit_traits::score_matrix_type;
446 
452  score_matrix_type _score_matrix{};
454 
467  void score_matrix_init()
468  {
469  derived_t const * self = static_cast<derived_t const *>(this);
470 
471  _score_matrix = score_matrix_type{std::ranges::size(self->query) + 1u};
472  _score_matrix.reserve(std::ranges::size(self->database) + 1u);
473  }
475 
476 public:
484  score_matrix_type const & score_matrix() const noexcept
485  {
486  static_assert(edit_traits::compute_score_matrix,
487  "score_matrix() can only be computed if you specify the "
488  "result type within your alignment config.");
489  return _score_matrix;
490  }
492 };
493 
497 template <typename derived_t, typename edit_traits>
498 class edit_distance_unbanded_trace_matrix_policy :
500  edit_traits
502 {
503 protected:
504  static_assert(edit_traits::compute_trace_matrix,
505  "This policy assumes that edit_traits::compute_trace_matrix is true.");
506 
508  friend derived_t;
509 
513  edit_distance_unbanded_trace_matrix_policy() noexcept = default;
514  edit_distance_unbanded_trace_matrix_policy(edit_distance_unbanded_trace_matrix_policy const &) noexcept =
515  default;
516  edit_distance_unbanded_trace_matrix_policy(edit_distance_unbanded_trace_matrix_policy &&) noexcept =
517  default;
518  edit_distance_unbanded_trace_matrix_policy &
519  operator=(edit_distance_unbanded_trace_matrix_policy const &) noexcept = default;
520  edit_distance_unbanded_trace_matrix_policy &
521  operator=(edit_distance_unbanded_trace_matrix_policy &&) noexcept = default;
522  ~edit_distance_unbanded_trace_matrix_policy() noexcept = default;
523 
525 
526  using typename edit_traits::alignment_result_type;
527  using typename edit_traits::trace_matrix_type;
528  using typename edit_traits::word_type;
529 
535  std::vector<word_type> hp{};
538 
540  trace_matrix_type _trace_matrix{};
542 
555  void trace_matrix_init(size_t block_count)
556  {
557  derived_t const * self = static_cast<derived_t const *>(this);
558 
559  _trace_matrix = trace_matrix_type{std::ranges::size(self->query) + 1u};
560  _trace_matrix.reserve(std::ranges::size(self->database) + 1u);
561 
562  hp.resize(block_count, 0u);
563  db.resize(block_count, 0u);
564  }
566 
567 public:
573  trace_matrix_type const & trace_matrix() const noexcept
574  {
575  static_assert(edit_traits::compute_trace_matrix,
576  "trace_matrix() can only be computed if you specify the "
577  "result type within your alignment config.");
578  return _trace_matrix;
579  }
580 
583  seqan3::detail::advanceable_alignment_coordinate<> begin_positions() const noexcept
584  {
585  derived_t const * self = static_cast<derived_t const *>(this);
586  static_assert(edit_traits::compute_begin_positions,
587  "begin_positions() can only be computed if you specify the "
588  "result type within your alignment config.");
589  if (!self->is_valid())
590  return self->invalid_coordinate();
591 
592  auto [first, second] = self->end_positions();
593  detail::matrix_coordinate const end_positions{detail::row_index_type{second}, detail::column_index_type{first}};
594  auto trace_path = self->trace_matrix().trace_path(end_positions);
595  auto trace_path_it = std::ranges::begin(trace_path);
596  std::ranges::advance(trace_path_it, std::ranges::end(trace_path));
597  detail::matrix_coordinate const begin_positions = trace_path_it.coordinate();
598  return {detail::column_index_type{begin_positions.col}, detail::row_index_type{begin_positions.row}};
599  }
600 
603  auto alignment() const noexcept
604  {
605  using alignment_t = std::remove_cvref_t<decltype(std::declval<alignment_result_type &>().alignment())>;
606 
607  derived_t const * self = static_cast<derived_t const *>(this);
608  static_assert(edit_traits::compute_sequence_alignment,
609  "alignment() can only be computed if you specify the "
610  "result type within your alignment config.");
611 
612  if (!self->is_valid())
613  return alignment_t{};
614 
615  auto [first, second] = self->end_positions();
616  detail::matrix_coordinate const end_positions{detail::row_index_type{second}, detail::column_index_type{first}};
617  aligned_sequence_builder builder{self->database, self->query};
618  auto trace_path = self->trace_matrix().trace_path(end_positions);
619  return builder(trace_path).alignment;
620  }
622 };
623 
627 template <typename value_t>
628 class proxy_reference
629 {
630 public:
634  proxy_reference() noexcept = default;
635  proxy_reference(proxy_reference const &) noexcept = default;
636  proxy_reference(proxy_reference &&) noexcept = default;
637  proxy_reference & operator=(proxy_reference const &) noexcept = default;
638  proxy_reference & operator=(proxy_reference &&) noexcept = default;
639  ~proxy_reference() = default;
640 
642  proxy_reference(value_t & t) noexcept : ptr(std::addressof(t))
643  {}
644 
645  proxy_reference(value_t &&) = delete;
646 
648  template <typename other_value_t>
649  requires std::convertible_to<other_value_t, value_t>
650  proxy_reference & operator=(other_value_t && u) noexcept
651  {
652  get() = std::forward<other_value_t>(u);
653  return *this;
654  }
656 
658  value_t & get() const noexcept
659  {
660  assert(ptr != nullptr);
661  return *ptr;
662  }
663 
665  operator value_t &() const noexcept
666  {
667  return get();
668  }
669 
670 private:
672  value_t * ptr{nullptr};
673 };
674 
686 template <std::ranges::viewable_range database_t,
687  std::ranges::viewable_range query_t,
688  typename align_config_t,
689  typename edit_traits>
690 class edit_distance_unbanded :
692  // Hide this section in doxygen, because it messes up the inheritance.
693  public edit_distance_base<edit_traits::use_max_errors,
694  edit_distance_unbanded_max_errors_policy,
695  edit_traits,
696  edit_distance_unbanded<database_t, query_t, align_config_t, edit_traits>>,
697  public edit_distance_base<edit_traits::is_global,
698  edit_distance_unbanded_global_policy,
699  edit_traits,
700  edit_distance_unbanded<database_t, query_t, align_config_t, edit_traits>>,
701  public edit_distance_base<edit_traits::is_semi_global,
702  edit_distance_unbanded_semi_global_policy,
703  edit_traits,
704  edit_distance_unbanded<database_t, query_t, align_config_t, edit_traits>>,
705  public edit_distance_base<edit_traits::compute_score_matrix,
706  edit_distance_unbanded_score_matrix_policy,
707  edit_traits,
708  edit_distance_unbanded<database_t, query_t, align_config_t, edit_traits>>,
709  public edit_distance_base<edit_traits::compute_trace_matrix,
710  edit_distance_unbanded_trace_matrix_policy,
711  edit_traits,
712  edit_distance_unbanded<database_t, query_t, align_config_t, edit_traits>>
714 {
715 public:
716  using edit_traits::word_size;
717  using typename edit_traits::align_config_type;
718  using typename edit_traits::database_type;
719  using typename edit_traits::query_type;
720  using typename edit_traits::score_type;
721  using typename edit_traits::word_type;
722 
723 private:
725  template <typename other_derived_t, typename other_edit_traits>
726  friend class edit_distance_unbanded_max_errors_policy;
728  template <typename other_derived_t, typename other_edit_traits>
729  friend class edit_distance_unbanded_global_policy;
731  template <typename other_derived_t, typename other_edit_traits>
732  friend class edit_distance_unbanded_semi_global_policy;
734  template <typename other_derived_t, typename other_edit_traits>
735  friend class edit_distance_unbanded_score_matrix_policy;
737  template <typename other_derived_t, typename other_edit_traits>
738  friend class edit_distance_unbanded_trace_matrix_policy;
739 
740  using edit_traits::compute_begin_positions;
741  using edit_traits::compute_end_positions;
742  using edit_traits::compute_matrix;
743  using edit_traits::compute_score;
744  using edit_traits::compute_score_matrix;
745  using edit_traits::compute_sequence_alignment;
746  using edit_traits::compute_trace_matrix;
747  using edit_traits::is_global;
748  using edit_traits::is_semi_global;
749  using edit_traits::use_max_errors;
750  using typename edit_traits::alignment_result_type;
751  using typename edit_traits::database_iterator;
752  using typename edit_traits::query_alphabet_type;
753  using typename edit_traits::score_matrix_type;
754  using typename edit_traits::trace_matrix_type;
755 
757  database_t database;
759  query_t query;
761  align_config_t config;
762 
764  static constexpr word_type hp0 = is_global ? 1u : 0u;
766  static constexpr word_type hn0 = 0u;
768  static constexpr word_type vp0 = ~word_type{0u};
770  static constexpr word_type vn0 = 0u;
771 
773  score_type _score{};
780  word_type score_mask{0u};
791  std::vector<word_type> bit_masks{};
792 
794  database_iterator database_it{};
796  database_iterator database_it_end{};
797 
799  struct compute_state_trace_matrix
800  {
802  proxy_reference<word_type> db{};
803  };
804 
806  struct compute_state : enable_state_t<edit_traits::compute_trace_matrix, compute_state_trace_matrix>
807  {
810 
812  word_type b{};
814  word_type d0{};
816  hp_type hp{};
818  word_type hn{};
820  proxy_reference<word_type> vp{};
822  proxy_reference<word_type> vn{};
824  word_type carry_d0{};
826  word_type carry_hp{hp0};
828  word_type carry_hn{};
829  };
830 
832  void add_state()
833  {
834  if constexpr (!use_max_errors && compute_score_matrix)
835  this->_score_matrix.add_column(vp, vn);
836 
837  if constexpr (!use_max_errors && compute_trace_matrix)
838  this->_trace_matrix.add_column(this->hp, this->db, vp);
839 
840  if constexpr (use_max_errors && compute_matrix)
841  {
842  size_t max_rows = this->max_rows(score_mask, this->last_block, _score, this->max_errors);
843  if constexpr (compute_score_matrix)
844  this->_score_matrix.add_column(vp, vn, max_rows);
845 
846  if constexpr (compute_trace_matrix)
847  this->_trace_matrix.add_column(this->hp, this->db, vp, max_rows);
848  }
849  }
850 
851 public:
856  edit_distance_unbanded() = delete;
857  edit_distance_unbanded(edit_distance_unbanded const &) = default;
858  edit_distance_unbanded(edit_distance_unbanded &&) = default;
859  edit_distance_unbanded & operator=(edit_distance_unbanded const &) = default;
860  edit_distance_unbanded & operator=(edit_distance_unbanded &&) = default;
861  ~edit_distance_unbanded() = default;
862 
869  edit_distance_unbanded(database_t _database,
870  query_t _query,
871  align_config_t _config,
872  edit_traits const & SEQAN3_DOXYGEN_ONLY(_traits)) :
873  database{std::forward<database_t>(_database)},
874  query{std::forward<query_t>(_query)},
875  config{std::forward<align_config_t>(_config)},
876  _score{static_cast<score_type>(std::ranges::size(query))},
877  database_it{std::ranges::begin(database)},
878  database_it_end{std::ranges::end(database)}
879  {
880  static constexpr size_t alphabet_size_ = alphabet_size<query_alphabet_type>;
881 
882  size_t const block_count = (std::ranges::size(query) - 1u + word_size) / word_size;
883  score_mask = word_type{1u} << ((std::ranges::size(query) - 1u + word_size) % word_size);
884 
885  this->score_init();
886  if constexpr (use_max_errors)
887  this->max_errors_init(block_count);
888 
889  if constexpr (compute_score_matrix)
890  this->score_matrix_init();
891 
892  if constexpr (compute_trace_matrix)
893  this->trace_matrix_init(block_count);
894 
895  vp.resize(block_count, vp0);
896  vn.resize(block_count, vn0);
897  bit_masks.resize((alphabet_size_ + 1u) * block_count, 0u);
898 
899  // encoding the letters as bit-vectors
900  for (size_t j = 0u; j < std::ranges::size(query); j++)
901  {
902  size_t const i = block_count * seqan3::to_rank(query[j]) + j / word_size;
903  bit_masks[i] |= word_type{1u} << (j % word_size);
904  }
905 
906  add_state();
907  }
909 
910 private:
912  template <bool with_carry>
913  static void compute_step(compute_state & state) noexcept
914  {
915  word_type x, t;
916  assert(state.carry_d0 <= 1u);
917  assert(state.carry_hp <= 1u);
918  assert(state.carry_hn <= 1u);
919 
920  x = state.b | state.vn;
921  t = state.vp + (x & state.vp) + state.carry_d0;
922 
923  state.d0 = (t ^ state.vp) | x;
924  state.hn = state.vp & state.d0;
925  state.hp = state.vn | ~(state.vp | state.d0);
926 
927  if constexpr (with_carry)
928  state.carry_d0 = (state.carry_d0 != 0u) ? t <= state.vp : t < state.vp;
929 
930  x = (state.hp << 1u) | state.carry_hp;
931  state.vn = x & state.d0;
932  state.vp = (state.hn << 1u) | ~(x | state.d0) | state.carry_hn;
933 
934  if constexpr (with_carry)
935  {
936  state.carry_hp = state.hp >> (word_size - 1u);
937  state.carry_hn = state.hn >> (word_size - 1u);
938  }
939  }
940 
942  template <bool with_carry>
943  void compute_kernel(compute_state & state, size_t const block_offset, size_t const current_block) noexcept
944  {
945  state.vp = proxy_reference<word_type>{this->vp[current_block]};
946  state.vn = proxy_reference<word_type>{this->vn[current_block]};
947  if constexpr (compute_trace_matrix)
948  {
949  state.hp = proxy_reference<word_type>{this->hp[current_block]};
950  state.db = proxy_reference<word_type>{this->db[current_block]};
951  }
952  state.b = bit_masks[block_offset + current_block];
953 
954  compute_step<with_carry>(state);
955  if constexpr (compute_trace_matrix)
956  state.db = ~(state.b ^ state.d0);
957  }
958 
960  void advance_score(word_type P, word_type N, word_type mask) noexcept
961  {
962  if ((P & mask) != word_type{0u})
963  _score++;
964  else if ((N & mask) != word_type{0u})
965  _score--;
966  }
967 
969  bool on_hit() noexcept
970  {
971  // TODO: call external on_hit functor
972  return false;
973  }
974 
976  inline bool small_patterns();
977 
979  inline bool large_patterns();
980 
982  inline void compute_empty_query_sequence()
983  {
984  assert(std::ranges::empty(query));
985 
986  bool abort_computation = false;
987 
988  for (; database_it != database_it_end; ++database_it)
989  {
990  if constexpr (is_global)
991  ++_score;
992  else // is_semi_global
993  this->update_best_score();
994 
995  // call on_hit
996  if constexpr (use_max_errors)
997  abort_computation = on_hit();
998 
999  this->add_state();
1000  if (abort_computation)
1001  break;
1002  }
1003  }
1004 
1006  void compute()
1007  {
1008  // limit search width for prefix search (if no matrix needs to be computed)
1009  if constexpr (use_max_errors && is_global && !compute_matrix)
1010  {
1011  // Note: For global alignments we know that the database can only be max_length long to have a score less
1012  // than or equal max_errors in the last cell.
1013  //
1014  // The following matrix shows a minimal value for each entry (because a diagonal must be +0 or +1, each
1015  // diagonal is at least the value of the initial value in the first row)
1016  // 0 1 2 3 4 5 6...
1017  // 1 0 1 2 3 4 5...
1018  // ...
1019  // m ... 3 2 1 0 1 2 3 4 5
1020  // Thus, after |query| + max_errors entries the score will always be higher than max_errors.
1021  size_t const max_length = std::ranges::size(query) + this->max_errors + 1u;
1022  size_t const haystack_length = std::min(std::ranges::size(database), max_length);
1023  database_it_end -= std::ranges::size(database) - haystack_length;
1024  }
1025 
1026  // distinguish between the version for needles not longer than
1027  // one machine word and the version for longer needles
1028  // A special cases is if the second sequence is empty (vp.size() == 0u).
1029  if (vp.size() == 0u) // [[unlikely]]
1030  compute_empty_query_sequence();
1031  else if (vp.size() == 1u)
1032  small_patterns();
1033  else
1034  large_patterns();
1035 
1036  if constexpr (is_global)
1037  this->update_best_score();
1038  }
1039 
1040 public:
1045  template <typename callback_t>
1046  void operator()([[maybe_unused]] size_t const idx, callback_t && callback)
1047  {
1048  using traits_type = alignment_configuration_traits<align_config_t>;
1049  using result_value_type = typename alignment_result_value_type_accessor<alignment_result_type>::type;
1050 
1051  compute();
1052 
1053  // First cache the begin and end positions if enabled by the edit distance traits.
1054  // Note, that they might be activated even if the user did not configure them, but in order to
1055  // compute for example the alignment both information are required and enabled internally.
1056  auto cached_end_positions = this->invalid_coordinate();
1057  auto cached_begin_positions = this->invalid_coordinate();
1058 
1059  if constexpr (compute_end_positions)
1060  cached_end_positions = this->end_positions();
1061 
1062  if constexpr (compute_begin_positions && !compute_sequence_alignment)
1063  {
1064  static_assert(compute_end_positions, "End positions required to compute the begin positions.");
1065  cached_begin_positions = this->begin_positions();
1066  }
1067 
1068  // Fill the result value type. Note we need to ask what was enabled on the user side in order to store
1069  // the correct information in the alignment result type. This information is stored in the alignment
1070  // configuration traits and not in the edit distance traits.
1071  result_value_type res_vt{};
1072 
1073  if constexpr (traits_type::output_sequence1_id)
1074  res_vt.sequence1_id = idx;
1075 
1076  if constexpr (traits_type::output_sequence2_id)
1077  res_vt.sequence2_id = idx;
1078 
1079  if constexpr (traits_type::compute_score)
1080  res_vt.score = this->score().value_or(matrix_inf<score_type>);
1081 
1082  if constexpr (traits_type::compute_sequence_alignment)
1083  {
1084  if (this->is_valid())
1085  {
1086  auto [first, second] = cached_end_positions;
1087  detail::matrix_coordinate const end_positions{detail::row_index_type{second},
1088  detail::column_index_type{first}};
1089 
1090  aligned_sequence_builder builder{this->database, this->query};
1091  auto trace_res = builder(this->trace_matrix().trace_path(end_positions));
1092  res_vt.alignment = std::move(trace_res.alignment);
1093  cached_begin_positions.first = trace_res.first_sequence_slice_positions.first;
1094  cached_begin_positions.second = trace_res.second_sequence_slice_positions.first;
1095  }
1096  }
1097 
1098  if constexpr (traits_type::compute_end_positions)
1099  res_vt.end_positions = std::move(cached_end_positions);
1100 
1101  if constexpr (traits_type::compute_begin_positions)
1102  res_vt.begin_positions = std::move(cached_begin_positions);
1103 
1104  callback(alignment_result_type{std::move(res_vt)});
1105  }
1106 };
1107 
1108 template <std::ranges::viewable_range database_t,
1109  std::ranges::viewable_range query_t,
1110  typename align_config_t,
1111  typename traits_t>
1112 bool edit_distance_unbanded<database_t, query_t, align_config_t, traits_t>::small_patterns()
1113 {
1114  bool abort_computation = false;
1115 
1116  // computing the blocks
1117  while (database_it != database_it_end)
1118  {
1119  compute_state state{};
1120  size_t const block_offset = seqan3::to_rank((query_alphabet_type)*database_it);
1121 
1122  compute_kernel<false>(state, block_offset, 0u);
1123  advance_score(state.hp, state.hn, score_mask);
1124 
1125  // semi-global without max_errors guarantees that the score stays within the last row
1126  if constexpr (is_semi_global && !use_max_errors)
1127  this->update_best_score();
1128 
1129  // updating the last active cell
1130  if constexpr (use_max_errors)
1131  abort_computation = this->update_last_active_cell();
1132 
1133  add_state();
1134  ++database_it;
1135  if (abort_computation)
1136  return true;
1137  }
1138 
1139  return false;
1140 }
1141 
1142 template <std::ranges::viewable_range database_t,
1143  std::ranges::viewable_range query_t,
1144  typename align_config_t,
1145  typename traits_t>
1146 bool edit_distance_unbanded<database_t, query_t, align_config_t, traits_t>::large_patterns()
1147 {
1148  bool abort_computation = false;
1149 
1150  while (database_it != database_it_end)
1151  {
1152  compute_state state{};
1153  size_t const block_offset = vp.size() * seqan3::to_rank((query_alphabet_type)*database_it);
1154 
1155  size_t block_count = vp.size();
1156  if constexpr (use_max_errors)
1157  block_count = this->last_block + 1;
1158 
1159  // compute each block in the current column; carries between blocks will be propagated.
1160  for (size_t current_block = 0u; current_block < block_count; current_block++)
1161  compute_kernel<true>(state, block_offset, current_block);
1162 
1163  advance_score(state.hp, state.hn, score_mask);
1164 
1165  // semi-global without max_errors guarantees that the score stays within the last row
1166  if constexpr (is_semi_global && !use_max_errors)
1167  this->update_best_score();
1168 
1169  if constexpr (use_max_errors)
1170  {
1171  // if the last active cell reached the end within the current block we have to compute the next block.
1172  bool additional_block = score_mask >> (word_size - 1u);
1173  bool reached_last_block = this->last_block + 1u == vp.size();
1174  // If there is no next block we skip the computation.
1175  if (reached_last_block)
1176  additional_block = false;
1177 
1178  if (additional_block)
1179  {
1180  size_t const current_block = this->last_block + 1u;
1181  // this might not be necessary, but carry_d0 = 1u might have an influence on the result of vn and vp.
1182  vp[current_block] = vp0;
1183  vn[current_block] = vn0;
1184  compute_kernel<false>(state, block_offset, current_block);
1185  }
1186 
1187  // updating the last active cell
1188  abort_computation = this->update_last_active_cell();
1189  }
1190 
1191  add_state();
1192  ++database_it;
1193 
1194  if (abort_computation)
1195  return true;
1196  }
1197 
1198  return false;
1199 }
1200 
1207 template <typename database_t, typename query_t, typename config_t, typename traits_t>
1208 edit_distance_unbanded(database_t && database, query_t && query, config_t config, traits_t)
1209  -> edit_distance_unbanded<database_t, query_t, config_t, traits_t>;
1211 
1212 } // namespace seqan3::detail
Provides seqan3::detail::advanceable_alignment_coordinate.
Provides seqan3::alignment_result.
T begin(T... args)
Provides seqan3::configuration and utility functions.
Forwards for seqan3::edit_distance_unbanded related types.
Provides seqan3::detail::edit_distance_score_matrix_full.
Provides seqan3::detail::edit_distance_trace_matrix_full.
T end(T... args)
T forward(T... args)
requires requires
The rank_type of the semi-alphabet; defined as the return type of seqan3::to_rank....
Definition: alphabet/concept.hpp:164
constexpr auto to_rank
Return the rank representation of a (semi-)alphabet object.
Definition: alphabet/concept.hpp:155
@ offset
Sequence (seqan3::field::seq) relative start position (0-based), unsigned value.
constexpr size_t size
The size of a type pack.
Definition: type_pack/traits.hpp:146
Provides seqan3::detail::matrix.
matrix_index< size_t > matrix_coordinate
A coordinate type to access an element inside of a two-dimensional matrix.
Definition: matrix_coordinate.hpp:178
T min(T... args)
constexpr auto const & get(configuration< configs_t... > const &config) noexcept
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: configuration.hpp:415
SeqAn specific customisations in the standard namespace.
The <ranges> header from C++20's standard library.