SeqAn3  3.2.0
The Modern C++ library for sequence analysis.
edit_distance_score_matrix_full.hpp
Go to the documentation of this file.
1 // -----------------------------------------------------------------------------------------------------
2 // Copyright (c) 2006-2022, Knut Reinert & Freie Universität Berlin
3 // Copyright (c) 2016-2022, Knut Reinert & MPI für molekulare Genetik
4 // This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5 // shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6 // -----------------------------------------------------------------------------------------------------
7 
13 #pragma once
14 
15 #include <bitset>
16 
19 
20 namespace seqan3::detail
21 {
22 
31 template <typename word_t, typename score_t, bool is_semi_global, bool use_max_errors>
32 class edit_distance_score_matrix_full
33 {
34 public:
36  template <std::ranges::viewable_range database_t,
37  std::ranges::viewable_range query_t,
38  typename align_config_t,
39  typename edit_traits>
40  friend class edit_distance_unbanded;
41 
45  edit_distance_score_matrix_full() = default;
46  edit_distance_score_matrix_full(edit_distance_score_matrix_full const &) = default;
47  edit_distance_score_matrix_full(edit_distance_score_matrix_full &&) = default;
48  edit_distance_score_matrix_full & operator=(edit_distance_score_matrix_full const &) = default;
49  edit_distance_score_matrix_full & operator=(edit_distance_score_matrix_full &&) = default;
50  ~edit_distance_score_matrix_full() = default;
51 
52 protected:
54  template <typename derived_t, typename edit_traits>
55  friend class edit_distance_unbanded_score_matrix_policy;
56 
60  edit_distance_score_matrix_full(size_t const rows_size) : rows_size{rows_size}, columns{}
61  {}
63 
64 public:
66  using word_type = word_t;
67 
69  static constexpr auto word_size = bits_of<word_type>;
70 
72  using score_type = score_t;
73 
76 
78  using reference = value_type;
79 
81  using size_type = size_t;
82 
84  static constexpr std::optional<score_type> inf = std::nullopt;
85 
94  void reserve(size_t const new_capacity)
95  {
96  columns.reserve(new_capacity);
97  }
98 
107  template <typename score_type>
108  static size_t max_rows(word_type const score_mask,
109  unsigned const last_block,
110  score_type const score,
111  score_type const max_errors) noexcept
112  {
113  size_t const offset = std::bit_width(score_mask);
114  size_t const active_row = word_size * last_block + offset;
115  return active_row + (score <= max_errors);
116  }
117 
123  static score_type score_delta_of_word(word_type const & vp, word_type const & vn) noexcept
124  {
125  score_type const p = std::bitset<word_size>{vp}.count();
126  score_type const n = std::bitset<word_size>{vn}.count();
127  return p - n;
128  }
129 
130 public:
132  reference at(matrix_coordinate const & coordinate) const noexcept
133  {
134  size_t col = coordinate.col;
135  size_t row = coordinate.row;
136 
137  assert(row < rows());
138  assert(col < cols());
139 
140  column_type const & column = columns[col];
141  if constexpr (use_max_errors)
142  if (!(row < column.max_rows))
143  return inf;
144 
145  score_type score = is_semi_global ? 0u : static_cast<score_type>(col);
146 
147  size_t current_row = 1u;
148  size_t word_idx = 0u;
149 
150  for (; current_row + word_size <= row; ++word_idx, current_row += word_size)
151  score += score_delta_of_word(column.vp[word_idx], column.vn[word_idx]);
152 
153  if (row >= current_row)
154  {
155  word_type const mask = (1u << (row - current_row + 1u)) - 1u;
156  score += score_delta_of_word(column.vp[word_idx] & mask, column.vn[word_idx] & mask);
157  }
158 
159  return -score;
160  }
161 
163  size_t rows() const noexcept
164  {
165  return rows_size;
166  }
167 
169  size_t cols() const noexcept
170  {
171  return columns.size();
172  }
173 
174 protected:
176  struct max_errors_state
177  {
180  size_t max_rows;
181  };
182 
184  struct score_matrix_state
185  {
190  };
191 
193  struct column_type : enable_state_t<true, score_matrix_state>, enable_state_t<use_max_errors, max_errors_state>
194  {};
195 
200  void add_column(std::vector<word_type> vp, std::vector<word_type> vn)
201  requires (!use_max_errors)
202  {
203  column_type column{};
204  column.vp = std::move(vp);
205  column.vn = std::move(vn);
206 
207  columns.push_back(std::move(column));
208  }
209 
215  void add_column(std::vector<word_type> vp, std::vector<word_type> vn, size_t const max_rows)
216  requires use_max_errors
217  {
218  column_type column{};
219  column.vp = std::move(vp);
220  column.vn = std::move(vn);
221  column.max_rows = max_rows;
222 
223  columns.push_back(std::move(column));
224  }
225 
226 private:
228  size_t rows_size{};
230  std::vector<column_type> columns{};
231 };
232 
233 } // namespace seqan3::detail
Provides utility functions for bit twiddling.
T count(T... args)
Forwards for seqan3::edit_distance_unbanded related types.
requires requires
The rank_type of the semi-alphabet; defined as the return type of seqan3::to_rank....
Definition: alphabet/concept.hpp:164
@ offset
Sequence (seqan3::field::seq) relative start position (0-based), unsigned value.
matrix_index< size_t > matrix_coordinate
A coordinate type to access an element inside of a two-dimensional matrix.
Definition: matrix_coordinate.hpp:178