protozero  1.6.1
Minimalistic protocol buffer decoder and encoder in C++.
pbf_reader.hpp
Go to the documentation of this file.
1 #ifndef PROTOZERO_PBF_READER_HPP
2 #define PROTOZERO_PBF_READER_HPP
3 
4 /*****************************************************************************
5 
6 protozero - Minimalistic protocol buffer decoder and encoder in C++.
7 
8 This file is from https://github.com/mapbox/protozero where you can find more
9 documentation.
10 
11 *****************************************************************************/
12 
19 #include <cstddef>
20 #include <cstdint>
21 #include <cstring>
22 #include <string>
23 #include <utility>
24 
25 #include <protozero/config.hpp>
26 #include <protozero/data_view.hpp>
27 #include <protozero/exception.hpp>
28 #include <protozero/iterators.hpp>
29 #include <protozero/types.hpp>
30 #include <protozero/varint.hpp>
31 
32 #if PROTOZERO_BYTE_ORDER != PROTOZERO_LITTLE_ENDIAN
33 # include <protozero/byteswap.hpp>
34 #endif
35 
36 namespace protozero {
37 
62 class pbf_reader {
63 
64  // A pointer to the next unread data.
65  const char* m_data = nullptr;
66 
67  // A pointer to one past the end of data.
68  const char* m_end = nullptr;
69 
70  // The wire type of the current field.
71  pbf_wire_type m_wire_type = pbf_wire_type::unknown;
72 
73  // The tag of the current field.
74  pbf_tag_type m_tag = 0;
75 
76  template <typename T>
77  T get_fixed() {
78  T result;
79  const char* data = m_data;
80  skip_bytes(sizeof(T));
81  std::memcpy(&result, data, sizeof(T));
82 #if PROTOZERO_BYTE_ORDER != PROTOZERO_LITTLE_ENDIAN
83  detail::byteswap_inplace(&result);
84 #endif
85  return result;
86  }
87 
88  template <typename T>
90  protozero_assert(tag() != 0 && "call next() before accessing field value");
91  const auto len = get_len_and_skip();
92  if (len % sizeof(T) != 0) {
94  }
95  return {const_fixed_iterator<T>(m_data - len),
96  const_fixed_iterator<T>(m_data)};
97  }
98 
99  template <typename T>
100  T get_varint() {
101  return static_cast<T>(decode_varint(&m_data, m_end));
102  }
103 
104  template <typename T>
105  T get_svarint() {
106  protozero_assert((has_wire_type(pbf_wire_type::varint) || has_wire_type(pbf_wire_type::length_delimited)) && "not a varint");
107  return static_cast<T>(decode_zigzag64(decode_varint(&m_data, m_end)));
108  }
109 
110  pbf_length_type get_length() {
111  return get_varint<pbf_length_type>();
112  }
113 
114  void skip_bytes(pbf_length_type len) {
115  if (m_data + len > m_end) {
116  throw end_of_buffer_exception{};
117  }
118  m_data += len;
119 
120  // In debug builds reset the tag to zero so that we can detect (some)
121  // wrong code.
122 #ifndef NDEBUG
123  m_tag = 0;
124 #endif
125  }
126 
127  pbf_length_type get_len_and_skip() {
128  const auto len = get_length();
129  skip_bytes(len);
130  return len;
131  }
132 
133  template <typename T>
134  iterator_range<T> get_packed() {
135  protozero_assert(tag() != 0 && "call next() before accessing field value");
136  const auto len = get_len_and_skip();
137  return {T{m_data - len, m_data},
138  T{m_data, m_data}};
139  }
140 
141 public:
142 
153  explicit pbf_reader(const data_view& view) noexcept
154  : m_data(view.data()),
155  m_end(view.data() + view.size()) {
156  }
157 
168  pbf_reader(const char* data, std::size_t size) noexcept
169  : m_data(data),
170  m_end(data + size) {
171  }
172 
173 #ifndef PROTOZERO_STRICT_API
174 
185  explicit pbf_reader(const std::pair<const char*, std::size_t>& data) noexcept
186  : m_data(data.first),
187  m_end(data.first + data.second) {
188  }
189 #endif
190 
201  explicit pbf_reader(const std::string& data) noexcept
202  : m_data(data.data()),
203  m_end(data.data() + data.size()) {
204  }
205 
210  pbf_reader() noexcept = default;
211 
213  pbf_reader(const pbf_reader&) noexcept = default;
214 
216  pbf_reader(pbf_reader&&) noexcept = default;
217 
219  pbf_reader& operator=(const pbf_reader& other) noexcept = default;
220 
222  pbf_reader& operator=(pbf_reader&& other) noexcept = default;
223 
224  ~pbf_reader() = default;
225 
231  void swap(pbf_reader& other) noexcept {
232  using std::swap;
233  swap(m_data, other.m_data);
234  swap(m_end, other.m_end);
235  swap(m_wire_type, other.m_wire_type);
236  swap(m_tag, other.m_tag);
237  }
238 
244  operator bool() const noexcept { // NOLINT clang-tidy: google-explicit-constructor
245  return m_data < m_end;
246  }
247 
257  std::size_t length() const noexcept {
258  return std::size_t(m_end - m_data);
259  }
260 
276  bool next() {
277  if (m_data == m_end) {
278  return false;
279  }
280 
281  const auto value = get_varint<uint32_t>();
282  m_tag = pbf_tag_type(value >> 3);
283 
284  // tags 0 and 19000 to 19999 are not allowed as per
285  // https://developers.google.com/protocol-buffers/docs/proto#assigning-tags
286  if (m_tag == 0 || (m_tag >= 19000 && m_tag <= 19999)) {
287  throw invalid_tag_exception{};
288  }
289 
290  m_wire_type = pbf_wire_type(value & 0x07);
291  switch (m_wire_type) {
292  case pbf_wire_type::varint:
293  case pbf_wire_type::fixed64:
294  case pbf_wire_type::length_delimited:
295  case pbf_wire_type::fixed32:
296  break;
297  default:
299  }
300 
301  return true;
302  }
303 
332  bool next(pbf_tag_type next_tag) {
333  while (next()) {
334  if (m_tag == next_tag) {
335  return true;
336  }
337  skip();
338  }
339  return false;
340  }
341 
370  bool next(pbf_tag_type next_tag, pbf_wire_type type) {
371  while (next()) {
372  if (m_tag == next_tag && m_wire_type == type) {
373  return true;
374  }
375  skip();
376  }
377  return false;
378  }
379 
389  pbf_tag_type tag() const noexcept {
390  return m_tag;
391  }
392 
408  pbf_wire_type wire_type() const noexcept {
409  return m_wire_type;
410  }
411 
434  uint32_t tag_and_type() const noexcept {
436  }
437 
444  bool has_wire_type(pbf_wire_type type) const noexcept {
445  return wire_type() == type;
446  }
447 
454  void skip() {
455  protozero_assert(tag() != 0 && "call next() before calling skip()");
456  switch (wire_type()) {
457  case pbf_wire_type::varint:
458  skip_varint(&m_data, m_end);
459  break;
460  case pbf_wire_type::fixed64:
461  skip_bytes(8);
462  break;
463  case pbf_wire_type::length_delimited:
464  skip_bytes(get_length());
465  break;
466  case pbf_wire_type::fixed32:
467  skip_bytes(4);
468  break;
469  default:
470  break;
471  }
472  }
473 
475 
486  bool get_bool() {
487  protozero_assert(tag() != 0 && "call next() before accessing field value");
488  protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint");
489  const auto data = m_data;
490  skip_varint(&m_data, m_end);
491  return data[0] != 0;
492  }
493 
501  int32_t get_enum() {
502  protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint");
503  return get_varint<int32_t>();
504  }
505 
513  int32_t get_int32() {
514  protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint");
515  return get_varint<int32_t>();
516  }
517 
525  int32_t get_sint32() {
526  protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint");
527  return get_svarint<int32_t>();
528  }
529 
537  uint32_t get_uint32() {
538  protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint");
539  return get_varint<uint32_t>();
540  }
541 
549  int64_t get_int64() {
550  protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint");
551  return get_varint<int64_t>();
552  }
553 
561  int64_t get_sint64() {
562  protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint");
563  return get_svarint<int64_t>();
564  }
565 
573  uint64_t get_uint64() {
574  protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint");
575  return get_varint<uint64_t>();
576  }
577 
585  uint32_t get_fixed32() {
586  protozero_assert(tag() != 0 && "call next() before accessing field value");
587  protozero_assert(has_wire_type(pbf_wire_type::fixed32) && "not a 32-bit fixed");
588  return get_fixed<uint32_t>();
589  }
590 
598  int32_t get_sfixed32() {
599  protozero_assert(tag() != 0 && "call next() before accessing field value");
600  protozero_assert(has_wire_type(pbf_wire_type::fixed32) && "not a 32-bit fixed");
601  return get_fixed<int32_t>();
602  }
603 
611  uint64_t get_fixed64() {
612  protozero_assert(tag() != 0 && "call next() before accessing field value");
613  protozero_assert(has_wire_type(pbf_wire_type::fixed64) && "not a 64-bit fixed");
614  return get_fixed<uint64_t>();
615  }
616 
624  int64_t get_sfixed64() {
625  protozero_assert(tag() != 0 && "call next() before accessing field value");
626  protozero_assert(has_wire_type(pbf_wire_type::fixed64) && "not a 64-bit fixed");
627  return get_fixed<int64_t>();
628  }
629 
637  float get_float() {
638  protozero_assert(tag() != 0 && "call next() before accessing field value");
639  protozero_assert(has_wire_type(pbf_wire_type::fixed32) && "not a 32-bit fixed");
640  return get_fixed<float>();
641  }
642 
650  double get_double() {
651  protozero_assert(tag() != 0 && "call next() before accessing field value");
652  protozero_assert(has_wire_type(pbf_wire_type::fixed64) && "not a 64-bit fixed");
653  return get_fixed<double>();
654  }
655 
666  protozero_assert(tag() != 0 && "call next() before accessing field value");
667  protozero_assert(has_wire_type(pbf_wire_type::length_delimited) && "not of type string, bytes or message");
668  const auto len = get_len_and_skip();
669  return {m_data - len, len};
670  }
671 
672 #ifndef PROTOZERO_STRICT_API
673 
681  std::pair<const char*, pbf_length_type> get_data() {
682  protozero_assert(tag() != 0 && "call next() before accessing field value");
683  protozero_assert(has_wire_type(pbf_wire_type::length_delimited) && "not of type string, bytes or message");
684  const auto len = get_len_and_skip();
685  return {m_data - len, len};
686  }
687 #endif
688 
696  std::string get_bytes() {
697  return std::string(get_view());
698  }
699 
707  std::string get_string() {
708  return std::string(get_view());
709  }
710 
719  return pbf_reader{get_view()};
720  }
721 
723 
726 
729 
732 
735 
738 
741 
744 
747 
750 
753 
756 
759 
762 
765 
767 
781  return get_packed<pbf_reader::const_bool_iterator>();
782  }
783 
794  return get_packed<pbf_reader::const_enum_iterator>();
795  }
796 
807  return get_packed<pbf_reader::const_int32_iterator>();
808  }
809 
820  return get_packed<pbf_reader::const_sint32_iterator>();
821  }
822 
833  return get_packed<pbf_reader::const_uint32_iterator>();
834  }
835 
846  return get_packed<pbf_reader::const_int64_iterator>();
847  }
848 
859  return get_packed<pbf_reader::const_sint64_iterator>();
860  }
861 
872  return get_packed<pbf_reader::const_uint64_iterator>();
873  }
874 
885  return packed_fixed<uint32_t>();
886  }
887 
898  return packed_fixed<int32_t>();
899  }
900 
911  return packed_fixed<uint64_t>();
912  }
913 
924  return packed_fixed<int64_t>();
925  }
926 
937  return packed_fixed<float>();
938  }
939 
950  return packed_fixed<double>();
951  }
952 
954 
955 }; // class pbf_reader
956 
963 inline void swap(pbf_reader& lhs, pbf_reader& rhs) noexcept {
964  lhs.swap(rhs);
965 }
966 
967 } // end namespace protozero
968 
969 #endif // PROTOZERO_PBF_READER_HPP
pbf_reader(const data_view &view) noexcept
Definition: pbf_reader.hpp:153
int64_t get_sfixed64()
Definition: pbf_reader.hpp:624
uint32_t get_uint32()
Definition: pbf_reader.hpp:537
uint64_t get_fixed64()
Definition: pbf_reader.hpp:611
int32_t get_sfixed32()
Definition: pbf_reader.hpp:598
Definition: exception.hpp:48
uint64_t get_uint64()
Definition: pbf_reader.hpp:573
constexpr int64_t decode_zigzag64(uint64_t value) noexcept
Definition: varint.hpp:182
Definition: iterators.hpp:160
int32_t get_int32()
Definition: pbf_reader.hpp:513
Definition: exception.hpp:82
pbf_reader(const std::string &data) noexcept
Definition: pbf_reader.hpp:201
iterator_range< pbf_reader::const_sint64_iterator > get_packed_sint64()
Definition: pbf_reader.hpp:858
constexpr uint32_t tag_and_type(T tag, pbf_wire_type wire_type) noexcept
Definition: types.hpp:56
void swap(pbf_reader &lhs, pbf_reader &rhs) noexcept
Definition: pbf_reader.hpp:963
Contains macro checks for different configurations.
iterator_range< pbf_reader::const_float_iterator > get_packed_float()
Definition: pbf_reader.hpp:936
Contains the declaration of low-level types used in the pbf format.
bool has_wire_type(pbf_wire_type type) const noexcept
Definition: pbf_reader.hpp:444
std::size_t length() const noexcept
Definition: pbf_reader.hpp:257
void skip()
Definition: pbf_reader.hpp:454
iterator_range< pbf_reader::const_int32_iterator > get_packed_int32()
Definition: pbf_reader.hpp:806
pbf_reader get_message()
Definition: pbf_reader.hpp:718
pbf_reader() noexcept=default
iterator_range< pbf_reader::const_int64_iterator > get_packed_int64()
Definition: pbf_reader.hpp:845
void skip_varint(const char **data, const char *end)
Definition: varint.hpp:112
Contains the iterators for access to packed repeated fields.
pbf_wire_type
Definition: types.hpp:40
iterator_range< pbf_reader::const_uint64_iterator > get_packed_uint64()
Definition: pbf_reader.hpp:871
iterator_range< pbf_reader::const_sint32_iterator > get_packed_sint32()
Definition: pbf_reader.hpp:819
iterator_range< pbf_reader::const_sfixed32_iterator > get_packed_sfixed32()
Definition: pbf_reader.hpp:897
pbf_wire_type wire_type() const noexcept
Definition: pbf_reader.hpp:408
Contains the implementation of the data_view class.
Definition: exception.hpp:72
void swap(pbf_reader &other) noexcept
Definition: pbf_reader.hpp:231
int64_t get_sint64()
Definition: pbf_reader.hpp:561
bool next(pbf_tag_type next_tag)
Definition: pbf_reader.hpp:332
uint32_t tag_and_type() const noexcept
Definition: pbf_reader.hpp:434
std::pair< const char *, pbf_length_type > get_data()
Definition: pbf_reader.hpp:681
iterator_range< pbf_reader::const_bool_iterator > get_packed_bool()
Definition: pbf_reader.hpp:780
Contains functions to swap bytes in values (for different endianness).
int32_t get_sint32()
Definition: pbf_reader.hpp:525
std::string get_bytes()
Definition: pbf_reader.hpp:696
iterator_range< pbf_reader::const_double_iterator > get_packed_double()
Definition: pbf_reader.hpp:949
double get_double()
Definition: pbf_reader.hpp:650
pbf_reader & operator=(const pbf_reader &other) noexcept=default
pbf_reader messages can be copied trivially.
bool get_bool()
Definition: pbf_reader.hpp:486
std::string get_string()
Definition: pbf_reader.hpp:707
uint32_t pbf_length_type
Definition: types.hpp:63
Contains the exceptions used in the protozero library.
pbf_reader(const std::pair< const char *, std::size_t > &data) noexcept
Definition: pbf_reader.hpp:185
data_view get_view()
Definition: pbf_reader.hpp:665
iterator_range< pbf_reader::const_fixed32_iterator > get_packed_fixed32()
Definition: pbf_reader.hpp:884
pbf_reader(const char *data, std::size_t size) noexcept
Definition: pbf_reader.hpp:168
uint32_t pbf_tag_type
Definition: types.hpp:33
uint32_t get_fixed32()
Definition: pbf_reader.hpp:585
iterator_range< pbf_reader::const_enum_iterator > get_packed_enum()
Definition: pbf_reader.hpp:793
Definition: data_view.hpp:39
pbf_tag_type tag() const noexcept
Definition: pbf_reader.hpp:389
Definition: iterators.hpp:359
iterator_range< pbf_reader::const_sfixed64_iterator > get_packed_sfixed64()
Definition: pbf_reader.hpp:923
Definition: iterators.hpp:286
int32_t get_enum()
Definition: pbf_reader.hpp:501
Definition: pbf_reader.hpp:62
bool next(pbf_tag_type next_tag, pbf_wire_type type)
Definition: pbf_reader.hpp:370
Definition: iterators.hpp:39
float get_float()
Definition: pbf_reader.hpp:637
Definition: exception.hpp:61
Contains low-level varint and zigzag encoding and decoding functions.
uint64_t decode_varint(const char **data, const char *end)
Definition: varint.hpp:89
bool next()
Definition: pbf_reader.hpp:276
iterator_range< pbf_reader::const_fixed64_iterator > get_packed_fixed64()
Definition: pbf_reader.hpp:910
int64_t get_int64()
Definition: pbf_reader.hpp:549
iterator_range< pbf_reader::const_uint32_iterator > get_packed_uint32()
Definition: pbf_reader.hpp:832
All parts of the protozero header-only library are in this namespace.
Definition: byteswap.hpp:24