protozero  1.6.8
Minimalistic protocol buffer decoder and encoder in C++.
pbf_reader.hpp
Go to the documentation of this file.
1 #ifndef PROTOZERO_PBF_READER_HPP
2 #define PROTOZERO_PBF_READER_HPP
3 
4 /*****************************************************************************
5 
6 protozero - Minimalistic protocol buffer decoder and encoder in C++.
7 
8 This file is from https://github.com/mapbox/protozero where you can find more
9 documentation.
10 
11 *****************************************************************************/
12 
19 #include <protozero/config.hpp>
20 #include <protozero/data_view.hpp>
21 #include <protozero/exception.hpp>
22 #include <protozero/iterators.hpp>
23 #include <protozero/types.hpp>
24 #include <protozero/varint.hpp>
25 
26 #if PROTOZERO_BYTE_ORDER != PROTOZERO_LITTLE_ENDIAN
27 # include <protozero/byteswap.hpp>
28 #endif
29 
30 #include <cstddef>
31 #include <cstdint>
32 #include <cstring>
33 #include <string>
34 #include <utility>
35 
36 namespace protozero {
37 
62 class pbf_reader {
63 
64  // A pointer to the next unread data.
65  const char* m_data = nullptr;
66 
67  // A pointer to one past the end of data.
68  const char* m_end = nullptr;
69 
70  // The wire type of the current field.
71  pbf_wire_type m_wire_type = pbf_wire_type::unknown;
72 
73  // The tag of the current field.
74  pbf_tag_type m_tag = 0;
75 
76  template <typename T>
77  T get_fixed() {
78  T result;
79  const char* data = m_data;
80  skip_bytes(sizeof(T));
81  std::memcpy(&result, data, sizeof(T));
82 #if PROTOZERO_BYTE_ORDER != PROTOZERO_LITTLE_ENDIAN
83  byteswap_inplace(&result);
84 #endif
85  return result;
86  }
87 
88  template <typename T>
90  protozero_assert(tag() != 0 && "call next() before accessing field value");
91  const auto len = get_len_and_skip();
92  if (len % sizeof(T) != 0) {
94  }
95  return {const_fixed_iterator<T>(m_data - len),
96  const_fixed_iterator<T>(m_data)};
97  }
98 
99  template <typename T>
100  T get_varint() {
101  const auto val = static_cast<T>(decode_varint(&m_data, m_end));
102  return val;
103  }
104 
105  template <typename T>
106  T get_svarint() {
107  protozero_assert((has_wire_type(pbf_wire_type::varint) || has_wire_type(pbf_wire_type::length_delimited)) && "not a varint");
108  return static_cast<T>(decode_zigzag64(decode_varint(&m_data, m_end)));
109  }
110 
111  pbf_length_type get_length() {
112  return get_varint<pbf_length_type>();
113  }
114 
115  void skip_bytes(pbf_length_type len) {
116  if (m_end - m_data < static_cast<ptrdiff_t>(len)) {
117  throw end_of_buffer_exception{};
118  }
119  m_data += len;
120 
121 #ifndef NDEBUG
122  // In debug builds reset the tag to zero so that we can detect (some)
123  // wrong code.
124  m_tag = 0;
125 #endif
126  }
127 
128  pbf_length_type get_len_and_skip() {
129  const auto len = get_length();
130  skip_bytes(len);
131  return len;
132  }
133 
134  template <typename T>
135  iterator_range<T> get_packed() {
136  protozero_assert(tag() != 0 && "call next() before accessing field value");
137  const auto len = get_len_and_skip();
138  return {T{m_data - len, m_data},
139  T{m_data, m_data}};
140  }
141 
142 public:
143 
154  explicit pbf_reader(const data_view& view) noexcept
155  : m_data{view.data()},
156  m_end{view.data() + view.size()} {
157  }
158 
169  pbf_reader(const char* data, std::size_t size) noexcept
170  : m_data{data},
171  m_end{data + size} {
172  }
173 
174 #ifndef PROTOZERO_STRICT_API
175 
186  explicit pbf_reader(const std::pair<const char*, std::size_t>& data) noexcept
187  : m_data{data.first},
188  m_end{data.first + data.second} {
189  }
190 #endif
191 
202  explicit pbf_reader(const std::string& data) noexcept
203  : m_data{data.data()},
204  m_end{data.data() + data.size()} {
205  }
206 
211  pbf_reader() noexcept = default;
212 
214  pbf_reader(const pbf_reader&) noexcept = default;
215 
217  pbf_reader(pbf_reader&&) noexcept = default;
218 
220  pbf_reader& operator=(const pbf_reader& other) noexcept = default;
221 
223  pbf_reader& operator=(pbf_reader&& other) noexcept = default;
224 
225  ~pbf_reader() = default;
226 
232  void swap(pbf_reader& other) noexcept {
233  using std::swap;
234  swap(m_data, other.m_data);
235  swap(m_end, other.m_end);
236  swap(m_wire_type, other.m_wire_type);
237  swap(m_tag, other.m_tag);
238  }
239 
245  operator bool() const noexcept { // NOLINT(google-explicit-constructor, hicpp-explicit-conversions)
246  return m_data != m_end;
247  }
248 
252  data_view data() const noexcept {
253  return {m_data, static_cast<std::size_t>(m_end - m_data)};
254  }
255 
265  std::size_t length() const noexcept {
266  return std::size_t(m_end - m_data);
267  }
268 
284  bool next() {
285  if (m_data == m_end) {
286  return false;
287  }
288 
289  const auto value = get_varint<uint32_t>();
290  m_tag = pbf_tag_type(value >> 3U);
291 
292  // tags 0 and 19000 to 19999 are not allowed as per
293  // https://developers.google.com/protocol-buffers/docs/proto#assigning-tags
294  if (m_tag == 0 || (m_tag >= 19000 && m_tag <= 19999)) {
295  throw invalid_tag_exception{};
296  }
297 
298  m_wire_type = pbf_wire_type(value & 0x07U);
299  switch (m_wire_type) {
300  case pbf_wire_type::varint:
301  case pbf_wire_type::fixed64:
302  case pbf_wire_type::length_delimited:
303  case pbf_wire_type::fixed32:
304  break;
305  default:
307  }
308 
309  return true;
310  }
311 
340  bool next(pbf_tag_type next_tag) {
341  while (next()) {
342  if (m_tag == next_tag) {
343  return true;
344  }
345  skip();
346  }
347  return false;
348  }
349 
378  bool next(pbf_tag_type next_tag, pbf_wire_type type) {
379  while (next()) {
380  if (m_tag == next_tag && m_wire_type == type) {
381  return true;
382  }
383  skip();
384  }
385  return false;
386  }
387 
397  pbf_tag_type tag() const noexcept {
398  return m_tag;
399  }
400 
416  pbf_wire_type wire_type() const noexcept {
417  return m_wire_type;
418  }
419 
442  uint32_t tag_and_type() const noexcept {
444  }
445 
452  bool has_wire_type(pbf_wire_type type) const noexcept {
453  return wire_type() == type;
454  }
455 
462  void skip() {
463  protozero_assert(tag() != 0 && "call next() before calling skip()");
464  switch (wire_type()) {
465  case pbf_wire_type::varint:
466  skip_varint(&m_data, m_end);
467  break;
468  case pbf_wire_type::fixed64:
469  skip_bytes(8);
470  break;
471  case pbf_wire_type::length_delimited:
472  skip_bytes(get_length());
473  break;
474  case pbf_wire_type::fixed32:
475  skip_bytes(4);
476  break;
477  default:
478  break;
479  }
480  }
481 
483 
494  bool get_bool() {
495  protozero_assert(tag() != 0 && "call next() before accessing field value");
496  protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint");
497  const auto data = m_data;
498  skip_varint(&m_data, m_end);
499  return data[0] != 0;
500  }
501 
509  int32_t get_enum() {
510  protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint");
511  return get_varint<int32_t>();
512  }
513 
521  int32_t get_int32() {
522  protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint");
523  return get_varint<int32_t>();
524  }
525 
533  int32_t get_sint32() {
534  protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint");
535  return get_svarint<int32_t>();
536  }
537 
545  uint32_t get_uint32() {
546  protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint");
547  return get_varint<uint32_t>();
548  }
549 
557  int64_t get_int64() {
558  protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint");
559  return get_varint<int64_t>();
560  }
561 
569  int64_t get_sint64() {
570  protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint");
571  return get_svarint<int64_t>();
572  }
573 
581  uint64_t get_uint64() {
582  protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint");
583  return get_varint<uint64_t>();
584  }
585 
593  uint32_t get_fixed32() {
594  protozero_assert(tag() != 0 && "call next() before accessing field value");
595  protozero_assert(has_wire_type(pbf_wire_type::fixed32) && "not a 32-bit fixed");
596  return get_fixed<uint32_t>();
597  }
598 
606  int32_t get_sfixed32() {
607  protozero_assert(tag() != 0 && "call next() before accessing field value");
608  protozero_assert(has_wire_type(pbf_wire_type::fixed32) && "not a 32-bit fixed");
609  return get_fixed<int32_t>();
610  }
611 
619  uint64_t get_fixed64() {
620  protozero_assert(tag() != 0 && "call next() before accessing field value");
621  protozero_assert(has_wire_type(pbf_wire_type::fixed64) && "not a 64-bit fixed");
622  return get_fixed<uint64_t>();
623  }
624 
632  int64_t get_sfixed64() {
633  protozero_assert(tag() != 0 && "call next() before accessing field value");
634  protozero_assert(has_wire_type(pbf_wire_type::fixed64) && "not a 64-bit fixed");
635  return get_fixed<int64_t>();
636  }
637 
645  float get_float() {
646  protozero_assert(tag() != 0 && "call next() before accessing field value");
647  protozero_assert(has_wire_type(pbf_wire_type::fixed32) && "not a 32-bit fixed");
648  return get_fixed<float>();
649  }
650 
658  double get_double() {
659  protozero_assert(tag() != 0 && "call next() before accessing field value");
660  protozero_assert(has_wire_type(pbf_wire_type::fixed64) && "not a 64-bit fixed");
661  return get_fixed<double>();
662  }
663 
674  protozero_assert(tag() != 0 && "call next() before accessing field value");
675  protozero_assert(has_wire_type(pbf_wire_type::length_delimited) && "not of type string, bytes or message");
676  const auto len = get_len_and_skip();
677  return {m_data - len, len};
678  }
679 
680 #ifndef PROTOZERO_STRICT_API
681 
689  std::pair<const char*, pbf_length_type> get_data() {
690  protozero_assert(tag() != 0 && "call next() before accessing field value");
691  protozero_assert(has_wire_type(pbf_wire_type::length_delimited) && "not of type string, bytes or message");
692  const auto len = get_len_and_skip();
693  return {m_data - len, len};
694  }
695 #endif
696 
704  std::string get_bytes() {
705  return std::string(get_view());
706  }
707 
715  std::string get_string() {
716  return std::string(get_view());
717  }
718 
727  return pbf_reader{get_view()};
728  }
729 
731 
734 
737 
740 
743 
746 
749 
752 
755 
758 
761 
764 
767 
770 
773 
775 
789  return get_packed<pbf_reader::const_bool_iterator>();
790  }
791 
802  return get_packed<pbf_reader::const_enum_iterator>();
803  }
804 
815  return get_packed<pbf_reader::const_int32_iterator>();
816  }
817 
828  return get_packed<pbf_reader::const_sint32_iterator>();
829  }
830 
841  return get_packed<pbf_reader::const_uint32_iterator>();
842  }
843 
854  return get_packed<pbf_reader::const_int64_iterator>();
855  }
856 
867  return get_packed<pbf_reader::const_sint64_iterator>();
868  }
869 
880  return get_packed<pbf_reader::const_uint64_iterator>();
881  }
882 
893  return packed_fixed<uint32_t>();
894  }
895 
906  return packed_fixed<int32_t>();
907  }
908 
919  return packed_fixed<uint64_t>();
920  }
921 
932  return packed_fixed<int64_t>();
933  }
934 
945  return packed_fixed<float>();
946  }
947 
958  return packed_fixed<double>();
959  }
960 
962 
963 }; // class pbf_reader
964 
971 inline void swap(pbf_reader& lhs, pbf_reader& rhs) noexcept {
972  lhs.swap(rhs);
973 }
974 
975 } // end namespace protozero
976 
977 #endif // PROTOZERO_PBF_READER_HPP
pbf_reader(const data_view &view) noexcept
Definition: pbf_reader.hpp:154
int64_t get_sfixed64()
Definition: pbf_reader.hpp:632
uint32_t get_uint32()
Definition: pbf_reader.hpp:545
uint64_t get_fixed64()
Definition: pbf_reader.hpp:619
int32_t get_sfixed32()
Definition: pbf_reader.hpp:606
Definition: exception.hpp:52
uint64_t get_uint64()
Definition: pbf_reader.hpp:581
constexpr int64_t decode_zigzag64(uint64_t value) noexcept
Definition: varint.hpp:199
Definition: iterators.hpp:160
int32_t get_int32()
Definition: pbf_reader.hpp:521
Definition: exception.hpp:92
pbf_reader(const std::string &data) noexcept
Definition: pbf_reader.hpp:202
iterator_range< pbf_reader::const_sint64_iterator > get_packed_sint64()
Definition: pbf_reader.hpp:866
constexpr uint32_t tag_and_type(T tag, pbf_wire_type wire_type) noexcept
Definition: types.hpp:55
void swap(pbf_reader &lhs, pbf_reader &rhs) noexcept
Definition: pbf_reader.hpp:971
Contains macro checks for different configurations.
iterator_range< pbf_reader::const_float_iterator > get_packed_float()
Definition: pbf_reader.hpp:944
Contains the declaration of low-level types used in the pbf format.
bool has_wire_type(pbf_wire_type type) const noexcept
Definition: pbf_reader.hpp:452
std::size_t length() const noexcept
Definition: pbf_reader.hpp:265
void skip()
Definition: pbf_reader.hpp:462
iterator_range< pbf_reader::const_int32_iterator > get_packed_int32()
Definition: pbf_reader.hpp:814
pbf_reader get_message()
Definition: pbf_reader.hpp:726
pbf_reader() noexcept=default
iterator_range< pbf_reader::const_int64_iterator > get_packed_int64()
Definition: pbf_reader.hpp:853
void skip_varint(const char **data, const char *end)
Definition: varint.hpp:112
Contains the iterators for access to packed repeated fields.
constexpr std::size_t size() const noexcept
Return length of data in bytes.
Definition: data_view.hpp:99
pbf_wire_type
Definition: types.hpp:40
iterator_range< pbf_reader::const_uint64_iterator > get_packed_uint64()
Definition: pbf_reader.hpp:879
iterator_range< pbf_reader::const_sint32_iterator > get_packed_sint32()
Definition: pbf_reader.hpp:827
iterator_range< pbf_reader::const_sfixed32_iterator > get_packed_sfixed32()
Definition: pbf_reader.hpp:905
pbf_wire_type wire_type() const noexcept
Definition: pbf_reader.hpp:416
Contains the implementation of the data_view class.
Definition: exception.hpp:80
void swap(pbf_reader &other) noexcept
Definition: pbf_reader.hpp:232
data_view data() const noexcept
Definition: pbf_reader.hpp:252
int64_t get_sint64()
Definition: pbf_reader.hpp:569
bool next(pbf_tag_type next_tag)
Definition: pbf_reader.hpp:340
uint32_t tag_and_type() const noexcept
Definition: pbf_reader.hpp:442
std::pair< const char *, pbf_length_type > get_data()
Definition: pbf_reader.hpp:689
iterator_range< pbf_reader::const_bool_iterator > get_packed_bool()
Definition: pbf_reader.hpp:788
Contains functions to swap bytes in values (for different endianness).
int32_t get_sint32()
Definition: pbf_reader.hpp:533
std::string get_bytes()
Definition: pbf_reader.hpp:704
iterator_range< pbf_reader::const_double_iterator > get_packed_double()
Definition: pbf_reader.hpp:957
double get_double()
Definition: pbf_reader.hpp:658
pbf_reader & operator=(const pbf_reader &other) noexcept=default
pbf_reader messages can be copied trivially.
bool get_bool()
Definition: pbf_reader.hpp:494
std::string get_string()
Definition: pbf_reader.hpp:715
uint32_t pbf_length_type
Definition: types.hpp:62
Contains the exceptions used in the protozero library.
pbf_reader(const std::pair< const char *, std::size_t > &data) noexcept
Definition: pbf_reader.hpp:186
data_view get_view()
Definition: pbf_reader.hpp:673
iterator_range< pbf_reader::const_fixed32_iterator > get_packed_fixed32()
Definition: pbf_reader.hpp:892
pbf_reader(const char *data, std::size_t size) noexcept
Definition: pbf_reader.hpp:169
uint32_t pbf_tag_type
Definition: types.hpp:33
uint32_t get_fixed32()
Definition: pbf_reader.hpp:593
iterator_range< pbf_reader::const_enum_iterator > get_packed_enum()
Definition: pbf_reader.hpp:801
Definition: data_view.hpp:39
pbf_tag_type tag() const noexcept
Definition: pbf_reader.hpp:397
Definition: iterators.hpp:374
iterator_range< pbf_reader::const_sfixed64_iterator > get_packed_sfixed64()
Definition: pbf_reader.hpp:931
void byteswap_inplace(uint32_t *ptr) noexcept
byteswap the data pointed to by ptr in-place.
Definition: byteswap.hpp:55
Definition: iterators.hpp:289
int32_t get_enum()
Definition: pbf_reader.hpp:509
Definition: pbf_reader.hpp:62
bool next(pbf_tag_type next_tag, pbf_wire_type type)
Definition: pbf_reader.hpp:378
constexpr const char * data() const noexcept
Return pointer to data.
Definition: data_view.hpp:94
Definition: iterators.hpp:39
float get_float()
Definition: pbf_reader.hpp:645
Definition: exception.hpp:67
Contains low-level varint and zigzag encoding and decoding functions.
uint64_t decode_varint(const char **data, const char *end)
Definition: varint.hpp:89
bool next()
Definition: pbf_reader.hpp:284
iterator_range< pbf_reader::const_fixed64_iterator > get_packed_fixed64()
Definition: pbf_reader.hpp:918
int64_t get_int64()
Definition: pbf_reader.hpp:557
iterator_range< pbf_reader::const_uint32_iterator > get_packed_uint32()
Definition: pbf_reader.hpp:840
All parts of the protozero header-only library are in this namespace.
Definition: byteswap.hpp:23