protozero
Minimalistic protocol buffer decoder and encoder in C++.
pbf_reader.hpp
Go to the documentation of this file.
1 #ifndef PROTOZERO_PBF_READER_HPP
2 #define PROTOZERO_PBF_READER_HPP
3 
4 /*****************************************************************************
5 
6 protozero - Minimalistic protocol buffer decoder and encoder in C++.
7 
8 This file is from https://github.com/mapbox/protozero where you can find more
9 documentation.
10 
11 *****************************************************************************/
12 
19 #include <cstddef>
20 #include <cstdint>
21 #include <string>
22 #include <utility>
23 
24 #include <protozero/config.hpp>
25 #include <protozero/exception.hpp>
26 #include <protozero/iterators.hpp>
27 #include <protozero/types.hpp>
28 #include <protozero/varint.hpp>
29 
30 #if PROTOZERO_BYTE_ORDER != PROTOZERO_LITTLE_ENDIAN
31 # include <protozero/byteswap.hpp>
32 #endif
33 
34 namespace protozero {
35 
60 class pbf_reader {
61 
62  // A pointer to the next unread data.
63  const char* m_data = nullptr;
64 
65  // A pointer to one past the end of data.
66  const char* m_end = nullptr;
67 
68  // The wire type of the current field.
69  pbf_wire_type m_wire_type = pbf_wire_type::unknown;
70 
71  // The tag of the current field.
72  pbf_tag_type m_tag = 0;
73 
74  template <typename T>
75  T get_fixed() {
76  T result;
77  skip_bytes(sizeof(T));
78  std::memcpy(&result, m_data - sizeof(T), sizeof(T));
79 #if PROTOZERO_BYTE_ORDER != PROTOZERO_LITTLE_ENDIAN
80  detail::byteswap_inplace(&result);
81 #endif
82  return result;
83  }
84 
85  template <typename T>
87  protozero_assert(tag() != 0 && "call next() before accessing field value");
88  const auto len = get_len_and_skip();
89  protozero_assert(len % sizeof(T) == 0);
91  const_fixed_iterator<T>(m_data, m_data)};
92  }
93 
94  template <typename T>
95  T get_varint() {
96  return static_cast<T>(decode_varint(&m_data, m_end));
97  }
98 
99  template <typename T>
100  T get_svarint() {
101  protozero_assert((has_wire_type(pbf_wire_type::varint) || has_wire_type(pbf_wire_type::length_delimited)) && "not a varint");
102  return static_cast<T>(decode_zigzag64(decode_varint(&m_data, m_end)));
103  }
104 
105  pbf_length_type get_length() {
106  return get_varint<pbf_length_type>();
107  }
108 
109  void skip_bytes(pbf_length_type len) {
110  if (m_data + len > m_end) {
111  throw end_of_buffer_exception();
112  }
113  m_data += len;
114 
115  // In debug builds reset the tag to zero so that we can detect (some)
116  // wrong code.
117 #ifndef NDEBUG
118  m_tag = 0;
119 #endif
120  }
121 
122  pbf_length_type get_len_and_skip() {
123  const auto len = get_length();
124  skip_bytes(len);
125  return len;
126  }
127 
128  template <typename T>
129  iterator_range<T> get_packed() {
130  protozero_assert(tag() != 0 && "call next() before accessing field value");
131  const auto len = get_len_and_skip();
132  return iterator_range<T>{T{m_data - len, m_data},
133  T{m_data, m_data}};
134  }
135 
136 public:
137 
148  explicit pbf_reader(const data_view& view) noexcept
149  : m_data(view.data()),
150  m_end(view.data() + view.size()),
151  m_wire_type(pbf_wire_type::unknown),
152  m_tag(0) {
153  }
154 
165  pbf_reader(const char* data, std::size_t size) noexcept
166  : m_data(data),
167  m_end(data + size),
168  m_wire_type(pbf_wire_type::unknown),
169  m_tag(0) {
170  }
171 
182  explicit pbf_reader(const std::pair<const char*, std::size_t>& data) noexcept
183  : m_data(data.first),
184  m_end(data.first + data.second),
185  m_wire_type(pbf_wire_type::unknown),
186  m_tag(0) {
187  }
188 
199  explicit pbf_reader(const std::string& data) noexcept
200  : m_data(data.data()),
201  m_end(data.data() + data.size()),
202  m_wire_type(pbf_wire_type::unknown),
203  m_tag(0) {
204  }
205 
210  pbf_reader() noexcept = default;
211 
213  pbf_reader(const pbf_reader&) noexcept = default;
214 
216  pbf_reader(pbf_reader&&) noexcept = default;
217 
219  pbf_reader& operator=(const pbf_reader& other) noexcept = default;
220 
222  pbf_reader& operator=(pbf_reader&& other) noexcept = default;
223 
224  ~pbf_reader() = default;
225 
231  void swap(pbf_reader& other) noexcept {
232  using std::swap;
233  swap(m_data, other.m_data);
234  swap(m_end, other.m_end);
235  swap(m_wire_type, other.m_wire_type);
236  swap(m_tag, other.m_tag);
237  }
238 
244  operator bool() const noexcept {
245  return m_data < m_end;
246  }
247 
257  std::size_t length() const noexcept {
258  return std::size_t(m_end - m_data);
259  }
260 
276  bool next() {
277  if (m_data == m_end) {
278  return false;
279  }
280 
281  const auto value = get_varint<uint32_t>();
282  m_tag = pbf_tag_type(value >> 3);
283 
284  // tags 0 and 19000 to 19999 are not allowed as per
285  // https://developers.google.com/protocol-buffers/docs/proto
286  protozero_assert(((m_tag > 0 && m_tag < 19000) ||
287  (m_tag > 19999 && m_tag <= ((1 << 29) - 1))) && "tag out of range");
288 
289  m_wire_type = pbf_wire_type(value & 0x07);
290  switch (m_wire_type) {
291  case pbf_wire_type::varint:
292  case pbf_wire_type::fixed64:
293  case pbf_wire_type::length_delimited:
294  case pbf_wire_type::fixed32:
295  break;
296  default:
298  }
299 
300  return true;
301  }
302 
331  bool next(pbf_tag_type next_tag) {
332  while (next()) {
333  if (m_tag == next_tag) {
334  return true;
335  } else {
336  skip();
337  }
338  }
339  return false;
340  }
341 
370  bool next(pbf_tag_type next_tag, pbf_wire_type type) {
371  while (next()) {
372  if (m_tag == next_tag && m_wire_type == type) {
373  return true;
374  } else {
375  skip();
376  }
377  }
378  return false;
379  }
380 
390  pbf_tag_type tag() const noexcept {
391  return m_tag;
392  }
393 
409  pbf_wire_type wire_type() const noexcept {
410  return m_wire_type;
411  }
412 
435  uint32_t tag_and_type() const noexcept {
437  }
438 
445  bool has_wire_type(pbf_wire_type type) const noexcept {
446  return wire_type() == type;
447  }
448 
455  void skip() {
456  protozero_assert(tag() != 0 && "call next() before calling skip()");
457  switch (wire_type()) {
458  case pbf_wire_type::varint:
459  skip_varint(&m_data, m_end);
460  break;
461  case pbf_wire_type::fixed64:
462  skip_bytes(8);
463  break;
464  case pbf_wire_type::length_delimited:
465  skip_bytes(get_length());
466  break;
467  case pbf_wire_type::fixed32:
468  skip_bytes(4);
469  break;
470  default:
471  protozero_assert(false && "can not be here because next() should have thrown already");
472  }
473  }
474 
476 
487  bool get_bool() {
488  protozero_assert(tag() != 0 && "call next() before accessing field value");
489  protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint");
490  protozero_assert((*m_data & 0x80) == 0 && "not a 1 byte varint");
491  skip_bytes(1);
492  return m_data[-1] != 0; // -1 okay because we incremented m_data the line before
493  }
494 
502  int32_t get_enum() {
503  protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint");
504  return get_varint<int32_t>();
505  }
506 
514  int32_t get_int32() {
515  protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint");
516  return get_varint<int32_t>();
517  }
518 
526  int32_t get_sint32() {
527  protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint");
528  return get_svarint<int32_t>();
529  }
530 
538  uint32_t get_uint32() {
539  protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint");
540  return get_varint<uint32_t>();
541  }
542 
550  int64_t get_int64() {
551  protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint");
552  return get_varint<int64_t>();
553  }
554 
562  int64_t get_sint64() {
563  protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint");
564  return get_svarint<int64_t>();
565  }
566 
574  uint64_t get_uint64() {
575  protozero_assert(has_wire_type(pbf_wire_type::varint) && "not a varint");
576  return get_varint<uint64_t>();
577  }
578 
586  uint32_t get_fixed32() {
587  protozero_assert(tag() != 0 && "call next() before accessing field value");
588  protozero_assert(has_wire_type(pbf_wire_type::fixed32) && "not a 32-bit fixed");
589  return get_fixed<uint32_t>();
590  }
591 
599  int32_t get_sfixed32() {
600  protozero_assert(tag() != 0 && "call next() before accessing field value");
601  protozero_assert(has_wire_type(pbf_wire_type::fixed32) && "not a 32-bit fixed");
602  return get_fixed<int32_t>();
603  }
604 
612  uint64_t get_fixed64() {
613  protozero_assert(tag() != 0 && "call next() before accessing field value");
614  protozero_assert(has_wire_type(pbf_wire_type::fixed64) && "not a 64-bit fixed");
615  return get_fixed<uint64_t>();
616  }
617 
625  int64_t get_sfixed64() {
626  protozero_assert(tag() != 0 && "call next() before accessing field value");
627  protozero_assert(has_wire_type(pbf_wire_type::fixed64) && "not a 64-bit fixed");
628  return get_fixed<int64_t>();
629  }
630 
638  float get_float() {
639  protozero_assert(tag() != 0 && "call next() before accessing field value");
640  protozero_assert(has_wire_type(pbf_wire_type::fixed32) && "not a 32-bit fixed");
641  return get_fixed<float>();
642  }
643 
651  double get_double() {
652  protozero_assert(tag() != 0 && "call next() before accessing field value");
653  protozero_assert(has_wire_type(pbf_wire_type::fixed64) && "not a 64-bit fixed");
654  return get_fixed<double>();
655  }
656 
667  protozero_assert(tag() != 0 && "call next() before accessing field value");
668  protozero_assert(has_wire_type(pbf_wire_type::length_delimited) && "not of type string, bytes or message");
669  const auto len = get_len_and_skip();
670  return data_view{m_data - len, len};
671  }
672 
673 #ifndef PROTOZERO_STRICT_API
674 
682  std::pair<const char*, pbf_length_type> get_data() {
683  protozero_assert(tag() != 0 && "call next() before accessing field value");
684  protozero_assert(has_wire_type(pbf_wire_type::length_delimited) && "not of type string, bytes or message");
685  const auto len = get_len_and_skip();
686  return std::make_pair(m_data - len, len);
687  }
688 #endif
689 
697  std::string get_bytes() {
698  return std::string(get_view());
699  }
700 
708  std::string get_string() {
709  return std::string(get_view());
710  }
711 
720  return pbf_reader(get_view());
721  }
722 
724 
727 
730 
733 
736 
739 
742 
745 
748 
750 
764  return get_packed<pbf_reader::const_bool_iterator>();
765  }
766 
777  return get_packed<pbf_reader::const_enum_iterator>();
778  }
779 
790  return get_packed<pbf_reader::const_int32_iterator>();
791  }
792 
803  return get_packed<pbf_reader::const_sint32_iterator>();
804  }
805 
816  return get_packed<pbf_reader::const_uint32_iterator>();
817  }
818 
829  return get_packed<pbf_reader::const_int64_iterator>();
830  }
831 
842  return get_packed<pbf_reader::const_sint64_iterator>();
843  }
844 
855  return get_packed<pbf_reader::const_uint64_iterator>();
856  }
857 
867  auto get_packed_fixed32() -> decltype(packed_fixed<uint32_t>()) {
868  return packed_fixed<uint32_t>();
869  }
870 
880  auto get_packed_sfixed32() -> decltype(packed_fixed<int32_t>()) {
881  return packed_fixed<int32_t>();
882  }
883 
893  auto get_packed_fixed64() -> decltype(packed_fixed<uint64_t>()) {
894  return packed_fixed<uint64_t>();
895  }
896 
906  auto get_packed_sfixed64() -> decltype(packed_fixed<int64_t>()) {
907  return packed_fixed<int64_t>();
908  }
909 
919  auto get_packed_float() -> decltype(packed_fixed<float>()) {
920  return packed_fixed<float>();
921  }
922 
932  auto get_packed_double() -> decltype(packed_fixed<double>()) {
933  return packed_fixed<double>();
934  }
935 
937 
938 }; // class pbf_reader
939 
946 inline void swap(pbf_reader& lhs, pbf_reader& rhs) noexcept {
947  lhs.swap(rhs);
948 }
949 
950 } // end namespace protozero
951 
952 #endif // PROTOZERO_PBF_READER_HPP
pbf_reader(const data_view &view) noexcept
Definition: pbf_reader.hpp:148
int64_t get_sfixed64()
Definition: pbf_reader.hpp:625
uint32_t get_uint32()
Definition: pbf_reader.hpp:538
uint64_t get_fixed64()
Definition: pbf_reader.hpp:612
int32_t get_sfixed32()
Definition: pbf_reader.hpp:599
Definition: exception.hpp:48
uint64_t get_uint64()
Definition: pbf_reader.hpp:574
auto get_packed_double() -> decltype(packed_fixed< double >())
Definition: pbf_reader.hpp:932
constexpr int64_t decode_zigzag64(uint64_t value) noexcept
Definition: varint.hpp:181
Definition: iterators.hpp:146
int32_t get_int32()
Definition: pbf_reader.hpp:514
pbf_reader(const std::string &data) noexcept
Definition: pbf_reader.hpp:199
iterator_range< pbf_reader::const_sint64_iterator > get_packed_sint64()
Definition: pbf_reader.hpp:841
auto get_packed_float() -> decltype(packed_fixed< float >())
Definition: pbf_reader.hpp:919
auto get_packed_fixed32() -> decltype(packed_fixed< uint32_t >())
Definition: pbf_reader.hpp:867
constexpr uint32_t tag_and_type(T tag, pbf_wire_type wire_type) noexcept
Definition: types.hpp:56
void swap(pbf_reader &lhs, pbf_reader &rhs) noexcept
Definition: pbf_reader.hpp:946
Contains macro checks for different configurations.
Contains the declaration of low-level types used in the pbf format.
bool has_wire_type(pbf_wire_type type) const noexcept
Definition: pbf_reader.hpp:445
std::size_t length() const noexcept
Definition: pbf_reader.hpp:257
void skip()
Definition: pbf_reader.hpp:455
iterator_range< pbf_reader::const_int32_iterator > get_packed_int32()
Definition: pbf_reader.hpp:789
pbf_reader get_message()
Definition: pbf_reader.hpp:719
pbf_reader() noexcept=default
iterator_range< pbf_reader::const_int64_iterator > get_packed_int64()
Definition: pbf_reader.hpp:828
void skip_varint(const char **data, const char *end)
Definition: varint.hpp:112
auto get_packed_sfixed64() -> decltype(packed_fixed< int64_t >())
Definition: pbf_reader.hpp:906
Contains the iterators for access to packed repeated fields.
auto get_packed_sfixed32() -> decltype(packed_fixed< int32_t >())
Definition: pbf_reader.hpp:880
pbf_wire_type
Definition: types.hpp:40
iterator_range< pbf_reader::const_uint64_iterator > get_packed_uint64()
Definition: pbf_reader.hpp:854
iterator_range< pbf_reader::const_sint32_iterator > get_packed_sint32()
Definition: pbf_reader.hpp:802
pbf_wire_type wire_type() const noexcept
Definition: pbf_reader.hpp:409
void swap(pbf_reader &other) noexcept
Definition: pbf_reader.hpp:231
int64_t get_sint64()
Definition: pbf_reader.hpp:562
bool next(pbf_tag_type next_tag)
Definition: pbf_reader.hpp:331
uint32_t tag_and_type() const noexcept
Definition: pbf_reader.hpp:435
std::pair< const char *, pbf_length_type > get_data()
Definition: pbf_reader.hpp:682
auto get_packed_fixed64() -> decltype(packed_fixed< uint64_t >())
Definition: pbf_reader.hpp:893
iterator_range< pbf_reader::const_bool_iterator > get_packed_bool()
Definition: pbf_reader.hpp:763
Contains functions to swap bytes in values (for different endianness).
int32_t get_sint32()
Definition: pbf_reader.hpp:526
std::string get_bytes()
Definition: pbf_reader.hpp:697
double get_double()
Definition: pbf_reader.hpp:651
pbf_reader & operator=(const pbf_reader &other) noexcept=default
pbf_reader messages can be copied trivially.
bool get_bool()
Definition: pbf_reader.hpp:487
std::string get_string()
Definition: pbf_reader.hpp:708
uint32_t pbf_length_type
Definition: types.hpp:63
Contains the exceptions used in the protozero library.
pbf_reader(const std::pair< const char *, std::size_t > &data) noexcept
Definition: pbf_reader.hpp:182
data_view get_view()
Definition: pbf_reader.hpp:666
pbf_reader(const char *data, std::size_t size) noexcept
Definition: pbf_reader.hpp:165
uint32_t pbf_tag_type
Definition: types.hpp:33
uint32_t get_fixed32()
Definition: pbf_reader.hpp:586
iterator_range< pbf_reader::const_enum_iterator > get_packed_enum()
Definition: pbf_reader.hpp:776
Definition: types.hpp:75
pbf_tag_type tag() const noexcept
Definition: pbf_reader.hpp:390
Definition: iterators.hpp:282
Definition: iterators.hpp:215
int32_t get_enum()
Definition: pbf_reader.hpp:502
Definition: pbf_reader.hpp:60
bool next(pbf_tag_type next_tag, pbf_wire_type type)
Definition: pbf_reader.hpp:370
Definition: iterators.hpp:38
float get_float()
Definition: pbf_reader.hpp:638
Definition: exception.hpp:61
Contains low-level varint and zigzag encoding and decoding functions.
uint64_t decode_varint(const char **data, const char *end)
Definition: varint.hpp:89
bool next()
Definition: pbf_reader.hpp:276
int64_t get_int64()
Definition: pbf_reader.hpp:550
iterator_range< pbf_reader::const_uint32_iterator > get_packed_uint32()
Definition: pbf_reader.hpp:815
All parts of the protozero header-only library are in this namespace.
Definition: byteswap.hpp:24