Horizon
parser.hpp
1 #pragma once
2 
3 #include <cassert> // assert
4 #include <cmath> // isfinite
5 #include <cstdint> // uint8_t
6 #include <functional> // function
7 #include <string> // string
8 #include <utility> // move
9 
10 #include <nlohmann/detail/exceptions.hpp>
11 #include <nlohmann/detail/macro_scope.hpp>
12 #include <nlohmann/detail/input/input_adapters.hpp>
13 #include <nlohmann/detail/input/lexer.hpp>
14 #include <nlohmann/detail/value_t.hpp>
15 
16 namespace nlohmann
17 {
18 namespace detail
19 {
21 // parser //
23 
29 template<typename BasicJsonType>
30 class parser
31 {
32  using number_integer_t = typename BasicJsonType::number_integer_t;
33  using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
34  using number_float_t = typename BasicJsonType::number_float_t;
36  using token_type = typename lexer_t::token_type;
37 
38  public:
39  enum class parse_event_t : uint8_t
40  {
44  object_end,
48  array_end,
50  key,
52  value
53  };
54 
55  using parser_callback_t =
56  std::function<bool(int depth, parse_event_t event, BasicJsonType& parsed)>;
57 
59  explicit parser(detail::input_adapter_t adapter,
60  const parser_callback_t cb = nullptr,
61  const bool allow_exceptions_ = true)
62  : callback(cb), m_lexer(adapter), allow_exceptions(allow_exceptions_)
63  {}
64 
75  void parse(const bool strict, BasicJsonType& result)
76  {
77  // read first token
78  get_token();
79 
80  parse_internal(true, result);
81  result.assert_invariant();
82 
83  // in strict mode, input must be completely read
84  if (strict)
85  {
86  get_token();
87  expect(token_type::end_of_input);
88  }
89 
90  // in case of an error, return discarded value
91  if (errored)
92  {
93  result = value_t::discarded;
94  return;
95  }
96 
97  // set top-level value to null if it was discarded by the callback
98  // function
99  if (result.is_discarded())
100  {
101  result = nullptr;
102  }
103  }
104 
111  bool accept(const bool strict = true)
112  {
113  // read first token
114  get_token();
115 
116  if (not accept_internal())
117  {
118  return false;
119  }
120 
121  // strict => last token must be EOF
122  return not strict or (get_token() == token_type::end_of_input);
123  }
124 
125  private:
132  void parse_internal(bool keep, BasicJsonType& result)
133  {
134  // never parse after a parse error was detected
135  assert(not errored);
136 
137  // start with a discarded value
138  if (not result.is_discarded())
139  {
140  result.m_value.destroy(result.m_type);
141  result.m_type = value_t::discarded;
142  }
143 
144  switch (last_token)
145  {
146  case token_type::begin_object:
147  {
148  if (keep)
149  {
150  if (callback)
151  {
152  keep = callback(depth++, parse_event_t::object_start, result);
153  }
154 
155  if (not callback or keep)
156  {
157  // explicitly set result to object to cope with {}
158  result.m_type = value_t::object;
159  result.m_value = value_t::object;
160  }
161  }
162 
163  // read next token
164  get_token();
165 
166  // closing } -> we are done
167  if (last_token == token_type::end_object)
168  {
169  if (keep and callback and not callback(--depth, parse_event_t::object_end, result))
170  {
171  result.m_value.destroy(result.m_type);
172  result.m_type = value_t::discarded;
173  }
174  break;
175  }
176 
177  // parse values
178  std::string key;
179  BasicJsonType value;
180  while (true)
181  {
182  // store key
183  if (not expect(token_type::value_string))
184  {
185  return;
186  }
187  key = m_lexer.move_string();
188 
189  bool keep_tag = false;
190  if (keep)
191  {
192  if (callback)
193  {
194  BasicJsonType k(key);
195  keep_tag = callback(depth, parse_event_t::key, k);
196  }
197  else
198  {
199  keep_tag = true;
200  }
201  }
202 
203  // parse separator (:)
204  get_token();
205  if (not expect(token_type::name_separator))
206  {
207  return;
208  }
209 
210  // parse and add value
211  get_token();
212  value.m_value.destroy(value.m_type);
213  value.m_type = value_t::discarded;
214  parse_internal(keep, value);
215 
216  if (JSON_UNLIKELY(errored))
217  {
218  return;
219  }
220 
221  if (keep and keep_tag and not value.is_discarded())
222  {
223  result.m_value.object->emplace(std::move(key), std::move(value));
224  }
225 
226  // comma -> next value
227  get_token();
228  if (last_token == token_type::value_separator)
229  {
230  get_token();
231  continue;
232  }
233 
234  // closing }
235  if (not expect(token_type::end_object))
236  {
237  return;
238  }
239  break;
240  }
241 
242  if (keep and callback and not callback(--depth, parse_event_t::object_end, result))
243  {
244  result.m_value.destroy(result.m_type);
245  result.m_type = value_t::discarded;
246  }
247  break;
248  }
249 
250  case token_type::begin_array:
251  {
252  if (keep)
253  {
254  if (callback)
255  {
256  keep = callback(depth++, parse_event_t::array_start, result);
257  }
258 
259  if (not callback or keep)
260  {
261  // explicitly set result to array to cope with []
262  result.m_type = value_t::array;
263  result.m_value = value_t::array;
264  }
265  }
266 
267  // read next token
268  get_token();
269 
270  // closing ] -> we are done
271  if (last_token == token_type::end_array)
272  {
273  if (callback and not callback(--depth, parse_event_t::array_end, result))
274  {
275  result.m_value.destroy(result.m_type);
276  result.m_type = value_t::discarded;
277  }
278  break;
279  }
280 
281  // parse values
282  BasicJsonType value;
283  while (true)
284  {
285  // parse value
286  value.m_value.destroy(value.m_type);
287  value.m_type = value_t::discarded;
288  parse_internal(keep, value);
289 
290  if (JSON_UNLIKELY(errored))
291  {
292  return;
293  }
294 
295  if (keep and not value.is_discarded())
296  {
297  result.m_value.array->push_back(std::move(value));
298  }
299 
300  // comma -> next value
301  get_token();
302  if (last_token == token_type::value_separator)
303  {
304  get_token();
305  continue;
306  }
307 
308  // closing ]
309  if (not expect(token_type::end_array))
310  {
311  return;
312  }
313  break;
314  }
315 
316  if (keep and callback and not callback(--depth, parse_event_t::array_end, result))
317  {
318  result.m_value.destroy(result.m_type);
319  result.m_type = value_t::discarded;
320  }
321  break;
322  }
323 
324  case token_type::literal_null:
325  {
326  result.m_type = value_t::null;
327  break;
328  }
329 
330  case token_type::value_string:
331  {
332  result.m_type = value_t::string;
333  result.m_value = m_lexer.move_string();
334  break;
335  }
336 
337  case token_type::literal_true:
338  {
339  result.m_type = value_t::boolean;
340  result.m_value = true;
341  break;
342  }
343 
344  case token_type::literal_false:
345  {
346  result.m_type = value_t::boolean;
347  result.m_value = false;
348  break;
349  }
350 
351  case token_type::value_unsigned:
352  {
353  result.m_type = value_t::number_unsigned;
354  result.m_value = m_lexer.get_number_unsigned();
355  break;
356  }
357 
358  case token_type::value_integer:
359  {
360  result.m_type = value_t::number_integer;
361  result.m_value = m_lexer.get_number_integer();
362  break;
363  }
364 
365  case token_type::value_float:
366  {
367  result.m_type = value_t::number_float;
368  result.m_value = m_lexer.get_number_float();
369 
370  // throw in case of infinity or NAN
371  if (JSON_UNLIKELY(not std::isfinite(result.m_value.number_float)))
372  {
373  if (allow_exceptions)
374  {
375  JSON_THROW(out_of_range::create(406, "number overflow parsing '" +
376  m_lexer.get_token_string() + "'"));
377  }
378  expect(token_type::uninitialized);
379  }
380  break;
381  }
382 
383  case token_type::parse_error:
384  {
385  // using "uninitialized" to avoid "expected" message
386  if (not expect(token_type::uninitialized))
387  {
388  return;
389  }
390  break; // LCOV_EXCL_LINE
391  }
392 
393  default:
394  {
395  // the last token was unexpected; we expected a value
396  if (not expect(token_type::literal_or_value))
397  {
398  return;
399  }
400  break; // LCOV_EXCL_LINE
401  }
402  }
403 
404  if (keep and callback and not callback(depth, parse_event_t::value, result))
405  {
406  result.m_type = value_t::discarded;
407  }
408  }
409 
420  bool accept_internal()
421  {
422  switch (last_token)
423  {
424  case token_type::begin_object:
425  {
426  // read next token
427  get_token();
428 
429  // closing } -> we are done
430  if (last_token == token_type::end_object)
431  {
432  return true;
433  }
434 
435  // parse values
436  while (true)
437  {
438  // parse key
439  if (last_token != token_type::value_string)
440  {
441  return false;
442  }
443 
444  // parse separator (:)
445  get_token();
446  if (last_token != token_type::name_separator)
447  {
448  return false;
449  }
450 
451  // parse value
452  get_token();
453  if (not accept_internal())
454  {
455  return false;
456  }
457 
458  // comma -> next value
459  get_token();
460  if (last_token == token_type::value_separator)
461  {
462  get_token();
463  continue;
464  }
465 
466  // closing }
467  return (last_token == token_type::end_object);
468  }
469  }
470 
471  case token_type::begin_array:
472  {
473  // read next token
474  get_token();
475 
476  // closing ] -> we are done
477  if (last_token == token_type::end_array)
478  {
479  return true;
480  }
481 
482  // parse values
483  while (true)
484  {
485  // parse value
486  if (not accept_internal())
487  {
488  return false;
489  }
490 
491  // comma -> next value
492  get_token();
493  if (last_token == token_type::value_separator)
494  {
495  get_token();
496  continue;
497  }
498 
499  // closing ]
500  return (last_token == token_type::end_array);
501  }
502  }
503 
504  case token_type::value_float:
505  {
506  // reject infinity or NAN
507  return std::isfinite(m_lexer.get_number_float());
508  }
509 
510  case token_type::literal_false:
511  case token_type::literal_null:
512  case token_type::literal_true:
513  case token_type::value_integer:
514  case token_type::value_string:
515  case token_type::value_unsigned:
516  return true;
517 
518  default: // the last token was unexpected
519  return false;
520  }
521  }
522 
524  token_type get_token()
525  {
526  return (last_token = m_lexer.scan());
527  }
528 
532  bool expect(token_type t)
533  {
534  if (JSON_UNLIKELY(t != last_token))
535  {
536  errored = true;
537  expected = t;
538  if (allow_exceptions)
539  {
540  throw_exception();
541  }
542  else
543  {
544  return false;
545  }
546  }
547 
548  return true;
549  }
550 
551  [[noreturn]] void throw_exception() const
552  {
553  std::string error_msg = "syntax error - ";
554  if (last_token == token_type::parse_error)
555  {
556  error_msg += std::string(m_lexer.get_error_message()) + "; last read: '" +
557  m_lexer.get_token_string() + "'";
558  }
559  else
560  {
561  error_msg += "unexpected " + std::string(lexer_t::token_type_name(last_token));
562  }
563 
564  if (expected != token_type::uninitialized)
565  {
566  error_msg += "; expected " + std::string(lexer_t::token_type_name(expected));
567  }
568 
569  JSON_THROW(parse_error::create(101, m_lexer.get_position(), error_msg));
570  }
571 
572  private:
574  int depth = 0;
576  const parser_callback_t callback = nullptr;
578  token_type last_token = token_type::uninitialized;
580  lexer_t m_lexer;
582  bool errored = false;
584  token_type expected = token_type::uninitialized;
586  const bool allow_exceptions = true;
587 };
588 }
589 }
nlohmann::detail::parser::parse_event_t::value
the parser finished reading a JSON value
nlohmann::detail::value_t::null
null value
nlohmann::detail::lexer::move_string
std::string move_string()
return current string value (implicitly resets the token; useful only once)
Definition: lexer.hpp:1133
nlohmann::detail::value_t::object
object (unordered set of name/value pairs)
nlohmann::detail::lexer::get_token_string
std::string get_token_string() const
return the last read token (for errors only).
Definition: lexer.hpp:1151
libzip::uint8_t
zip_uint8_t uint8_t
zip_uint8_t typedef.
Definition: zip.hpp:78
nlohmann
namespace for Niels Lohmann
Definition: adl_serializer.hpp:8
nlohmann::detail::value_t::number_float
number value (floating-point)
nlohmann::detail::lexer::token_type
token_type
token types for the parser
Definition: lexer.hpp:38
nlohmann::detail::parser::parse_event_t::object_end
the parser read } and finished processing a JSON object
nlohmann::detail::value_t::number_integer
number value (signed integer)
nlohmann::detail::parser::parse_event_t::key
the parser read a key of a value in an object
nlohmann::detail::lexer
lexical analysis
Definition: lexer.hpp:30
nlohmann::detail::parser::parser
parser(detail::input_adapter_t adapter, const parser_callback_t cb=nullptr, const bool allow_exceptions_=true)
a parser reading from an input adapter
Definition: parser.hpp:59
nlohmann::detail::input_adapter_t
std::shared_ptr< input_adapter_protocol > input_adapter_t
a type to simplify interfaces
Definition: input_adapters.hpp:48
nlohmann::detail::value_t::string
string value
nlohmann::detail::parser::accept
bool accept(const bool strict=true)
public accept interface
Definition: parser.hpp:111
nlohmann::detail::lexer::get_position
constexpr std::size_t get_position() const noexcept
return position of last read token
Definition: lexer.hpp:1143
nlohmann::detail::parse_error::create
static parse_error create(int id_, std::size_t byte_, const std::string &what_arg)
create a parse error exception
Definition: exceptions.hpp:122
nlohmann::detail::parser
syntax analysis
Definition: parser.hpp:30
nlohmann::detail::parser::parse_event_t::array_end
the parser read ] and finished processing a JSON array
nlohmann::detail::lexer::token_type_name
static const char * token_type_name(const token_type t) noexcept
return name of values of type token_type (only used for errors)
Definition: lexer.hpp:60
nlohmann::detail::lexer::get_number_unsigned
constexpr number_unsigned_t get_number_unsigned() const noexcept
return unsigned integer value
Definition: lexer.hpp:1121
nlohmann::detail::value_t::number_unsigned
number value (unsigned integer)
nlohmann::detail::value_t::array
array (ordered collection of values)
nlohmann::detail::value_t::discarded
discarded by the the parser callback function
nlohmann::detail::parser::parse_event_t
parse_event_t
Definition: parser.hpp:39
nlohmann::detail::value_t::boolean
boolean value
nlohmann::detail::lexer::get_number_integer
constexpr number_integer_t get_number_integer() const noexcept
return integer value
Definition: lexer.hpp:1115
nlohmann::detail::parser::parse
void parse(const bool strict, BasicJsonType &result)
public parser interface
Definition: parser.hpp:75
nlohmann::detail::parser::parse_event_t::array_start
the parser read [ and started to process a JSON array
nlohmann::detail::lexer::get_error_message
constexpr const char * get_error_message() const noexcept
return syntax error message
Definition: lexer.hpp:1176
nlohmann::detail::lexer::get_number_float
constexpr number_float_t get_number_float() const noexcept
return floating-point value
Definition: lexer.hpp:1127
nlohmann::detail::parser::parse_event_t::object_start
the parser read { and started to process a JSON object