6 #include <initializer_list>
13 #include <nlohmann/detail/macro_scope.hpp>
14 #include <nlohmann/detail/input/input_adapters.hpp>
29 template<
typename BasicJsonType>
32 using number_integer_t =
typename BasicJsonType::number_integer_t;
33 using number_unsigned_t =
typename BasicJsonType::number_unsigned_t;
34 using number_float_t =
typename BasicJsonType::number_float_t;
65 return "<uninitialized>";
67 return "true literal";
69 return "false literal";
71 return "null literal";
73 return "string literal";
77 return "number literal";
91 return "<parse error>";
93 return "end of input";
95 return "'[', '{', or a literal";
97 return "unknown token";
102 : ia(std::move(adapter)), decimal_point_char(get_decimal_point()) {}
105 lexer(
const lexer&) =
delete;
106 lexer& operator=(lexer&) =
delete;
114 static char get_decimal_point() noexcept
116 const auto loc = localeconv();
117 assert(loc !=
nullptr);
118 return (loc->decimal_point ==
nullptr) ?
'.' : *(loc->decimal_point);
143 assert(current ==
'u');
146 const auto factors = { 12, 8, 4, 0 };
147 for (
const auto factor : factors)
151 if (current >=
'0' and current <=
'9')
153 codepoint += ((current - 0x30) << factor);
155 else if (current >=
'A' and current <=
'F')
157 codepoint += ((current - 0x37) << factor);
159 else if (current >=
'a' and current <=
'f')
161 codepoint += ((current - 0x57) << factor);
169 assert(0x0000 <= codepoint and codepoint <= 0xFFFF);
188 bool next_byte_in_range(std::initializer_list<int> ranges)
190 assert(ranges.size() == 2 or ranges.size() == 4 or ranges.size() == 6);
193 for (
auto range = ranges.begin(); range != ranges.end(); ++range)
196 if (JSON_LIKELY(*range <= current and current <= *(++range)))
202 error_message =
"invalid string: ill-formed UTF-8 byte";
231 assert(current ==
'\"');
239 case std::char_traits<char>::eof():
241 error_message =
"invalid string: missing closing quote";
248 return token_type::value_string;
292 const int codepoint1 = get_codepoint();
293 int codepoint = codepoint1;
295 if (JSON_UNLIKELY(codepoint1 == -1))
297 error_message =
"invalid string: '\\u' must be followed by 4 hex digits";
298 return token_type::parse_error;
302 if (0xD800 <= codepoint1 and codepoint1 <= 0xDBFF)
305 if (JSON_LIKELY(get() ==
'\\' and get() ==
'u'))
307 const int codepoint2 = get_codepoint();
309 if (JSON_UNLIKELY(codepoint2 == -1))
311 error_message =
"invalid string: '\\u' must be followed by 4 hex digits";
312 return token_type::parse_error;
316 if (JSON_LIKELY(0xDC00 <= codepoint2 and codepoint2 <= 0xDFFF))
331 error_message =
"invalid string: surrogate U+DC00..U+DFFF must be followed by U+DC00..U+DFFF";
332 return token_type::parse_error;
337 error_message =
"invalid string: surrogate U+DC00..U+DFFF must be followed by U+DC00..U+DFFF";
338 return token_type::parse_error;
343 if (JSON_UNLIKELY(0xDC00 <= codepoint1 and codepoint1 <= 0xDFFF))
345 error_message =
"invalid string: surrogate U+DC00..U+DFFF must follow U+D800..U+DBFF";
346 return token_type::parse_error;
351 assert(0x00 <= codepoint and codepoint <= 0x10FFFF);
354 if (codepoint < 0x80)
359 else if (codepoint <= 0x7FF)
362 add(0xC0 | (codepoint >> 6));
363 add(0x80 | (codepoint & 0x3F));
365 else if (codepoint <= 0xFFFF)
368 add(0xE0 | (codepoint >> 12));
369 add(0x80 | ((codepoint >> 6) & 0x3F));
370 add(0x80 | (codepoint & 0x3F));
375 add(0xF0 | (codepoint >> 18));
376 add(0x80 | ((codepoint >> 12) & 0x3F));
377 add(0x80 | ((codepoint >> 6) & 0x3F));
378 add(0x80 | (codepoint & 0x3F));
386 error_message =
"invalid string: forbidden character after backslash";
387 return token_type::parse_error;
427 error_message =
"invalid string: control character must be escaped";
428 return token_type::parse_error;
563 if (JSON_UNLIKELY(not next_byte_in_range({0x80, 0xBF})))
565 return token_type::parse_error;
573 if (JSON_UNLIKELY(not (next_byte_in_range({0xA0, 0xBF, 0x80, 0xBF}))))
575 return token_type::parse_error;
597 if (JSON_UNLIKELY(not (next_byte_in_range({0x80, 0xBF, 0x80, 0xBF}))))
599 return token_type::parse_error;
607 if (JSON_UNLIKELY(not (next_byte_in_range({0x80, 0x9F, 0x80, 0xBF}))))
609 return token_type::parse_error;
617 if (JSON_UNLIKELY(not (next_byte_in_range({0x90, 0xBF, 0x80, 0xBF, 0x80, 0xBF}))))
619 return token_type::parse_error;
629 if (JSON_UNLIKELY(not (next_byte_in_range({0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF}))))
631 return token_type::parse_error;
639 if (JSON_UNLIKELY(not (next_byte_in_range({0x80, 0x8F, 0x80, 0xBF, 0x80, 0xBF}))))
641 return token_type::parse_error;
649 error_message =
"invalid string: ill-formed UTF-8 byte";
650 return token_type::parse_error;
656 static void strtof(
float& f,
const char* str,
char** endptr) noexcept
658 f = std::strtof(str, endptr);
661 static void strtof(
double& f,
const char* str,
char** endptr) noexcept
663 f = std::strtod(str, endptr);
666 static void strtof(
long double& f,
const char* str,
char** endptr) noexcept
668 f = std::strtold(str, endptr);
711 token_type scan_number()
718 token_type number_type = token_type::value_unsigned;
726 goto scan_number_minus;
732 goto scan_number_zero;
746 goto scan_number_any1;
758 number_type = token_type::value_integer;
764 goto scan_number_zero;
778 goto scan_number_any1;
783 error_message =
"invalid number; expected digit after '-'";
784 return token_type::parse_error;
794 add(decimal_point_char);
795 goto scan_number_decimal1;
802 goto scan_number_exponent;
806 goto scan_number_done;
825 goto scan_number_any1;
830 add(decimal_point_char);
831 goto scan_number_decimal1;
838 goto scan_number_exponent;
842 goto scan_number_done;
845 scan_number_decimal1:
847 number_type = token_type::value_float;
862 goto scan_number_decimal2;
867 error_message =
"invalid number; expected digit after '.'";
868 return token_type::parse_error;
872 scan_number_decimal2:
888 goto scan_number_decimal2;
895 goto scan_number_exponent;
899 goto scan_number_done;
902 scan_number_exponent:
904 number_type = token_type::value_float;
911 goto scan_number_sign;
926 goto scan_number_any2;
932 "invalid number; expected '+', '-', or digit after exponent";
933 return token_type::parse_error;
953 goto scan_number_any2;
958 error_message =
"invalid number; expected digit after exponent sign";
959 return token_type::parse_error;
979 goto scan_number_any2;
983 goto scan_number_done;
991 char* endptr =
nullptr;
995 if (number_type == token_type::value_unsigned)
997 const auto x = std::strtoull(token_buffer.data(), &endptr, 10);
1000 assert(endptr == token_buffer.data() + token_buffer.size());
1004 value_unsigned = static_cast<number_unsigned_t>(x);
1005 if (value_unsigned == x)
1007 return token_type::value_unsigned;
1011 else if (number_type == token_type::value_integer)
1013 const auto x = std::strtoll(token_buffer.data(), &endptr, 10);
1016 assert(endptr == token_buffer.data() + token_buffer.size());
1020 value_integer = static_cast<number_integer_t>(x);
1021 if (value_integer == x)
1023 return token_type::value_integer;
1030 strtof(value_float, token_buffer.data(), &endptr);
1033 assert(endptr == token_buffer.data() + token_buffer.size());
1035 return token_type::value_float;
1043 token_type scan_literal(
const char* literal_text,
const std::size_t length,
1044 token_type return_type)
1046 assert(current == literal_text[0]);
1047 for (std::size_t i = 1; i < length; ++i)
1049 if (JSON_UNLIKELY(get() != literal_text[i]))
1051 error_message =
"invalid literal";
1052 return token_type::parse_error;
1063 void reset() noexcept
1065 token_buffer.clear();
1066 token_string.clear();
1067 token_string.push_back(std::char_traits<char>::to_char_type(current));
1080 std::char_traits<char>::int_type get()
1083 current = ia->get_character();
1084 if (JSON_LIKELY(current != std::char_traits<char>::eof()))
1086 token_string.push_back(std::char_traits<char>::to_char_type(current));
1095 if (JSON_LIKELY(current != std::char_traits<char>::eof()))
1097 ia->unget_character();
1098 assert(token_string.size() != 0);
1099 token_string.pop_back();
1106 token_buffer.push_back(std::char_traits<char>::to_char_type(c));
1117 return value_integer;
1123 return value_unsigned;
1135 return std::move(token_buffer);
1155 for (
const auto c : token_string)
1157 if (
'\x00' <= c and c <=
'\x1F')
1160 std::stringstream ss;
1161 ss <<
"<U+" << std::setw(4) << std::uppercase << std::setfill(
'0')
1162 << std::hex << static_cast<int>(c) <<
">";
1168 result.push_back(c);
1178 return error_message;
1192 while (current ==
' ' or current ==
'\t' or current ==
'\n' or current ==
'\r');
1198 return token_type::begin_array;
1200 return token_type::end_array;
1202 return token_type::begin_object;
1204 return token_type::end_object;
1206 return token_type::name_separator;
1208 return token_type::value_separator;
1212 return scan_literal(
"true", 4, token_type::literal_true);
1214 return scan_literal(
"false", 5, token_type::literal_false);
1216 return scan_literal(
"null", 4, token_type::literal_null);
1220 return scan_string();
1234 return scan_number();
1239 case std::char_traits<char>::eof():
1240 return token_type::end_of_input;
1244 error_message =
"invalid literal";
1245 return token_type::parse_error;
1254 std::char_traits<char>::int_type current = std::char_traits<char>::eof();
1257 std::size_t chars_read = 0;
1260 std::vector<char> token_string {};
1266 const char* error_message =
"";
1269 number_integer_t value_integer = 0;
1270 number_unsigned_t value_unsigned = 0;
1271 number_float_t value_float = 0;
1274 const char decimal_point_char =
'.';