Horizon
serializer.hpp
1 #pragma once
2 
3 #include <algorithm> // reverse, remove, fill, find, none_of
4 #include <array> // array
5 #include <cassert> // assert
6 #include <ciso646> // and, or
7 #include <clocale> // localeconv, lconv
8 #include <cmath> // labs, isfinite, isnan, signbit
9 #include <cstddef> // size_t, ptrdiff_t
10 #include <cstdint> // uint8_t
11 #include <cstdio> // snprintf
12 #include <iomanip> // setfill
13 #include <iterator> // next
14 #include <limits> // numeric_limits
15 #include <string> // string
16 #include <sstream> // stringstream
17 #include <type_traits> // is_same
18 
19 #include <nlohmann/detail/exceptions.hpp>
20 #include <nlohmann/detail/conversions/to_chars.hpp>
21 #include <nlohmann/detail/macro_scope.hpp>
22 #include <nlohmann/detail/meta.hpp>
23 #include <nlohmann/detail/output/output_adapters.hpp>
24 #include <nlohmann/detail/value_t.hpp>
25 
26 namespace nlohmann
27 {
28 namespace detail
29 {
31 // serialization //
33 
34 template<typename BasicJsonType>
36 {
37  using string_t = typename BasicJsonType::string_t;
38  using number_float_t = typename BasicJsonType::number_float_t;
39  using number_integer_t = typename BasicJsonType::number_integer_t;
40  using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
41  static constexpr uint8_t UTF8_ACCEPT = 0;
42  static constexpr uint8_t UTF8_REJECT = 1;
43 
44  public:
49  serializer(output_adapter_t<char> s, const char ichar)
50  : o(std::move(s)), loc(std::localeconv()),
51  thousands_sep(loc->thousands_sep == nullptr ? '\0' : * (loc->thousands_sep)),
52  decimal_point(loc->decimal_point == nullptr ? '\0' : * (loc->decimal_point)),
53  indent_char(ichar), indent_string(512, indent_char)
54  {}
55 
56  // delete because of pointer members
57  serializer(const serializer&) = delete;
58  serializer& operator=(const serializer&) = delete;
59 
77  void dump(const BasicJsonType& val, const bool pretty_print,
78  const bool ensure_ascii,
79  const unsigned int indent_step,
80  const unsigned int current_indent = 0)
81  {
82  switch (val.m_type)
83  {
84  case value_t::object:
85  {
86  if (val.m_value.object->empty())
87  {
88  o->write_characters("{}", 2);
89  return;
90  }
91 
92  if (pretty_print)
93  {
94  o->write_characters("{\n", 2);
95 
96  // variable to hold indentation for recursive calls
97  const auto new_indent = current_indent + indent_step;
98  if (JSON_UNLIKELY(indent_string.size() < new_indent))
99  {
100  indent_string.resize(indent_string.size() * 2, ' ');
101  }
102 
103  // first n-1 elements
104  auto i = val.m_value.object->cbegin();
105  for (std::size_t cnt = 0; cnt < val.m_value.object->size() - 1; ++cnt, ++i)
106  {
107  o->write_characters(indent_string.c_str(), new_indent);
108  o->write_character('\"');
109  dump_escaped(i->first, ensure_ascii);
110  o->write_characters("\": ", 3);
111  dump(i->second, true, ensure_ascii, indent_step, new_indent);
112  o->write_characters(",\n", 2);
113  }
114 
115  // last element
116  assert(i != val.m_value.object->cend());
117  assert(std::next(i) == val.m_value.object->cend());
118  o->write_characters(indent_string.c_str(), new_indent);
119  o->write_character('\"');
120  dump_escaped(i->first, ensure_ascii);
121  o->write_characters("\": ", 3);
122  dump(i->second, true, ensure_ascii, indent_step, new_indent);
123 
124  o->write_character('\n');
125  o->write_characters(indent_string.c_str(), current_indent);
126  o->write_character('}');
127  }
128  else
129  {
130  o->write_character('{');
131 
132  // first n-1 elements
133  auto i = val.m_value.object->cbegin();
134  for (std::size_t cnt = 0; cnt < val.m_value.object->size() - 1; ++cnt, ++i)
135  {
136  o->write_character('\"');
137  dump_escaped(i->first, ensure_ascii);
138  o->write_characters("\":", 2);
139  dump(i->second, false, ensure_ascii, indent_step, current_indent);
140  o->write_character(',');
141  }
142 
143  // last element
144  assert(i != val.m_value.object->cend());
145  assert(std::next(i) == val.m_value.object->cend());
146  o->write_character('\"');
147  dump_escaped(i->first, ensure_ascii);
148  o->write_characters("\":", 2);
149  dump(i->second, false, ensure_ascii, indent_step, current_indent);
150 
151  o->write_character('}');
152  }
153 
154  return;
155  }
156 
157  case value_t::array:
158  {
159  if (val.m_value.array->empty())
160  {
161  o->write_characters("[]", 2);
162  return;
163  }
164 
165  if (pretty_print)
166  {
167  o->write_characters("[\n", 2);
168 
169  // variable to hold indentation for recursive calls
170  const auto new_indent = current_indent + indent_step;
171  if (JSON_UNLIKELY(indent_string.size() < new_indent))
172  {
173  indent_string.resize(indent_string.size() * 2, ' ');
174  }
175 
176  // first n-1 elements
177  for (auto i = val.m_value.array->cbegin();
178  i != val.m_value.array->cend() - 1; ++i)
179  {
180  o->write_characters(indent_string.c_str(), new_indent);
181  dump(*i, true, ensure_ascii, indent_step, new_indent);
182  o->write_characters(",\n", 2);
183  }
184 
185  // last element
186  assert(not val.m_value.array->empty());
187  o->write_characters(indent_string.c_str(), new_indent);
188  dump(val.m_value.array->back(), true, ensure_ascii, indent_step, new_indent);
189 
190  o->write_character('\n');
191  o->write_characters(indent_string.c_str(), current_indent);
192  o->write_character(']');
193  }
194  else
195  {
196  o->write_character('[');
197 
198  // first n-1 elements
199  for (auto i = val.m_value.array->cbegin();
200  i != val.m_value.array->cend() - 1; ++i)
201  {
202  dump(*i, false, ensure_ascii, indent_step, current_indent);
203  o->write_character(',');
204  }
205 
206  // last element
207  assert(not val.m_value.array->empty());
208  dump(val.m_value.array->back(), false, ensure_ascii, indent_step, current_indent);
209 
210  o->write_character(']');
211  }
212 
213  return;
214  }
215 
216  case value_t::string:
217  {
218  o->write_character('\"');
219  dump_escaped(*val.m_value.string, ensure_ascii);
220  o->write_character('\"');
221  return;
222  }
223 
224  case value_t::boolean:
225  {
226  if (val.m_value.boolean)
227  {
228  o->write_characters("true", 4);
229  }
230  else
231  {
232  o->write_characters("false", 5);
233  }
234  return;
235  }
236 
238  {
239  dump_integer(val.m_value.number_integer);
240  return;
241  }
242 
244  {
245  dump_integer(val.m_value.number_unsigned);
246  return;
247  }
248 
250  {
251  dump_float(val.m_value.number_float);
252  return;
253  }
254 
255  case value_t::discarded:
256  {
257  o->write_characters("<discarded>", 11);
258  return;
259  }
260 
261  case value_t::null:
262  {
263  o->write_characters("null", 4);
264  return;
265  }
266  }
267  }
268 
269  private:
284  void dump_escaped(const string_t& s, const bool ensure_ascii)
285  {
286  uint32_t codepoint;
287  uint8_t state = UTF8_ACCEPT;
288  std::size_t bytes = 0; // number of bytes written to string_buffer
289 
290  for (std::size_t i = 0; i < s.size(); ++i)
291  {
292  const auto byte = static_cast<uint8_t>(s[i]);
293 
294  switch (decode(state, codepoint, byte))
295  {
296  case UTF8_ACCEPT: // decode found a new code point
297  {
298  switch (codepoint)
299  {
300  case 0x08: // backspace
301  {
302  string_buffer[bytes++] = '\\';
303  string_buffer[bytes++] = 'b';
304  break;
305  }
306 
307  case 0x09: // horizontal tab
308  {
309  string_buffer[bytes++] = '\\';
310  string_buffer[bytes++] = 't';
311  break;
312  }
313 
314  case 0x0A: // newline
315  {
316  string_buffer[bytes++] = '\\';
317  string_buffer[bytes++] = 'n';
318  break;
319  }
320 
321  case 0x0C: // formfeed
322  {
323  string_buffer[bytes++] = '\\';
324  string_buffer[bytes++] = 'f';
325  break;
326  }
327 
328  case 0x0D: // carriage return
329  {
330  string_buffer[bytes++] = '\\';
331  string_buffer[bytes++] = 'r';
332  break;
333  }
334 
335  case 0x22: // quotation mark
336  {
337  string_buffer[bytes++] = '\\';
338  string_buffer[bytes++] = '\"';
339  break;
340  }
341 
342  case 0x5C: // reverse solidus
343  {
344  string_buffer[bytes++] = '\\';
345  string_buffer[bytes++] = '\\';
346  break;
347  }
348 
349  default:
350  {
351  // escape control characters (0x00..0x1F) or, if
352  // ensure_ascii parameter is used, non-ASCII characters
353  if ((codepoint <= 0x1F) or (ensure_ascii and (codepoint >= 0x7F)))
354  {
355  if (codepoint <= 0xFFFF)
356  {
357  std::snprintf(string_buffer.data() + bytes, 7, "\\u%04x",
358  static_cast<uint16_t>(codepoint));
359  bytes += 6;
360  }
361  else
362  {
363  std::snprintf(string_buffer.data() + bytes, 13, "\\u%04x\\u%04x",
364  static_cast<uint16_t>(0xD7C0 + (codepoint >> 10)),
365  static_cast<uint16_t>(0xDC00 + (codepoint & 0x3FF)));
366  bytes += 12;
367  }
368  }
369  else
370  {
371  // copy byte to buffer (all previous bytes
372  // been copied have in default case above)
373  string_buffer[bytes++] = s[i];
374  }
375  break;
376  }
377  }
378 
379  // write buffer and reset index; there must be 13 bytes
380  // left, as this is the maximal number of bytes to be
381  // written ("\uxxxx\uxxxx\0") for one code point
382  if (string_buffer.size() - bytes < 13)
383  {
384  o->write_characters(string_buffer.data(), bytes);
385  bytes = 0;
386  }
387  break;
388  }
389 
390  case UTF8_REJECT: // decode found invalid UTF-8 byte
391  {
392  std::stringstream ss;
393  ss << std::setw(2) << std::uppercase << std::setfill('0') << std::hex << static_cast<int>(byte);
394  JSON_THROW(type_error::create(316, "invalid UTF-8 byte at index " + std::to_string(i) + ": 0x" + ss.str()));
395  }
396 
397  default: // decode found yet incomplete multi-byte code point
398  {
399  if (not ensure_ascii)
400  {
401  // code point will not be escaped - copy byte to buffer
402  string_buffer[bytes++] = s[i];
403  }
404  break;
405  }
406  }
407  }
408 
409  if (JSON_LIKELY(state == UTF8_ACCEPT))
410  {
411  // write buffer
412  if (bytes > 0)
413  {
414  o->write_characters(string_buffer.data(), bytes);
415  }
416  }
417  else
418  {
419  // we finish reading, but do not accept: string was incomplete
420  std::stringstream ss;
421  ss << std::setw(2) << std::uppercase << std::setfill('0') << std::hex << static_cast<int>(static_cast<uint8_t>(s.back()));
422  JSON_THROW(type_error::create(316, "incomplete UTF-8 string; last byte: 0x" + ss.str()));
423  }
424  }
425 
435  template<typename NumberType, detail::enable_if_t<
436  std::is_same<NumberType, number_unsigned_t>::value or
437  std::is_same<NumberType, number_integer_t>::value,
438  int> = 0>
439  void dump_integer(NumberType x)
440  {
441  // special case for "0"
442  if (x == 0)
443  {
444  o->write_character('0');
445  return;
446  }
447 
448  const bool is_negative = (x <= 0) and (x != 0); // see issue #755
449  std::size_t i = 0;
450 
451  while (x != 0)
452  {
453  // spare 1 byte for '\0'
454  assert(i < number_buffer.size() - 1);
455 
456  const auto digit = std::labs(static_cast<long>(x % 10));
457  number_buffer[i++] = static_cast<char>('0' + digit);
458  x /= 10;
459  }
460 
461  if (is_negative)
462  {
463  // make sure there is capacity for the '-'
464  assert(i < number_buffer.size() - 2);
465  number_buffer[i++] = '-';
466  }
467 
468  std::reverse(number_buffer.begin(), number_buffer.begin() + i);
469  o->write_characters(number_buffer.data(), i);
470  }
471 
480  void dump_float(number_float_t x)
481  {
482  // NaN / inf
483  if (not std::isfinite(x))
484  {
485  o->write_characters("null", 4);
486  return;
487  }
488 
489  // If number_float_t is an IEEE-754 single or double precision number,
490  // use the Grisu2 algorithm to produce short numbers which are
491  // guaranteed to round-trip, using strtof and strtod, resp.
492  //
493  // NB: The test below works if <long double> == <double>.
494  static constexpr bool is_ieee_single_or_double
495  = (std::numeric_limits<number_float_t>::is_iec559 and std::numeric_limits<number_float_t>::digits == 24 and std::numeric_limits<number_float_t>::max_exponent == 128) or
496  (std::numeric_limits<number_float_t>::is_iec559 and std::numeric_limits<number_float_t>::digits == 53 and std::numeric_limits<number_float_t>::max_exponent == 1024);
497 
498  dump_float(x, std::integral_constant<bool, is_ieee_single_or_double>());
499  }
500 
501  void dump_float(number_float_t x, std::true_type /*is_ieee_single_or_double*/)
502  {
503  char* begin = number_buffer.data();
504  char* end = ::nlohmann::detail::to_chars(begin, begin + number_buffer.size(), x);
505 
506  o->write_characters(begin, static_cast<size_t>(end - begin));
507  }
508 
509  void dump_float(number_float_t x, std::false_type /*is_ieee_single_or_double*/)
510  {
511  // get number of digits for a float -> text -> float round-trip
512  static constexpr auto d = std::numeric_limits<number_float_t>::max_digits10;
513 
514  // the actual conversion
515  std::ptrdiff_t len = snprintf(number_buffer.data(), number_buffer.size(), "%.*g", d, x);
516 
517  // negative value indicates an error
518  assert(len > 0);
519  // check if buffer was large enough
520  assert(static_cast<std::size_t>(len) < number_buffer.size());
521 
522  // erase thousands separator
523  if (thousands_sep != '\0')
524  {
525  const auto end = std::remove(number_buffer.begin(),
526  number_buffer.begin() + len, thousands_sep);
527  std::fill(end, number_buffer.end(), '\0');
528  assert((end - number_buffer.begin()) <= len);
529  len = (end - number_buffer.begin());
530  }
531 
532  // convert decimal point to '.'
533  if (decimal_point != '\0' and decimal_point != '.')
534  {
535  const auto dec_pos = std::find(number_buffer.begin(), number_buffer.end(), decimal_point);
536  if (dec_pos != number_buffer.end())
537  {
538  *dec_pos = '.';
539  }
540  }
541 
542  o->write_characters(number_buffer.data(), static_cast<std::size_t>(len));
543 
544  // determine if need to append ".0"
545  const bool value_is_int_like =
546  std::none_of(number_buffer.begin(), number_buffer.begin() + len + 1,
547  [](char c)
548  {
549  return (c == '.' or c == 'e');
550  });
551 
552  if (value_is_int_like)
553  {
554  o->write_characters(".0", 2);
555  }
556  }
557 
579  static uint8_t decode(uint8_t& state, uint32_t& codep, const uint8_t byte) noexcept
580  {
581  static const std::array<uint8_t, 400> utf8d =
582  {
583  {
584  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 00..1F
585  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 20..3F
586  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 40..5F
587  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 60..7F
588  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, // 80..9F
589  7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // A0..BF
590  8, 8, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // C0..DF
591  0xA, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x4, 0x3, 0x3, // E0..EF
592  0xB, 0x6, 0x6, 0x6, 0x5, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8, // F0..FF
593  0x0, 0x1, 0x2, 0x3, 0x5, 0x8, 0x7, 0x1, 0x1, 0x1, 0x4, 0x6, 0x1, 0x1, 0x1, 0x1, // s0..s0
594  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, // s1..s2
595  1, 2, 1, 1, 1, 1, 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, // s3..s4
596  1, 2, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 3, 1, 1, 1, 1, 1, 1, // s5..s6
597  1, 3, 1, 1, 1, 1, 1, 3, 1, 3, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // s7..s8
598  }
599  };
600 
601  const uint8_t type = utf8d[byte];
602 
603  codep = (state != UTF8_ACCEPT)
604  ? (byte & 0x3fu) | (codep << 6)
605  : static_cast<uint32_t>(0xff >> type) & (byte);
606 
607  state = utf8d[256u + state * 16u + type];
608  return state;
609  }
610 
611  private:
613  output_adapter_t<char> o = nullptr;
614 
616  std::array<char, 64> number_buffer{{}};
617 
619  const std::lconv* loc = nullptr;
621  const char thousands_sep = '\0';
623  const char decimal_point = '\0';
624 
626  std::array<char, 512> string_buffer{{}};
627 
629  const char indent_char;
631  string_t indent_string;
632 };
633 }
634 }
nlohmann::detail::serializer::serializer
serializer(output_adapter_t< char > s, const char ichar)
Definition: serializer.hpp:49
nlohmann::detail::value_t::null
null value
nlohmann::detail::value_t::object
object (unordered set of name/value pairs)
nlohmann::detail::serializer::dump
void dump(const BasicJsonType &val, const bool pretty_print, const bool ensure_ascii, const unsigned int indent_step, const unsigned int current_indent=0)
internal implementation of the serialization function
Definition: serializer.hpp:77
libzip::uint8_t
zip_uint8_t uint8_t
zip_uint8_t typedef.
Definition: zip.hpp:78
nlohmann
namespace for Niels Lohmann
Definition: adl_serializer.hpp:8
nlohmann::detail::value_t::number_float
number value (floating-point)
nlohmann::detail::value_t::number_integer
number value (signed integer)
nlohmann::detail::value_t::string
string value
nlohmann::detail::output_adapter_t
std::shared_ptr< output_adapter_protocol< CharType > > output_adapter_t
a type to simplify interfaces
Definition: output_adapters.hpp:26
nlohmann::detail::value_t::number_unsigned
number value (unsigned integer)
nlohmann::detail::value_t::array
array (ordered collection of values)
libzip::uint32_t
zip_uint32_t uint32_t
zip_uint32_t typedef.
Definition: zip.hpp:98
nlohmann::detail::value_t::discarded
discarded by the the parser callback function
nlohmann::detail::value_t::boolean
boolean value
nlohmann::detail::serializer
Definition: serializer.hpp:35