#include "string_representation.h" #include "parser.h" #include #include #include "utf8.h" namespace json { std::unique_ptr ParsingCall::here(ParserContext &pctx) { return NULL; } ValueParseCall::ValueParseCall(JSON &result) : result(result) { assert(result.isNull()); } bool isDigit(int ch) { return ('0' <= ch && ch <= '9'); } bool isIntegerStart(int ch) { return isDigit(ch) || ch == '-'; } bool isSymbolConstituent(int ch) { return 'a' <= ch && ch <= 'z'; } void read_int_minus_part(ParserContext& pctx, bool& mantis_minus) { mantis_minus = false; if (peep(pctx) == '-') { skip(pctx); mantis_minus = true; } } void read_int_int_part(ParserContext& pctx, int64_t& mantis_max18, bool& is_terrifying) { mantis_max18 = 0; int d = 0; while (true) { int ch = peep(pctx); if (!isDigit(ch)) break; skip(pctx); if (ch == '0' && d == 0) return; if (d < 18) { mantis_max18 = mantis_max18 * 10 + (ch - '0'); d++; } else { is_terrifying = true; } } if (d == 0) throw bad_syntax(); } void read_that_int_part_with_at_least_one_digit(ParserContext& pctx) { if (!isDigit(peep(pctx))) throw bad_syntax(); skip(pctx); while (isDigit(peep(pctx))) skip(pctx); } void read_int_frac_exp_part(ParserContext& pctx, bool& is_terrifying) { if (peep(pctx) == '.') { is_terrifying = true; skip(pctx); read_that_int_part_with_at_least_one_digit(pctx); } if (peep(pctx) == 'e' || peep(pctx) == 'E') { is_terrifying = true; skip(pctx); if (peep(pctx) == '+' || peep(pctx) == '-') skip(pctx); read_that_int_part_with_at_least_one_digit(pctx); } } /* Starts with reading u. Throws json::bad_syntax on bad syntax */ uint32_t read_4nibbles(ParserContext& pctx) { uint32_t result = 0; demandSkip(pctx, 'u'); for (int i = 0; i < 4; i++) { int ch = peep(pctx); result <<= 4; if (isDigit(ch)) { result += (ch - '0'); } else if ('a' <= ch && ch <= 'f') { result += (ch - 'a') + 10; } else if ('A' <= ch && ch <= 'F') { result += (ch - 'A') + 10; } else throw bad_syntax(); skip(pctx); } return result; } bool is_utf16_2bp_high_surrogate(uint32_t v) { return 0xD800 <= v && v <= 0xDBFF; } bool is_utf16_2bp_low_surrogate(uint32_t v) { return 0xDC00 <= v && v <= 0xE000; } constexpr char escaping_rules[][2] = {{'"', '"'}, {'\\', '\\'}, {'/', '/'}, {'b', '\b'}, {'f', '\f',}, {'n', '\n'}, {'r', '\r'}, {'t', '\t'}, {0, 0}}; void resert_to_one_char_escape(int leader, std::string& str) { for (int i = 0; escaping_rules[i][0] != 0; i++) { if (escaping_rules[i][0] == leader) { str += escaping_rules[i][1]; return; } } throw bad_syntax(); } std::string demandStringJson(ParserContext &pctx) { skipWhitespaces(pctx); std::string str; demandSkip(pctx, '"'); int ch; while ((ch = peep(pctx)) != '"') { if ((0 <= ch && ch <= 0x1f) || ch == endOfFile) throw bad_syntax(); skip(pctx); if (ch == '\\') { int leader = peep(pctx); if (leader == 'u') { uint32_t first_utf16 = read_4nibbles(pctx); if (is_utf16_2bp_low_surrogate(first_utf16)) throw bad_syntax(); if (!is_utf16_2bp_high_surrogate(first_utf16)) { codepoint_to_utf8(first_utf16, str); } else { demandSkip(pctx, '\\'); uint32_t second_utf16 = read_4nibbles(pctx); if (!is_utf16_2bp_low_surrogate(second_utf16)) throw bad_syntax(); uint32_t cp = 0x10000 + ((first_utf16 - 0xD800) << 10) + (second_utf16 - 0xDC00); codepoint_to_utf8(cp, str); } } else { resert_to_one_char_escape(leader, str); skip(pctx); // Skipping leader } } else { str += ch; } } skip(pctx); if (!isUtf8String(str)) throw bad_syntax(); return str; } std::unique_ptr ValueParseCall::here(ParserContext &pctx) { if (got_him) return NULL; got_him = true; skipWhitespaces(pctx); int herald = peep(pctx); if (herald == '"') { result.asString() = demandStringJson(pctx); } else if (isIntegerStart(herald)) { size_t pos_beg = pctx.pos; bool terrifying = false; bool mantis_minus; read_int_minus_part(pctx, mantis_minus); int64_t mantis_abs_max18; read_int_int_part(pctx, mantis_abs_max18, terrifying); read_int_frac_exp_part(pctx, terrifying); if (terrifying) { result.asInteger() = Integer(pctx.text.substr(pos_beg, pctx.pos).c_str()); } else if (mantis_minus) { result.asInteger() = Integer(-mantis_abs_max18); } else { result.asInteger() = Integer(mantis_abs_max18); } } else if (isSymbolConstituent(herald)) { std::string sym; while (isSymbolConstituent(peep(pctx))) { sym += (char)(uint8_t)peep(pctx); skip(pctx); } if (sym == "null") { result = JSON(null_symbol); } else if (sym == "false") { result = JSON(false_symbol); } else if (sym == "true") { result = JSON(true_symbol); } else throw bad_syntax(); } else if (herald == '[') { skip(pctx); result = JSON(array); return std::make_unique(result.asArray()); } else if (herald == '{') { skip(pctx); result = JSON(dictionary); return std::make_unique(result.asDictionary()); } else throw bad_syntax(); return NULL; } JSON parse_str_flawless(const std::string &text) { WrongSyntax wsErr; ParserContext pctx(text); JSON result; std::vector> callStack; callStack.push_back(std::make_unique(result)); while (!callStack.empty()) { std::unique_ptr rt = callStack.back()->here(pctx); if (rt) { callStack.push_back(std::move(rt)); } else { callStack.pop_back(); } } skipWhitespaces(pctx); if (!isEof(pctx)) throw bad_syntax(); return result; } }