233 lines
7.4 KiB
C++
233 lines
7.4 KiB
C++
#include "string_representation.h"
|
|
#include "parser.h"
|
|
#include <memory>
|
|
#include <assert.h>
|
|
|
|
#include "utf8.h"
|
|
|
|
namespace json {
|
|
std::unique_ptr<ParsingCall> ParsingCall::here(ParserContext &pctx) {
|
|
return NULL;
|
|
}
|
|
|
|
ValueParseCall::ValueParseCall(JSON &result) : result(result) {
|
|
assert(result.isNull());
|
|
}
|
|
|
|
bool isDigit(int ch) {
|
|
return ('0' <= ch && ch <= '9');
|
|
}
|
|
|
|
bool isIntegerStart(int ch) {
|
|
return isDigit(ch) || ch == '-';
|
|
}
|
|
|
|
bool isSymbolConstituent(int ch) {
|
|
return 'a' <= ch && ch <= 'z';
|
|
}
|
|
|
|
void read_int_minus_part(ParserContext& pctx, bool& mantis_minus) {
|
|
mantis_minus = false;
|
|
if (peep(pctx) == '-') {
|
|
skip(pctx);
|
|
mantis_minus = true;
|
|
}
|
|
}
|
|
|
|
void read_int_int_part(ParserContext& pctx, int64_t& mantis_max18, bool& is_terrifying) {
|
|
mantis_max18 = 0;
|
|
int d = 0;
|
|
while (true) {
|
|
int ch = peep(pctx);
|
|
if (!isDigit(ch))
|
|
break;
|
|
skip(pctx);
|
|
if (ch == '0' && d == 0)
|
|
return;
|
|
if (d < 18) {
|
|
mantis_max18 = mantis_max18 * 10 + (ch - '0');
|
|
d++;
|
|
} else {
|
|
is_terrifying = true;
|
|
}
|
|
}
|
|
if (d == 0)
|
|
throw bad_syntax();
|
|
}
|
|
|
|
void read_that_int_part_with_at_least_one_digit(ParserContext& pctx) {
|
|
if (!isDigit(peep(pctx)))
|
|
throw bad_syntax();
|
|
skip(pctx);
|
|
while (isDigit(peep(pctx)))
|
|
skip(pctx);
|
|
}
|
|
|
|
void read_int_frac_exp_part(ParserContext& pctx, bool& is_terrifying) {
|
|
if (peep(pctx) == '.') {
|
|
is_terrifying = true;
|
|
skip(pctx);
|
|
read_that_int_part_with_at_least_one_digit(pctx);
|
|
}
|
|
if (peep(pctx) == 'e' || peep(pctx) == 'E') {
|
|
is_terrifying = true;
|
|
skip(pctx);
|
|
if (peep(pctx) == '+' || peep(pctx) == '-')
|
|
skip(pctx);
|
|
read_that_int_part_with_at_least_one_digit(pctx);
|
|
}
|
|
}
|
|
|
|
/* Starts with reading u. Throws json::bad_syntax on bad syntax */
|
|
uint32_t read_4nibbles(ParserContext& pctx) {
|
|
uint32_t result = 0;
|
|
demandSkip(pctx, 'u');
|
|
for (int i = 0; i < 4; i++) {
|
|
int ch = peep(pctx);
|
|
result <<= 4;
|
|
if (isDigit(ch)) {
|
|
result += (ch - '0');
|
|
} else if ('a' <= ch && ch <= 'f') {
|
|
result += (ch - 'a') + 10;
|
|
} else if ('A' <= ch && ch <= 'F') {
|
|
result += (ch - 'A') + 10;
|
|
} else
|
|
throw bad_syntax();
|
|
skip(pctx);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
bool is_utf16_2bp_high_surrogate(uint32_t v) {
|
|
return 0xD800 <= v && v <= 0xDBFF;
|
|
}
|
|
|
|
bool is_utf16_2bp_low_surrogate(uint32_t v) {
|
|
return 0xDC00 <= v && v <= 0xE000;
|
|
}
|
|
|
|
constexpr char escaping_rules[][2] = {{'"', '"'}, {'\\', '\\'}, {'/', '/'}, {'b', '\b'}, {'f', '\f',},
|
|
{'n', '\n'}, {'r', '\r'}, {'t', '\t'}, {0, 0}};
|
|
|
|
void resert_to_one_char_escape(int leader, std::string& str) {
|
|
for (int i = 0; escaping_rules[i][0] != 0; i++) {
|
|
if (escaping_rules[i][0] == leader) {
|
|
str += escaping_rules[i][1];
|
|
return;
|
|
}
|
|
}
|
|
throw bad_syntax();
|
|
}
|
|
|
|
std::string demandStringJson(ParserContext &pctx) {
|
|
skipWhitespaces(pctx);
|
|
std::string str;
|
|
demandSkip(pctx, '"');
|
|
int ch;
|
|
while ((ch = peep(pctx)) != '"') {
|
|
if ((0 <= ch && ch <= 0x1f) || ch == endOfFile)
|
|
throw bad_syntax();
|
|
skip(pctx);
|
|
if (ch == '\\') {
|
|
int leader = peep(pctx);
|
|
if (leader == 'u') {
|
|
uint32_t first_utf16 = read_4nibbles(pctx);
|
|
if (is_utf16_2bp_low_surrogate(first_utf16))
|
|
throw bad_syntax();
|
|
if (!is_utf16_2bp_high_surrogate(first_utf16)) {
|
|
codepoint_to_utf8(first_utf16, str);
|
|
} else {
|
|
demandSkip(pctx, '\\');
|
|
uint32_t second_utf16 = read_4nibbles(pctx);
|
|
if (!is_utf16_2bp_low_surrogate(second_utf16))
|
|
throw bad_syntax();
|
|
uint32_t cp = 0x10000 + ((first_utf16 - 0xD800) << 10) + (second_utf16 - 0xDC00);
|
|
codepoint_to_utf8(cp, str);
|
|
}
|
|
} else {
|
|
resert_to_one_char_escape(leader, str);
|
|
skip(pctx); // Skipping leader
|
|
}
|
|
} else {
|
|
str += ch;
|
|
}
|
|
}
|
|
skip(pctx);
|
|
if (!isUtf8String(str))
|
|
throw bad_syntax();
|
|
return str;
|
|
}
|
|
|
|
std::unique_ptr<ParsingCall> ValueParseCall::here(ParserContext &pctx) {
|
|
if (got_him)
|
|
return NULL;
|
|
got_him = true;
|
|
skipWhitespaces(pctx);
|
|
int herald = peep(pctx);
|
|
if (herald == '"') {
|
|
result.asString() = demandStringJson(pctx);
|
|
} else if (isIntegerStart(herald)) {
|
|
size_t pos_beg = pctx.pos;
|
|
bool terrifying = false;
|
|
bool mantis_minus;
|
|
read_int_minus_part(pctx, mantis_minus);
|
|
int64_t mantis_abs_max18;
|
|
read_int_int_part(pctx, mantis_abs_max18, terrifying);
|
|
read_int_frac_exp_part(pctx, terrifying);
|
|
if (terrifying) {
|
|
result.asInteger() = Integer(pctx.text.substr(pos_beg, pctx.pos).c_str());
|
|
} else if (mantis_minus) {
|
|
result.asInteger() = Integer(-mantis_abs_max18);
|
|
} else {
|
|
result.asInteger() = Integer(mantis_abs_max18);
|
|
}
|
|
} else if (isSymbolConstituent(herald)) {
|
|
std::string sym;
|
|
while (isSymbolConstituent(peep(pctx))) {
|
|
sym += (char)(uint8_t)peep(pctx);
|
|
skip(pctx);
|
|
}
|
|
if (sym == "null") {
|
|
result = JSON(null_symbol);
|
|
} else if (sym == "false") {
|
|
result = JSON(false_symbol);
|
|
} else if (sym == "true") {
|
|
result = JSON(true_symbol);
|
|
} else
|
|
throw bad_syntax();
|
|
} else if (herald == '[') {
|
|
skip(pctx);
|
|
result = JSON(array);
|
|
return std::make_unique<ArrayParseCall>(result.asArray());
|
|
} else if (herald == '{') {
|
|
skip(pctx);
|
|
result = JSON(dictionary);
|
|
return std::make_unique<DictionaryParseCall>(result.asDictionary());
|
|
} else
|
|
throw bad_syntax();
|
|
return NULL;
|
|
}
|
|
|
|
JSON parse_str_flawless(const std::string &text) {
|
|
WrongSyntax wsErr;
|
|
ParserContext pctx(text);
|
|
JSON result;
|
|
|
|
std::vector<std::unique_ptr<ParsingCall>> callStack;
|
|
callStack.push_back(std::make_unique<ValueParseCall>(result));
|
|
while (!callStack.empty()) {
|
|
std::unique_ptr<ParsingCall> rt = callStack.back()->here(pctx);
|
|
if (rt) {
|
|
callStack.push_back(std::move(rt));
|
|
} else {
|
|
callStack.pop_back();
|
|
}
|
|
}
|
|
skipWhitespaces(pctx);
|
|
if (!isEof(pctx))
|
|
throw bad_syntax();
|
|
|
|
return result;
|
|
}
|
|
}
|