libjsonincpp/src/library/jsonincpp/parser.cpp

233 lines
7.4 KiB
C++

#include "string_representation.h"
#include "parser.h"
#include <memory>
#include <assert.h>
#include "utf8.h"
namespace json {
std::unique_ptr<ParsingCall> ParsingCall::here(ParserContext &pctx) {
return NULL;
}
ValueParseCall::ValueParseCall(JSON &result) : result(result) {
assert(result.isNull());
}
bool isDigit(int ch) {
return ('0' <= ch && ch <= '9');
}
bool isIntegerStart(int ch) {
return isDigit(ch) || ch == '-';
}
bool isSymbolConstituent(int ch) {
return 'a' <= ch && ch <= 'z';
}
void read_int_minus_part(ParserContext& pctx, bool& mantis_minus) {
mantis_minus = false;
if (peep(pctx) == '-') {
skip(pctx);
mantis_minus = true;
}
}
void read_int_int_part(ParserContext& pctx, int64_t& mantis_max18, bool& is_terrifying) {
mantis_max18 = 0;
int d = 0;
while (true) {
int ch = peep(pctx);
if (!isDigit(ch))
break;
skip(pctx);
if (ch == '0' && d == 0)
return;
if (d < 18) {
mantis_max18 = mantis_max18 * 10 + (ch - '0');
d++;
} else {
is_terrifying = true;
}
}
if (d == 0)
throw bad_syntax();
}
void read_that_int_part_with_at_least_one_digit(ParserContext& pctx) {
if (!isDigit(peep(pctx)))
throw bad_syntax();
skip(pctx);
while (isDigit(peep(pctx)))
skip(pctx);
}
void read_int_frac_exp_part(ParserContext& pctx, bool& is_terrifying) {
if (peep(pctx) == '.') {
is_terrifying = true;
skip(pctx);
read_that_int_part_with_at_least_one_digit(pctx);
}
if (peep(pctx) == 'e' || peep(pctx) == 'E') {
is_terrifying = true;
skip(pctx);
if (peep(pctx) == '+' || peep(pctx) == '-')
skip(pctx);
read_that_int_part_with_at_least_one_digit(pctx);
}
}
/* Starts with reading u. Throws json::bad_syntax on bad syntax */
uint32_t read_4nibbles(ParserContext& pctx) {
uint32_t result = 0;
demandSkip(pctx, 'u');
for (int i = 0; i < 4; i++) {
int ch = peep(pctx);
result <<= 4;
if (isDigit(ch)) {
result += (ch - '0');
} else if ('a' <= ch && ch <= 'f') {
result += (ch - 'a') + 10;
} else if ('A' <= ch && ch <= 'F') {
result += (ch - 'A') + 10;
} else
throw bad_syntax();
skip(pctx);
}
return result;
}
bool is_utf16_2bp_high_surrogate(uint32_t v) {
return 0xD800 <= v && v <= 0xDBFF;
}
bool is_utf16_2bp_low_surrogate(uint32_t v) {
return 0xDC00 <= v && v <= 0xE000;
}
constexpr char escaping_rules[][2] = {{'"', '"'}, {'\\', '\\'}, {'/', '/'}, {'b', '\b'}, {'f', '\f',},
{'n', '\n'}, {'r', '\r'}, {'t', '\t'}, {0, 0}};
void resert_to_one_char_escape(int leader, std::string& str) {
for (int i = 0; escaping_rules[i][0] != 0; i++) {
if (escaping_rules[i][0] == leader) {
str += escaping_rules[i][1];
return;
}
}
throw bad_syntax();
}
std::string demandStringJson(ParserContext &pctx) {
skipWhitespaces(pctx);
std::string str;
demandSkip(pctx, '"');
int ch;
while ((ch = peep(pctx)) != '"') {
if ((0 <= ch && ch <= 0x1f) || ch == endOfFile)
throw bad_syntax();
skip(pctx);
if (ch == '\\') {
int leader = peep(pctx);
if (leader == 'u') {
uint32_t first_utf16 = read_4nibbles(pctx);
if (is_utf16_2bp_low_surrogate(first_utf16))
throw bad_syntax();
if (!is_utf16_2bp_high_surrogate(first_utf16)) {
codepoint_to_utf8(first_utf16, str);
} else {
demandSkip(pctx, '\\');
uint32_t second_utf16 = read_4nibbles(pctx);
if (!is_utf16_2bp_low_surrogate(second_utf16))
throw bad_syntax();
uint32_t cp = 0x10000 + ((first_utf16 - 0xD800) << 10) + (second_utf16 - 0xDC00);
codepoint_to_utf8(cp, str);
}
} else {
resert_to_one_char_escape(leader, str);
skip(pctx); // Skipping leader
}
} else {
str += ch;
}
}
skip(pctx);
if (!isUtf8String(str))
throw bad_syntax();
return str;
}
std::unique_ptr<ParsingCall> ValueParseCall::here(ParserContext &pctx) {
if (got_him)
return NULL;
got_him = true;
skipWhitespaces(pctx);
int herald = peep(pctx);
if (herald == '"') {
result.asString() = demandStringJson(pctx);
} else if (isIntegerStart(herald)) {
size_t pos_beg = pctx.pos;
bool terrifying = false;
bool mantis_minus;
read_int_minus_part(pctx, mantis_minus);
int64_t mantis_abs_max18;
read_int_int_part(pctx, mantis_abs_max18, terrifying);
read_int_frac_exp_part(pctx, terrifying);
if (terrifying) {
result.asInteger() = Integer(pctx.text.substr(pos_beg, pctx.pos).c_str());
} else if (mantis_minus) {
result.asInteger() = Integer(-mantis_abs_max18);
} else {
result.asInteger() = Integer(mantis_abs_max18);
}
} else if (isSymbolConstituent(herald)) {
std::string sym;
while (isSymbolConstituent(peep(pctx))) {
sym += (char)(uint8_t)peep(pctx);
skip(pctx);
}
if (sym == "null") {
result = JSON(null_symbol);
} else if (sym == "false") {
result = JSON(false_symbol);
} else if (sym == "true") {
result = JSON(true_symbol);
} else
throw bad_syntax();
} else if (herald == '[') {
skip(pctx);
result = JSON(array);
return std::make_unique<ArrayParseCall>(result.asArray());
} else if (herald == '{') {
skip(pctx);
result = JSON(dictionary);
return std::make_unique<DictionaryParseCall>(result.asDictionary());
} else
throw bad_syntax();
return NULL;
}
JSON parse_str_flawless(const std::string &text) {
WrongSyntax wsErr;
ParserContext pctx(text);
JSON result;
std::vector<std::unique_ptr<ParsingCall>> callStack;
callStack.push_back(std::make_unique<ValueParseCall>(result));
while (!callStack.empty()) {
std::unique_ptr<ParsingCall> rt = callStack.back()->here(pctx);
if (rt) {
callStack.push_back(std::move(rt));
} else {
callStack.pop_back();
}
}
skipWhitespaces(pctx);
if (!isEof(pctx))
throw bad_syntax();
return result;
}
}