diff --git a/src/l1/core/VecU8_as_str.h b/src/l1/core/VecU8_as_str.h index 4ed346e..9e89880 100644 --- a/src/l1/core/VecU8_as_str.h +++ b/src/l1/core/VecU8_as_str.h @@ -317,9 +317,10 @@ char digit_to_small_hex(U32 d){ return d >= 10 ? (char)('a' + d - 10) : (char)('0' + d); } -static_assert(0b11000000 == 0xC0 && 0b10000000 == 0x80 && 0b00111111 == 0x3F, "asdasda"); +static_assert(0b11000000 == 0xC0 && 0b10000000 == 0x80 && 0b00111111 == 0x3F && + 0b11100000 == 0xE0 && 0b11110000 == 0xF0, "asdasda"); -/* Some bytes (encoding codepoint U) will be appended to str. Utf-8 works only with codepoints below (1u << 24) */ +/* Some bytes (encoding codepoint U) will be appended to str. Utf-8 works only with codepoints below (1u << 21) */ void VecU8_encode_as_utf8(VecU8* str, U32 U){ if (U < (1u << 7)) { VecU8_append(str, (U8)U); @@ -327,14 +328,14 @@ void VecU8_encode_as_utf8(VecU8* str, U32 U){ VecU8_append_span(str, (SpanU8){(U8[]){ 0xC0 | (U8)(U >> 6), 0x80 | (U8)(U & 0x3F) }, 2}); - } else if (U < (1u << 17)) { + } else if (U < (1u << 16)) { VecU8_append_span(str, (SpanU8){(U8[]){ - 0xC0 | (U8)(U >> 12), 0x80 | (U8)((U >> 6) & 0x3F), (U8)(U & 0x3F) + 0xE0 | (U8)(U >> 12), 0x80 | (U8)((U >> 6) & 0x3F), (U8)(U & 0x3F) }, 3}); } else { - /* U < (1u << 24) */ + /* U < (1u << 21) */ VecU8_append_span(str, (SpanU8){(U8[]){ - 0xC0 | (U8)(U >> 18), 0x80 | (U8)((U >> 12) & 0x3F), 0x80 | (U8)((U >> 6) & 0x3F), (U8)(U & 0x3F) + 0xF0 | (U8)(U >> 18), 0x80 | (U8)((U >> 12) & 0x3F), 0x80 | (U8)((U >> 6) & 0x3F), (U8)(U & 0x3F) }, 4}); } } diff --git a/src/l1_5/core/parsing_string.h b/src/l1_5/core/parsing_string.h index 8780817..568e9ef 100644 --- a/src/l1_5/core/parsing_string.h +++ b/src/l1_5/core/parsing_string.h @@ -44,8 +44,8 @@ void SpanU8_parsing_skip_entire_line(SpanU8* rem){ } } -bool SpanU8_parsing_is_char_ahead(SpanU8* rem, char ch){ - return rem->len > 0 ? rem->data[0] == (U8)ch : false; +bool SpanU8_parsing_is_char_ahead(SpanU8 rem, char ch){ + return rem.len > 0 ? rem.data[0] == (U8)ch : false; } /* Time to learn how to read integers */ @@ -82,14 +82,6 @@ int SpanU8_read_U64(SpanU8* rem_ret, U64* res_ret){ return 0; } -U64 SpanU64_expect_read_U64(SpanU8* rem){ - U64 x; - int code = SpanU8_read_U64(rem, &x); - if (code) - abortf("Failed to read U64. Syntax error\n"); - return x; -} - /* Returns positive on error, 0 on success, rem_ret is untouched on error */ int SpanU8_read_S64(SpanU8* rem_ret, S64* ret){ SpanU8 rem = *rem_ret; @@ -144,6 +136,26 @@ int SpanU8_read_S64(SpanU8* rem_ret, S64* ret){ return 0; } +/* Helper function. Btw, floating point numbers suck */ +float SpanU8_read_float_h_pow10(S64 p){ + assert(-50 <= p && p <= 50); + float twopowers[6] = {10.f}; + U32 ap = (U32)labs(p); + for (int i = 0; i + 1 < 6; i++) { + twopowers[i + 1] = twopowers[i] * twopowers[i]; + } + float rc = 1.f; + for (int i = 0; i < 6; i++) { + if (ap & 1) { + rc *= twopowers[i]; + } + ap >>= 1; + } + if (p < 0) { + rc = 1.f / rc; + } + return rc; +} /* returns positive int on error, 0 on success, rem_ret is untouched on error */ int SpanU8_read_float(SpanU8* rem_ret, float* res_ret){ @@ -178,17 +190,18 @@ int SpanU8_read_float(SpanU8* rem_ret, float* res_ret){ saw_digit = true; } else if (ch == 'e' || ch == 'E') { SpanU8_parsing_skip_char(&rem); - if (SpanU8_parsing_is_char_ahead(&rem, '+')) + if (SpanU8_parsing_is_char_ahead(rem, '+')) SpanU8_parsing_skip_char(&rem); S64 exp; int ret = SpanU8_read_S64(&rem, &exp); if (ret) return ret; - if (exp > 1000 || exp < -999) { + if (res == 0.f) + break; + if (exp > 50 || exp < -50) { return 2; } - /* If compiler won't perform pow optimization here, I will throw my chair out of the window */ - res = res * powf(10.f, (float)exp); + res *= SpanU8_read_float_h_pow10(exp); break; } else { break; @@ -210,14 +223,6 @@ int SpanU8_read_float(SpanU8* rem_ret, float* res_ret){ return 0; } -float SpanU8_expect_read_float(SpanU8* rem){ - float x; - int code = SpanU8_read_float(rem, &x); - if (code) - abortf("Failed to read float. Syntax error\n"); - return x; -} - void SpanU8_parsing_skip_spaces(SpanU8* rem){ while (rem->len) { U8 ch = *rem->data; diff --git a/src/l2/core/json_encoded.h b/src/l2/core/json_encoded.h index 156a875..c18ad2b 100644 --- a/src/l2/core/json_encoded.h +++ b/src/l2/core/json_encoded.h @@ -12,10 +12,10 @@ void json_encoding_append_utf16(VecU8* res, U8 codepoint){ } /* Str is being encoded as JSON string literal */ -void json_encoding_append_string(VecU8* res, const VecU8* str){ +void json_encoding_append_string(VecU8* res, SpanU8 str){ VecU8_append(res, '"'); - for (size_t i = 0; i < str->len; i++) { - U8 ch = str->buf[i]; + for (size_t i = 0; i < str.len; i++) { + U8 ch = str.data[i]; if (ch == '\t') { VecU8_append_span(res, cstr("\\t")); } else if (ch == '\n') { @@ -48,7 +48,7 @@ void json_encoding_append_to_str(const Json* obj, VecU8* res){ } else if (obj->variant == Json_float) { VecU8_append_vec(res, VecU8_format("%f", obj->float_num)); } else if (obj->variant == Json_str) { - json_encoding_append_string(res, &obj->str); + json_encoding_append_string(res, VecU8_to_span(&obj->str)); } else if (obj->variant == Json_arr) { VecU8_append(res, '['); const VecJson* arr = &obj->arr; @@ -66,7 +66,7 @@ void json_encoding_append_to_str(const Json* obj, VecU8* res){ if (was) { VecU8_append_span(res, cstr(", ")); } - json_encoding_append_string(res, &it->key); + json_encoding_append_string(res, VecU8_to_span(&it->key)); VecU8_append_span(res, cstr(": ")); json_encoding_append_to_str(&it->value, res); was = true; @@ -191,7 +191,7 @@ OptionJson json_decoding_h_no_spaces(SpanU8* rem, U32 depth_rem){ bool none_code = SpanU8_parsing_try_read_prefix(rem, cstr("none")); if (none_code) return Some_Json(Json_None); - if (SpanU8_parsing_is_char_ahead(rem, '\"')) { + if (SpanU8_parsing_is_char_ahead(*rem, '\"')) { VecU8 str; int str_code = json_decoding_parse_string(rem, &str); if (str_code) { diff --git a/src/l2/tests/t_parsing.c b/src/l2/tests/t_parsing.c index 80221ad..ed5301e 100644 --- a/src/l2/tests/t_parsing.c +++ b/src/l2/tests/t_parsing.c @@ -1,6 +1,296 @@ #include "../core/json_encoded.h" +#include + +/* It should be noted, that if a nd b point to parts of different strings, then this function is UB */ +bool SpanU8_equal(SpanU8 a, SpanU8 b){ + return a.data == b.data && a.len == b.len; +} + +bool vec_matches_cstr(VecU8 a, const char* b){ + bool ans = SpanU8_cont_equal(VecU8_to_span(&a), SpanU8_from_cstr(b)); + VecU8_drop(a); + return ans; +} + +void tt1(){ + SpanU8 str = cstr("ABC="); + SpanU8 rem = str; + bool c = SpanU8_parsing_try_read_prefix(&rem, cstr("ABC")); + check(c); + check(rem.data == str.data + 3); + check(rem.len == 1); +} + +void tt2(){ + SpanU8 str = cstr("ABC=="); + SpanU8 rem = str; + bool c = SpanU8_parsing_try_read_prefix(&rem, cstr("ABD")); + check(!c); + check(SpanU8_equal(rem, str)); +} + +void tt3(){ + SpanU8 str = (SpanU8){.data = (U8*)"ABC", .len = 2}; + SpanU8 rem = str; + bool c = SpanU8_parsing_try_read_prefix(&rem, cstr("ABC")); + check(!c); + check(SpanU8_equal(rem, str)); +} + +void tt4(){ + SpanU8 str = cstr("ABC"); + SpanU8 rem = str; + SpanU8_parsing_skip_char(&rem); + check(SpanU8_equal(rem, (SpanU8){str.data + 1, str.len - 1})); +} + +void tt5(){ + SpanU8 str = cstr("AB"); + SpanU8 rem = str; + bool c = SpanU8_parsing_try_read_char(&rem, 'A'); + check(c && SpanU8_equal(rem, (SpanU8){str.data + 1, str.len - 1})); +} + +void tt6(){ + SpanU8 str = cstr("ABC"); + SpanU8 rem = str; + bool c = SpanU8_parsing_try_read_char(&rem, 'B'); + check(!c && SpanU8_equal(rem, str)); +} + +void tt7(){ + SpanU8 str = (SpanU8){.data = (U8*)"AB", .len = 0}; + SpanU8 rem = str; + bool c = SpanU8_parsing_try_read_char(&rem, 'A'); + check(!c && SpanU8_equal(rem, str)); +} + +void tt8(){ + SpanU8 str = cstr("abcd\n\n"); + SpanU8 rem = str; + SpanU8_parsing_skip_entire_line(&rem); + check(SpanU8_equal(rem, (SpanU8){str.data + 5, str.len - 5})); +} + +void tt9(){ + SpanU8 str = cstr("abcdef"); + SpanU8 rem = str; + SpanU8_parsing_skip_entire_line(&rem); + check(SpanU8_equal(rem, (SpanU8){str.data + 6, 0})); +} + +void tt10(){ + SpanU8 str = cstr("\nab\n\n"); + SpanU8 rem = str; + SpanU8_parsing_skip_entire_line(&rem); + check(SpanU8_equal(rem, (SpanU8){str.data + 1, str.len - 1})); +} + +void tt11(){ + SpanU8 str = cstr("ABC"); + check(SpanU8_parsing_is_char_ahead(str, 'A')); + check(!SpanU8_parsing_is_char_ahead(str, 'B')); + str.len = 0; + check(!SpanU8_parsing_is_char_ahead(str, 'A')); +} + +void test_u64_reading_with_good_inp(SpanU8 str, U64 right_val, U64 leftovers){ + SpanU8 rem = str; + U64 val; + int c = SpanU8_read_U64(&rem, &val); + check(c == 0); + check(rem.data == str.data + str.len - leftovers && rem.len == leftovers); + check(val == right_val); +} + +void tt12(){ + test_u64_reading_with_good_inp(cstr("18446744073709551615--"), 18446744073709551615ul, 2); + test_u64_reading_with_good_inp(cstr("69+"), 69, 1); + test_u64_reading_with_good_inp(cstr("0"), 0, 0); +} + +void test_u64_reading_with_ill_formed_inp(SpanU8 str){ + SpanU8 rem = str; + U64 val; + int c = SpanU8_read_U64(&rem, &val); + check(c > 0); + check(SpanU8_equal(str, rem)); +} + +void tt13(){ + test_u64_reading_with_ill_formed_inp(cstr("18446744073709551616")); + test_u64_reading_with_ill_formed_inp(cstr("01")); + test_u64_reading_with_ill_formed_inp(cstr("-123")); +} + +void test_s64_reading_with_ill_formed_inp(SpanU8 str){ + SpanU8 rem = str; + S64 val; + int c = SpanU8_read_S64(&rem, &val); + check(c > 0); + check(SpanU8_equal(str, rem)); +} + +void tt14(){ + test_s64_reading_with_ill_formed_inp(cstr("9223372036854775808")); + test_s64_reading_with_ill_formed_inp(cstr("92233720368547758000")); + test_s64_reading_with_ill_formed_inp(cstr("+9223372036854775808")); + test_s64_reading_with_ill_formed_inp(cstr("-9223372036854775809")); + test_s64_reading_with_ill_formed_inp(cstr("-92233720368547758000")); + test_s64_reading_with_ill_formed_inp(cstr("-09")); + test_s64_reading_with_ill_formed_inp(cstr("09")); + test_s64_reading_with_ill_formed_inp(cstr("00")); + test_s64_reading_with_ill_formed_inp(cstr("-00")); + test_s64_reading_with_ill_formed_inp(cstr("+")); + test_s64_reading_with_ill_formed_inp(cstr("-")); + test_s64_reading_with_ill_formed_inp(cstr("+123")); +} + +void test_s64_reading_with_good_inp(SpanU8 str, S64 right_val, U64 leftovers){ + SpanU8 rem = str; + S64 val; + int c = SpanU8_read_S64(&rem, &val); + check(c == 0); + check(rem.data == str.data + str.len - leftovers && rem.len == leftovers); + check(val == right_val); +} + +void tt15(){ + test_s64_reading_with_good_inp(cstr("9223372036854775807--"), 9223372036854775807l, 2); + test_s64_reading_with_good_inp(cstr("9223372036854775807"), 9223372036854775807l, 0); + test_s64_reading_with_good_inp(cstr("9223372036854775805"), 9223372036854775805l, 0); + test_s64_reading_with_good_inp(cstr("-9223372036854775808++"), -9223372036854775807l-1, 2); + test_s64_reading_with_good_inp(cstr("-9223372036854775808"), -9223372036854775807l-1, 0); + test_s64_reading_with_good_inp(cstr("413-"), 413, 1); + test_s64_reading_with_good_inp(cstr("0-"), 0, 1); +} + +void test_float_reading_with_good_inp(SpanU8 str, float right_val, float eps, U64 lo){ + SpanU8 rem = str; + float val; + int c = SpanU8_read_float(&rem, &val); + check(c == 0); + check(rem.data == str.data + str.len - lo && rem.len == lo); + check((right_val >= +1e60 && val >= +1e60) || (right_val <= -1e60 && val <= -1e60) || fabsf(right_val - val) < eps); +} + +void tt16(){ + test_float_reading_with_good_inp(cstr("1022.312"), 1022.312f, 0.00001f, 0); + test_float_reading_with_good_inp(cstr("1022.1"), 1022.1f, 0.00001f, 0); + test_float_reading_with_good_inp(cstr("0.0000"), 0.f, 0.00001f, 0); + test_float_reading_with_good_inp(cstr("0.0e100"), 0.f, 0.00001f, 0); + test_float_reading_with_good_inp(cstr("1.2e3"), 1200.f, 0.00001f, 0); + test_float_reading_with_good_inp(cstr("101"), 101.f, 0.00001f, 0); + test_float_reading_with_good_inp(cstr("-123"), -123.f, 0.00001f, 0); + test_float_reading_with_good_inp(cstr("-123e-3"), -0.123f, 0.00001f, 0); + test_float_reading_with_good_inp(cstr("98e-3"), 0.098f, 0.00001f, 0); + test_float_reading_with_good_inp(cstr("1e50"), 1.e50f, 0.00001f, 0); + test_float_reading_with_good_inp(cstr("1e-50"), 1.e-50f, 0.00001f, 0); + test_float_reading_with_good_inp(cstr("-1e-50"), -1e-50f, 0.00001f, 0); + test_float_reading_with_good_inp(cstr("-1e50"), -1e50f, 0.00001f, 0); + test_float_reading_with_good_inp(cstr("-15e+4"), -150000, 0.00001f, 0); +} + +void test_float_reading_with_ill_formed_inp(SpanU8 str){ + SpanU8 rem = str; + float val; + int c = SpanU8_read_float(&rem, &val); + check(c > 0); + check(SpanU8_equal(str, rem)); +} + +void tt17(){ + test_float_reading_with_ill_formed_inp(cstr("+123")); + test_float_reading_with_ill_formed_inp(cstr("1e51")); + test_float_reading_with_ill_formed_inp(cstr("1e9223372036854775807")); + test_float_reading_with_ill_formed_inp(cstr("1e9223372036854775808")); + test_float_reading_with_ill_formed_inp(cstr("1e-51")); + test_float_reading_with_ill_formed_inp(cstr("1e-9223372036854775808")); + test_float_reading_with_ill_formed_inp(cstr("1e-9223372036854775809")); +} + +void tt18(){ + SpanU8 str = cstr(" 1"); + SpanU8 rem = str; + SpanU8_parsing_skip_spaces(&rem); + check(SpanU8_equal(rem, (SpanU8){str.data + 3, str.len - 3})); +} + +void tt19(){ + SpanU8 str = cstr("\t\r\n "); + SpanU8 rem = str; + SpanU8_parsing_skip_spaces(&rem); + check(SpanU8_equal(rem, (SpanU8){str.data + 4, 0})); +} + +void tt20(){ + SpanU8 str = cstr("1213"); + SpanU8 rem = str; + SpanU8_parsing_skip_spaces(&rem); + check(SpanU8_equal(rem, str)); +} + +void tt21(){ + check(is_hex_char('0')); + check(is_hex_char('9')); + check(is_hex_char('A')); + check(is_hex_char('F')); + check(is_hex_char('a')); + check(is_hex_char('f')); + check(!is_hex_char('_') && !is_hex_char('*') && !is_hex_char('~') && !is_hex_char('-') && !is_hex_char('(') + && !is_hex_char('/') && !is_hex_char(':') && !is_hex_char('G') && !is_hex_char('g') && !is_hex_char('[')); + check(char_to_hex_digit('a') == 10 && char_to_hex_digit('A') == 10 && char_to_hex_digit('9') == 9); + + SpanU8 str = cstr("E="); + SpanU8 rem = str; + U32 d; + bool c = SpanU8_parsing_try_read_hex_digit(&rem, &d); + check(c); + check(rem.data == str.data + 1 && rem.len == 1); + check(d == 0xE); + + check(!SpanU8_parsing_try_read_hex_digit(&rem, &d)); + check(rem.data == str.data + 1 && rem.len == 1); + rem.len = 0; + check(!SpanU8_parsing_try_read_hex_digit(&rem, &d)); + check(rem.data == str.data + 1 && rem.len == 0); +} + +/* Json kicks in */ + +void tt22(){ + VecU8 buf = VecU8_new(); + json_encoding_append_utf16(&buf, 17); + check(SpanU8_cont_equal(VecU8_to_span(&buf), cstr("\\u0011"))); + json_encoding_append_utf16(&buf, 7); + check(vec_matches_cstr(buf, "\\u0011\\u0007")); +} + +void tt23(){ + VecU8 buf = VecU8_new(); + json_encoding_append_string(&buf, cstr("")); + check(vec_matches_cstr(buf, "\"\"")); +} + +void tt24(){ + VecU8 buf = VecU8_new(); + json_encoding_append_string(&buf, cstr("\"\n\t\r\\ AB")); + check(vec_matches_cstr(buf, "\"" "\\\"\\n\\t\\r\\\\ AB" "\"")); +} + +void tt25(){ + Json x = Json_from_VecJson(VecJson_new()); + VecU8 my_ans = json_encode(&x); + Json_drop(x); + check(vec_matches_cstr(my_ans, "[]")); +} + + + int main(){ - + tt1(); tt2(); tt3(); tt4(); tt5(); tt6(); tt7(); tt8(); tt9(); tt10(); tt11(); tt12(); tt13(); tt14(); tt15(); + tt16(); tt17(); tt18(); tt19(); + tt20(); tt21(); tt22(); tt23(); tt24(); tt25() return 0; }