#include #include #include namespace regexis024 { uint32_t read_hex(REGEX_IS024_MeaningContext& ctx, int sz){ uint32_t res = 0; for (int i = 0; i < sz; i++){ int32_t ch = peep(ctx); if ('0' <= ch && ch <= '9') res = ((res << 4) | ((uint32_t)ch - '0')); else if ('a' <= ch && ch <= 'z') res = ((res << 4) | ((uint32_t)ch - 'a' + 10)); else if ('A' <= ch && ch <= 'Z') res = ((res << 4) | ((uint32_t)ch - 'A' + 10)); else{ report(ctx, "escape backslash expression: bad unicode code"); return 0; } readChar(ctx); } return res; } void unicode_in_bs_case(REGEX_IS024_MeaningContext &ctx, bool &ret_is_multicode, codeset_t &ret_set, int sz){ ret_is_multicode = false; readChar(ctx); uint32_t hc = read_hex(ctx, sz); // Might create an error ret_set = codeset_of_one_char(hc); } void backslash_expression_parsing_try_regular(REGEX_IS024_MeaningContext &ctx, const CommonCodesets& cc, bool &ret_is_multicode, codeset_t &ret_set) { int32_t leader = peep(ctx); if (ctx.error) return; #define block(l, b, E) case l: ret_is_multicode = b; ret_set = E; readChar(ctx); break; switch (leader) { block('s', false, codeset_of_one_char(U' ')) block('t', false, codeset_of_one_char(U'\t')) block('n', false, codeset_of_one_char(U'\n')) block('r', false, codeset_of_one_char(U'\r')) block('e', true, cc.spaces); block('E', true, invert_set(cc.spaces)) block('w', true, cc.word_constituents); block('W', true, invert_set(cc.word_constituents)); case 'u': unicode_in_bs_case(ctx, ret_is_multicode, ret_set, 4); break; case 'U': unicode_in_bs_case(ctx, ret_is_multicode, ret_set, 8); break; default: if (leader >= 0){ ret_is_multicode = false; ret_set = codeset_of_one_char(leader); } else { report(ctx, "backslash in the wrong place"); } } } }