From d4c48ea269cc4533e46021be8a1a919b2c432730 Mon Sep 17 00:00:00 2001 From: Andreev Gregory Date: Wed, 21 Aug 2024 09:50:46 +0300 Subject: [PATCH] Moved utf8.h/cpp to namespace json:: --- src/library/jsonincpp/utf8.cpp | 154 +++++++++++++++++---------------- src/library/jsonincpp/utf8.h | 14 +-- 2 files changed, 86 insertions(+), 82 deletions(-) diff --git a/src/library/jsonincpp/utf8.cpp b/src/library/jsonincpp/utf8.cpp index a6cb591..a1a924d 100644 --- a/src/library/jsonincpp/utf8.cpp +++ b/src/library/jsonincpp/utf8.cpp @@ -1,82 +1,84 @@ #include "utf8.h" #include -int _utf8_retrieve_size(uint8_t firstByte) { - if (!(firstByte & 0b10000000)) - return 1; - uint8_t a = 0b11000000; - uint8_t b = 0b00100000; - for (int i = 2; i <= 4; i++){ - if ((firstByte & (a | b)) == a) - return i; - a |= b; - b >>= 1; - } - return -1; -} - -int32_t _utf8_retrieve_character(int sz, size_t pos, const char *string) { - if (sz == 1) - return (int32_t)string[pos]; - uint32_t v = ((uint8_t)string[pos]) & (0b01111111 >> sz); - pos++; - for (int i = 1; i < sz; i++){ - uint32_t th = (uint8_t)string[pos]; - if ((th & 0b11000000) != 0b10000000) - return -1; - v <<= 6; - v |= (th & 0b00111111); - pos++; - } - assert(v <= INT32_MAX); - return static_cast(v); -} - -void utf8_string_iterat(int32_t &cp, size_t &adj, size_t pos, const char *string, size_t string_size) { - if (pos >= string_size) {cp = -1; return;} - adj = _utf8_retrieve_size((uint8_t)string[pos]); - if (adj < 0 || pos + adj > string_size) {cp = -1; return;} - if ((cp = _utf8_retrieve_character(adj, pos, string)) < 0) {cp = -1; return;} -} - -bool isUtf8String(const std::string &str) { - size_t N = str.size(); - size_t cpos = 0; - while (cpos < N) { - int32_t codepoint; - size_t adj; - utf8_string_iterat(codepoint, adj, cpos, str.data(), N); - if (codepoint < 0) - return false; - cpos += adj; - } - return true; -} - -int codepoint_to_utf8(uint32_t cp, std::string &out) { - size_t N = out.size(); - auto make_compl = [cp](int imp) -> char { - return (char)(((cp >> imp) & 0x3f) | 0x80); - }; - if (cp > 0x10FFFF) +namespace json { + int utf8_retrieve_size(uint8_t firstByte) { + if (!(firstByte & 0b10000000)) + return 1; + uint8_t a = 0b11000000; + uint8_t b = 0b00100000; + for (int i = 2; i <= 4; i++){ + if ((firstByte & (a | b)) == a) + return i; + a |= b; + b >>= 1; + } return -1; - if (cp <= 0x7F) { - out += (char)cp; - } else if (cp <= 0x7ff) { - out.resize(N + 2); - out[N] = (char)((cp >> 6) | 0xc0); - out[N + 1] = make_compl(0); - } else if (cp <= 0xffff) { - out.resize(N + 3); - out[N] = (char)((cp >> 12) | 0xe0); - out[N + 1] = make_compl(6); - out[N + 2] = make_compl(0); - } else { - out.resize(N + 4); - out[N] = (char)((cp >> 18) | 0xf0); - out[N + 1] = make_compl(12); - out[N + 2] = make_compl(6); - out[N + 3] = make_compl(0); } - return 0; + + int32_t utf8_retrieve_character(int sz, size_t pos, const char *string) { + if (sz == 1) + return (int32_t)string[pos]; + uint32_t v = ((uint8_t)string[pos]) & (0b01111111 >> sz); + pos++; + for (int i = 1; i < sz; i++){ + uint32_t th = (uint8_t)string[pos]; + if ((th & 0b11000000) != 0b10000000) + return -1; + v <<= 6; + v |= (th & 0b00111111); + pos++; + } + assert(v <= INT32_MAX); + return static_cast(v); + } + + void utf8_string_iterat(int32_t &cp, size_t &adj, size_t pos, const char *string, size_t string_size) { + if (pos >= string_size) {cp = -1; return;} + adj = utf8_retrieve_size((uint8_t)string[pos]); + if (adj < 0 || pos + adj > string_size) {cp = -1; return;} + if ((cp = utf8_retrieve_character(adj, pos, string)) < 0) {cp = -1; return;} + } + + bool isUtf8String(const std::string &str) { + size_t N = str.size(); + size_t cpos = 0; + while (cpos < N) { + int32_t codepoint; + size_t adj; + utf8_string_iterat(codepoint, adj, cpos, str.data(), N); + if (codepoint < 0) + return false; + cpos += adj; + } + return true; + } + + int codepoint_to_utf8(uint32_t cp, std::string &out) { + size_t N = out.size(); + auto make_compl = [cp](int imp) -> char { + return (char)(((cp >> imp) & 0x3f) | 0x80); + }; + if (cp > 0x10FFFF) + return -1; + if (cp <= 0x7F) { + out += (char)cp; + } else if (cp <= 0x7ff) { + out.resize(N + 2); + out[N] = (char)((cp >> 6) | 0xc0); + out[N + 1] = make_compl(0); + } else if (cp <= 0xffff) { + out.resize(N + 3); + out[N] = (char)((cp >> 12) | 0xe0); + out[N + 1] = make_compl(6); + out[N + 2] = make_compl(0); + } else { + out.resize(N + 4); + out[N] = (char)((cp >> 18) | 0xf0); + out[N + 1] = make_compl(12); + out[N + 2] = make_compl(6); + out[N + 3] = make_compl(0); + } + return 0; + } } diff --git a/src/library/jsonincpp/utf8.h b/src/library/jsonincpp/utf8.h index 5a84ebb..e07c79e 100644 --- a/src/library/jsonincpp/utf8.h +++ b/src/library/jsonincpp/utf8.h @@ -4,15 +4,17 @@ #include #include -int _utf8_retrieve_size(uint8_t firstByte); +namespace json { + int utf8_retrieve_size(uint8_t firstByte); -int32_t _utf8_retrieve_character(int sz, size_t pos, const char *string); + int32_t utf8_retrieve_character(int sz, size_t pos, const char *string); -void utf8_string_iterat(int32_t &cp, size_t &adj, size_t pos, const char *string, size_t string_size); + void utf8_string_iterat(int32_t &cp, size_t &adj, size_t pos, const char *string, size_t string_size); -bool isUtf8String(const std::string& str); + bool isUtf8String(const std::string& str); -/* Returns -1 if cp is not in 0-0x10FFFF range */ -int codepoint_to_utf8(uint32_t cp, std::string& out); + /* Returns -1 if cp is not in 0-0x10FFFF range */ + int codepoint_to_utf8(uint32_t cp, std::string& out); +} #endif