Moved utf8.h/cpp to namespace json::

This commit is contained in:
Андреев Григорий 2024-08-21 09:50:46 +03:00
parent 1c2e479012
commit d4c48ea269
2 changed files with 86 additions and 82 deletions

View File

@ -1,7 +1,8 @@
#include "utf8.h" #include "utf8.h"
#include <assert.h> #include <assert.h>
int _utf8_retrieve_size(uint8_t firstByte) { namespace json {
int utf8_retrieve_size(uint8_t firstByte) {
if (!(firstByte & 0b10000000)) if (!(firstByte & 0b10000000))
return 1; return 1;
uint8_t a = 0b11000000; uint8_t a = 0b11000000;
@ -13,9 +14,9 @@ int _utf8_retrieve_size(uint8_t firstByte) {
b >>= 1; b >>= 1;
} }
return -1; return -1;
} }
int32_t _utf8_retrieve_character(int sz, size_t pos, const char *string) { int32_t utf8_retrieve_character(int sz, size_t pos, const char *string) {
if (sz == 1) if (sz == 1)
return (int32_t)string[pos]; return (int32_t)string[pos];
uint32_t v = ((uint8_t)string[pos]) & (0b01111111 >> sz); uint32_t v = ((uint8_t)string[pos]) & (0b01111111 >> sz);
@ -30,16 +31,16 @@ int32_t _utf8_retrieve_character(int sz, size_t pos, const char *string) {
} }
assert(v <= INT32_MAX); assert(v <= INT32_MAX);
return static_cast<int32_t>(v); return static_cast<int32_t>(v);
} }
void utf8_string_iterat(int32_t &cp, size_t &adj, size_t pos, const char *string, size_t string_size) { void utf8_string_iterat(int32_t &cp, size_t &adj, size_t pos, const char *string, size_t string_size) {
if (pos >= string_size) {cp = -1; return;} if (pos >= string_size) {cp = -1; return;}
adj = _utf8_retrieve_size((uint8_t)string[pos]); adj = utf8_retrieve_size((uint8_t)string[pos]);
if (adj < 0 || pos + adj > string_size) {cp = -1; return;} if (adj < 0 || pos + adj > string_size) {cp = -1; return;}
if ((cp = _utf8_retrieve_character(adj, pos, string)) < 0) {cp = -1; return;} if ((cp = utf8_retrieve_character(adj, pos, string)) < 0) {cp = -1; return;}
} }
bool isUtf8String(const std::string &str) { bool isUtf8String(const std::string &str) {
size_t N = str.size(); size_t N = str.size();
size_t cpos = 0; size_t cpos = 0;
while (cpos < N) { while (cpos < N) {
@ -51,9 +52,9 @@ bool isUtf8String(const std::string &str) {
cpos += adj; cpos += adj;
} }
return true; return true;
} }
int codepoint_to_utf8(uint32_t cp, std::string &out) { int codepoint_to_utf8(uint32_t cp, std::string &out) {
size_t N = out.size(); size_t N = out.size();
auto make_compl = [cp](int imp) -> char { auto make_compl = [cp](int imp) -> char {
return (char)(((cp >> imp) & 0x3f) | 0x80); return (char)(((cp >> imp) & 0x3f) | 0x80);
@ -79,4 +80,5 @@ int codepoint_to_utf8(uint32_t cp, std::string &out) {
out[N + 3] = make_compl(0); out[N + 3] = make_compl(0);
} }
return 0; return 0;
}
} }

View File

@ -4,15 +4,17 @@
#include <stdint.h> #include <stdint.h>
#include <string> #include <string>
int _utf8_retrieve_size(uint8_t firstByte); namespace json {
int utf8_retrieve_size(uint8_t firstByte);
int32_t _utf8_retrieve_character(int sz, size_t pos, const char *string); int32_t utf8_retrieve_character(int sz, size_t pos, const char *string);
void utf8_string_iterat(int32_t &cp, size_t &adj, size_t pos, const char *string, size_t string_size); void utf8_string_iterat(int32_t &cp, size_t &adj, size_t pos, const char *string, size_t string_size);
bool isUtf8String(const std::string& str); bool isUtf8String(const std::string& str);
/* Returns -1 if cp is not in 0-0x10FFFF range */ /* Returns -1 if cp is not in 0-0x10FFFF range */
int codepoint_to_utf8(uint32_t cp, std::string& out); int codepoint_to_utf8(uint32_t cp, std::string& out);
}
#endif #endif