diff --git a/deps/limonp/NonCopyable.hpp b/deps/limonp/NonCopyable.hpp index 5cdcf37..145400f 100644 --- a/deps/limonp/NonCopyable.hpp +++ b/deps/limonp/NonCopyable.hpp @@ -1,3 +1,5 @@ +/************************************ + ************************************/ #ifndef LIMONP_NONCOPYABLE_H #define LIMONP_NONCOPYABLE_H diff --git a/deps/limonp/StringUtil.hpp b/deps/limonp/StringUtil.hpp index 44fef06..41d451e 100644 --- a/deps/limonp/StringUtil.hpp +++ b/deps/limonp/StringUtil.hpp @@ -185,11 +185,92 @@ bool Utf8ToUnicode(const char * const str, size_t len, Uint16Container& vec) { } return true; } + template bool Utf8ToUnicode(const string& str, Uint16Container& vec) { return Utf8ToUnicode(str.c_str(), str.size(), vec); } +template +bool Utf8ToUnicode32(const string& str, Uint32Container& vec) { + uint32_t tmp; + vec.clear(); + for(size_t i = 0; i < str.size();) { + if(!(str[i] & 0x80)) { // 0xxxxxxx + // 7bit, total 7bit + tmp = (uint8_t)(str[i]) & 0x7f; + i++; + } else if ((uint8_t)str[i] <= 0xdf && i + 1 < str.size()) { // 110xxxxxx + // 5bit, total 5bit + tmp = (uint8_t)(str[i]) & 0x1f; + + // 6bit, total 11bit + tmp <<= 6; + tmp |= (uint8_t)(str[i+1]) & 0x3f; + i += 2; + } else if((uint8_t)str[i] <= 0xef && i + 2 < str.size()) { // 1110xxxxxx + // 4bit, total 4bit + tmp = (uint8_t)(str[i]) & 0x0f; + + // 6bit, total 10bit + tmp <<= 6; + tmp |= (uint8_t)(str[i+1]) & 0x3f; + + // 6bit, total 16bit + tmp <<= 6; + tmp |= (uint8_t)(str[i+2]) & 0x3f; + + i += 3; + } else if((uint8_t)str[i] <= 0xf7 && i + 3 < str.size()) { // 11110xxxx + // 3bit, total 3bit + tmp = (uint8_t)(str[i]) & 0x07; + + // 6bit, total 9bit + tmp <<= 6; + tmp |= (uint8_t)(str[i+1]) & 0x3f; + + // 6bit, total 15bit + tmp <<= 6; + tmp |= (uint8_t)(str[i+2]) & 0x3f; + + // 6bit, total 21bit + tmp <<= 6; + tmp |= (uint8_t)(str[i+3]) & 0x3f; + + i += 4; + } else { + return false; + } + vec.push_back(tmp); + } + return true; +} + +template +void Unicode32ToUtf8(Uint32ContainerConIter begin, Uint32ContainerConIter end, string& res) { + res.clear(); + uint32_t ui; + while(begin != end) { + ui = *begin; + if(ui <= 0x7f) { + res += char(ui); + } else if(ui <= 0x7ff) { + res += char(((ui >> 6) & 0x1f) | 0xc0); + res += char((ui & 0x3f) | 0x80); + } else if(ui <= 0xffff) { + res += char(((ui >> 12) & 0x0f) | 0xe0); + res += char(((ui >> 6) & 0x3f) | 0x80); + res += char((ui & 0x3f) | 0x80); + } else { + res += char(((ui >> 18) & 0x03) | 0xf0); + res += char(((ui >> 12) & 0x3f) | 0x80); + res += char(((ui >> 6) & 0x3f) | 0x80); + res += char((ui & 0x3f) | 0x80); + } + begin ++; + } +} + template void UnicodeToUtf8(Uint16ContainerConIter begin, Uint16ContainerConIter end, string& res) { res.clear();