mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
upgrade limonp to v0.5.4
This commit is contained in:
parent
239d025cd8
commit
8d66b1f1fa
2
deps/limonp/NonCopyable.hpp
vendored
2
deps/limonp/NonCopyable.hpp
vendored
@ -1,3 +1,5 @@
|
||||
/************************************
|
||||
************************************/
|
||||
#ifndef LIMONP_NONCOPYABLE_H
|
||||
#define LIMONP_NONCOPYABLE_H
|
||||
|
||||
|
81
deps/limonp/StringUtil.hpp
vendored
81
deps/limonp/StringUtil.hpp
vendored
@ -185,11 +185,92 @@ bool Utf8ToUnicode(const char * const str, size_t len, Uint16Container& vec) {
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
template <class Uint16Container>
|
||||
bool Utf8ToUnicode(const string& str, Uint16Container& vec) {
|
||||
return Utf8ToUnicode(str.c_str(), str.size(), vec);
|
||||
}
|
||||
|
||||
template <class Uint32Container>
|
||||
bool Utf8ToUnicode32(const string& str, Uint32Container& vec) {
|
||||
uint32_t tmp;
|
||||
vec.clear();
|
||||
for(size_t i = 0; i < str.size();) {
|
||||
if(!(str[i] & 0x80)) { // 0xxxxxxx
|
||||
// 7bit, total 7bit
|
||||
tmp = (uint8_t)(str[i]) & 0x7f;
|
||||
i++;
|
||||
} else if ((uint8_t)str[i] <= 0xdf && i + 1 < str.size()) { // 110xxxxxx
|
||||
// 5bit, total 5bit
|
||||
tmp = (uint8_t)(str[i]) & 0x1f;
|
||||
|
||||
// 6bit, total 11bit
|
||||
tmp <<= 6;
|
||||
tmp |= (uint8_t)(str[i+1]) & 0x3f;
|
||||
i += 2;
|
||||
} else if((uint8_t)str[i] <= 0xef && i + 2 < str.size()) { // 1110xxxxxx
|
||||
// 4bit, total 4bit
|
||||
tmp = (uint8_t)(str[i]) & 0x0f;
|
||||
|
||||
// 6bit, total 10bit
|
||||
tmp <<= 6;
|
||||
tmp |= (uint8_t)(str[i+1]) & 0x3f;
|
||||
|
||||
// 6bit, total 16bit
|
||||
tmp <<= 6;
|
||||
tmp |= (uint8_t)(str[i+2]) & 0x3f;
|
||||
|
||||
i += 3;
|
||||
} else if((uint8_t)str[i] <= 0xf7 && i + 3 < str.size()) { // 11110xxxx
|
||||
// 3bit, total 3bit
|
||||
tmp = (uint8_t)(str[i]) & 0x07;
|
||||
|
||||
// 6bit, total 9bit
|
||||
tmp <<= 6;
|
||||
tmp |= (uint8_t)(str[i+1]) & 0x3f;
|
||||
|
||||
// 6bit, total 15bit
|
||||
tmp <<= 6;
|
||||
tmp |= (uint8_t)(str[i+2]) & 0x3f;
|
||||
|
||||
// 6bit, total 21bit
|
||||
tmp <<= 6;
|
||||
tmp |= (uint8_t)(str[i+3]) & 0x3f;
|
||||
|
||||
i += 4;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
vec.push_back(tmp);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
template <class Uint32ContainerConIter>
|
||||
void Unicode32ToUtf8(Uint32ContainerConIter begin, Uint32ContainerConIter end, string& res) {
|
||||
res.clear();
|
||||
uint32_t ui;
|
||||
while(begin != end) {
|
||||
ui = *begin;
|
||||
if(ui <= 0x7f) {
|
||||
res += char(ui);
|
||||
} else if(ui <= 0x7ff) {
|
||||
res += char(((ui >> 6) & 0x1f) | 0xc0);
|
||||
res += char((ui & 0x3f) | 0x80);
|
||||
} else if(ui <= 0xffff) {
|
||||
res += char(((ui >> 12) & 0x0f) | 0xe0);
|
||||
res += char(((ui >> 6) & 0x3f) | 0x80);
|
||||
res += char((ui & 0x3f) | 0x80);
|
||||
} else {
|
||||
res += char(((ui >> 18) & 0x03) | 0xf0);
|
||||
res += char(((ui >> 12) & 0x3f) | 0x80);
|
||||
res += char(((ui >> 6) & 0x3f) | 0x80);
|
||||
res += char((ui & 0x3f) | 0x80);
|
||||
}
|
||||
begin ++;
|
||||
}
|
||||
}
|
||||
|
||||
template <class Uint16ContainerConIter>
|
||||
void UnicodeToUtf8(Uint16ContainerConIter begin, Uint16ContainerConIter end, string& res) {
|
||||
res.clear();
|
||||
|
Loading…
x
Reference in New Issue
Block a user