mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
update enc
This commit is contained in:
parent
1576d15b2f
commit
1bdce8904f
@ -98,6 +98,7 @@ namespace CppJieba
|
|||||||
return iterator(&_unico, _unico.end(), _unico.end(), DIGIT_OR_LETTER);
|
return iterator(&_unico, _unico.end(), _unico.end(), DIGIT_OR_LETTER);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -218,30 +218,29 @@ namespace Limonp
|
|||||||
return str.find(ch) != string::npos;
|
return str.find(ch) != string::npos;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline bool utf8ToUnicode(const string& str, vector<uint16_t>& vec)
|
inline bool utf8ToUnicode(const char * const str, uint len, vector<uint16_t>& vec)
|
||||||
{
|
{
|
||||||
char ch1, ch2;
|
char ch1, ch2;
|
||||||
if(str.empty())
|
if(!str)
|
||||||
{
|
{
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
vec.clear();
|
vec.clear();
|
||||||
size_t siz = str.size();
|
for(uint i = 0;i < len;)
|
||||||
for(uint i = 0;i < siz;)
|
|
||||||
{
|
{
|
||||||
if(!(str[i] & 0x80)) // 0xxxxxxx
|
if(!(str[i] & 0x80)) // 0xxxxxxx
|
||||||
{
|
{
|
||||||
vec.push_back(str[i]);
|
vec.push_back(str[i]);
|
||||||
i++;
|
i++;
|
||||||
}
|
}
|
||||||
else if ((unsigned char)str[i] <= 0xdf && i + 1 < siz) // 110xxxxxx
|
else if ((unsigned char)str[i] <= 0xdf && i + 1 < len) // 110xxxxxx
|
||||||
{
|
{
|
||||||
ch1 = (str[i] >> 2) & 0x07;
|
ch1 = (str[i] >> 2) & 0x07;
|
||||||
ch2 = (str[i+1] & 0x3f) | ((str[i] & 0x03) << 6 );
|
ch2 = (str[i+1] & 0x3f) | ((str[i] & 0x03) << 6 );
|
||||||
vec.push_back(twocharToUint16(ch1, ch2));
|
vec.push_back(twocharToUint16(ch1, ch2));
|
||||||
i += 2;
|
i += 2;
|
||||||
}
|
}
|
||||||
else if((unsigned char)str[i] <= 0xef && i + 2 < siz)
|
else if((unsigned char)str[i] <= 0xef && i + 2 < len)
|
||||||
{
|
{
|
||||||
ch1 = (str[i] << 4) | ((str[i+1] >> 2) & 0x0f );
|
ch1 = (str[i] << 4) | ((str[i+1] >> 2) & 0x0f );
|
||||||
ch2 = ((str[i+1]<<6) & 0xc0) | (str[i+2] & 0x3f);
|
ch2 = ((str[i+1]<<6) & 0xc0) | (str[i+2] & 0x3f);
|
||||||
@ -255,6 +254,10 @@ namespace Limonp
|
|||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
inline bool utf8ToUnicode(const string& str, vector<uint16_t>& vec)
|
||||||
|
{
|
||||||
|
return utf8ToUnicode(str.c_str(), str.size(), vec);
|
||||||
|
}
|
||||||
|
|
||||||
inline bool unicodeToUtf8(vector<uint16_t>::const_iterator begin, vector<uint16_t>::const_iterator end, string& res)
|
inline bool unicodeToUtf8(vector<uint16_t>::const_iterator begin, vector<uint16_t>::const_iterator end, string& res)
|
||||||
{
|
{
|
||||||
@ -287,15 +290,16 @@ namespace Limonp
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline bool gbkTrans(const string& str, vector<uint16_t>& vec)
|
|
||||||
|
inline bool gbkTrans(const char* const str, uint len, vector<uint16_t>& vec)
|
||||||
{
|
{
|
||||||
vec.clear();
|
vec.clear();
|
||||||
if(str.empty())
|
if(!str)
|
||||||
{
|
{
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
uint i = 0;
|
uint i = 0;
|
||||||
while(i < str.size())
|
while(i < len)
|
||||||
{
|
{
|
||||||
if(0 == (str[i] & 0x80))
|
if(0 == (str[i] & 0x80))
|
||||||
{
|
{
|
||||||
@ -304,7 +308,7 @@ namespace Limonp
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if(i + 1 < str.size()) //&& (str[i+1] & 0x80))
|
if(i + 1 < len) //&& (str[i+1] & 0x80))
|
||||||
{
|
{
|
||||||
vec.push_back(twocharToUint16(str[i], str[i + 1]));
|
vec.push_back(twocharToUint16(str[i], str[i + 1]));
|
||||||
i += 2;
|
i += 2;
|
||||||
@ -317,6 +321,11 @@ namespace Limonp
|
|||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
inline bool gbkTrans(const string& str, vector<uint16_t>& vec)
|
||||||
|
{
|
||||||
|
return gbkTrans(str.c_str(), str.size(), vec);
|
||||||
|
}
|
||||||
|
|
||||||
inline bool gbkTrans(vector<uint16_t>::const_iterator begin, vector<uint16_t>::const_iterator end, string& res)
|
inline bool gbkTrans(vector<uint16_t>::const_iterator begin, vector<uint16_t>::const_iterator end, string& res)
|
||||||
{
|
{
|
||||||
if(begin >= end)
|
if(begin >= end)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user