mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
little change
This commit is contained in:
parent
4e5c48962a
commit
62efd1fca4
5
.gitignore
vendored
5
.gitignore
vendored
@ -7,3 +7,8 @@ tags
|
|||||||
log
|
log
|
||||||
main
|
main
|
||||||
lib*.a
|
lib*.a
|
||||||
|
*_demo
|
||||||
|
segdict*
|
||||||
|
prior.gbk
|
||||||
|
tmp
|
||||||
|
t.*
|
||||||
|
3
demo/testlines.utf8
Normal file
3
demo/testlines.utf8
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
我来到北京清华大学
|
||||||
|
他来到了网易杭研大厦
|
||||||
|
小明硕士毕业于中国科学院计算所,后在日本京都大学深造
|
@ -51,23 +51,24 @@ namespace CppJieba
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
vec.clear();
|
vec.clear();
|
||||||
for(uint i = 0;i < str.size();)
|
size_t siz = str.size();
|
||||||
|
for(uint i = 0;i < siz;)
|
||||||
{
|
{
|
||||||
if((unsigned char)str[i] <= 0x7f) // 0xxxxxxx
|
if(!(str[i] & 0x80)) // 0xxxxxxx
|
||||||
{
|
{
|
||||||
vec.push_back(str[i]);
|
vec.push_back(str[i]);
|
||||||
i++;
|
i++;
|
||||||
}
|
}
|
||||||
else if ((unsigned char)str[i] <= 0xdf && i + 1 < str.size()) // 110xxxxxx
|
else if ((unsigned char)str[i] <= 0xdf && i + 1 < siz) // 110xxxxxx
|
||||||
{
|
{
|
||||||
ch1 = ((unsigned char)str[i] >> 2) & 0x07;
|
ch1 = (str[i] >> 2) & 0x07;
|
||||||
ch2 = (str[i+1] & 0x3f) | ((str[i] & 0x03) << 6 );
|
ch2 = (str[i+1] & 0x3f) | ((str[i] & 0x03) << 6 );
|
||||||
vec.push_back(twocharToUint16(ch1, ch2));
|
vec.push_back(twocharToUint16(ch1, ch2));
|
||||||
i += 2;
|
i += 2;
|
||||||
}
|
}
|
||||||
else if((unsigned char)str[i] <= 0xef && i + 2 < str.size())
|
else if((unsigned char)str[i] <= 0xef && i + 2 < siz)
|
||||||
{
|
{
|
||||||
ch1 = ((unsigned char)str[i] << 4) | (((unsigned char)str[i+1] >> 2) & 0x0f );
|
ch1 = (str[i] << 4) | ((str[i+1] >> 2) & 0x0f );
|
||||||
ch2 = ((str[i+1]<<6) & 0xc0) | (str[i+2] & 0x3f);
|
ch2 = ((str[i+1]<<6) & 0xc0) | (str[i+2] & 0x3f);
|
||||||
vec.push_back(twocharToUint16(ch1, ch2));
|
vec.push_back(twocharToUint16(ch1, ch2));
|
||||||
i += 3;
|
i += 3;
|
||||||
@ -178,13 +179,8 @@ namespace CppJieba
|
|||||||
|
|
||||||
size_t TransCode::getWordLength(const string& str)
|
size_t TransCode::getWordLength(const string& str)
|
||||||
{
|
{
|
||||||
if(NULL == _pf_strToVec)
|
|
||||||
{
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
vector<uint16_t> vec;
|
vector<uint16_t> vec;
|
||||||
bool ret = strToVec(str, vec);
|
if(!strToVec(str, vec))
|
||||||
if(!ret)
|
|
||||||
{
|
{
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@ -222,7 +218,7 @@ int main()
|
|||||||
//vector<uint16_t> vec;
|
//vector<uint16_t> vec;
|
||||||
//tmp("1",vec);
|
//tmp("1",vec);
|
||||||
|
|
||||||
string a("严");
|
string a("abd你好世界!a");
|
||||||
vector<uint16_t> vec;
|
vector<uint16_t> vec;
|
||||||
//TransCode::setUtf8Enc();
|
//TransCode::setUtf8Enc();
|
||||||
cout<<TransCode::strToVec(a, vec)<<endl;
|
cout<<TransCode::strToVec(a, vec)<<endl;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user