mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
fix bug's in SegmentBase where using filterAscii
This commit is contained in:
parent
8e8a68352b
commit
45a5df5856
@ -40,12 +40,14 @@ namespace CppJieba
|
||||
Unicode unico;
|
||||
while(offset < size)
|
||||
{
|
||||
if(-1 == (ret = filterAscii(cstr + offset, size, len)))
|
||||
const char * const nstr = cstr + offset;
|
||||
uint nsize = size - offset;
|
||||
if(-1 == (ret = filterAscii(nstr, nsize, len)) || 0 == len || len > nsize)
|
||||
{
|
||||
LogFatal("str[%s] illegal.", cstr);
|
||||
return false;
|
||||
}
|
||||
subs.assign(cstr + offset, len);
|
||||
subs.assign(nstr, len);
|
||||
if(!ret)
|
||||
{
|
||||
res.push_back(subs);
|
||||
@ -60,32 +62,9 @@ namespace CppJieba
|
||||
}
|
||||
cut(unico.begin(), unico.end(), res);
|
||||
}
|
||||
if(len == 0)
|
||||
{
|
||||
LogFatal("str[%s] illegal.", cstr);
|
||||
return false;
|
||||
}
|
||||
offset += len;
|
||||
}
|
||||
return true;
|
||||
//ChineseFilter filter;
|
||||
//filter.feed(str);
|
||||
//for(ChineseFilter::iterator it = filter.begin(); it != filter.end(); it++)
|
||||
//{
|
||||
// if(it.charType == CHWORD)
|
||||
// {
|
||||
// cut(it.begin, it.end, res);
|
||||
// }
|
||||
// else
|
||||
// {
|
||||
// string tmp;
|
||||
// if(TransCode::encode(it.begin, it.end, tmp))
|
||||
// {
|
||||
// res.push_back(tmp);
|
||||
// }
|
||||
// }
|
||||
//}
|
||||
//return true;
|
||||
}
|
||||
|
||||
};
|
||||
|
Loading…
x
Reference in New Issue
Block a user