fix bug's in SegmentBase where using filterAscii

This commit is contained in:
wyy 2013-12-07 15:22:00 +08:00
parent 8e8a68352b
commit 45a5df5856

View File

@ -40,12 +40,14 @@ namespace CppJieba
Unicode unico;
while(offset < size)
{
if(-1 == (ret = filterAscii(cstr + offset, size, len)))
const char * const nstr = cstr + offset;
uint nsize = size - offset;
if(-1 == (ret = filterAscii(nstr, nsize, len)) || 0 == len || len > nsize)
{
LogFatal("str[%s] illegal.", cstr);
return false;
}
subs.assign(cstr + offset, len);
subs.assign(nstr, len);
if(!ret)
{
res.push_back(subs);
@ -60,32 +62,9 @@ namespace CppJieba
}
cut(unico.begin(), unico.end(), res);
}
if(len == 0)
{
LogFatal("str[%s] illegal.", cstr);
return false;
}
offset += len;
}
return true;
//ChineseFilter filter;
//filter.feed(str);
//for(ChineseFilter::iterator it = filter.begin(); it != filter.end(); it++)
//{
// if(it.charType == CHWORD)
// {
// cut(it.begin, it.end, res);
// }
// else
// {
// string tmp;
// if(TransCode::encode(it.begin, it.end, tmp))
// {
// res.push_back(tmp);
// }
// }
//}
//return true;
}
};