use filterSpecialChars in segmentbase.hpp

This commit is contained in:
wyy 2014-04-14 22:21:09 +08:00
parent 59dae88689
commit 76d640b26e
2 changed files with 27 additions and 10 deletions

View File

@ -11,6 +11,9 @@
namespace CppJieba
{
using namespace Limonp;
const char* const SPECIAL_CHARS = " \t\n";
class SegmentBase: public ISegment, public InitOnOff
{
public:
@ -24,14 +27,6 @@ namespace CppJieba
assert(_getInitFlag());
Unicode unico;
res.clear();
#ifdef NO_FILTER
if(!TransCode::decode(str, unico))
{
LogFatal("str[%s] decode failed.", str.c_str());
return false;
}
return cut(unico.begin(), unico.end(), res);
#else
const char * const cstr = str.c_str();
size_t size = str.size();
size_t offset = 0;
@ -42,7 +37,7 @@ namespace CppJieba
{
const char * const nstr = cstr + offset;
size_t nsize = size - offset;
if(-1 == (ret = filterAscii(nstr, nsize, len)) || 0 == len || len > nsize)
if(-1 == (ret = filterSpecialChars(nstr, nsize, len)) || 0 == len || len > nsize)
{
LogFatal("str[%s] illegal.", cstr);
return false;
@ -65,10 +60,31 @@ namespace CppJieba
offset += len;
}
return true;
#endif
}
public:
/*
* if char is SPECIAL_CHARS, count the SPECITAL_CHARS string's length and return 0;
* else count the NO SPECIAL_CHARS string's length and return 1;
* if errors, return -1;
* */
static int filterSpecialChars(const char* str, size_t len, size_t& resLen)
{
if(!str || !len)
{
return -1;
}
resLen = 1;
int flag = (strchr(SPECIAL_CHARS, *str) ? 0: 1);
for(size_t i = 1; i < len && bool(flag) != bool(strchr(SPECIAL_CHARS, str[i])); i++)
{
resLen ++;
}
return flag;
}
/*
* if char is ascii, count the ascii string's length and return 0;
* else count the nonascii string's length and return 1;

View File

@ -7,3 +7,4 @@
我来到南京市长江大桥
请在一米线外等候
人事处女干事
去医院做B超叫号123