use filterSpecialChars in segmentbase.hpp

This commit is contained in:
wyy 2014-04-14 22:21:09 +08:00
parent 59dae88689
commit 76d640b26e
2 changed files with 27 additions and 10 deletions

View File

@ -11,6 +11,9 @@
namespace CppJieba namespace CppJieba
{ {
using namespace Limonp; using namespace Limonp;
const char* const SPECIAL_CHARS = " \t\n";
class SegmentBase: public ISegment, public InitOnOff class SegmentBase: public ISegment, public InitOnOff
{ {
public: public:
@ -24,14 +27,6 @@ namespace CppJieba
assert(_getInitFlag()); assert(_getInitFlag());
Unicode unico; Unicode unico;
res.clear(); res.clear();
#ifdef NO_FILTER
if(!TransCode::decode(str, unico))
{
LogFatal("str[%s] decode failed.", str.c_str());
return false;
}
return cut(unico.begin(), unico.end(), res);
#else
const char * const cstr = str.c_str(); const char * const cstr = str.c_str();
size_t size = str.size(); size_t size = str.size();
size_t offset = 0; size_t offset = 0;
@ -42,7 +37,7 @@ namespace CppJieba
{ {
const char * const nstr = cstr + offset; const char * const nstr = cstr + offset;
size_t nsize = size - offset; size_t nsize = size - offset;
if(-1 == (ret = filterAscii(nstr, nsize, len)) || 0 == len || len > nsize) if(-1 == (ret = filterSpecialChars(nstr, nsize, len)) || 0 == len || len > nsize)
{ {
LogFatal("str[%s] illegal.", cstr); LogFatal("str[%s] illegal.", cstr);
return false; return false;
@ -65,10 +60,31 @@ namespace CppJieba
offset += len; offset += len;
} }
return true; return true;
#endif
} }
public: public:
/*
* if char is SPECIAL_CHARS, count the SPECITAL_CHARS string's length and return 0;
* else count the NO SPECIAL_CHARS string's length and return 1;
* if errors, return -1;
* */
static int filterSpecialChars(const char* str, size_t len, size_t& resLen)
{
if(!str || !len)
{
return -1;
}
resLen = 1;
int flag = (strchr(SPECIAL_CHARS, *str) ? 0: 1);
for(size_t i = 1; i < len && bool(flag) != bool(strchr(SPECIAL_CHARS, str[i])); i++)
{
resLen ++;
}
return flag;
}
/* /*
* if char is ascii, count the ascii string's length and return 0; * if char is ascii, count the ascii string's length and return 0;
* else count the nonascii string's length and return 1; * else count the nonascii string's length and return 1;

View File

@ -7,3 +7,4 @@
我来到南京市长江大桥 我来到南京市长江大桥
请在一米线外等候 请在一米线外等候
人事处女干事 人事处女干事
去医院做B超叫号123