From 8e8a68352b3b8376313610e9fc06450fe4897655 Mon Sep 17 00:00:00 2001 From: wyy Date: Fri, 6 Dec 2013 22:52:59 -0800 Subject: [PATCH] modify some gbk enc for more robust --- src/ChineseFilter.hpp | 10 +++++++++- src/SegmentBase.hpp | 7 ++++++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/src/ChineseFilter.hpp b/src/ChineseFilter.hpp index c969607..6a5dd66 100644 --- a/src/ChineseFilter.hpp +++ b/src/ChineseFilter.hpp @@ -112,7 +112,7 @@ namespace CppJieba } char x = 0x80; int resFlag = (str[0] & x ? 1 : 0); - resLen = 1; + resLen = 0; if(!resFlag) { while(resLen < len && !(str[resLen] & x)) @@ -124,9 +124,17 @@ namespace CppJieba { while(resLen < len && (str[resLen] & x)) { +#ifdef CPPJIEBA_GBK + resLen += 2; +#else resLen ++; +#endif } } + if(resLen > len) + { + return -1; + } return resFlag; } } diff --git a/src/SegmentBase.hpp b/src/SegmentBase.hpp index 5536047..114f2f3 100644 --- a/src/SegmentBase.hpp +++ b/src/SegmentBase.hpp @@ -31,7 +31,7 @@ namespace CppJieba LogError("not inited."); return false; } - const char * cstr = str.c_str(); + const char * const cstr = str.c_str(); uint size = str.size(); uint offset = 0; string subs; @@ -60,6 +60,11 @@ namespace CppJieba } cut(unico.begin(), unico.end(), res); } + if(len == 0) + { + LogFatal("str[%s] illegal.", cstr); + return false; + } offset += len; } return true;