diff --git a/src/ChineseFilter.hpp b/src/ChineseFilter.hpp index 0c12f3f..3b1e163 100644 --- a/src/ChineseFilter.hpp +++ b/src/ChineseFilter.hpp @@ -98,6 +98,7 @@ namespace CppJieba return iterator(&_unico, _unico.end(), _unico.end(), DIGIT_OR_LETTER); } }; + } #endif diff --git a/src/Limonp/str_functs.hpp b/src/Limonp/str_functs.hpp index 5a480c9..a603922 100644 --- a/src/Limonp/str_functs.hpp +++ b/src/Limonp/str_functs.hpp @@ -218,30 +218,29 @@ namespace Limonp return str.find(ch) != string::npos; } - inline bool utf8ToUnicode(const string& str, vector& vec) + inline bool utf8ToUnicode(const char * const str, uint len, vector& vec) { char ch1, ch2; - if(str.empty()) + if(!str) { return false; } vec.clear(); - size_t siz = str.size(); - for(uint i = 0;i < siz;) + for(uint i = 0;i < len;) { if(!(str[i] & 0x80)) // 0xxxxxxx { vec.push_back(str[i]); i++; } - else if ((unsigned char)str[i] <= 0xdf && i + 1 < siz) // 110xxxxxx + else if ((unsigned char)str[i] <= 0xdf && i + 1 < len) // 110xxxxxx { ch1 = (str[i] >> 2) & 0x07; ch2 = (str[i+1] & 0x3f) | ((str[i] & 0x03) << 6 ); vec.push_back(twocharToUint16(ch1, ch2)); i += 2; } - else if((unsigned char)str[i] <= 0xef && i + 2 < siz) + else if((unsigned char)str[i] <= 0xef && i + 2 < len) { ch1 = (str[i] << 4) | ((str[i+1] >> 2) & 0x0f ); ch2 = ((str[i+1]<<6) & 0xc0) | (str[i+2] & 0x3f); @@ -255,6 +254,10 @@ namespace Limonp } return true; } + inline bool utf8ToUnicode(const string& str, vector& vec) + { + return utf8ToUnicode(str.c_str(), str.size(), vec); + } inline bool unicodeToUtf8(vector::const_iterator begin, vector::const_iterator end, string& res) { @@ -287,15 +290,16 @@ namespace Limonp return true; } - inline bool gbkTrans(const string& str, vector& vec) + + inline bool gbkTrans(const char* const str, uint len, vector& vec) { vec.clear(); - if(str.empty()) + if(!str) { return false; } uint i = 0; - while(i < str.size()) + while(i < len) { if(0 == (str[i] & 0x80)) { @@ -304,7 +308,7 @@ namespace Limonp } else { - if(i + 1 < str.size()) //&& (str[i+1] & 0x80)) + if(i + 1 < len) //&& (str[i+1] & 0x80)) { vec.push_back(twocharToUint16(str[i], str[i + 1])); i += 2; @@ -317,6 +321,11 @@ namespace Limonp } return true; } + inline bool gbkTrans(const string& str, vector& vec) + { + return gbkTrans(str.c_str(), str.size(), vec); + } + inline bool gbkTrans(vector::const_iterator begin, vector::const_iterator end, string& res) { if(begin >= end)