From fd7ff031d075f833fad0f3c2564b6794c9846b08 Mon Sep 17 00:00:00 2001 From: wyy Date: Wed, 4 Dec 2013 08:00:27 -0800 Subject: [PATCH] add gbk --- CMakeLists.txt | 2 +- src/Limonp/str_functs.hpp | 56 +++++++++++++++++++++++++++++++++++++++ src/TransCode.hpp | 4 +-- 3 files changed, 59 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index dd751d5..cb09b96 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,7 +1,7 @@ PROJECT(CPPJIEBA) SET(CMAKE_INSTALL_PREFIX /usr) ADD_DEFINITIONS(-std=c++0x -O3) -#ADD_DEFINITIONS(-DCPPJIEBA_GBK) +ADD_DEFINITIONS(-DCPPJIEBA_GBK) ADD_SUBDIRECTORY(src) ADD_SUBDIRECTORY(dicts) ADD_SUBDIRECTORY(scripts) diff --git a/src/Limonp/str_functs.hpp b/src/Limonp/str_functs.hpp index f572ee9..5a480c9 100644 --- a/src/Limonp/str_functs.hpp +++ b/src/Limonp/str_functs.hpp @@ -286,5 +286,61 @@ namespace Limonp } return true; } + + inline bool gbkTrans(const string& str, vector& vec) + { + vec.clear(); + if(str.empty()) + { + return false; + } + uint i = 0; + while(i < str.size()) + { + if(0 == (str[i] & 0x80)) + { + vec.push_back(uint16_t(str[i])); + i++; + } + else + { + if(i + 1 < str.size()) //&& (str[i+1] & 0x80)) + { + vec.push_back(twocharToUint16(str[i], str[i + 1])); + i += 2; + } + else + { + return false; + } + } + } + return true; + } + inline bool gbkTrans(vector::const_iterator begin, vector::const_iterator end, string& res) + { + if(begin >= end) + { + return false; + } + res.clear(); + pair pa; + while(begin != end) + { + pa = uint16ToChar2(*begin); + if(pa.first & 0x80) + { + res += pa.first; + res += pa.second; + } + else + { + res += pa.second; + } + begin++; + } + return true; + } + } #endif diff --git a/src/TransCode.hpp b/src/TransCode.hpp index 4e9c926..e3bb891 100644 --- a/src/TransCode.hpp +++ b/src/TransCode.hpp @@ -18,7 +18,7 @@ namespace CppJieba inline bool decode(const string& str, vector& vec) { #ifdef CPPJIEBA_GBK - return false; + return gbkTrans(str, vec); #else return utf8ToUnicode(str, vec); #endif @@ -27,7 +27,7 @@ namespace CppJieba inline bool encode(vector::const_iterator begin, vector::const_iterator end, string& res) { #ifdef CPPJIEBA_GBK - return false; + return gbkTrans(begin, end, res); #else return unicodeToUtf8(begin, end, res); #endif