From abb507a029727ab119d2de02cf5266e21032ea9b Mon Sep 17 00:00:00 2001 From: gwdwyy Date: Sun, 18 Aug 2013 13:13:55 +0800 Subject: [PATCH] addding utf8 --- src/TransCode.cpp | 88 +++++++++++++++++++++++++++++++++-------------- src/TransCode.h | 18 ++++++++-- 2 files changed, 78 insertions(+), 28 deletions(-) diff --git a/src/TransCode.cpp b/src/TransCode.cpp index 26bc7d5..2eb35a3 100644 --- a/src/TransCode.cpp +++ b/src/TransCode.cpp @@ -5,12 +5,16 @@ namespace CppJieba string TransCode::_enc; vector TransCode::_encVec; bool TransCode::_isInitted = TransCode::init(); + TransCode::pf_strToVec_t TransCode::_pf_strToVec = NULL; + TransCode::pf_vecToStr_t TransCode::_pf_vecToStr = NULL; bool TransCode::init() { _encVec.push_back("utf-8"); _encVec.push_back("gbk"); - _enc = _encVec[0]; + _enc = _encVec[1]; + _pf_strToVec = gbkToVec; + _pf_vecToStr = vecToGbk; return true; } @@ -39,8 +43,27 @@ namespace CppJieba } return true; } + + bool TransCode::a(const string& str, vector& vec) + { + return true; + } bool TransCode::strToVec(const string& str, vector& vec) + { + if(NULL == _pf_strToVec) + { + return false; + } + return _pf_strToVec(str, vec); + } + + bool TransCode::utf8ToVec(const string& str, vector& vec) + { + return true; + } + + bool TransCode::gbkToVec(const string& str, vector& vec) { vec.clear(); if(str.empty()) @@ -72,6 +95,20 @@ namespace CppJieba } string TransCode::vecToStr(VUINT16_CONST_ITER begin, VUINT16_CONST_ITER end) + { + if(NULL == _pf_vecToStr) + { + return ""; + } + return _pf_vecToStr(begin, end); + } + + string TransCode::vecToUtf8(VUINT16_CONST_ITER begin, VUINT16_CONST_ITER end) + { + return ""; + } + + string TransCode::vecToGbk(VUINT16_CONST_ITER begin, VUINT16_CONST_ITER end) { if(begin >= end) { @@ -95,17 +132,12 @@ namespace CppJieba return res; } - string TransCode::vecToStr(const vector& vec) - { - if(vec.empty()) - { - return ""; - } - return vecToStr(vec.begin(), vec.end()); - } - size_t TransCode::getWordLength(const string& str) { + if(NULL == _pf_strToVec) + { + return 0; + } vector vec; bool ret = strToVec(str, vec); if(!ret) @@ -125,22 +157,26 @@ using namespace CPPCOMMON; using namespace CppJieba; int main() { - ifstream ifile("/home/wuyanyi/code/SevKeyword/log.2.txt"); - string line; - VUINT16 vec; - while(getline(ifile, line)) - { - - cout<&); + pf tmp = TransCode::a; + vector vec; + tmp("1",vec); return 0; } #endif diff --git a/src/TransCode.h b/src/TransCode.h index 1d741aa..e5467e8 100644 --- a/src/TransCode.h +++ b/src/TransCode.h @@ -15,10 +15,17 @@ namespace CppJieba using namespace CPPCOMMON; class TransCode { + public: + typedef bool (*pf_strToVec_t)(const string&, vector&); + typedef string (*pf_vecToStr_t)(VUINT16_CONST_ITER begin, VUINT16_CONST_ITER end); + typedef size_t (*pf_getWordLength_t)(const string& str); private: static string _enc; static vector _encVec; static bool _isInitted; + static pf_strToVec_t _pf_strToVec; + static pf_vecToStr_t _pf_vecToStr; + static pf_getWordLength_t _pf_getWordLength; public: static bool setEnc(const string& enc); @@ -29,11 +36,18 @@ namespace CppJieba public: static bool init(); public: + static bool a(const string& str, vector& vec); static bool strToVec(const string& str, vector& vec); - static string vecToStr(const vector& vec); static string vecToStr(VUINT16_CONST_ITER begin, VUINT16_CONST_ITER end); static size_t getWordLength(const string& str); - + public: + static bool gbkToVec(const string& str, vector& vec); + static string vecToGbk(VUINT16_CONST_ITER begin, VUINT16_CONST_ITER end); + static size_t getGbkLength(const string& str); + public: + static bool utf8ToVec(const string& str, vector& vec); + static string vecToUtf8(VUINT16_CONST_ITER begin, VUINT16_CONST_ITER end); + static size_t getUtf8Length(const string& str); }; }