addding utf8

This commit is contained in:
gwdwyy 2013-08-18 13:13:55 +08:00
parent 9302f54e04
commit abb507a029
2 changed files with 78 additions and 28 deletions

View File

@ -5,12 +5,16 @@ namespace CppJieba
string TransCode::_enc; string TransCode::_enc;
vector<string> TransCode::_encVec; vector<string> TransCode::_encVec;
bool TransCode::_isInitted = TransCode::init(); bool TransCode::_isInitted = TransCode::init();
TransCode::pf_strToVec_t TransCode::_pf_strToVec = NULL;
TransCode::pf_vecToStr_t TransCode::_pf_vecToStr = NULL;
bool TransCode::init() bool TransCode::init()
{ {
_encVec.push_back("utf-8"); _encVec.push_back("utf-8");
_encVec.push_back("gbk"); _encVec.push_back("gbk");
_enc = _encVec[0]; _enc = _encVec[1];
_pf_strToVec = gbkToVec;
_pf_vecToStr = vecToGbk;
return true; return true;
} }
@ -40,7 +44,26 @@ namespace CppJieba
return true; return true;
} }
bool TransCode::a(const string& str, vector<uint16_t>& vec)
{
return true;
}
bool TransCode::strToVec(const string& str, vector<uint16_t>& vec) bool TransCode::strToVec(const string& str, vector<uint16_t>& vec)
{
if(NULL == _pf_strToVec)
{
return false;
}
return _pf_strToVec(str, vec);
}
bool TransCode::utf8ToVec(const string& str, vector<uint16_t>& vec)
{
return true;
}
bool TransCode::gbkToVec(const string& str, vector<uint16_t>& vec)
{ {
vec.clear(); vec.clear();
if(str.empty()) if(str.empty())
@ -72,6 +95,20 @@ namespace CppJieba
} }
string TransCode::vecToStr(VUINT16_CONST_ITER begin, VUINT16_CONST_ITER end) string TransCode::vecToStr(VUINT16_CONST_ITER begin, VUINT16_CONST_ITER end)
{
if(NULL == _pf_vecToStr)
{
return "";
}
return _pf_vecToStr(begin, end);
}
string TransCode::vecToUtf8(VUINT16_CONST_ITER begin, VUINT16_CONST_ITER end)
{
return "";
}
string TransCode::vecToGbk(VUINT16_CONST_ITER begin, VUINT16_CONST_ITER end)
{ {
if(begin >= end) if(begin >= end)
{ {
@ -95,17 +132,12 @@ namespace CppJieba
return res; return res;
} }
string TransCode::vecToStr(const vector<uint16_t>& vec)
{
if(vec.empty())
{
return "";
}
return vecToStr(vec.begin(), vec.end());
}
size_t TransCode::getWordLength(const string& str) size_t TransCode::getWordLength(const string& str)
{ {
if(NULL == _pf_strToVec)
{
return 0;
}
vector<uint16_t> vec; vector<uint16_t> vec;
bool ret = strToVec(str, vec); bool ret = strToVec(str, vec);
if(!ret) if(!ret)
@ -125,22 +157,26 @@ using namespace CPPCOMMON;
using namespace CppJieba; using namespace CppJieba;
int main() int main()
{ {
ifstream ifile("/home/wuyanyi/code/SevKeyword/log.2.txt"); //ifstream ifile("/home/wuyanyi/code/SevKeyword/log.2.txt");
string line; //string line;
VUINT16 vec; //VUINT16 vec;
while(getline(ifile, line)) //while(getline(ifile, line))
{ //{
//
cout<<line<<endl; // cout<<line<<endl;
cout<<line.size()<<endl; // cout<<line.size()<<endl;
if(!TransCode::strToVec(line, vec)) // if(!TransCode::strToVec(line, vec))
{ // {
cout<<"error"<<endl; // cout<<"error"<<endl;
} // }
PRINT_VECTOR(vec); // PRINT_VECTOR(vec);
cout<<TransCode::vecToStr(vec)<<endl; // cout<<TransCode::vecToStr(vec)<<endl;
} //}
ifile.close(); //ifile.close();
typedef bool (* pf)(const string& , vector<uint16_t>&);
pf tmp = TransCode::a;
vector<uint16_t> vec;
tmp("1",vec);
return 0; return 0;
} }
#endif #endif

View File

@ -15,10 +15,17 @@ namespace CppJieba
using namespace CPPCOMMON; using namespace CPPCOMMON;
class TransCode class TransCode
{ {
public:
typedef bool (*pf_strToVec_t)(const string&, vector<uint16_t>&);
typedef string (*pf_vecToStr_t)(VUINT16_CONST_ITER begin, VUINT16_CONST_ITER end);
typedef size_t (*pf_getWordLength_t)(const string& str);
private: private:
static string _enc; static string _enc;
static vector<string> _encVec; static vector<string> _encVec;
static bool _isInitted; static bool _isInitted;
static pf_strToVec_t _pf_strToVec;
static pf_vecToStr_t _pf_vecToStr;
static pf_getWordLength_t _pf_getWordLength;
public: public:
static bool setEnc(const string& enc); static bool setEnc(const string& enc);
@ -29,11 +36,18 @@ namespace CppJieba
public: public:
static bool init(); static bool init();
public: public:
static bool a(const string& str, vector<uint16_t>& vec);
static bool strToVec(const string& str, vector<uint16_t>& vec); static bool strToVec(const string& str, vector<uint16_t>& vec);
static string vecToStr(const vector<uint16_t>& vec);
static string vecToStr(VUINT16_CONST_ITER begin, VUINT16_CONST_ITER end); static string vecToStr(VUINT16_CONST_ITER begin, VUINT16_CONST_ITER end);
static size_t getWordLength(const string& str); static size_t getWordLength(const string& str);
public:
static bool gbkToVec(const string& str, vector<uint16_t>& vec);
static string vecToGbk(VUINT16_CONST_ITER begin, VUINT16_CONST_ITER end);
static size_t getGbkLength(const string& str);
public:
static bool utf8ToVec(const string& str, vector<uint16_t>& vec);
static string vecToUtf8(VUINT16_CONST_ITER begin, VUINT16_CONST_ITER end);
static size_t getUtf8Length(const string& str);
}; };
} }