mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
addding utf8
This commit is contained in:
parent
9302f54e04
commit
abb507a029
@ -5,12 +5,16 @@ namespace CppJieba
|
|||||||
string TransCode::_enc;
|
string TransCode::_enc;
|
||||||
vector<string> TransCode::_encVec;
|
vector<string> TransCode::_encVec;
|
||||||
bool TransCode::_isInitted = TransCode::init();
|
bool TransCode::_isInitted = TransCode::init();
|
||||||
|
TransCode::pf_strToVec_t TransCode::_pf_strToVec = NULL;
|
||||||
|
TransCode::pf_vecToStr_t TransCode::_pf_vecToStr = NULL;
|
||||||
|
|
||||||
bool TransCode::init()
|
bool TransCode::init()
|
||||||
{
|
{
|
||||||
_encVec.push_back("utf-8");
|
_encVec.push_back("utf-8");
|
||||||
_encVec.push_back("gbk");
|
_encVec.push_back("gbk");
|
||||||
_enc = _encVec[0];
|
_enc = _encVec[1];
|
||||||
|
_pf_strToVec = gbkToVec;
|
||||||
|
_pf_vecToStr = vecToGbk;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -40,7 +44,26 @@ namespace CppJieba
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool TransCode::a(const string& str, vector<uint16_t>& vec)
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
bool TransCode::strToVec(const string& str, vector<uint16_t>& vec)
|
bool TransCode::strToVec(const string& str, vector<uint16_t>& vec)
|
||||||
|
{
|
||||||
|
if(NULL == _pf_strToVec)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return _pf_strToVec(str, vec);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool TransCode::utf8ToVec(const string& str, vector<uint16_t>& vec)
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool TransCode::gbkToVec(const string& str, vector<uint16_t>& vec)
|
||||||
{
|
{
|
||||||
vec.clear();
|
vec.clear();
|
||||||
if(str.empty())
|
if(str.empty())
|
||||||
@ -72,6 +95,20 @@ namespace CppJieba
|
|||||||
}
|
}
|
||||||
|
|
||||||
string TransCode::vecToStr(VUINT16_CONST_ITER begin, VUINT16_CONST_ITER end)
|
string TransCode::vecToStr(VUINT16_CONST_ITER begin, VUINT16_CONST_ITER end)
|
||||||
|
{
|
||||||
|
if(NULL == _pf_vecToStr)
|
||||||
|
{
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
return _pf_vecToStr(begin, end);
|
||||||
|
}
|
||||||
|
|
||||||
|
string TransCode::vecToUtf8(VUINT16_CONST_ITER begin, VUINT16_CONST_ITER end)
|
||||||
|
{
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
string TransCode::vecToGbk(VUINT16_CONST_ITER begin, VUINT16_CONST_ITER end)
|
||||||
{
|
{
|
||||||
if(begin >= end)
|
if(begin >= end)
|
||||||
{
|
{
|
||||||
@ -95,17 +132,12 @@ namespace CppJieba
|
|||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
string TransCode::vecToStr(const vector<uint16_t>& vec)
|
|
||||||
{
|
|
||||||
if(vec.empty())
|
|
||||||
{
|
|
||||||
return "";
|
|
||||||
}
|
|
||||||
return vecToStr(vec.begin(), vec.end());
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t TransCode::getWordLength(const string& str)
|
size_t TransCode::getWordLength(const string& str)
|
||||||
{
|
{
|
||||||
|
if(NULL == _pf_strToVec)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
vector<uint16_t> vec;
|
vector<uint16_t> vec;
|
||||||
bool ret = strToVec(str, vec);
|
bool ret = strToVec(str, vec);
|
||||||
if(!ret)
|
if(!ret)
|
||||||
@ -125,22 +157,26 @@ using namespace CPPCOMMON;
|
|||||||
using namespace CppJieba;
|
using namespace CppJieba;
|
||||||
int main()
|
int main()
|
||||||
{
|
{
|
||||||
ifstream ifile("/home/wuyanyi/code/SevKeyword/log.2.txt");
|
//ifstream ifile("/home/wuyanyi/code/SevKeyword/log.2.txt");
|
||||||
string line;
|
//string line;
|
||||||
VUINT16 vec;
|
//VUINT16 vec;
|
||||||
while(getline(ifile, line))
|
//while(getline(ifile, line))
|
||||||
{
|
//{
|
||||||
|
//
|
||||||
cout<<line<<endl;
|
// cout<<line<<endl;
|
||||||
cout<<line.size()<<endl;
|
// cout<<line.size()<<endl;
|
||||||
if(!TransCode::strToVec(line, vec))
|
// if(!TransCode::strToVec(line, vec))
|
||||||
{
|
// {
|
||||||
cout<<"error"<<endl;
|
// cout<<"error"<<endl;
|
||||||
}
|
// }
|
||||||
PRINT_VECTOR(vec);
|
// PRINT_VECTOR(vec);
|
||||||
cout<<TransCode::vecToStr(vec)<<endl;
|
// cout<<TransCode::vecToStr(vec)<<endl;
|
||||||
}
|
//}
|
||||||
ifile.close();
|
//ifile.close();
|
||||||
|
typedef bool (* pf)(const string& , vector<uint16_t>&);
|
||||||
|
pf tmp = TransCode::a;
|
||||||
|
vector<uint16_t> vec;
|
||||||
|
tmp("1",vec);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -15,10 +15,17 @@ namespace CppJieba
|
|||||||
using namespace CPPCOMMON;
|
using namespace CPPCOMMON;
|
||||||
class TransCode
|
class TransCode
|
||||||
{
|
{
|
||||||
|
public:
|
||||||
|
typedef bool (*pf_strToVec_t)(const string&, vector<uint16_t>&);
|
||||||
|
typedef string (*pf_vecToStr_t)(VUINT16_CONST_ITER begin, VUINT16_CONST_ITER end);
|
||||||
|
typedef size_t (*pf_getWordLength_t)(const string& str);
|
||||||
private:
|
private:
|
||||||
static string _enc;
|
static string _enc;
|
||||||
static vector<string> _encVec;
|
static vector<string> _encVec;
|
||||||
static bool _isInitted;
|
static bool _isInitted;
|
||||||
|
static pf_strToVec_t _pf_strToVec;
|
||||||
|
static pf_vecToStr_t _pf_vecToStr;
|
||||||
|
static pf_getWordLength_t _pf_getWordLength;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
static bool setEnc(const string& enc);
|
static bool setEnc(const string& enc);
|
||||||
@ -29,11 +36,18 @@ namespace CppJieba
|
|||||||
public:
|
public:
|
||||||
static bool init();
|
static bool init();
|
||||||
public:
|
public:
|
||||||
|
static bool a(const string& str, vector<uint16_t>& vec);
|
||||||
static bool strToVec(const string& str, vector<uint16_t>& vec);
|
static bool strToVec(const string& str, vector<uint16_t>& vec);
|
||||||
static string vecToStr(const vector<uint16_t>& vec);
|
|
||||||
static string vecToStr(VUINT16_CONST_ITER begin, VUINT16_CONST_ITER end);
|
static string vecToStr(VUINT16_CONST_ITER begin, VUINT16_CONST_ITER end);
|
||||||
static size_t getWordLength(const string& str);
|
static size_t getWordLength(const string& str);
|
||||||
|
public:
|
||||||
|
static bool gbkToVec(const string& str, vector<uint16_t>& vec);
|
||||||
|
static string vecToGbk(VUINT16_CONST_ITER begin, VUINT16_CONST_ITER end);
|
||||||
|
static size_t getGbkLength(const string& str);
|
||||||
|
public:
|
||||||
|
static bool utf8ToVec(const string& str, vector<uint16_t>& vec);
|
||||||
|
static string vecToUtf8(VUINT16_CONST_ITER begin, VUINT16_CONST_ITER end);
|
||||||
|
static size_t getUtf8Length(const string& str);
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user