mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
rename some Transcode's funct into encode and decode
This commit is contained in:
parent
e8a98d4e4d
commit
a10876f808
@ -113,7 +113,7 @@ namespace CppJieba
|
||||
vector<uint16_t> unico;
|
||||
vector<uint> status;
|
||||
vector<uint16_t>::iterator begin, left, right;
|
||||
if(!TransCode::strToVec(str, unico))
|
||||
if(!TransCode::decode(str, unico))
|
||||
|
||||
{
|
||||
LogError("TransCode failed.");
|
||||
@ -125,7 +125,7 @@ namespace CppJieba
|
||||
LogError("viterbi failed.");
|
||||
return false;
|
||||
}
|
||||
//cout<<vecToString(status)<<endl;
|
||||
//cout<<encodeing(status)<<endl;
|
||||
begin = unico.begin();
|
||||
left = begin;
|
||||
res.clear();
|
||||
@ -134,7 +134,7 @@ namespace CppJieba
|
||||
if(status[i] % 2) //if(E == status[i] || S == status[i])
|
||||
{
|
||||
right = begin + i + 1;
|
||||
res.push_back(TransCode::vecToStr(left, right));
|
||||
res.push_back(TransCode::encode(left, right));
|
||||
left = right;
|
||||
}
|
||||
|
||||
@ -279,7 +279,7 @@ namespace CppJieba
|
||||
bool HMMSegment::_decodeOne(const string& str, uint16_t& res)
|
||||
{
|
||||
vector<uint16_t> ui16;
|
||||
if(!TransCode::strToVec(str, ui16) || ui16.size() != 1)
|
||||
if(!TransCode::decode(str, ui16) || ui16.size() != 1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
@ -71,9 +71,9 @@ namespace CppJieba
|
||||
Unicode word;
|
||||
while(getline(ifile, line))
|
||||
{
|
||||
if(!TransCode::strToVec(line, word))
|
||||
if(!TransCode::decode(line, word))
|
||||
{
|
||||
LogError("strToVec failed .");
|
||||
LogError("decode failed .");
|
||||
return false;
|
||||
}
|
||||
_stopWords.insert(word);
|
||||
@ -142,9 +142,9 @@ namespace CppJieba
|
||||
for(uint i = 0; i < words.size(); i++)
|
||||
{
|
||||
Unicode uniWord;
|
||||
if(!TransCode::strToVec(words[i], uniWord))
|
||||
if(!TransCode::decode(words[i], uniWord))
|
||||
{
|
||||
LogError("strToVec failed");
|
||||
LogError("decode failed");
|
||||
return false;
|
||||
}
|
||||
keyWordInfos.push_back(uniWord);
|
||||
|
@ -46,7 +46,7 @@ namespace CppJieba
|
||||
res.clear();
|
||||
for(uint i = 0; i < segWordInfos.size(); i++)
|
||||
{
|
||||
res.push_back(TransCode::vecToStr(segWordInfos[i].word.begin(), segWordInfos[i].word.end()));
|
||||
res.push_back(TransCode::encode(segWordInfos[i].word.begin(), segWordInfos[i].word.end()));
|
||||
}
|
||||
return true;
|
||||
}
|
||||
@ -61,9 +61,9 @@ namespace CppJieba
|
||||
SegmentContext segContext;
|
||||
Unicode sentence;
|
||||
|
||||
if(!TransCode::strToVec(str, sentence))
|
||||
if(!TransCode::decode(str, sentence))
|
||||
{
|
||||
LogError("TransCode::strToVec failed.");
|
||||
LogError("TransCode::decode failed.");
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -4,13 +4,13 @@ namespace CppJieba
|
||||
{
|
||||
vector<string> TransCode::_encVec;
|
||||
bool TransCode::_isInitted = TransCode::init();
|
||||
TransCode::pf_strToVec_t TransCode::_pf_strToVec = NULL;
|
||||
TransCode::pf_vecToStr_t TransCode::_pf_vecToStr = NULL;
|
||||
TransCode::pf_decode_t TransCode::_pf_decode = NULL;
|
||||
TransCode::pf_encode_t TransCode::_pf_encode = NULL;
|
||||
|
||||
bool TransCode::init()
|
||||
{
|
||||
_pf_strToVec = gbkToVec;
|
||||
_pf_vecToStr = vecToGbk;
|
||||
_pf_decode = gbkToVec;
|
||||
_pf_encode = vecToGbk;
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -24,23 +24,23 @@ namespace CppJieba
|
||||
|
||||
void TransCode::setGbkEnc()
|
||||
{
|
||||
_pf_strToVec = gbkToVec;
|
||||
_pf_vecToStr = vecToGbk;
|
||||
_pf_decode = gbkToVec;
|
||||
_pf_encode = vecToGbk;
|
||||
}
|
||||
|
||||
void TransCode::setUtf8Enc()
|
||||
{
|
||||
_pf_strToVec = utf8ToVec;
|
||||
_pf_vecToStr = vecToUtf8;
|
||||
_pf_decode = utf8ToVec;
|
||||
_pf_encode = vecToUtf8;
|
||||
}
|
||||
|
||||
bool TransCode::strToVec(const string& str, vector<uint16_t>& vec)
|
||||
bool TransCode::decode(const string& str, vector<uint16_t>& vec)
|
||||
{
|
||||
if(NULL == _pf_strToVec)
|
||||
if(NULL == _pf_decode)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
return _pf_strToVec(str, vec);
|
||||
return _pf_decode(str, vec);
|
||||
}
|
||||
|
||||
bool TransCode::utf8ToVec(const string& str, vector<uint16_t>& vec)
|
||||
@ -112,13 +112,13 @@ namespace CppJieba
|
||||
return true;
|
||||
}
|
||||
|
||||
string TransCode::vecToStr(Unicode::const_iterator begin, Unicode::const_iterator end)
|
||||
string TransCode::encode(Unicode::const_iterator begin, Unicode::const_iterator end)
|
||||
{
|
||||
if(NULL == _pf_vecToStr)
|
||||
if(NULL == _pf_encode)
|
||||
{
|
||||
return "";
|
||||
}
|
||||
return _pf_vecToStr(begin, end);
|
||||
return _pf_encode(begin, end);
|
||||
}
|
||||
|
||||
string TransCode::vecToUtf8(Unicode::const_iterator begin, Unicode::const_iterator end)
|
||||
@ -177,18 +177,18 @@ namespace CppJieba
|
||||
return res;
|
||||
}
|
||||
|
||||
size_t TransCode::getWordLength(const string& str)
|
||||
{
|
||||
vector<uint16_t> vec;
|
||||
if(!strToVec(str, vec))
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
return vec.size();
|
||||
}
|
||||
}
|
||||
//size_t TransCode::getWordLength(const string& str)
|
||||
//{
|
||||
// vector<uint16_t> vec;
|
||||
// if(!decode(str, vec))
|
||||
// {
|
||||
// return 0;
|
||||
// }
|
||||
// else
|
||||
// {
|
||||
// return vec.size();
|
||||
// }
|
||||
//}
|
||||
}
|
||||
|
||||
|
||||
@ -205,12 +205,12 @@ int main()
|
||||
//
|
||||
// cout<<line<<endl;
|
||||
// cout<<line.size()<<endl;
|
||||
// if(!TransCode::strToVec(line, vec))
|
||||
// if(!TransCode::decode(line, vec))
|
||||
// {
|
||||
// cout<<"error"<<endl;
|
||||
// }
|
||||
// PRINT_VECTOR(vec);
|
||||
// cout<<TransCode::vecToStr(vec)<<endl;
|
||||
// cout<<TransCode::encode(vec)<<endl;
|
||||
//}
|
||||
//ifile.close();
|
||||
//typedef bool (* pf)(const string& , vector<uint16_t>&);
|
||||
@ -221,10 +221,10 @@ int main()
|
||||
string a("abd你好世界!a");
|
||||
vector<uint16_t> vec;
|
||||
//TransCode::setUtf8Enc();
|
||||
cout<<TransCode::strToVec(a, vec)<<endl;
|
||||
cout<<TransCode::decode(a, vec)<<endl;
|
||||
PRINT_VECTOR(vec);
|
||||
|
||||
cout<<TransCode::vecToStr(vec.begin(), vec.end())<<endl;
|
||||
cout<<TransCode::encode(vec.begin(), vec.end())<<endl;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -16,14 +16,14 @@ namespace CppJieba
|
||||
class TransCode
|
||||
{
|
||||
public:
|
||||
typedef bool (*pf_strToVec_t)(const string&, vector<uint16_t>&);
|
||||
typedef string (*pf_vecToStr_t)(Unicode::const_iterator begin, Unicode::const_iterator end);
|
||||
typedef bool (*pf_decode_t)(const string&, vector<uint16_t>&);
|
||||
typedef string (*pf_encode_t)(Unicode::const_iterator begin, Unicode::const_iterator end);
|
||||
typedef size_t (*pf_getWordLength_t)(const string& str);
|
||||
private:
|
||||
static vector<string> _encVec;
|
||||
static bool _isInitted;
|
||||
static pf_strToVec_t _pf_strToVec;
|
||||
static pf_vecToStr_t _pf_vecToStr;
|
||||
static pf_decode_t _pf_decode;
|
||||
static pf_encode_t _pf_encode;
|
||||
static pf_getWordLength_t _pf_getWordLength;
|
||||
|
||||
public:
|
||||
@ -36,17 +36,17 @@ namespace CppJieba
|
||||
public:
|
||||
static bool init();
|
||||
public:
|
||||
static bool strToVec(const string& str, vector<uint16_t>& vec);
|
||||
static string vecToStr(Unicode::const_iterator begin, Unicode::const_iterator end);
|
||||
static size_t getWordLength(const string& str);
|
||||
static bool decode(const string& str, vector<uint16_t>& vec);
|
||||
static string encode(Unicode::const_iterator begin, Unicode::const_iterator end);
|
||||
//static size_t getWordLength(const string& str);
|
||||
public:
|
||||
static bool gbkToVec(const string& str, vector<uint16_t>& vec);
|
||||
static string vecToGbk(Unicode::const_iterator begin, Unicode::const_iterator end);
|
||||
static size_t getGbkLength(const string& str);
|
||||
//static size_t getGbkLength(const string& str);
|
||||
public:
|
||||
static bool utf8ToVec(const string& str, vector<uint16_t>& vec);
|
||||
static string vecToUtf8(Unicode::const_iterator begin, Unicode::const_iterator end);
|
||||
static size_t getUtf8Length(const string& str);
|
||||
//static size_t getUtf8Length(const string& str);
|
||||
};
|
||||
}
|
||||
|
||||
|
10
src/Trie.cpp
10
src/Trie.cpp
@ -109,7 +109,7 @@ namespace CppJieba
|
||||
LogError(string_format("line[%s] illegal.", line.c_str()));
|
||||
return false;
|
||||
}
|
||||
if(!TransCode::strToVec(vecBuf[0], nodeInfo.word))
|
||||
if(!TransCode::decode(vecBuf[0], nodeInfo.word))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
@ -156,10 +156,10 @@ namespace CppJieba
|
||||
}
|
||||
Unicode uintVec;
|
||||
|
||||
bool retFlag = TransCode::strToVec(str, uintVec);
|
||||
bool retFlag = TransCode::decode(str, uintVec);
|
||||
if(retFlag)
|
||||
{
|
||||
LogError("TransCode::strToVec failed.");
|
||||
LogError("TransCode::decode failed.");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -198,7 +198,7 @@ namespace CppJieba
|
||||
TrieNodeInfo* Trie::find(const string& str)
|
||||
{
|
||||
Unicode uintVec;
|
||||
bool retFlag = TransCode::strToVec(str, uintVec);
|
||||
bool retFlag = TransCode::decode(str, uintVec);
|
||||
if(!retFlag)
|
||||
{
|
||||
return NULL;
|
||||
@ -260,7 +260,7 @@ namespace CppJieba
|
||||
{
|
||||
|
||||
Unicode uintVec;
|
||||
TransCode::strToVec(str, uintVec);
|
||||
TransCode::decode(str, uintVec);
|
||||
return getWeight(uintVec);
|
||||
}
|
||||
|
||||
|
@ -84,7 +84,7 @@ namespace CppJieba
|
||||
}
|
||||
string toString() const
|
||||
{
|
||||
return string_format("{word:%s,weight:%lf, idf:%lf}", TransCode::vecToStr(word.begin(), word.end()).c_str(), weight, idf);
|
||||
return string_format("{word:%s,weight:%lf, idf:%lf}", TransCode::encode(word.begin(), word.end()).c_str(), weight, idf);
|
||||
}
|
||||
KeyWordInfo& operator = (const TrieNodeInfo& trieNodeInfo)
|
||||
{
|
||||
|
Loading…
x
Reference in New Issue
Block a user