mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
rewrite transcode for time conmused by mv some return value of funct to argv of functs
This commit is contained in:
parent
d5b9bb21dd
commit
aefbbdf49d
@ -149,9 +149,13 @@ namespace CppJieba
|
||||
return false;
|
||||
}
|
||||
res.clear();
|
||||
string tmp;
|
||||
for(uint i = 0; i < words.size(); i++)
|
||||
{
|
||||
res.push_back(TransCode::encode(words[i].begin(), words[i].end()));
|
||||
if(TransCode::encode(words[i], tmp))
|
||||
{
|
||||
res.push_back(tmp);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
@ -27,30 +27,6 @@ namespace CppJieba
|
||||
return true;
|
||||
}
|
||||
|
||||
//bool KeyWordExt::_loadPriorSubWords(const char * const filePath)
|
||||
//{
|
||||
// LogInfo(string_format("_loadPriorSubWords(%s) start", filePath));
|
||||
// if(!checkFileExist(filePath))
|
||||
// {
|
||||
// LogError(string_format("cann't find file[%s].",filePath));
|
||||
// return false;
|
||||
// }
|
||||
// if(!_priorSubWords.empty())
|
||||
// {
|
||||
// LogError("_priorSubWords has been initted before");
|
||||
// return false;
|
||||
// }
|
||||
// ifstream infile(filePath);
|
||||
// string subword;
|
||||
// while(getline(infile, subword))
|
||||
// {
|
||||
// _priorSubWords.push_back(subword);
|
||||
// }
|
||||
// LogInfo(string_format("_loadPriorSubWords(%s) end", filePath));
|
||||
// infile.close();
|
||||
// return true;
|
||||
//}
|
||||
|
||||
bool KeyWordExt::loadStopWords(const char * const filePath)
|
||||
{
|
||||
|
||||
|
@ -44,9 +44,17 @@ namespace CppJieba
|
||||
return false;
|
||||
}
|
||||
res.clear();
|
||||
string tmp;
|
||||
for(uint i = 0; i < segWordInfos.size(); i++)
|
||||
{
|
||||
res.push_back(TransCode::encode(segWordInfos[i].word.begin(), segWordInfos[i].word.end()));
|
||||
if(TransCode::encode(segWordInfos[i].word, tmp))
|
||||
{
|
||||
res.push_back(tmp);
|
||||
}
|
||||
else
|
||||
{
|
||||
LogError("encode failed.");
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
@ -43,6 +43,7 @@ namespace CppJieba
|
||||
res.clear();
|
||||
Unicode unico;
|
||||
vector<Unicode> hmmRes;
|
||||
string tmp;
|
||||
for(uint i= 0; i < infos.size(); i++)
|
||||
{
|
||||
if(1 == infos[i].word.size())
|
||||
@ -60,12 +61,14 @@ namespace CppJieba
|
||||
}
|
||||
for(uint j = 0; j < hmmRes.size(); j++)
|
||||
{
|
||||
res.push_back(TransCode::encode(hmmRes[j]));
|
||||
TransCode::encode(hmmRes[j], tmp);
|
||||
res.push_back(tmp);
|
||||
}
|
||||
}
|
||||
unico.clear();
|
||||
|
||||
res.push_back(TransCode::encode(infos[i].word));
|
||||
TransCode::encode(infos[i].word, tmp);
|
||||
res.push_back(tmp);
|
||||
}
|
||||
|
||||
}
|
||||
@ -78,7 +81,8 @@ namespace CppJieba
|
||||
}
|
||||
for(uint j = 0; j < hmmRes.size(); j++)
|
||||
{
|
||||
res.push_back(TransCode::encode(hmmRes[j]));
|
||||
TransCode::encode(hmmRes[j], tmp);
|
||||
res.push_back(tmp);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -34,14 +34,6 @@ namespace CppJieba
|
||||
_pf_encode = vecToUtf8;
|
||||
}
|
||||
|
||||
bool TransCode::decode(const string& str, vector<uint16_t>& vec)
|
||||
{
|
||||
if(NULL == _pf_decode)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
return _pf_decode(str, vec);
|
||||
}
|
||||
|
||||
bool TransCode::utf8ToVec(const string& str, vector<uint16_t>& vec)
|
||||
{
|
||||
@ -112,27 +104,14 @@ namespace CppJieba
|
||||
return true;
|
||||
}
|
||||
|
||||
string TransCode::encode(const Unicode& sentence)
|
||||
{
|
||||
return encode(sentence.begin(), sentence.end());
|
||||
}
|
||||
|
||||
string TransCode::encode(Unicode::const_iterator begin, Unicode::const_iterator end)
|
||||
{
|
||||
if(!_pf_encode)
|
||||
{
|
||||
return "";
|
||||
}
|
||||
return _pf_encode(begin, end);
|
||||
}
|
||||
|
||||
string TransCode::vecToUtf8(Unicode::const_iterator begin, Unicode::const_iterator end)
|
||||
bool TransCode::vecToUtf8(Unicode::const_iterator begin, Unicode::const_iterator end, string& res)
|
||||
{
|
||||
if(begin >= end)
|
||||
{
|
||||
return "";
|
||||
return false;
|
||||
}
|
||||
string res;
|
||||
res.clear();
|
||||
uint16_t ui;
|
||||
while(begin != end)
|
||||
{
|
||||
@ -154,17 +133,17 @@ namespace CppJieba
|
||||
}
|
||||
begin ++;
|
||||
}
|
||||
return res;
|
||||
return true;
|
||||
}
|
||||
|
||||
string TransCode::vecToGbk(Unicode::const_iterator begin, Unicode::const_iterator end)
|
||||
bool TransCode::vecToGbk(Unicode::const_iterator begin, Unicode::const_iterator end, string& res)
|
||||
{
|
||||
if(begin >= end)
|
||||
{
|
||||
return "";
|
||||
return false;
|
||||
}
|
||||
res.clear();
|
||||
pair<char, char> pa;
|
||||
string res;
|
||||
while(begin != end)
|
||||
{
|
||||
pa = uint16ToChar2(*begin);
|
||||
@ -179,21 +158,8 @@ namespace CppJieba
|
||||
}
|
||||
begin++;
|
||||
}
|
||||
return res;
|
||||
return true;
|
||||
}
|
||||
|
||||
//size_t TransCode::getWordLength(const string& str)
|
||||
//{
|
||||
// vector<uint16_t> vec;
|
||||
// if(!decode(str, vec))
|
||||
// {
|
||||
// return 0;
|
||||
// }
|
||||
// else
|
||||
// {
|
||||
// return vec.size();
|
||||
// }
|
||||
//}
|
||||
}
|
||||
|
||||
|
||||
@ -202,27 +168,6 @@ using namespace CPPCOMMON;
|
||||
using namespace CppJieba;
|
||||
int main()
|
||||
{
|
||||
//ifstream ifile("/home/wuyanyi/code/SevKeyword/log.2.txt");
|
||||
//string line;
|
||||
//Unicode vec;
|
||||
//while(getline(ifile, line))
|
||||
//{
|
||||
//
|
||||
// cout<<line<<endl;
|
||||
// cout<<line.size()<<endl;
|
||||
// if(!TransCode::decode(line, vec))
|
||||
// {
|
||||
// cout<<"error"<<endl;
|
||||
// }
|
||||
// PRINT_VECTOR(vec);
|
||||
// cout<<TransCode::encode(vec)<<endl;
|
||||
//}
|
||||
//ifile.close();
|
||||
//typedef bool (* pf)(const string& , vector<uint16_t>&);
|
||||
//pf tmp = TransCode::a;
|
||||
//vector<uint16_t> vec;
|
||||
//tmp("1",vec);
|
||||
|
||||
string a("abd你好世界!a");
|
||||
vector<uint16_t> vec;
|
||||
//TransCode::setUtf8Enc();
|
||||
|
@ -17,38 +17,56 @@ namespace CppJieba
|
||||
{
|
||||
public:
|
||||
typedef bool (*pf_decode_t)(const string&, vector<uint16_t>&);
|
||||
typedef string (*pf_encode_t)(Unicode::const_iterator begin, Unicode::const_iterator end);
|
||||
typedef size_t (*pf_getWordLength_t)(const string& str);
|
||||
typedef bool (*pf_encode_t)(Unicode::const_iterator begin, Unicode::const_iterator end, string& res);
|
||||
private:
|
||||
static vector<string> _encVec;
|
||||
static bool _isInitted;
|
||||
static pf_decode_t _pf_decode;
|
||||
static pf_encode_t _pf_encode;
|
||||
static pf_getWordLength_t _pf_getWordLength;
|
||||
|
||||
public:
|
||||
static void setGbkEnc();
|
||||
static void setUtf8Enc();
|
||||
|
||||
public:
|
||||
private:
|
||||
TransCode();
|
||||
~TransCode();
|
||||
public:
|
||||
static bool init();
|
||||
public:
|
||||
static bool decode(const string& str, vector<uint16_t>& vec);
|
||||
static string encode(Unicode::const_iterator begin, Unicode::const_iterator end);
|
||||
static string encode(const Unicode& sentence);
|
||||
//static size_t getWordLength(const string& str);
|
||||
static inline bool decode(const string& str, vector<uint16_t>& vec);
|
||||
static inline bool encode(Unicode::const_iterator begin, Unicode::const_iterator end, string& res);
|
||||
static inline bool encode(const Unicode& sentence, string& res);
|
||||
|
||||
public:
|
||||
static bool gbkToVec(const string& str, vector<uint16_t>& vec);
|
||||
static string vecToGbk(Unicode::const_iterator begin, Unicode::const_iterator end);
|
||||
//static size_t getGbkLength(const string& str);
|
||||
static bool vecToGbk(Unicode::const_iterator begin, Unicode::const_iterator end, string& res);
|
||||
public:
|
||||
static bool utf8ToVec(const string& str, vector<uint16_t>& vec);
|
||||
static string vecToUtf8(Unicode::const_iterator begin, Unicode::const_iterator end);
|
||||
//static size_t getUtf8Length(const string& str);
|
||||
static bool vecToUtf8(Unicode::const_iterator begin, Unicode::const_iterator end, string& res);
|
||||
};
|
||||
|
||||
inline bool TransCode::decode(const string& str, vector<uint16_t>& vec)
|
||||
{
|
||||
if(NULL == _pf_decode)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
return _pf_decode(str, vec);
|
||||
}
|
||||
inline bool TransCode::encode(const Unicode& sentence, string& res)
|
||||
{
|
||||
return encode(sentence.begin(), sentence.end(), res);
|
||||
}
|
||||
|
||||
inline bool TransCode::encode(Unicode::const_iterator begin, Unicode::const_iterator end, string& res)
|
||||
{
|
||||
if(!_pf_encode)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
return _pf_encode(begin, end, res);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -82,9 +82,11 @@ namespace CppJieba
|
||||
KeyWordInfo(const TrieNodeInfo& trieNodeInfo):TrieNodeInfo(trieNodeInfo)
|
||||
{
|
||||
}
|
||||
string toString() const
|
||||
inline string toString() const
|
||||
{
|
||||
return string_format("{word:%s,weight:%lf, idf:%lf}", TransCode::encode(word.begin(), word.end()).c_str(), weight, idf);
|
||||
string tmp;
|
||||
TransCode::encode(word, tmp);
|
||||
return string_format("{word:%s,weight:%lf, idf:%lf}", tmp.c_str(), weight, idf);
|
||||
}
|
||||
KeyWordInfo& operator = (const TrieNodeInfo& trieNodeInfo)
|
||||
{
|
||||
|
Loading…
x
Reference in New Issue
Block a user