rewrite transcode for time conmused by mv some return value of funct to argv of functs

This commit is contained in:
wyy 2013-09-09 22:55:55 +08:00
parent d5b9bb21dd
commit aefbbdf49d
7 changed files with 63 additions and 106 deletions

View File

@ -149,9 +149,13 @@ namespace CppJieba
return false;
}
res.clear();
string tmp;
for(uint i = 0; i < words.size(); i++)
{
res.push_back(TransCode::encode(words[i].begin(), words[i].end()));
if(TransCode::encode(words[i], tmp))
{
res.push_back(tmp);
}
}
return true;
}

View File

@ -27,30 +27,6 @@ namespace CppJieba
return true;
}
//bool KeyWordExt::_loadPriorSubWords(const char * const filePath)
//{
// LogInfo(string_format("_loadPriorSubWords(%s) start", filePath));
// if(!checkFileExist(filePath))
// {
// LogError(string_format("cann't find file[%s].",filePath));
// return false;
// }
// if(!_priorSubWords.empty())
// {
// LogError("_priorSubWords has been initted before");
// return false;
// }
// ifstream infile(filePath);
// string subword;
// while(getline(infile, subword))
// {
// _priorSubWords.push_back(subword);
// }
// LogInfo(string_format("_loadPriorSubWords(%s) end", filePath));
// infile.close();
// return true;
//}
bool KeyWordExt::loadStopWords(const char * const filePath)
{

View File

@ -44,9 +44,17 @@ namespace CppJieba
return false;
}
res.clear();
string tmp;
for(uint i = 0; i < segWordInfos.size(); i++)
{
res.push_back(TransCode::encode(segWordInfos[i].word.begin(), segWordInfos[i].word.end()));
if(TransCode::encode(segWordInfos[i].word, tmp))
{
res.push_back(tmp);
}
else
{
LogError("encode failed.");
}
}
return true;
}

View File

@ -43,6 +43,7 @@ namespace CppJieba
res.clear();
Unicode unico;
vector<Unicode> hmmRes;
string tmp;
for(uint i= 0; i < infos.size(); i++)
{
if(1 == infos[i].word.size())
@ -60,12 +61,14 @@ namespace CppJieba
}
for(uint j = 0; j < hmmRes.size(); j++)
{
res.push_back(TransCode::encode(hmmRes[j]));
TransCode::encode(hmmRes[j], tmp);
res.push_back(tmp);
}
}
unico.clear();
res.push_back(TransCode::encode(infos[i].word));
TransCode::encode(infos[i].word, tmp);
res.push_back(tmp);
}
}
@ -78,7 +81,8 @@ namespace CppJieba
}
for(uint j = 0; j < hmmRes.size(); j++)
{
res.push_back(TransCode::encode(hmmRes[j]));
TransCode::encode(hmmRes[j], tmp);
res.push_back(tmp);
}
}

View File

@ -34,14 +34,6 @@ namespace CppJieba
_pf_encode = vecToUtf8;
}
bool TransCode::decode(const string& str, vector<uint16_t>& vec)
{
if(NULL == _pf_decode)
{
return false;
}
return _pf_decode(str, vec);
}
bool TransCode::utf8ToVec(const string& str, vector<uint16_t>& vec)
{
@ -112,27 +104,14 @@ namespace CppJieba
return true;
}
string TransCode::encode(const Unicode& sentence)
{
return encode(sentence.begin(), sentence.end());
}
string TransCode::encode(Unicode::const_iterator begin, Unicode::const_iterator end)
{
if(!_pf_encode)
{
return "";
}
return _pf_encode(begin, end);
}
string TransCode::vecToUtf8(Unicode::const_iterator begin, Unicode::const_iterator end)
bool TransCode::vecToUtf8(Unicode::const_iterator begin, Unicode::const_iterator end, string& res)
{
if(begin >= end)
{
return "";
return false;
}
string res;
res.clear();
uint16_t ui;
while(begin != end)
{
@ -154,17 +133,17 @@ namespace CppJieba
}
begin ++;
}
return res;
return true;
}
string TransCode::vecToGbk(Unicode::const_iterator begin, Unicode::const_iterator end)
bool TransCode::vecToGbk(Unicode::const_iterator begin, Unicode::const_iterator end, string& res)
{
if(begin >= end)
{
return "";
return false;
}
res.clear();
pair<char, char> pa;
string res;
while(begin != end)
{
pa = uint16ToChar2(*begin);
@ -179,21 +158,8 @@ namespace CppJieba
}
begin++;
}
return res;
return true;
}
//size_t TransCode::getWordLength(const string& str)
//{
// vector<uint16_t> vec;
// if(!decode(str, vec))
// {
// return 0;
// }
// else
// {
// return vec.size();
// }
//}
}
@ -202,27 +168,6 @@ using namespace CPPCOMMON;
using namespace CppJieba;
int main()
{
//ifstream ifile("/home/wuyanyi/code/SevKeyword/log.2.txt");
//string line;
//Unicode vec;
//while(getline(ifile, line))
//{
//
// cout<<line<<endl;
// cout<<line.size()<<endl;
// if(!TransCode::decode(line, vec))
// {
// cout<<"error"<<endl;
// }
// PRINT_VECTOR(vec);
// cout<<TransCode::encode(vec)<<endl;
//}
//ifile.close();
//typedef bool (* pf)(const string& , vector<uint16_t>&);
//pf tmp = TransCode::a;
//vector<uint16_t> vec;
//tmp("1",vec);
string a("abd你好世界!a");
vector<uint16_t> vec;
//TransCode::setUtf8Enc();

View File

@ -17,38 +17,56 @@ namespace CppJieba
{
public:
typedef bool (*pf_decode_t)(const string&, vector<uint16_t>&);
typedef string (*pf_encode_t)(Unicode::const_iterator begin, Unicode::const_iterator end);
typedef size_t (*pf_getWordLength_t)(const string& str);
typedef bool (*pf_encode_t)(Unicode::const_iterator begin, Unicode::const_iterator end, string& res);
private:
static vector<string> _encVec;
static bool _isInitted;
static pf_decode_t _pf_decode;
static pf_encode_t _pf_encode;
static pf_getWordLength_t _pf_getWordLength;
public:
static void setGbkEnc();
static void setUtf8Enc();
public:
private:
TransCode();
~TransCode();
public:
static bool init();
public:
static bool decode(const string& str, vector<uint16_t>& vec);
static string encode(Unicode::const_iterator begin, Unicode::const_iterator end);
static string encode(const Unicode& sentence);
//static size_t getWordLength(const string& str);
static inline bool decode(const string& str, vector<uint16_t>& vec);
static inline bool encode(Unicode::const_iterator begin, Unicode::const_iterator end, string& res);
static inline bool encode(const Unicode& sentence, string& res);
public:
static bool gbkToVec(const string& str, vector<uint16_t>& vec);
static string vecToGbk(Unicode::const_iterator begin, Unicode::const_iterator end);
//static size_t getGbkLength(const string& str);
static bool vecToGbk(Unicode::const_iterator begin, Unicode::const_iterator end, string& res);
public:
static bool utf8ToVec(const string& str, vector<uint16_t>& vec);
static string vecToUtf8(Unicode::const_iterator begin, Unicode::const_iterator end);
//static size_t getUtf8Length(const string& str);
static bool vecToUtf8(Unicode::const_iterator begin, Unicode::const_iterator end, string& res);
};
inline bool TransCode::decode(const string& str, vector<uint16_t>& vec)
{
if(NULL == _pf_decode)
{
return false;
}
return _pf_decode(str, vec);
}
inline bool TransCode::encode(const Unicode& sentence, string& res)
{
return encode(sentence.begin(), sentence.end(), res);
}
inline bool TransCode::encode(Unicode::const_iterator begin, Unicode::const_iterator end, string& res)
{
if(!_pf_encode)
{
return false;
}
return _pf_encode(begin, end, res);
}
}
#endif

View File

@ -82,9 +82,11 @@ namespace CppJieba
KeyWordInfo(const TrieNodeInfo& trieNodeInfo):TrieNodeInfo(trieNodeInfo)
{
}
string toString() const
inline string toString() const
{
return string_format("{word:%s,weight:%lf, idf:%lf}", TransCode::encode(word.begin(), word.end()).c_str(), weight, idf);
string tmp;
TransCode::encode(word, tmp);
return string_format("{word:%s,weight:%lf, idf:%lf}", tmp.c_str(), weight, idf);
}
KeyWordInfo& operator = (const TrieNodeInfo& trieNodeInfo)
{