rewriting ...

This commit is contained in:
gwdwyy 2013-07-21 16:47:58 +08:00
parent 196d2d563f
commit 44654205d4
4 changed files with 22 additions and 29 deletions

View File

@ -22,7 +22,22 @@ namespace CppJieba
{
bool retFlag;
retFlag = _trie.init();
return retFlag;
if(!retFlag)
{
LogError("_trie.init failed.");
return false;
}
return true;
}
bool Segment::setEncoding(const string& enc)
{
if(!isInVec<string>(_encVec, enc))
{
LogError(string_format("%s illegal: not in [\"%s\"]", enc.c_str(), joinStr(_encVec, ",").c_str()));
return false;
}
return _trie.setEncoding(enc);
}
bool Segment::loadSegDict(const string& filePath)
@ -59,7 +74,6 @@ namespace CppJieba
LogError("_calcDAG failed.");
return false;
}
//LogDebug("_calcDAG finished.");
vector<pair<int, double> > dp;
retFlag = _calcDP(uniStr, dag, dp);
@ -68,7 +82,6 @@ namespace CppJieba
LogError("_calcDP failed.");
return false;
}
//LogDebug("_calcDP finished.");
retFlag = _cutDAG(uniStr, dp, res);
if(!retFlag)
@ -76,33 +89,15 @@ namespace CppJieba
LogError("_cutDAG failed.");
return false;
}
//LogDebug("_cutDAG finished.");
return true;
}
double Segment::getUtf8WordWeight(const string& word)
{
return _trie.getWeight(utf8ToUnicode(word));
}
double Segment::getUniWordWeight(const string& word)
double Segment::getWordWeight(const string& word)
{
return _trie.getWeight(word);
}
string Segment::_utf8ToUni(const string& utfStr)
{
string uniStr = utf8ToUnicode(utfStr);
if(uniStr.empty() || uniStr.size() % 2)
{
LogError(string_format("utf8ToUnicode [%s] failed!", utfStr.c_str()));
return "";
}
return uniStr;
}
bool Segment::_calcDAG(const string& uniStr, vector<vector<uint> >& dag)
{
for(uint i = 0; i < uniStr.size(); i+=2)
@ -218,7 +213,6 @@ int main()
PRINT_VECTOR(res);
getchar();
}
cout<<__FILE__<<__LINE__<<endl;
segment.dispose();
return 0;

View File

@ -23,15 +23,14 @@ namespace CppJieba
~Segment();
public:
bool init();
bool setEncoding(const string& enc);
bool loadSegDict(const string& filePath);
bool dispose();
public:
bool cutDAG(const string& chStr, vector<string>& res);
double getUtf8WordWeight(const string& word);
double getUniWordWeight(const string& word);
double getWordWeight(const string& word);
private:
string _utf8ToUni(const string& chStr);
bool _calcDAG(const string& uniStr, vector<vector<uint> >& dag);
bool _calcDP(const string& uniStr, const vector<vector<uint> >& dag, vector<pair<int, double> >& res);
bool _cutDAG(const string& uniStr, const vector<pair<int, double> >& dp, vector<string>& res);

View File

@ -275,8 +275,9 @@ namespace CppJieba
return NULL;
}
double Trie::getWeight(const string& uniStr)
double Trie::getWeight(const string& str)
{
string uniStr = decode(str);
const TrieNodeInfo * p = _findUniStr(uniStr);
if(NULL != p)
{

View File

@ -95,8 +95,7 @@ namespace CppJieba
const TrieNodeInfo* findPrefix(const string& str);
public:
//double getWeight(const ChUnicode* uniStr, size_t len);
double getWeight(const string& uniStr);
double getWeight(const string& str);
double getMinWeight();
int64_t getTotalCount();