mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
rewriting ...
This commit is contained in:
parent
196d2d563f
commit
44654205d4
@ -22,7 +22,22 @@ namespace CppJieba
|
||||
{
|
||||
bool retFlag;
|
||||
retFlag = _trie.init();
|
||||
return retFlag;
|
||||
if(!retFlag)
|
||||
{
|
||||
LogError("_trie.init failed.");
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Segment::setEncoding(const string& enc)
|
||||
{
|
||||
if(!isInVec<string>(_encVec, enc))
|
||||
{
|
||||
LogError(string_format("%s illegal: not in [\"%s\"]", enc.c_str(), joinStr(_encVec, ",").c_str()));
|
||||
return false;
|
||||
}
|
||||
return _trie.setEncoding(enc);
|
||||
}
|
||||
|
||||
bool Segment::loadSegDict(const string& filePath)
|
||||
@ -59,7 +74,6 @@ namespace CppJieba
|
||||
LogError("_calcDAG failed.");
|
||||
return false;
|
||||
}
|
||||
//LogDebug("_calcDAG finished.");
|
||||
|
||||
vector<pair<int, double> > dp;
|
||||
retFlag = _calcDP(uniStr, dag, dp);
|
||||
@ -68,7 +82,6 @@ namespace CppJieba
|
||||
LogError("_calcDP failed.");
|
||||
return false;
|
||||
}
|
||||
//LogDebug("_calcDP finished.");
|
||||
|
||||
retFlag = _cutDAG(uniStr, dp, res);
|
||||
if(!retFlag)
|
||||
@ -76,33 +89,15 @@ namespace CppJieba
|
||||
LogError("_cutDAG failed.");
|
||||
return false;
|
||||
}
|
||||
//LogDebug("_cutDAG finished.");
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
double Segment::getUtf8WordWeight(const string& word)
|
||||
{
|
||||
return _trie.getWeight(utf8ToUnicode(word));
|
||||
}
|
||||
|
||||
double Segment::getUniWordWeight(const string& word)
|
||||
double Segment::getWordWeight(const string& word)
|
||||
{
|
||||
return _trie.getWeight(word);
|
||||
}
|
||||
|
||||
string Segment::_utf8ToUni(const string& utfStr)
|
||||
{
|
||||
string uniStr = utf8ToUnicode(utfStr);
|
||||
|
||||
if(uniStr.empty() || uniStr.size() % 2)
|
||||
{
|
||||
LogError(string_format("utf8ToUnicode [%s] failed!", utfStr.c_str()));
|
||||
return "";
|
||||
}
|
||||
return uniStr;
|
||||
}
|
||||
|
||||
bool Segment::_calcDAG(const string& uniStr, vector<vector<uint> >& dag)
|
||||
{
|
||||
for(uint i = 0; i < uniStr.size(); i+=2)
|
||||
@ -218,7 +213,6 @@ int main()
|
||||
PRINT_VECTOR(res);
|
||||
getchar();
|
||||
}
|
||||
cout<<__FILE__<<__LINE__<<endl;
|
||||
|
||||
segment.dispose();
|
||||
return 0;
|
||||
|
@ -23,15 +23,14 @@ namespace CppJieba
|
||||
~Segment();
|
||||
public:
|
||||
bool init();
|
||||
bool setEncoding(const string& enc);
|
||||
bool loadSegDict(const string& filePath);
|
||||
bool dispose();
|
||||
public:
|
||||
bool cutDAG(const string& chStr, vector<string>& res);
|
||||
double getUtf8WordWeight(const string& word);
|
||||
double getUniWordWeight(const string& word);
|
||||
double getWordWeight(const string& word);
|
||||
|
||||
private:
|
||||
string _utf8ToUni(const string& chStr);
|
||||
bool _calcDAG(const string& uniStr, vector<vector<uint> >& dag);
|
||||
bool _calcDP(const string& uniStr, const vector<vector<uint> >& dag, vector<pair<int, double> >& res);
|
||||
bool _cutDAG(const string& uniStr, const vector<pair<int, double> >& dp, vector<string>& res);
|
||||
|
@ -275,8 +275,9 @@ namespace CppJieba
|
||||
return NULL;
|
||||
}
|
||||
|
||||
double Trie::getWeight(const string& uniStr)
|
||||
double Trie::getWeight(const string& str)
|
||||
{
|
||||
string uniStr = decode(str);
|
||||
const TrieNodeInfo * p = _findUniStr(uniStr);
|
||||
if(NULL != p)
|
||||
{
|
||||
|
@ -95,8 +95,7 @@ namespace CppJieba
|
||||
const TrieNodeInfo* findPrefix(const string& str);
|
||||
|
||||
public:
|
||||
//double getWeight(const ChUnicode* uniStr, size_t len);
|
||||
double getWeight(const string& uniStr);
|
||||
double getWeight(const string& str);
|
||||
double getMinWeight();
|
||||
|
||||
int64_t getTotalCount();
|
||||
|
Loading…
x
Reference in New Issue
Block a user