mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
rewriting ...
This commit is contained in:
parent
196d2d563f
commit
44654205d4
@ -22,7 +22,22 @@ namespace CppJieba
|
|||||||
{
|
{
|
||||||
bool retFlag;
|
bool retFlag;
|
||||||
retFlag = _trie.init();
|
retFlag = _trie.init();
|
||||||
return retFlag;
|
if(!retFlag)
|
||||||
|
{
|
||||||
|
LogError("_trie.init failed.");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Segment::setEncoding(const string& enc)
|
||||||
|
{
|
||||||
|
if(!isInVec<string>(_encVec, enc))
|
||||||
|
{
|
||||||
|
LogError(string_format("%s illegal: not in [\"%s\"]", enc.c_str(), joinStr(_encVec, ",").c_str()));
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return _trie.setEncoding(enc);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Segment::loadSegDict(const string& filePath)
|
bool Segment::loadSegDict(const string& filePath)
|
||||||
@ -59,7 +74,6 @@ namespace CppJieba
|
|||||||
LogError("_calcDAG failed.");
|
LogError("_calcDAG failed.");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
//LogDebug("_calcDAG finished.");
|
|
||||||
|
|
||||||
vector<pair<int, double> > dp;
|
vector<pair<int, double> > dp;
|
||||||
retFlag = _calcDP(uniStr, dag, dp);
|
retFlag = _calcDP(uniStr, dag, dp);
|
||||||
@ -68,7 +82,6 @@ namespace CppJieba
|
|||||||
LogError("_calcDP failed.");
|
LogError("_calcDP failed.");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
//LogDebug("_calcDP finished.");
|
|
||||||
|
|
||||||
retFlag = _cutDAG(uniStr, dp, res);
|
retFlag = _cutDAG(uniStr, dp, res);
|
||||||
if(!retFlag)
|
if(!retFlag)
|
||||||
@ -76,33 +89,15 @@ namespace CppJieba
|
|||||||
LogError("_cutDAG failed.");
|
LogError("_cutDAG failed.");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
//LogDebug("_cutDAG finished.");
|
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
double Segment::getUtf8WordWeight(const string& word)
|
double Segment::getWordWeight(const string& word)
|
||||||
{
|
|
||||||
return _trie.getWeight(utf8ToUnicode(word));
|
|
||||||
}
|
|
||||||
|
|
||||||
double Segment::getUniWordWeight(const string& word)
|
|
||||||
{
|
{
|
||||||
return _trie.getWeight(word);
|
return _trie.getWeight(word);
|
||||||
}
|
}
|
||||||
|
|
||||||
string Segment::_utf8ToUni(const string& utfStr)
|
|
||||||
{
|
|
||||||
string uniStr = utf8ToUnicode(utfStr);
|
|
||||||
|
|
||||||
if(uniStr.empty() || uniStr.size() % 2)
|
|
||||||
{
|
|
||||||
LogError(string_format("utf8ToUnicode [%s] failed!", utfStr.c_str()));
|
|
||||||
return "";
|
|
||||||
}
|
|
||||||
return uniStr;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool Segment::_calcDAG(const string& uniStr, vector<vector<uint> >& dag)
|
bool Segment::_calcDAG(const string& uniStr, vector<vector<uint> >& dag)
|
||||||
{
|
{
|
||||||
for(uint i = 0; i < uniStr.size(); i+=2)
|
for(uint i = 0; i < uniStr.size(); i+=2)
|
||||||
@ -218,7 +213,6 @@ int main()
|
|||||||
PRINT_VECTOR(res);
|
PRINT_VECTOR(res);
|
||||||
getchar();
|
getchar();
|
||||||
}
|
}
|
||||||
cout<<__FILE__<<__LINE__<<endl;
|
|
||||||
|
|
||||||
segment.dispose();
|
segment.dispose();
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -23,15 +23,14 @@ namespace CppJieba
|
|||||||
~Segment();
|
~Segment();
|
||||||
public:
|
public:
|
||||||
bool init();
|
bool init();
|
||||||
|
bool setEncoding(const string& enc);
|
||||||
bool loadSegDict(const string& filePath);
|
bool loadSegDict(const string& filePath);
|
||||||
bool dispose();
|
bool dispose();
|
||||||
public:
|
public:
|
||||||
bool cutDAG(const string& chStr, vector<string>& res);
|
bool cutDAG(const string& chStr, vector<string>& res);
|
||||||
double getUtf8WordWeight(const string& word);
|
double getWordWeight(const string& word);
|
||||||
double getUniWordWeight(const string& word);
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
string _utf8ToUni(const string& chStr);
|
|
||||||
bool _calcDAG(const string& uniStr, vector<vector<uint> >& dag);
|
bool _calcDAG(const string& uniStr, vector<vector<uint> >& dag);
|
||||||
bool _calcDP(const string& uniStr, const vector<vector<uint> >& dag, vector<pair<int, double> >& res);
|
bool _calcDP(const string& uniStr, const vector<vector<uint> >& dag, vector<pair<int, double> >& res);
|
||||||
bool _cutDAG(const string& uniStr, const vector<pair<int, double> >& dp, vector<string>& res);
|
bool _cutDAG(const string& uniStr, const vector<pair<int, double> >& dp, vector<string>& res);
|
||||||
|
@ -275,8 +275,9 @@ namespace CppJieba
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
double Trie::getWeight(const string& uniStr)
|
double Trie::getWeight(const string& str)
|
||||||
{
|
{
|
||||||
|
string uniStr = decode(str);
|
||||||
const TrieNodeInfo * p = _findUniStr(uniStr);
|
const TrieNodeInfo * p = _findUniStr(uniStr);
|
||||||
if(NULL != p)
|
if(NULL != p)
|
||||||
{
|
{
|
||||||
|
@ -95,8 +95,7 @@ namespace CppJieba
|
|||||||
const TrieNodeInfo* findPrefix(const string& str);
|
const TrieNodeInfo* findPrefix(const string& str);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
//double getWeight(const ChUnicode* uniStr, size_t len);
|
double getWeight(const string& str);
|
||||||
double getWeight(const string& uniStr);
|
|
||||||
double getMinWeight();
|
double getMinWeight();
|
||||||
|
|
||||||
int64_t getTotalCount();
|
int64_t getTotalCount();
|
||||||
|
Loading…
x
Reference in New Issue
Block a user