diff --git a/src/Segment.cpp b/src/Segment.cpp index 9f7ca59..39e2cb6 100644 --- a/src/Segment.cpp +++ b/src/Segment.cpp @@ -22,7 +22,22 @@ namespace CppJieba { bool retFlag; retFlag = _trie.init(); - return retFlag; + if(!retFlag) + { + LogError("_trie.init failed."); + return false; + } + return true; + } + + bool Segment::setEncoding(const string& enc) + { + if(!isInVec(_encVec, enc)) + { + LogError(string_format("%s illegal: not in [\"%s\"]", enc.c_str(), joinStr(_encVec, ",").c_str())); + return false; + } + return _trie.setEncoding(enc); } bool Segment::loadSegDict(const string& filePath) @@ -59,7 +74,6 @@ namespace CppJieba LogError("_calcDAG failed."); return false; } - //LogDebug("_calcDAG finished."); vector > dp; retFlag = _calcDP(uniStr, dag, dp); @@ -68,7 +82,6 @@ namespace CppJieba LogError("_calcDP failed."); return false; } - //LogDebug("_calcDP finished."); retFlag = _cutDAG(uniStr, dp, res); if(!retFlag) @@ -76,33 +89,15 @@ namespace CppJieba LogError("_cutDAG failed."); return false; } - //LogDebug("_cutDAG finished."); return true; } - double Segment::getUtf8WordWeight(const string& word) - { - return _trie.getWeight(utf8ToUnicode(word)); - } - - double Segment::getUniWordWeight(const string& word) + double Segment::getWordWeight(const string& word) { return _trie.getWeight(word); } - string Segment::_utf8ToUni(const string& utfStr) - { - string uniStr = utf8ToUnicode(utfStr); - - if(uniStr.empty() || uniStr.size() % 2) - { - LogError(string_format("utf8ToUnicode [%s] failed!", utfStr.c_str())); - return ""; - } - return uniStr; - } - bool Segment::_calcDAG(const string& uniStr, vector >& dag) { for(uint i = 0; i < uniStr.size(); i+=2) @@ -218,7 +213,6 @@ int main() PRINT_VECTOR(res); getchar(); } - cout<<__FILE__<<__LINE__<& res); - double getUtf8WordWeight(const string& word); - double getUniWordWeight(const string& word); + double getWordWeight(const string& word); private: - string _utf8ToUni(const string& chStr); bool _calcDAG(const string& uniStr, vector >& dag); bool _calcDP(const string& uniStr, const vector >& dag, vector >& res); bool _cutDAG(const string& uniStr, const vector >& dp, vector& res); diff --git a/src/Trie.cpp b/src/Trie.cpp index e159131..456b4d2 100644 --- a/src/Trie.cpp +++ b/src/Trie.cpp @@ -275,8 +275,9 @@ namespace CppJieba return NULL; } - double Trie::getWeight(const string& uniStr) + double Trie::getWeight(const string& str) { + string uniStr = decode(str); const TrieNodeInfo * p = _findUniStr(uniStr); if(NULL != p) { diff --git a/src/Trie.h b/src/Trie.h index 95f4e86..03d9a5b 100644 --- a/src/Trie.h +++ b/src/Trie.h @@ -95,8 +95,7 @@ namespace CppJieba const TrieNodeInfo* findPrefix(const string& str); public: - //double getWeight(const ChUnicode* uniStr, size_t len); - double getWeight(const string& uniStr); + double getWeight(const string& str); double getMinWeight(); int64_t getTotalCount();