From dc41068d581144364cf597dc3576e57d8d508b5b Mon Sep 17 00:00:00 2001 From: wyy Date: Fri, 13 Sep 2013 13:12:32 +0800 Subject: [PATCH] inlining some funct int Trie.cpp/h and add find(const Unicode&, vector >&res) --- src/MixSegment.cpp | 6 ++- src/Trie.cpp | 115 +++++++++++++++++++++++++-------------------- src/Trie.h | 18 +++---- 3 files changed, 80 insertions(+), 59 deletions(-) diff --git a/src/MixSegment.cpp b/src/MixSegment.cpp index 8383105..19fddc8 100644 --- a/src/MixSegment.cpp +++ b/src/MixSegment.cpp @@ -34,10 +34,14 @@ namespace CppJieba bool MixSegment::cut(const string& str, vector& res) { + if(str.empty()) + { + return false; + } vector infos; if(!_mpSeg.cut(str, infos)) { - LogError("_mpSeg cutDAG failed."); + LogError("_mpSeg cutDAG [%s] failed.", str.c_str()); return false; } res.clear(); diff --git a/src/Trie.cpp b/src/Trie.cpp index 114872f..629628f 100644 --- a/src/Trie.cpp +++ b/src/Trie.cpp @@ -30,15 +30,6 @@ namespace CppJieba dispose(); } - bool Trie::_getInitFlag() - { - return _initFlag; - } - void Trie::_setInitFlag(bool on) - { - _initFlag = on; - } - bool Trie::init() { if(_getInitFlag()) @@ -256,54 +247,78 @@ namespace CppJieba return NULL; } - double Trie::getWeight(const string& str) - { - - Unicode uintVec; - TransCode::decode(str, uintVec); - return getWeight(uintVec); - } - - double Trie::getWeight(const Unicode& uintVec) - { - if(uintVec.empty()) + bool Trie::find(const Unicode& unico, vector >& res) + { + if(!_getInitFlag()) { - return getMinLogFreq(); + LogFatal("trie not initted!"); + return false; } - const TrieNodeInfo * p = find(uintVec); - if(NULL != p) + res.clear(); + TrieNode* p = _root; + //for(Unicode::const_iterator it = begin; it != end; it++) + for(uint i = 0; i < unico.size(); i++) { - return p->logFreq; + if(p->hmap.find(unico[i]) == p-> hmap.end()) + { + break; + } + p = p->hmap[unico[i]]; + if(p->isLeaf) + { + uint pos = p->nodeInfoVecPos; + if(pos < _nodeInfoVec.size()) + { + res.push_back(make_pair(i, &_nodeInfoVec[pos])); + } + else + { + LogFatal("node's nodeInfoVecPos is out of _nodeInfoVec's range"); + return false; + } + } } - else - { - return getMinLogFreq(); - } - - } + return !res.empty(); + } - double Trie::getWeight(Unicode::const_iterator begin, Unicode::const_iterator end) - { - const TrieNodeInfo * p = find(begin, end); - if(NULL != p) - { - return p->logFreq; - } - else - { - return getMinLogFreq(); - } - } + //double Trie::getWeight(const string& str) + //{ - double Trie::getMinLogFreq() - { - return _minLogFreq; - } + // Unicode uintVec; + // TransCode::decode(str, uintVec); + // return getWeight(uintVec); + //} - int64_t Trie::getTotalCount() - { - return _freqSum; - } + //double Trie::getWeight(const Unicode& uintVec) + //{ + // if(uintVec.empty()) + // { + // return getMinLogFreq(); + // } + // const TrieNodeInfo * p = find(uintVec); + // if(NULL != p) + // { + // return p->logFreq; + // } + // else + // { + // return getMinLogFreq(); + // } + // + //} + + //double Trie::getWeight(Unicode::const_iterator begin, Unicode::const_iterator end) + //{ + // const TrieNodeInfo * p = find(begin, end); + // if(NULL != p) + // { + // return p->logFreq; + // } + // else + // { + // return getMinLogFreq(); + // } + //} bool Trie::_deleteNode(TrieNode* node) { diff --git a/src/Trie.h b/src/Trie.h index 4d4673c..a2b6623 100644 --- a/src/Trie.h +++ b/src/Trie.h @@ -43,9 +43,9 @@ namespace CppJieba TrieNode* _root; vector _nodeInfoVec; + bool _initFlag; int64_t _freqSum; double _minLogFreq; - bool _initFlag; public: typedef vector::iterator iterator; @@ -62,22 +62,24 @@ namespace CppJieba bool dispose(); private: - void _setInitFlag(bool on); - bool _getInitFlag(); + void _setInitFlag(bool on){_initFlag = on;}; + bool _getInitFlag(){return _initFlag;}; public: TrieNodeInfo* find(const string& str); TrieNodeInfo* find(const Unicode& uintVec); TrieNodeInfo* find(Unicode::const_iterator begin, Unicode::const_iterator end); + bool find(const Unicode& unico, vector >& res); + const TrieNodeInfo* findPrefix(const string& str); public: - double getWeight(const string& str); - double getWeight(const Unicode& uintVec); - double getWeight(Unicode::const_iterator begin, Unicode::const_iterator end); - double getMinLogFreq(); + //double getWeight(const string& str); + //double getWeight(const Unicode& uintVec); + //double getWeight(Unicode::const_iterator begin, Unicode::const_iterator end); + double getMinLogFreq(){return _minLogFreq;}; - int64_t getTotalCount(); + int64_t getTotalCount(){return _freqSum;}; bool insert(const TrieNodeInfo& nodeInfo);