From 30072490990b4e9deaba64a15896260257fdaadd Mon Sep 17 00:00:00 2001 From: gwdwyy Date: Tue, 16 Jul 2013 22:40:57 +0800 Subject: [PATCH] add findPrefix into trie.cpp --- src/KeyWordExt.cpp | 2 +- src/Trie.cpp | 45 ++++++++++++++++++++++++++++++++++++++++++--- src/Trie.h | 2 +- 3 files changed, 44 insertions(+), 5 deletions(-) diff --git a/src/KeyWordExt.cpp b/src/KeyWordExt.cpp index 1c93b47..613d13c 100644 --- a/src/KeyWordExt.cpp +++ b/src/KeyWordExt.cpp @@ -286,7 +286,7 @@ namespace CppJieba for(vector::iterator it = wordInfos.begin(); it != wordInfos.end(); ) { cout<<__FILE__<<__LINE__<word)) + if(NULL != _priorPrefixTrie.findPrefix(it->word)) { prior = *it; it = wordInfos.erase(it); diff --git a/src/Trie.cpp b/src/Trie.cpp index d3fe34e..575fe41 100644 --- a/src/Trie.cpp +++ b/src/Trie.cpp @@ -112,8 +112,13 @@ namespace CppJieba } } - const TrieNodeInfo* Trie::findUtf8(const string& utf8Str) + const TrieNodeInfo* Trie::findPrefix(const string& utf8Str) { + if(NULL == _root) + { + LogFatal("trie not initted!"); + return NULL; + } if(utf8Str.empty()) { LogError("utf8Str is empty"); @@ -122,10 +127,44 @@ namespace CppJieba string uniStr = utf8ToUnicode(utf8Str); if(uniStr.empty()) { - LogError("utf8ToUnicode return empty str"); + LogError("utf8ToUnicode return empty star"); return NULL; } - return find(uniStr); + if(uniStr.size() % 2) + { + LogError("utf8ToUnicode return uniStr illegal"); + return NULL; + } + //find + TrieNode* p = _root; + TrieNodeInfo * res = NULL; + for(uint i = 0; i < uniStr.size(); i+=2) + { + ChUnicode chUni = twocharToUint16(uniStr[0], uniStr[i+1]); + if(p->isLeaf) + { + uint pos = p->nodeInfoVecPos; + if(pos < _nodeInfoVec.size()) + { + res = &(_nodeInfoVec[pos]); + } + else + { + LogFatal("node's nodeInfoVecPos is out of _nodeInfoVec's range"); + return NULL; + } + + } + if(p->hmap.find(chUni) == p->hmap.end()) + { + break; + } + else + { + p = p->hmap[chUni]; + } + } + return res; } const TrieNodeInfo* Trie::find(const string& uniStr) diff --git a/src/Trie.h b/src/Trie.h index f449e40..bead95b 100644 --- a/src/Trie.h +++ b/src/Trie.h @@ -70,9 +70,9 @@ namespace CppJieba void display(); public: - const TrieNodeInfo* findUtf8(const string& utf8Str); const TrieNodeInfo* find(const string& uniStr); const TrieNodeInfo* find(const ChUnicode* const chUniStr, size_t len); + const TrieNodeInfo* findPrefix(const string& utf8Str); public: double getWeight(const ChUnicode* uniStr, size_t len);