prettify Trie.hpp ing

This commit is contained in:
wyy 2014-03-16 21:00:51 +08:00
parent 762495f5f4
commit 89c955c1d6
2 changed files with 18 additions and 32 deletions

View File

@ -2,6 +2,7 @@
1. 修改了设计上的问题,删除了`TrieManager`这个类,以避免造成一些可能的隐患。 1. 修改了设计上的问题,删除了`TrieManager`这个类,以避免造成一些可能的隐患。
2. 增加`stop_words.utf8`词典,并修改`KeywordExtractor`的初始化函数用以使用此词典。 2. 增加`stop_words.utf8`词典,并修改`KeywordExtractor`的初始化函数用以使用此词典。
3. 优化了Trie树相关部分代码结构。
## v2.3.2 ## v2.3.2

View File

@ -104,18 +104,17 @@ namespace CppJieba
public: public:
const TrieNodeInfo* find(Unicode::const_iterator begin, Unicode::const_iterator end)const const TrieNodeInfo* find(Unicode::const_iterator begin, Unicode::const_iterator end)const
{ {
TrieNodeMap::const_iterator citer;
TrieNode* p = _root; TrieNode* p = _root;
for(Unicode::const_iterator it = begin; it != end; it++) for(Unicode::const_iterator it = begin; it != end; it++)
{ {
uint16_t chUni = *it; uint16_t chUni = *it;
if(p->hmap.find(chUni) == p-> hmap.end()) citer = p->hmap.find(chUni);
if(p-> hmap.end() == citer)
{ {
return NULL; return NULL;
} }
else p = citer->second;
{
p = p->hmap[chUni];
}
} }
if(p->isLeaf) if(p->isLeaf)
{ {
@ -126,26 +125,19 @@ namespace CppJieba
bool find(Unicode::const_iterator begin, Unicode::const_iterator end, vector<pair<size_t, const TrieNodeInfo*> >& res) const bool find(Unicode::const_iterator begin, Unicode::const_iterator end, vector<pair<size_t, const TrieNodeInfo*> >& res) const
{ {
TrieNodeMap::const_iterator citer;
TrieNode* p = _root; TrieNode* p = _root;
for (Unicode::const_iterator itr = begin; itr != end; itr++) for (Unicode::const_iterator itr = begin; itr != end; itr++)
{ {
if(p->hmap.find(*itr) == p-> hmap.end()) citer = p->hmap.find(*itr);
if(p->hmap.end() == citer)
{ {
break; break;
} }
p = p->hmap[*itr]; p = citer->second;
if(p->isLeaf) if(p->isLeaf)
{ {
size_t pos = p->nodeInfoPos; res.push_back(make_pair(itr-begin, &_nodeInfos[p->nodeInfoPos]));
if(pos < _nodeInfos.size())
{
res.push_back(make_pair(itr-begin, &_nodeInfos[pos]));
}
else
{
LogFatal("node's nodeInfoPos is out of _nodeInfos's range");
return false;
}
} }
} }
return !res.empty(); return !res.empty();
@ -154,25 +146,18 @@ namespace CppJieba
bool find(Unicode::const_iterator begin, Unicode::const_iterator end, size_t offset, DagType & res) const bool find(Unicode::const_iterator begin, Unicode::const_iterator end, size_t offset, DagType & res) const
{ {
TrieNode* p = _root; TrieNode* p = _root;
TrieNodeMap::const_iterator citer;
for (Unicode::const_iterator itr = begin; itr != end; itr++) for (Unicode::const_iterator itr = begin; itr != end; itr++)
{ {
if(p->hmap.find(*itr) == p-> hmap.end()) citer = p->hmap.find(*itr);
if(p->hmap.end() == citer)
{ {
break; break;
} }
p = p->hmap[*itr]; p = citer->second;
if(p->isLeaf) if(p->isLeaf)
{ {
size_t pos = p->nodeInfoPos; res[itr - begin + offset] = &_nodeInfos[p->nodeInfoPos];
if(pos < _nodeInfos.size())
{
res[itr-begin + offset] = &_nodeInfos[pos];
}
else
{
LogFatal("node's nodeInfoPos is out of _nodeInfos's range");
return false;
}
} }
} }
return !res.empty(); return !res.empty();
@ -184,11 +169,11 @@ namespace CppJieba
private: private:
void _insert(const TrieNodeInfo& nodeInfo, size_t nodeInfoPos) void _insert(const TrieNodeInfo& nodeInfo, size_t nodeInfoPos)
{ {
const Unicode& uintVec = nodeInfo.word; const Unicode& unico = nodeInfo.word;
TrieNode* p = _root; TrieNode* p = _root;
for(size_t i = 0; i < uintVec.size(); i++) for(size_t i = 0; i < unico.size(); i++)
{ {
uint16_t cu = uintVec[i]; uint16_t cu = unico[i];
assert(p); assert(p);
if(!isIn(p->hmap, cu)) if(!isIn(p->hmap, cu))
{ {