mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
prettify Trie.hpp ing
This commit is contained in:
parent
762495f5f4
commit
89c955c1d6
@ -2,6 +2,7 @@
|
||||
|
||||
1. 修改了设计上的问题,删除了`TrieManager`这个类,以避免造成一些可能的隐患。
|
||||
2. 增加`stop_words.utf8`词典,并修改`KeywordExtractor`的初始化函数用以使用此词典。
|
||||
3. 优化了Trie树相关部分代码结构。
|
||||
|
||||
## v2.3.2
|
||||
|
||||
|
49
src/Trie.hpp
49
src/Trie.hpp
@ -104,18 +104,17 @@ namespace CppJieba
|
||||
public:
|
||||
const TrieNodeInfo* find(Unicode::const_iterator begin, Unicode::const_iterator end)const
|
||||
{
|
||||
TrieNodeMap::const_iterator citer;
|
||||
TrieNode* p = _root;
|
||||
for(Unicode::const_iterator it = begin; it != end; it++)
|
||||
{
|
||||
uint16_t chUni = *it;
|
||||
if(p->hmap.find(chUni) == p-> hmap.end())
|
||||
citer = p->hmap.find(chUni);
|
||||
if(p-> hmap.end() == citer)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
else
|
||||
{
|
||||
p = p->hmap[chUni];
|
||||
}
|
||||
p = citer->second;
|
||||
}
|
||||
if(p->isLeaf)
|
||||
{
|
||||
@ -126,26 +125,19 @@ namespace CppJieba
|
||||
|
||||
bool find(Unicode::const_iterator begin, Unicode::const_iterator end, vector<pair<size_t, const TrieNodeInfo*> >& res) const
|
||||
{
|
||||
TrieNodeMap::const_iterator citer;
|
||||
TrieNode* p = _root;
|
||||
for (Unicode::const_iterator itr = begin; itr != end; itr++)
|
||||
{
|
||||
if(p->hmap.find(*itr) == p-> hmap.end())
|
||||
citer = p->hmap.find(*itr);
|
||||
if(p->hmap.end() == citer)
|
||||
{
|
||||
break;
|
||||
}
|
||||
p = p->hmap[*itr];
|
||||
p = citer->second;
|
||||
if(p->isLeaf)
|
||||
{
|
||||
size_t pos = p->nodeInfoPos;
|
||||
if(pos < _nodeInfos.size())
|
||||
{
|
||||
res.push_back(make_pair(itr-begin, &_nodeInfos[pos]));
|
||||
}
|
||||
else
|
||||
{
|
||||
LogFatal("node's nodeInfoPos is out of _nodeInfos's range");
|
||||
return false;
|
||||
}
|
||||
res.push_back(make_pair(itr-begin, &_nodeInfos[p->nodeInfoPos]));
|
||||
}
|
||||
}
|
||||
return !res.empty();
|
||||
@ -154,25 +146,18 @@ namespace CppJieba
|
||||
bool find(Unicode::const_iterator begin, Unicode::const_iterator end, size_t offset, DagType & res) const
|
||||
{
|
||||
TrieNode* p = _root;
|
||||
TrieNodeMap::const_iterator citer;
|
||||
for (Unicode::const_iterator itr = begin; itr != end; itr++)
|
||||
{
|
||||
if(p->hmap.find(*itr) == p-> hmap.end())
|
||||
citer = p->hmap.find(*itr);
|
||||
if(p->hmap.end() == citer)
|
||||
{
|
||||
break;
|
||||
}
|
||||
p = p->hmap[*itr];
|
||||
p = citer->second;
|
||||
if(p->isLeaf)
|
||||
{
|
||||
size_t pos = p->nodeInfoPos;
|
||||
if(pos < _nodeInfos.size())
|
||||
{
|
||||
res[itr-begin + offset] = &_nodeInfos[pos];
|
||||
}
|
||||
else
|
||||
{
|
||||
LogFatal("node's nodeInfoPos is out of _nodeInfos's range");
|
||||
return false;
|
||||
}
|
||||
res[itr - begin + offset] = &_nodeInfos[p->nodeInfoPos];
|
||||
}
|
||||
}
|
||||
return !res.empty();
|
||||
@ -184,11 +169,11 @@ namespace CppJieba
|
||||
private:
|
||||
void _insert(const TrieNodeInfo& nodeInfo, size_t nodeInfoPos)
|
||||
{
|
||||
const Unicode& uintVec = nodeInfo.word;
|
||||
const Unicode& unico = nodeInfo.word;
|
||||
TrieNode* p = _root;
|
||||
for(size_t i = 0; i < uintVec.size(); i++)
|
||||
for(size_t i = 0; i < unico.size(); i++)
|
||||
{
|
||||
uint16_t cu = uintVec[i];
|
||||
uint16_t cu = unico[i];
|
||||
assert(p);
|
||||
if(!isIn(p->hmap, cu))
|
||||
{
|
||||
|
Loading…
x
Reference in New Issue
Block a user