mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
use localvector in dag
This commit is contained in:
parent
99c3405e13
commit
c119dc0a93
52
src/Trie.hpp
52
src/Trie.hpp
@ -23,7 +23,7 @@ namespace CppJieba
|
||||
return os << string_format("%s %s %.3lf", s.c_str(), unit.tag.c_str(), unit.weight);
|
||||
}
|
||||
|
||||
typedef std::vector<std::pair<size_t, const DictUnit*> > DagType;
|
||||
typedef LocalVector<std::pair<size_t, const DictUnit*> > DagType;
|
||||
|
||||
struct SegmentChar
|
||||
{
|
||||
@ -34,6 +34,8 @@ namespace CppJieba
|
||||
size_t nextPos;
|
||||
SegmentChar():uniCh(0), pInfo(NULL), weight(0.0), nextPos(0)
|
||||
{}
|
||||
~SegmentChar()
|
||||
{}
|
||||
};
|
||||
|
||||
template <class KeyType, class ValueType>
|
||||
@ -45,6 +47,20 @@ namespace CppJieba
|
||||
TrieNode * fail;
|
||||
NextMap * next;
|
||||
const ValueType * ptValue;
|
||||
public:
|
||||
const TrieNode * findNext(KeyType key) const
|
||||
{
|
||||
if(next == NULL)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
typename NextMap::const_iterator iter = next->find(key);
|
||||
if(iter == next->end())
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
return iter->second;
|
||||
}
|
||||
public:
|
||||
TrieNode(): fail(NULL), next(NULL), ptValue(NULL) {
|
||||
}
|
||||
@ -96,24 +112,42 @@ namespace CppJieba
|
||||
{
|
||||
res.resize(end - begin);
|
||||
const TrieNodeType * now = _root;
|
||||
typename TrieNodeType::NextMap::const_iterator iter;
|
||||
//typename TrieNodeType::NextMap::const_iterator iter;
|
||||
const TrieNodeType* node;
|
||||
for (size_t i = 0; i < end - begin; i++)
|
||||
{
|
||||
res[i].uniCh = *(begin + i);
|
||||
Unicode::value_type ch = *(begin + i);
|
||||
res[i].uniCh = ch;
|
||||
assert(res[i].dag.empty());
|
||||
res[i].dag.reserve(2);
|
||||
res[i].dag.push_back(pair<typename KeysContainerType::size_type, const ValueType* >(i, NULL));
|
||||
while( now != _root && (now->next == NULL || (iter = now->next->find(res[i].uniCh)) == now->next->end()))
|
||||
bool flag = false;
|
||||
|
||||
// rollback
|
||||
while( now != _root )
|
||||
{
|
||||
now = now->fail;
|
||||
node = now->findNext(ch);
|
||||
if (node != NULL)
|
||||
{
|
||||
flag = true;
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
now = now->fail;
|
||||
}
|
||||
}
|
||||
if(now->next == NULL || (iter = now->next->find(res[i].uniCh)) == now->next->end())
|
||||
|
||||
if(!flag)
|
||||
{
|
||||
node = now->findNext(ch);
|
||||
}
|
||||
if(node == NULL)
|
||||
{
|
||||
now = _root;
|
||||
}
|
||||
else
|
||||
{
|
||||
now = iter->second;
|
||||
now = node;
|
||||
const TrieNodeType * temp = now;
|
||||
while(temp != _root)
|
||||
{
|
||||
@ -135,7 +169,7 @@ namespace CppJieba
|
||||
bool find(
|
||||
typename KeyContainerType::const_iterator begin,
|
||||
typename KeyContainerType::const_iterator end,
|
||||
std::vector<std::pair<typename KeyContainerType::size_type, const ValueType* > >& res,
|
||||
DagType & res,
|
||||
size_t offset = 0) const
|
||||
{
|
||||
const TrieNodeType * ptNode = _root;
|
||||
|
@ -34,9 +34,9 @@ TEST(DictTrieTest, Test1)
|
||||
|
||||
EXPECT_EQ("[\"26469\", \"21040\"] v -8.870", s2);
|
||||
word = "清华大学";
|
||||
vector<pair<size_t, const DictUnit*> > res;
|
||||
LocalVector<pair<size_t, const DictUnit*> > res;
|
||||
//vector<pair<size_t, const DictUnit* > resMap;
|
||||
vector<pair<size_t, const DictUnit*> > res2;
|
||||
LocalVector<pair<size_t, const DictUnit*> > res2;
|
||||
const char * words[] = {"清", "清华", "清华大学"};
|
||||
for(size_t i = 0; i < sizeof(words)/sizeof(words[0]); i++)
|
||||
{
|
||||
@ -50,7 +50,9 @@ TEST(DictTrieTest, Test1)
|
||||
vector<pair<size_t, const DictUnit*> > vec;
|
||||
ASSERT_TRUE(TransCode::decode(word, uni));
|
||||
ASSERT_TRUE(trie.find(uni.begin(), uni.end(), res2, 0));
|
||||
ASSERT_EQ(res, res2);
|
||||
s1 << res;
|
||||
s2 << res;
|
||||
ASSERT_EQ(s1, s2);
|
||||
}
|
||||
|
||||
TEST(DictTrieTest, UserDict)
|
||||
|
Loading…
x
Reference in New Issue
Block a user