use localvector in dag

This commit is contained in:
wyy 2014-11-12 21:18:30 +08:00
parent 99c3405e13
commit c119dc0a93
2 changed files with 48 additions and 12 deletions

View File

@ -23,7 +23,7 @@ namespace CppJieba
return os << string_format("%s %s %.3lf", s.c_str(), unit.tag.c_str(), unit.weight);
}
typedef std::vector<std::pair<size_t, const DictUnit*> > DagType;
typedef LocalVector<std::pair<size_t, const DictUnit*> > DagType;
struct SegmentChar
{
@ -34,6 +34,8 @@ namespace CppJieba
size_t nextPos;
SegmentChar():uniCh(0), pInfo(NULL), weight(0.0), nextPos(0)
{}
~SegmentChar()
{}
};
template <class KeyType, class ValueType>
@ -45,6 +47,20 @@ namespace CppJieba
TrieNode * fail;
NextMap * next;
const ValueType * ptValue;
public:
const TrieNode * findNext(KeyType key) const
{
if(next == NULL)
{
return NULL;
}
typename NextMap::const_iterator iter = next->find(key);
if(iter == next->end())
{
return NULL;
}
return iter->second;
}
public:
TrieNode(): fail(NULL), next(NULL), ptValue(NULL) {
}
@ -96,24 +112,42 @@ namespace CppJieba
{
res.resize(end - begin);
const TrieNodeType * now = _root;
typename TrieNodeType::NextMap::const_iterator iter;
//typename TrieNodeType::NextMap::const_iterator iter;
const TrieNodeType* node;
for (size_t i = 0; i < end - begin; i++)
{
res[i].uniCh = *(begin + i);
Unicode::value_type ch = *(begin + i);
res[i].uniCh = ch;
assert(res[i].dag.empty());
res[i].dag.reserve(2);
res[i].dag.push_back(pair<typename KeysContainerType::size_type, const ValueType* >(i, NULL));
while( now != _root && (now->next == NULL || (iter = now->next->find(res[i].uniCh)) == now->next->end()))
bool flag = false;
// rollback
while( now != _root )
{
now = now->fail;
node = now->findNext(ch);
if (node != NULL)
{
flag = true;
break;
}
else
{
now = now->fail;
}
}
if(now->next == NULL || (iter = now->next->find(res[i].uniCh)) == now->next->end())
if(!flag)
{
node = now->findNext(ch);
}
if(node == NULL)
{
now = _root;
}
else
{
now = iter->second;
now = node;
const TrieNodeType * temp = now;
while(temp != _root)
{
@ -135,7 +169,7 @@ namespace CppJieba
bool find(
typename KeyContainerType::const_iterator begin,
typename KeyContainerType::const_iterator end,
std::vector<std::pair<typename KeyContainerType::size_type, const ValueType* > >& res,
DagType & res,
size_t offset = 0) const
{
const TrieNodeType * ptNode = _root;

View File

@ -34,9 +34,9 @@ TEST(DictTrieTest, Test1)
EXPECT_EQ("[\"26469\", \"21040\"] v -8.870", s2);
word = "清华大学";
vector<pair<size_t, const DictUnit*> > res;
LocalVector<pair<size_t, const DictUnit*> > res;
//vector<pair<size_t, const DictUnit* > resMap;
vector<pair<size_t, const DictUnit*> > res2;
LocalVector<pair<size_t, const DictUnit*> > res2;
const char * words[] = {"", "清华", "清华大学"};
for(size_t i = 0; i < sizeof(words)/sizeof(words[0]); i++)
{
@ -50,7 +50,9 @@ TEST(DictTrieTest, Test1)
vector<pair<size_t, const DictUnit*> > vec;
ASSERT_TRUE(TransCode::decode(word, uni));
ASSERT_TRUE(trie.find(uni.begin(), uni.end(), res2, 0));
ASSERT_EQ(res, res2);
s1 << res;
s2 << res;
ASSERT_EQ(s1, s2);
}
TEST(DictTrieTest, UserDict)