use localvector in dag

This commit is contained in:
wyy 2014-11-12 21:18:30 +08:00
parent 99c3405e13
commit c119dc0a93
2 changed files with 48 additions and 12 deletions

View File

@ -23,7 +23,7 @@ namespace CppJieba
return os << string_format("%s %s %.3lf", s.c_str(), unit.tag.c_str(), unit.weight); return os << string_format("%s %s %.3lf", s.c_str(), unit.tag.c_str(), unit.weight);
} }
typedef std::vector<std::pair<size_t, const DictUnit*> > DagType; typedef LocalVector<std::pair<size_t, const DictUnit*> > DagType;
struct SegmentChar struct SegmentChar
{ {
@ -34,6 +34,8 @@ namespace CppJieba
size_t nextPos; size_t nextPos;
SegmentChar():uniCh(0), pInfo(NULL), weight(0.0), nextPos(0) SegmentChar():uniCh(0), pInfo(NULL), weight(0.0), nextPos(0)
{} {}
~SegmentChar()
{}
}; };
template <class KeyType, class ValueType> template <class KeyType, class ValueType>
@ -45,6 +47,20 @@ namespace CppJieba
TrieNode * fail; TrieNode * fail;
NextMap * next; NextMap * next;
const ValueType * ptValue; const ValueType * ptValue;
public:
const TrieNode * findNext(KeyType key) const
{
if(next == NULL)
{
return NULL;
}
typename NextMap::const_iterator iter = next->find(key);
if(iter == next->end())
{
return NULL;
}
return iter->second;
}
public: public:
TrieNode(): fail(NULL), next(NULL), ptValue(NULL) { TrieNode(): fail(NULL), next(NULL), ptValue(NULL) {
} }
@ -96,24 +112,42 @@ namespace CppJieba
{ {
res.resize(end - begin); res.resize(end - begin);
const TrieNodeType * now = _root; const TrieNodeType * now = _root;
typename TrieNodeType::NextMap::const_iterator iter; //typename TrieNodeType::NextMap::const_iterator iter;
const TrieNodeType* node;
for (size_t i = 0; i < end - begin; i++) for (size_t i = 0; i < end - begin; i++)
{ {
res[i].uniCh = *(begin + i); Unicode::value_type ch = *(begin + i);
res[i].uniCh = ch;
assert(res[i].dag.empty()); assert(res[i].dag.empty());
res[i].dag.reserve(2);
res[i].dag.push_back(pair<typename KeysContainerType::size_type, const ValueType* >(i, NULL)); res[i].dag.push_back(pair<typename KeysContainerType::size_type, const ValueType* >(i, NULL));
while( now != _root && (now->next == NULL || (iter = now->next->find(res[i].uniCh)) == now->next->end())) bool flag = false;
// rollback
while( now != _root )
{ {
now = now->fail; node = now->findNext(ch);
if (node != NULL)
{
flag = true;
break;
}
else
{
now = now->fail;
}
} }
if(now->next == NULL || (iter = now->next->find(res[i].uniCh)) == now->next->end())
if(!flag)
{
node = now->findNext(ch);
}
if(node == NULL)
{ {
now = _root; now = _root;
} }
else else
{ {
now = iter->second; now = node;
const TrieNodeType * temp = now; const TrieNodeType * temp = now;
while(temp != _root) while(temp != _root)
{ {
@ -135,7 +169,7 @@ namespace CppJieba
bool find( bool find(
typename KeyContainerType::const_iterator begin, typename KeyContainerType::const_iterator begin,
typename KeyContainerType::const_iterator end, typename KeyContainerType::const_iterator end,
std::vector<std::pair<typename KeyContainerType::size_type, const ValueType* > >& res, DagType & res,
size_t offset = 0) const size_t offset = 0) const
{ {
const TrieNodeType * ptNode = _root; const TrieNodeType * ptNode = _root;

View File

@ -34,9 +34,9 @@ TEST(DictTrieTest, Test1)
EXPECT_EQ("[\"26469\", \"21040\"] v -8.870", s2); EXPECT_EQ("[\"26469\", \"21040\"] v -8.870", s2);
word = "清华大学"; word = "清华大学";
vector<pair<size_t, const DictUnit*> > res; LocalVector<pair<size_t, const DictUnit*> > res;
//vector<pair<size_t, const DictUnit* > resMap; //vector<pair<size_t, const DictUnit* > resMap;
vector<pair<size_t, const DictUnit*> > res2; LocalVector<pair<size_t, const DictUnit*> > res2;
const char * words[] = {"", "清华", "清华大学"}; const char * words[] = {"", "清华", "清华大学"};
for(size_t i = 0; i < sizeof(words)/sizeof(words[0]); i++) for(size_t i = 0; i < sizeof(words)/sizeof(words[0]); i++)
{ {
@ -50,7 +50,9 @@ TEST(DictTrieTest, Test1)
vector<pair<size_t, const DictUnit*> > vec; vector<pair<size_t, const DictUnit*> > vec;
ASSERT_TRUE(TransCode::decode(word, uni)); ASSERT_TRUE(TransCode::decode(word, uni));
ASSERT_TRUE(trie.find(uni.begin(), uni.end(), res2, 0)); ASSERT_TRUE(trie.find(uni.begin(), uni.end(), res2, 0));
ASSERT_EQ(res, res2); s1 << res;
s2 << res;
ASSERT_EQ(s1, s2);
} }
TEST(DictTrieTest, UserDict) TEST(DictTrieTest, UserDict)