From c119dc0a932da33f804df8b425aff3024049b65f Mon Sep 17 00:00:00 2001 From: wyy Date: Wed, 12 Nov 2014 21:18:30 +0800 Subject: [PATCH] use localvector in dag --- src/Trie.hpp | 52 ++++++++++++++++++++++++++++++++++------- test/unittest/TTrie.cpp | 8 ++++--- 2 files changed, 48 insertions(+), 12 deletions(-) diff --git a/src/Trie.hpp b/src/Trie.hpp index c97d96d..0e71973 100644 --- a/src/Trie.hpp +++ b/src/Trie.hpp @@ -23,7 +23,7 @@ namespace CppJieba return os << string_format("%s %s %.3lf", s.c_str(), unit.tag.c_str(), unit.weight); } - typedef std::vector > DagType; + typedef LocalVector > DagType; struct SegmentChar { @@ -34,6 +34,8 @@ namespace CppJieba size_t nextPos; SegmentChar():uniCh(0), pInfo(NULL), weight(0.0), nextPos(0) {} + ~SegmentChar() + {} }; template @@ -45,6 +47,20 @@ namespace CppJieba TrieNode * fail; NextMap * next; const ValueType * ptValue; + public: + const TrieNode * findNext(KeyType key) const + { + if(next == NULL) + { + return NULL; + } + typename NextMap::const_iterator iter = next->find(key); + if(iter == next->end()) + { + return NULL; + } + return iter->second; + } public: TrieNode(): fail(NULL), next(NULL), ptValue(NULL) { } @@ -96,24 +112,42 @@ namespace CppJieba { res.resize(end - begin); const TrieNodeType * now = _root; - typename TrieNodeType::NextMap::const_iterator iter; + //typename TrieNodeType::NextMap::const_iterator iter; + const TrieNodeType* node; for (size_t i = 0; i < end - begin; i++) { - res[i].uniCh = *(begin + i); + Unicode::value_type ch = *(begin + i); + res[i].uniCh = ch; assert(res[i].dag.empty()); - res[i].dag.reserve(2); res[i].dag.push_back(pair(i, NULL)); - while( now != _root && (now->next == NULL || (iter = now->next->find(res[i].uniCh)) == now->next->end())) + bool flag = false; + + // rollback + while( now != _root ) { - now = now->fail; + node = now->findNext(ch); + if (node != NULL) + { + flag = true; + break; + } + else + { + now = now->fail; + } } - if(now->next == NULL || (iter = now->next->find(res[i].uniCh)) == now->next->end()) + + if(!flag) + { + node = now->findNext(ch); + } + if(node == NULL) { now = _root; } else { - now = iter->second; + now = node; const TrieNodeType * temp = now; while(temp != _root) { @@ -135,7 +169,7 @@ namespace CppJieba bool find( typename KeyContainerType::const_iterator begin, typename KeyContainerType::const_iterator end, - std::vector >& res, + DagType & res, size_t offset = 0) const { const TrieNodeType * ptNode = _root; diff --git a/test/unittest/TTrie.cpp b/test/unittest/TTrie.cpp index 5ca12fa..5baea4a 100644 --- a/test/unittest/TTrie.cpp +++ b/test/unittest/TTrie.cpp @@ -34,9 +34,9 @@ TEST(DictTrieTest, Test1) EXPECT_EQ("[\"26469\", \"21040\"] v -8.870", s2); word = "清华大学"; - vector > res; + LocalVector > res; //vector resMap; - vector > res2; + LocalVector > res2; const char * words[] = {"清", "清华", "清华大学"}; for(size_t i = 0; i < sizeof(words)/sizeof(words[0]); i++) { @@ -50,7 +50,9 @@ TEST(DictTrieTest, Test1) vector > vec; ASSERT_TRUE(TransCode::decode(word, uni)); ASSERT_TRUE(trie.find(uni.begin(), uni.end(), res2, 0)); - ASSERT_EQ(res, res2); + s1 << res; + s2 << res; + ASSERT_EQ(s1, s2); } TEST(DictTrieTest, UserDict)