diff --git a/src/DictTrie.hpp b/src/DictTrie.hpp index 4fdd478..bed15cf 100644 --- a/src/DictTrie.hpp +++ b/src/DictTrie.hpp @@ -66,12 +66,12 @@ class DictTrie { const DictUnit* find(Unicode::const_iterator begin, Unicode::const_iterator end) const { return trie_->find(begin, end); } - bool find(Unicode::const_iterator begin, Unicode::const_iterator end, DagType& dag, size_t offset = 0) const { - return trie_->find(begin, end, dag, offset); + bool find(Unicode::const_iterator begin, Unicode::const_iterator end, LocalVector >& nexts, size_t offset = 0) const { + return trie_->find(begin, end, nexts, offset); } void find(Unicode::const_iterator begin, Unicode::const_iterator end, - vector& res) const { + vector& res) const { trie_->find(begin, end, res); } bool isUserDictSingleChineseWord(const Unicode::value_type& word) const { @@ -79,7 +79,7 @@ class DictTrie { } double getMinWeight() const { return minWeight_; - }; + } private: void createTrie_(const vector& dictUnits) { diff --git a/src/FullSegment.hpp b/src/FullSegment.hpp index 55594be..6c8153f 100644 --- a/src/FullSegment.hpp +++ b/src/FullSegment.hpp @@ -31,7 +31,7 @@ class FullSegment: public SegmentBase { bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector& res) const { //resut of searching in trie tree - DagType tRes; + LocalVector > tRes; //max index of res's words int maxIdx = 0; @@ -45,9 +45,7 @@ class FullSegment: public SegmentBase { for (Unicode::const_iterator uItr = begin; uItr != end; uItr++) { //find word start from uItr if (dictTrie_->find(uItr, end, tRes, 0)) { - for(DagType::const_iterator itr = tRes.begin(); itr != tRes.end(); itr++) - //for (vector >::const_iterator itr = tRes.begin(); itr != tRes.end(); itr++) - { + for(LocalVector >::const_iterator itr = tRes.begin(); itr != tRes.end(); itr++) { wordLen = itr->second->word.size(); if (wordLen >= 2 || (tRes.size() == 1 && maxIdx <= uIdx)) { res.push_back(itr->second->word); diff --git a/src/MPSegment.hpp b/src/MPSegment.hpp index 0e30baf..1d9cd16 100644 --- a/src/MPSegment.hpp +++ b/src/MPSegment.hpp @@ -48,7 +48,7 @@ class MPSegment: public SegmentBase { } bool cut(Unicode::const_iterator begin , Unicode::const_iterator end, vector& res) const { - vector segmentChars; + vector segmentChars; dictTrie_->find(begin, end, segmentChars); @@ -63,16 +63,16 @@ class MPSegment: public SegmentBase { } private: - void calcDP_(vector& segmentChars) const { + void calcDP_(vector& segmentChars) const { size_t nextPos; const DictUnit* p; double val; - for(vector::reverse_iterator rit = segmentChars.rbegin(); rit != segmentChars.rend(); rit++) { + for(vector::reverse_iterator rit = segmentChars.rbegin(); rit != segmentChars.rend(); rit++) { rit->pInfo = NULL; rit->weight = MIN_DOUBLE; - assert(!rit->dag.empty()); - for(DagType::const_iterator it = rit->dag.begin(); it != rit->dag.end(); it++) { + assert(!rit->nexts.empty()); + for(LocalVector >::const_iterator it = rit->nexts.begin(); it != rit->nexts.end(); it++) { nextPos = it->first; p = it->second; val = 0.0; @@ -92,7 +92,7 @@ class MPSegment: public SegmentBase { } } } - void cut_(const vector& segmentChars, + void cut_(const vector& segmentChars, vector& res) const { size_t i = 0; while(i < segmentChars.size()) { diff --git a/src/QuerySegment.hpp b/src/QuerySegment.hpp index 617d4f2..bd8e676 100644 --- a/src/QuerySegment.hpp +++ b/src/QuerySegment.hpp @@ -56,7 +56,6 @@ class QuerySegment: public SegmentBase { return true; } - bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector& res) const { vector uRes; if (!cut(begin, end, uRes)) { diff --git a/src/Trie.hpp b/src/Trie.hpp index 3afda9b..18e1b82 100644 --- a/src/Trie.hpp +++ b/src/Trie.hpp @@ -21,16 +21,14 @@ inline ostream & operator << (ostream& os, const DictUnit& unit) { return os << string_format("%s %s %.3lf", s.c_str(), unit.tag.c_str(), unit.weight); } -typedef LocalVector > DagType; - -struct SegmentChar { +struct Dag { uint16_t uniCh; - DagType dag; + LocalVector > nexts; const DictUnit * pInfo; double weight; size_t nextPos; - SegmentChar() : uniCh(), pInfo(NULL), weight(0.0), nextPos(0) {} - ~SegmentChar() {} + Dag():uniCh(0), pInfo(NULL), weight(0.0), nextPos(0) { + } }; typedef Unicode::value_type TrieKey; @@ -47,10 +45,23 @@ class TrieNode { class Trie { public: static const size_t BASE_SIZE = (1 << (8 * (sizeof(TrieKey)))); - public: Trie(const vector& keys, const vector& valuePointers) { _createTrie(keys, valuePointers); } + ~Trie() { + for (size_t i = 0; i < BASE_SIZE; i++) { + if (_base[i].next == NULL) { + continue; + } + for (TrieNode::NextMap::iterator it = _base[i].next->begin(); it != _base[i].next->end(); it++) { + _deleteNode(it->second); + it->second = NULL; + } + delete _base[i].next; + _base[i].next = NULL; + } + } + const DictUnit* find(Unicode::const_iterator begin, Unicode::const_iterator end) const { if (begin == end) { return NULL; @@ -71,11 +82,9 @@ class Trie { return ptNode->ptValue; } - void find( - Unicode::const_iterator begin, + void find(Unicode::const_iterator begin, Unicode::const_iterator end, - vector& res - ) const { + vector& res) const { res.resize(end - begin); const TrieNode *ptNode = NULL; @@ -106,7 +115,7 @@ class Trie { bool find( Unicode::const_iterator begin, Unicode::const_iterator end, - DagType & res, + LocalVector > & res, size_t offset = 0) const { if (begin == end) { return !res.empty(); @@ -135,20 +144,6 @@ class Trie { } return !res.empty(); } - ~Trie() { - for (size_t i = 0; i < BASE_SIZE; i++) { - if (_base[i].next == NULL) { - continue; - } - for (TrieNode::NextMap::iterator it = _base[i].next->begin(); it != _base[i].next->end(); it++) { - _deleteNode(it->second); - it->second = NULL; - } - delete _base[i].next; - _base[i].next = NULL; - } - } - void insertNode(const Unicode& key, const DictUnit* ptValue) { if (key.begin() == key.end()) { return; diff --git a/test/unittest/TTrie.cpp b/test/unittest/TTrie.cpp index cf18fd3..628a931 100644 --- a/test/unittest/TTrie.cpp +++ b/test/unittest/TTrie.cpp @@ -32,9 +32,7 @@ TEST(DictTrieTest, Test1) { EXPECT_EQ("[\"26469\", \"21040\"] v -8.870", s2); word = "清华大学"; - LocalVector > res; - //vector resMap; - LocalVector > res2; + LocalVector > res, res2; const char * words[] = {"清", "清华", "清华大学"}; for(size_t i = 0; i < sizeof(words)/sizeof(words[0]); i++) { ASSERT_TRUE(TransCode::decode(words[i], uni)); @@ -70,6 +68,6 @@ TEST(DictTrieTest, automation) { string word = "abcderf"; Unicode unicode; ASSERT_TRUE(TransCode::decode(word, unicode)); - vector res; + vector res; trie.find(unicode.begin(), unicode.end(), res); }