diff --git a/src/DictTrie.hpp b/src/DictTrie.hpp index 9a5a5ad..9d925f8 100644 --- a/src/DictTrie.hpp +++ b/src/DictTrie.hpp @@ -73,10 +73,10 @@ class DictTrie { } void findByLimit(Unicode::const_iterator begin, Unicode::const_iterator end, - vector&res, size_t min_word_len, - size_t max_word_len) const { - trie_->findByLimit(begin, end, res, min_word_len, max_word_len); + size_t max_word_len, + vector&res) const { + trie_->findByLimit(begin, end, min_word_len, max_word_len, res); } bool isUserDictSingleChineseWord(const Rune& word) const { return isIn(userDictSingleChineseWord_, word); diff --git a/src/FullSegment.hpp b/src/FullSegment.hpp index 37a88e0..351198b 100644 --- a/src/FullSegment.hpp +++ b/src/FullSegment.hpp @@ -28,7 +28,8 @@ class FullSegment: public SegmentBase { } } using SegmentBase::cut; - bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, + virtual void cut(Unicode::const_iterator begin, + Unicode::const_iterator end, vector& res) const { //resut of searching in trie tree LocalVector > tRes; @@ -58,7 +59,6 @@ class FullSegment: public SegmentBase { } uIdx++; } - return true; } private: const DictTrie* dictTrie_; diff --git a/src/HMMSegment.hpp b/src/HMMSegment.hpp index 554a52f..8adbafb 100644 --- a/src/HMMSegment.hpp +++ b/src/HMMSegment.hpp @@ -24,13 +24,13 @@ class HMMSegment: public SegmentBase { } using SegmentBase::cut; - bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector& res)const { + void cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector& res)const { Unicode::const_iterator left = begin; Unicode::const_iterator right = begin; while(right != end) { if(*right < 0x80) { - if(left != right && !cut_(left, right, res)) { - return false; + if(left != right) { + cut_(left, right, res); } left = right; do { @@ -50,10 +50,9 @@ class HMMSegment: public SegmentBase { right++; } } - if(left != right && !cut_(left, right, res)) { - return false; + if(left != right) { + cut_(left, right, res); } - return true; } private: // sequential letters rule @@ -92,12 +91,9 @@ class HMMSegment: public SegmentBase { } return begin; } - bool cut_(Unicode::const_iterator begin, Unicode::const_iterator end, vector& res) const { + void cut_(Unicode::const_iterator begin, Unicode::const_iterator end, vector& res) const { vector status; - if(!viterbi_(begin, end, status)) { - LogError("viterbi_ failed."); - return false; - } + viterbi_(begin, end, status); Unicode::const_iterator left = begin; Unicode::const_iterator right; @@ -108,15 +104,11 @@ class HMMSegment: public SegmentBase { left = right; } } - return true; } - bool viterbi_(Unicode::const_iterator begin, Unicode::const_iterator end, + void viterbi_(Unicode::const_iterator begin, + Unicode::const_iterator end, vector& status) const { - if(begin == end) { - return false; - } - size_t Y = HMMModel::STATUS_SUM; size_t X = end - begin; @@ -166,8 +158,6 @@ class HMMSegment: public SegmentBase { status[x] = stat; stat = path[x + stat*X]; } - - return true; } private: diff --git a/src/MPSegment.hpp b/src/MPSegment.hpp index da6f38b..1298d67 100644 --- a/src/MPSegment.hpp +++ b/src/MPSegment.hpp @@ -33,7 +33,7 @@ class MPSegment: public SegmentBase { } using SegmentBase::cut; - bool cut(Unicode::const_iterator begin , Unicode::const_iterator end, vector& res) const { + void cut(Unicode::const_iterator begin , Unicode::const_iterator end, vector& res) const { vector dags; dictTrie_->find(begin, end, dags); @@ -41,8 +41,19 @@ class MPSegment: public SegmentBase { calcDP_(dags); cut_(dags, res); - - return true; + } + void cut(Unicode::const_iterator begin, + Unicode::const_iterator end, + size_t min_word_len, + size_t max_word_len, + vector&res) const { + vector dags; + dictTrie_->findByLimit(begin, end, + min_word_len, + max_word_len, + dags); + calcDP_(dags); + cut_(dags, res); } const DictTrie* getDictTrie() const { return dictTrie_; diff --git a/src/MixSegment.hpp b/src/MixSegment.hpp index 36d00a2..c9d0880 100644 --- a/src/MixSegment.hpp +++ b/src/MixSegment.hpp @@ -21,13 +21,10 @@ class MixSegment: public SegmentBase { virtual ~MixSegment() { } using SegmentBase::cut; - virtual bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector& res) const { + virtual void cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector& res) const { vector words; words.reserve(end - begin); - if(!mpSeg_.cut(begin, end, words)) { - LogError("mpSeg cutDAG failed."); - return false; - } + mpSeg_.cut(begin, end, words); vector hmmRes; hmmRes.reserve(end - begin); @@ -48,10 +45,7 @@ class MixSegment: public SegmentBase { } // cut the sequence with hmm - if (!hmmSeg_.cut(piece.begin(), piece.end(), hmmRes)) { - LogError("hmmSeg_ cut failed."); - return false; - } + hmmSeg_.cut(piece.begin(), piece.end(), hmmRes); //put hmm result to result for (size_t k = 0; k < hmmRes.size(); k++) { @@ -65,7 +59,6 @@ class MixSegment: public SegmentBase { //let i jump over this piece i = j - 1; } - return true; } const DictTrie* getDictTrie() const { diff --git a/src/QuerySegment.hpp b/src/QuerySegment.hpp index 31e8847..fb95d1d 100644 --- a/src/QuerySegment.hpp +++ b/src/QuerySegment.hpp @@ -28,32 +28,26 @@ class QuerySegment: public SegmentBase { virtual ~QuerySegment() { } using SegmentBase::cut; - bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector& res) const { + void cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector& res) const { //use mix cut first vector mixRes; - if (!mixSeg_.cut(begin, end, mixRes)) { - LogError("mixSeg_ cut failed."); - return false; - } + mixSeg_.cut(begin, end, mixRes); vector fullRes; for (vector::const_iterator mixResItr = mixRes.begin(); mixResItr != mixRes.end(); mixResItr++) { // if it's too long, cut with fullSeg_, put fullRes in res if (mixResItr->size() > maxWordLen_) { - if (fullSeg_.cut(mixResItr->begin(), mixResItr->end(), fullRes)) { - for (vector::const_iterator fullResItr = fullRes.begin(); fullResItr != fullRes.end(); fullResItr++) { - res.push_back(*fullResItr); - } - - //clear tmp res - fullRes.clear(); + fullSeg_.cut(mixResItr->begin(), mixResItr->end(), fullRes); + for (vector::const_iterator fullResItr = fullRes.begin(); fullResItr != fullRes.end(); fullResItr++) { + res.push_back(*fullResItr); } + + //clear tmp res + fullRes.clear(); } else { // just use the mix result res.push_back(*mixResItr); } } - - return true; } private: MixSegment mixSeg_; diff --git a/src/SegmentBase.hpp b/src/SegmentBase.hpp index 512be42..4989771 100644 --- a/src/SegmentBase.hpp +++ b/src/SegmentBase.hpp @@ -27,7 +27,7 @@ class SegmentBase: public ISegment, public NonCopyable { virtual ~SegmentBase() { }; public: - virtual bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector& res) const = 0; + virtual void cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector& res) const = 0; virtual bool cut(const string& str, vector& res) const { res.clear(); @@ -55,25 +55,18 @@ class SegmentBase: public ISegment, public NonCopyable { return true; } - virtual bool cut(Unicode::const_iterator begin, + void cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector& res) const { - if(begin == end) { - return false; - } - vector uRes; uRes.reserve(end - begin); - if (!cut(begin, end, uRes)) { - return false; - } + cut(begin, end, uRes); size_t offset = res.size(); res.resize(res.size() + uRes.size()); for(size_t i = 0; i < uRes.size(); i ++, offset++) { TransCode::encode(uRes[i], res[offset]); } - return true; } private: void loadSpecialSymbols_() { diff --git a/src/Trie.hpp b/src/Trie.hpp index 7768dfc..ac2659c 100644 --- a/src/Trie.hpp +++ b/src/Trie.hpp @@ -88,9 +88,9 @@ class Trie { void findByLimit(Unicode::const_iterator begin, Unicode::const_iterator end, - vector&res, size_t min_word_len, - size_t max_word_len) const { + size_t max_word_len, + vector&res) const { res.resize(end - begin); // min_word_len start from 1; @@ -130,7 +130,7 @@ class Trie { void find(Unicode::const_iterator begin, Unicode::const_iterator end, vector& res) const { - findByLimit(begin, end, res, MIN_WORD_LENGTH, MAX_WORD_LENGTH); + findByLimit(begin, end, MIN_WORD_LENGTH, MAX_WORD_LENGTH, res); } void insertNode(const Unicode& key, const DictUnit* ptValue) { if (key.begin() == key.end()) { diff --git a/test/unittest/TTrie.cpp b/test/unittest/TTrie.cpp index 8f030da..8bd7c4d 100644 --- a/test/unittest/TTrie.cpp +++ b/test/unittest/TTrie.cpp @@ -128,7 +128,7 @@ TEST(DictTrieTest, Dag) { Unicode unicode; ASSERT_TRUE(TransCode::decode(word, unicode)); vector res; - trie.findByLimit(unicode.begin(), unicode.end(), res, 2, 3); + trie.findByLimit(unicode.begin(), unicode.end(), 2, 3, res); size_t nexts_sizes[] = {1, 0, 1, 0}; ASSERT_EQ(res.size(), sizeof(nexts_sizes)/sizeof(nexts_sizes[0])); @@ -143,7 +143,7 @@ TEST(DictTrieTest, Dag) { Unicode unicode; ASSERT_TRUE(TransCode::decode(word, unicode)); vector res; - trie.findByLimit(unicode.begin(), unicode.end(), res, 0, 4); + trie.findByLimit(unicode.begin(), unicode.end(), 0, 4, res); size_t nexts_sizes[] = {3, 1, 2, 1}; ASSERT_EQ(res.size(), sizeof(nexts_sizes)/sizeof(nexts_sizes[0]));