diff --git a/src/MPSegment.hpp b/src/MPSegment.hpp index c1c351b..9ca7292 100644 --- a/src/MPSegment.hpp +++ b/src/MPSegment.hpp @@ -64,15 +64,15 @@ namespace CppJieba return false; } - vector segWordInfos; - if(!cut(begin, end, segWordInfos)) + vector words; + if(!cut(begin, end, words)) { return false; } string word; - for(size_t i = 0; i < segWordInfos.size(); i++) + for(size_t i = 0; i < words.size(); i++) { - if(TransCode::encode(segWordInfos[i].word, word)) + if(TransCode::encode(words[i], word)) { res.push_back(word); } @@ -84,7 +84,7 @@ namespace CppJieba return true; } - bool cut(Unicode::const_iterator begin , Unicode::const_iterator end, vector& segWordInfos)const + bool cut(Unicode::const_iterator begin , Unicode::const_iterator end, vector& res) const { if(!_getInitFlag()) { @@ -92,7 +92,6 @@ namespace CppJieba return false; } SegmentContext segContext; - //calc DAG if(!_calcDAG(begin, end, segContext)) { @@ -106,7 +105,7 @@ namespace CppJieba return false; } - if(!_cut(segContext, segWordInfos)) + if(!_cut(segContext, res)) { LogError("_cut failed."); return false; @@ -172,7 +171,7 @@ namespace CppJieba return true; } - bool _cut(SegmentContext& segContext, vector& res)const + bool _cut(SegmentContext& segContext, vector& res)const { size_t i = 0; while(i < segContext.size()) @@ -180,16 +179,12 @@ namespace CppJieba const TrieNodeInfo* p = segContext[i].pInfo; if(p) { - res.push_back(*p); + res.push_back(p->word); i += p->word.size(); } else//single chinese word { - TrieNodeInfo nodeInfo; - nodeInfo.word.push_back(segContext[i].uniCh); - nodeInfo.freq = 0; - nodeInfo.logFreq = _trie.getMinLogFreq(); - res.push_back(nodeInfo); + res.push_back(Unicode(1, segContext[i].uniCh)); i++; } } diff --git a/src/MixSegment.hpp b/src/MixSegment.hpp index a55e69a..36fcb08 100644 --- a/src/MixSegment.hpp +++ b/src/MixSegment.hpp @@ -44,9 +44,8 @@ namespace CppJieba virtual bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector& res) const { assert(_getInitFlag()); - - vector infos; - if(!_mpSeg.cut(begin, end, infos)) + vector words; + if(!_mpSeg.cut(begin, end, words)) { LogError("mpSeg cutDAG failed."); return false; @@ -54,20 +53,20 @@ namespace CppJieba vector hmmRes; Unicode piece; - for (size_t i = 0, j = 0; i < infos.size(); i++) + for (size_t i = 0, j = 0; i < words.size(); i++) { //if mp get a word, it's ok, put it into result - if (1 != infos[i].word.size()) + if (1 != words[i].size()) { - res.push_back(infos[i].word); + res.push_back(words[i]); continue; } // if mp get a single one, collect it in sequence j = i; - while (j < infos.size() && infos[j].word.size() == 1) + while (j < words.size() && words[j].size() == 1) { - piece.push_back(infos[j].word[0]); + piece.push_back(words[j][0]); j++; }