change MPSegment's cut(..., vector<TrieNodeInfo>) -> cut(..., vector<Unicode>)

This commit is contained in:
wyy 2014-04-08 08:43:32 -07:00
parent 1536a9e2e3
commit 45a7cac784
2 changed files with 16 additions and 22 deletions

View File

@ -64,15 +64,15 @@ namespace CppJieba
return false; return false;
} }
vector<TrieNodeInfo> segWordInfos; vector<Unicode> words;
if(!cut(begin, end, segWordInfos)) if(!cut(begin, end, words))
{ {
return false; return false;
} }
string word; string word;
for(size_t i = 0; i < segWordInfos.size(); i++) for(size_t i = 0; i < words.size(); i++)
{ {
if(TransCode::encode(segWordInfos[i].word, word)) if(TransCode::encode(words[i], word))
{ {
res.push_back(word); res.push_back(word);
} }
@ -84,7 +84,7 @@ namespace CppJieba
return true; return true;
} }
bool cut(Unicode::const_iterator begin , Unicode::const_iterator end, vector<TrieNodeInfo>& segWordInfos)const bool cut(Unicode::const_iterator begin , Unicode::const_iterator end, vector<Unicode>& res) const
{ {
if(!_getInitFlag()) if(!_getInitFlag())
{ {
@ -92,7 +92,6 @@ namespace CppJieba
return false; return false;
} }
SegmentContext segContext; SegmentContext segContext;
//calc DAG //calc DAG
if(!_calcDAG(begin, end, segContext)) if(!_calcDAG(begin, end, segContext))
{ {
@ -106,7 +105,7 @@ namespace CppJieba
return false; return false;
} }
if(!_cut(segContext, segWordInfos)) if(!_cut(segContext, res))
{ {
LogError("_cut failed."); LogError("_cut failed.");
return false; return false;
@ -172,7 +171,7 @@ namespace CppJieba
return true; return true;
} }
bool _cut(SegmentContext& segContext, vector<TrieNodeInfo>& res)const bool _cut(SegmentContext& segContext, vector<Unicode>& res)const
{ {
size_t i = 0; size_t i = 0;
while(i < segContext.size()) while(i < segContext.size())
@ -180,16 +179,12 @@ namespace CppJieba
const TrieNodeInfo* p = segContext[i].pInfo; const TrieNodeInfo* p = segContext[i].pInfo;
if(p) if(p)
{ {
res.push_back(*p); res.push_back(p->word);
i += p->word.size(); i += p->word.size();
} }
else//single chinese word else//single chinese word
{ {
TrieNodeInfo nodeInfo; res.push_back(Unicode(1, segContext[i].uniCh));
nodeInfo.word.push_back(segContext[i].uniCh);
nodeInfo.freq = 0;
nodeInfo.logFreq = _trie.getMinLogFreq();
res.push_back(nodeInfo);
i++; i++;
} }
} }

View File

@ -44,9 +44,8 @@ namespace CppJieba
virtual bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const virtual bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const
{ {
assert(_getInitFlag()); assert(_getInitFlag());
vector<Unicode> words;
vector<TrieNodeInfo> infos; if(!_mpSeg.cut(begin, end, words))
if(!_mpSeg.cut(begin, end, infos))
{ {
LogError("mpSeg cutDAG failed."); LogError("mpSeg cutDAG failed.");
return false; return false;
@ -54,20 +53,20 @@ namespace CppJieba
vector<Unicode> hmmRes; vector<Unicode> hmmRes;
Unicode piece; Unicode piece;
for (size_t i = 0, j = 0; i < infos.size(); i++) for (size_t i = 0, j = 0; i < words.size(); i++)
{ {
//if mp get a word, it's ok, put it into result //if mp get a word, it's ok, put it into result
if (1 != infos[i].word.size()) if (1 != words[i].size())
{ {
res.push_back(infos[i].word); res.push_back(words[i]);
continue; continue;
} }
// if mp get a single one, collect it in sequence // if mp get a single one, collect it in sequence
j = i; j = i;
while (j < infos.size() && infos[j].word.size() == 1) while (j < words.size() && words[j].size() == 1)
{ {
piece.push_back(infos[j].word[0]); piece.push_back(words[j][0]);
j++; j++;
} }