change MPSegment's cut(..., vector<TrieNodeInfo>) -> cut(..., vector<Unicode>)

This commit is contained in:
wyy 2014-04-08 08:43:32 -07:00
parent 1536a9e2e3
commit 45a7cac784
2 changed files with 16 additions and 22 deletions

View File

@ -64,15 +64,15 @@ namespace CppJieba
return false;
}
vector<TrieNodeInfo> segWordInfos;
if(!cut(begin, end, segWordInfos))
vector<Unicode> words;
if(!cut(begin, end, words))
{
return false;
}
string word;
for(size_t i = 0; i < segWordInfos.size(); i++)
for(size_t i = 0; i < words.size(); i++)
{
if(TransCode::encode(segWordInfos[i].word, word))
if(TransCode::encode(words[i], word))
{
res.push_back(word);
}
@ -84,7 +84,7 @@ namespace CppJieba
return true;
}
bool cut(Unicode::const_iterator begin , Unicode::const_iterator end, vector<TrieNodeInfo>& segWordInfos)const
bool cut(Unicode::const_iterator begin , Unicode::const_iterator end, vector<Unicode>& res) const
{
if(!_getInitFlag())
{
@ -92,7 +92,6 @@ namespace CppJieba
return false;
}
SegmentContext segContext;
//calc DAG
if(!_calcDAG(begin, end, segContext))
{
@ -106,7 +105,7 @@ namespace CppJieba
return false;
}
if(!_cut(segContext, segWordInfos))
if(!_cut(segContext, res))
{
LogError("_cut failed.");
return false;
@ -172,7 +171,7 @@ namespace CppJieba
return true;
}
bool _cut(SegmentContext& segContext, vector<TrieNodeInfo>& res)const
bool _cut(SegmentContext& segContext, vector<Unicode>& res)const
{
size_t i = 0;
while(i < segContext.size())
@ -180,16 +179,12 @@ namespace CppJieba
const TrieNodeInfo* p = segContext[i].pInfo;
if(p)
{
res.push_back(*p);
res.push_back(p->word);
i += p->word.size();
}
else//single chinese word
{
TrieNodeInfo nodeInfo;
nodeInfo.word.push_back(segContext[i].uniCh);
nodeInfo.freq = 0;
nodeInfo.logFreq = _trie.getMinLogFreq();
res.push_back(nodeInfo);
res.push_back(Unicode(1, segContext[i].uniCh));
i++;
}
}

View File

@ -44,9 +44,8 @@ namespace CppJieba
virtual bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const
{
assert(_getInitFlag());
vector<TrieNodeInfo> infos;
if(!_mpSeg.cut(begin, end, infos))
vector<Unicode> words;
if(!_mpSeg.cut(begin, end, words))
{
LogError("mpSeg cutDAG failed.");
return false;
@ -54,20 +53,20 @@ namespace CppJieba
vector<Unicode> hmmRes;
Unicode piece;
for (size_t i = 0, j = 0; i < infos.size(); i++)
for (size_t i = 0, j = 0; i < words.size(); i++)
{
//if mp get a word, it's ok, put it into result
if (1 != infos[i].word.size())
if (1 != words[i].size())
{
res.push_back(infos[i].word);
res.push_back(words[i]);
continue;
}
// if mp get a single one, collect it in sequence
j = i;
while (j < infos.size() && infos[j].word.size() == 1)
while (j < words.size() && words[j].size() == 1)
{
piece.push_back(infos[j].word[0]);
piece.push_back(words[j][0]);
j++;
}