mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
change MPSegment's cut(..., vector<TrieNodeInfo>) -> cut(..., vector<Unicode>)
This commit is contained in:
parent
1536a9e2e3
commit
45a7cac784
@ -64,15 +64,15 @@ namespace CppJieba
|
||||
return false;
|
||||
}
|
||||
|
||||
vector<TrieNodeInfo> segWordInfos;
|
||||
if(!cut(begin, end, segWordInfos))
|
||||
vector<Unicode> words;
|
||||
if(!cut(begin, end, words))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
string word;
|
||||
for(size_t i = 0; i < segWordInfos.size(); i++)
|
||||
for(size_t i = 0; i < words.size(); i++)
|
||||
{
|
||||
if(TransCode::encode(segWordInfos[i].word, word))
|
||||
if(TransCode::encode(words[i], word))
|
||||
{
|
||||
res.push_back(word);
|
||||
}
|
||||
@ -84,7 +84,7 @@ namespace CppJieba
|
||||
return true;
|
||||
}
|
||||
|
||||
bool cut(Unicode::const_iterator begin , Unicode::const_iterator end, vector<TrieNodeInfo>& segWordInfos)const
|
||||
bool cut(Unicode::const_iterator begin , Unicode::const_iterator end, vector<Unicode>& res) const
|
||||
{
|
||||
if(!_getInitFlag())
|
||||
{
|
||||
@ -92,7 +92,6 @@ namespace CppJieba
|
||||
return false;
|
||||
}
|
||||
SegmentContext segContext;
|
||||
|
||||
//calc DAG
|
||||
if(!_calcDAG(begin, end, segContext))
|
||||
{
|
||||
@ -106,7 +105,7 @@ namespace CppJieba
|
||||
return false;
|
||||
}
|
||||
|
||||
if(!_cut(segContext, segWordInfos))
|
||||
if(!_cut(segContext, res))
|
||||
{
|
||||
LogError("_cut failed.");
|
||||
return false;
|
||||
@ -172,7 +171,7 @@ namespace CppJieba
|
||||
return true;
|
||||
|
||||
}
|
||||
bool _cut(SegmentContext& segContext, vector<TrieNodeInfo>& res)const
|
||||
bool _cut(SegmentContext& segContext, vector<Unicode>& res)const
|
||||
{
|
||||
size_t i = 0;
|
||||
while(i < segContext.size())
|
||||
@ -180,16 +179,12 @@ namespace CppJieba
|
||||
const TrieNodeInfo* p = segContext[i].pInfo;
|
||||
if(p)
|
||||
{
|
||||
res.push_back(*p);
|
||||
res.push_back(p->word);
|
||||
i += p->word.size();
|
||||
}
|
||||
else//single chinese word
|
||||
{
|
||||
TrieNodeInfo nodeInfo;
|
||||
nodeInfo.word.push_back(segContext[i].uniCh);
|
||||
nodeInfo.freq = 0;
|
||||
nodeInfo.logFreq = _trie.getMinLogFreq();
|
||||
res.push_back(nodeInfo);
|
||||
res.push_back(Unicode(1, segContext[i].uniCh));
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
@ -44,9 +44,8 @@ namespace CppJieba
|
||||
virtual bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const
|
||||
{
|
||||
assert(_getInitFlag());
|
||||
|
||||
vector<TrieNodeInfo> infos;
|
||||
if(!_mpSeg.cut(begin, end, infos))
|
||||
vector<Unicode> words;
|
||||
if(!_mpSeg.cut(begin, end, words))
|
||||
{
|
||||
LogError("mpSeg cutDAG failed.");
|
||||
return false;
|
||||
@ -54,20 +53,20 @@ namespace CppJieba
|
||||
|
||||
vector<Unicode> hmmRes;
|
||||
Unicode piece;
|
||||
for (size_t i = 0, j = 0; i < infos.size(); i++)
|
||||
for (size_t i = 0, j = 0; i < words.size(); i++)
|
||||
{
|
||||
//if mp get a word, it's ok, put it into result
|
||||
if (1 != infos[i].word.size())
|
||||
if (1 != words[i].size())
|
||||
{
|
||||
res.push_back(infos[i].word);
|
||||
res.push_back(words[i]);
|
||||
continue;
|
||||
}
|
||||
|
||||
// if mp get a single one, collect it in sequence
|
||||
j = i;
|
||||
while (j < infos.size() && infos[j].word.size() == 1)
|
||||
while (j < words.size() && words[j].size() == 1)
|
||||
{
|
||||
piece.push_back(infos[j].word[0]);
|
||||
piece.push_back(words[j][0]);
|
||||
j++;
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user