Merge pull request #9 from aholic/master

remove NO_CODING_LOG | make MixSegment looks better
This commit is contained in:
Wu Yanyi 2013-12-14 06:27:25 -08:00
commit d47900d65a
3 changed files with 73 additions and 80 deletions

View File

@ -24,13 +24,11 @@ namespace CppJieba
public:
bool init()
{
#ifndef NO_CODING_LOG
if(_getInitFlag())
{
LogError("already inited before now.");
return false;
}
#endif
if(!_trie.init())
{
LogError("_trie.init failed.");
@ -47,12 +45,10 @@ namespace CppJieba
}
bool dispose()
{
#ifndef NO_CODING_LOG
if(!_getInitFlag())
{
return true;
}
#endif
_trie.dispose();
_setInitFlag(false);
return true;
@ -65,18 +61,12 @@ namespace CppJieba
bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const
{
assert(_getInitFlag());
#ifndef NO_CODING_LOG
//if (!_getInitFlag())
//{
// LogError("not inited.");
// return false;
//}
if (begin >= end)
{
LogError("begin >= end");
return false;
}
#endif
//resut of searching in trie tree
vector<pair<uint, const TrieNodeInfo*> > tRes;
@ -123,26 +113,21 @@ namespace CppJieba
bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<string>& res) const
{
#ifndef NO_CODING_LOG
if (!_getInitFlag())
assert(_getInitFlag());
if (begin >= end)
{
LogError("not inited.");
LogError("begin >= end");
return false;
}
if (begin > end)
{
LogError("begin > end");
return false;
}
#endif
vector<Unicode> uRes;
if (!cut(begin, end, uRes))
{
LogError("get unicode cut result error.");
return false;
}
string tmp;
string tmp;
for (vector<Unicode>::const_iterator uItr = uRes.begin(); uItr != uRes.end(); uItr++)
{
if (TransCode::encode(*uItr, tmp))

View File

@ -55,72 +55,95 @@ namespace CppJieba
public:
using SegmentBase::cut;
public:
virtual bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<string>& res)const
virtual bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const
{
//if(!_getInitFlag())
//{
// LogError("not inited.");
// return false;
//}
assert(_getInitFlag());
if(begin == end)
if(begin >= end)
{
LogError("begin >= end");
return false;
}
vector<TrieNodeInfo> infos;
if(!_mpSeg.cut(begin, end, infos))
{
LogError("mpSeg cutDAG failed.");
return false;
}
Unicode unico;
vector<Unicode> hmmRes;
string tmp;
for(uint i= 0; i < infos.size(); i++)
Unicode piece;
for (uint i = 0, j = 0; i < infos.size(); i++)
{
TransCode::encode(infos[i].word,tmp);
if(1 == infos[i].word.size())
//if mp get a word, it's ok, put it into result
if (1 != infos[i].word.size())
{
unico.push_back(infos[i].word[0]);
res.push_back(infos[i].word);
continue;
}
else
// if mp get a single one, collect it in sequence
j = i;
while (j < infos.size() && infos[j].word.size() == 1)
{
if(!unico.empty())
{
hmmRes.clear();
if(!_hmmSeg.cut(unico.begin(), unico.end(), hmmRes))
{
LogError("_hmmSeg cut failed.");
return false;
}
for(uint j = 0; j < hmmRes.size(); j++)
{
TransCode::encode(hmmRes[j], tmp);
res.push_back(tmp);
}
}
unico.clear();
TransCode::encode(infos[i].word, tmp);
res.push_back(tmp);
piece.push_back(infos[j].word[0]);
j++;
}
}
if(!unico.empty())
{
hmmRes.clear();
if(!_hmmSeg.cut(unico.begin(), unico.end(), hmmRes))
// cut the sequence with hmm
if (!_hmmSeg.cut(piece.begin(), piece.end(), hmmRes))
{
LogError("_hmmSeg cut failed.");
return false;
}
for(uint j = 0; j < hmmRes.size(); j++)
//put hmm result to return
for (uint k = 0; k < hmmRes.size(); k++)
{
TransCode::encode(hmmRes[j], tmp);
res.push_back(tmp);
res.push_back(hmmRes[k]);
}
//clear tmp vars
piece.clear();
hmmRes.clear();
//let i jump over this piece
i = j - 1;
}
return true;
}
virtual bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<string>& res)const
{
assert(_getInitFlag());
if(begin >= end)
{
LogError("begin >= end");
return false;
}
vector<Unicode> uRes;
if (!cut(begin, end, uRes))
{
LogError("get unicode cut result error.");
return false;
}
string tmp;
for (vector<Unicode>::const_iterator uItr = uRes.begin(); uItr != uRes.end(); uItr++)
{
if (TransCode::encode(*uItr, tmp))
{
res.push_back(tmp);
}
else
{
LogError("encode failed.");
}
}
return true;
}
};
}

View File

@ -27,12 +27,10 @@ namespace CppJieba
public:
bool init()
{
#ifndef NO_CODING_LOG
if (_getInitFlag())
{
LogError("inited.");
}
#endif
if (!_hmmSeg.init())
{
LogError("_hmmSeg init");
@ -47,12 +45,10 @@ namespace CppJieba
}
bool dispose()
{
#ifndef NO_CODING_LOG
if(!_getInitFlag())
{
return true;
}
#endif
_fullSeg.dispose();
_hmmSeg.dispose();
_setInitFlag(false);
@ -66,18 +62,12 @@ namespace CppJieba
bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const
{
assert(_getInitFlag());
#ifndef NO_CODING_LOG
//if (!_getInitFlag())
//{
// LogError("not inited.");
// return false;
//}
if (begin >= end)
{
LogError("begin >= end");
return false;
}
#endif
//use hmm cut first
vector<Unicode> hmmRes;
if (!_hmmSeg.cut(begin, end, hmmRes))
@ -113,18 +103,13 @@ namespace CppJieba
bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<string>& res) const
{
#ifndef NO_CODING_LOG
if (!_getInitFlag())
assert(_getInitFlag());
if (begin >= end)
{
LogError("not inited.");
LogError("begin >= end");
return false;
}
if (begin > end)
{
LogError("begin > end");
return false;
}
#endif
vector<Unicode> uRes;
if (!cut(begin, end, uRes))
{