mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
Merge pull request #9 from aholic/master
remove NO_CODING_LOG | make MixSegment looks better
This commit is contained in:
commit
d47900d65a
@ -24,13 +24,11 @@ namespace CppJieba
|
||||
public:
|
||||
bool init()
|
||||
{
|
||||
#ifndef NO_CODING_LOG
|
||||
if(_getInitFlag())
|
||||
{
|
||||
LogError("already inited before now.");
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
if(!_trie.init())
|
||||
{
|
||||
LogError("_trie.init failed.");
|
||||
@ -47,12 +45,10 @@ namespace CppJieba
|
||||
}
|
||||
bool dispose()
|
||||
{
|
||||
#ifndef NO_CODING_LOG
|
||||
if(!_getInitFlag())
|
||||
{
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
_trie.dispose();
|
||||
_setInitFlag(false);
|
||||
return true;
|
||||
@ -65,18 +61,12 @@ namespace CppJieba
|
||||
bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const
|
||||
{
|
||||
assert(_getInitFlag());
|
||||
#ifndef NO_CODING_LOG
|
||||
//if (!_getInitFlag())
|
||||
//{
|
||||
// LogError("not inited.");
|
||||
// return false;
|
||||
//}
|
||||
if (begin >= end)
|
||||
{
|
||||
LogError("begin >= end");
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
//resut of searching in trie tree
|
||||
vector<pair<uint, const TrieNodeInfo*> > tRes;
|
||||
|
||||
@ -123,26 +113,21 @@ namespace CppJieba
|
||||
|
||||
bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<string>& res) const
|
||||
{
|
||||
#ifndef NO_CODING_LOG
|
||||
if (!_getInitFlag())
|
||||
assert(_getInitFlag());
|
||||
if (begin >= end)
|
||||
{
|
||||
LogError("not inited.");
|
||||
LogError("begin >= end");
|
||||
return false;
|
||||
}
|
||||
if (begin > end)
|
||||
{
|
||||
LogError("begin > end");
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
vector<Unicode> uRes;
|
||||
if (!cut(begin, end, uRes))
|
||||
{
|
||||
LogError("get unicode cut result error.");
|
||||
return false;
|
||||
}
|
||||
string tmp;
|
||||
|
||||
string tmp;
|
||||
for (vector<Unicode>::const_iterator uItr = uRes.begin(); uItr != uRes.end(); uItr++)
|
||||
{
|
||||
if (TransCode::encode(*uItr, tmp))
|
||||
|
@ -55,72 +55,95 @@ namespace CppJieba
|
||||
public:
|
||||
using SegmentBase::cut;
|
||||
public:
|
||||
virtual bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<string>& res)const
|
||||
virtual bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const
|
||||
{
|
||||
//if(!_getInitFlag())
|
||||
//{
|
||||
// LogError("not inited.");
|
||||
// return false;
|
||||
//}
|
||||
assert(_getInitFlag());
|
||||
if(begin == end)
|
||||
if(begin >= end)
|
||||
{
|
||||
LogError("begin >= end");
|
||||
return false;
|
||||
}
|
||||
|
||||
vector<TrieNodeInfo> infos;
|
||||
if(!_mpSeg.cut(begin, end, infos))
|
||||
{
|
||||
LogError("mpSeg cutDAG failed.");
|
||||
return false;
|
||||
}
|
||||
Unicode unico;
|
||||
|
||||
vector<Unicode> hmmRes;
|
||||
string tmp;
|
||||
for(uint i= 0; i < infos.size(); i++)
|
||||
Unicode piece;
|
||||
for (uint i = 0, j = 0; i < infos.size(); i++)
|
||||
{
|
||||
TransCode::encode(infos[i].word,tmp);
|
||||
if(1 == infos[i].word.size())
|
||||
//if mp get a word, it's ok, put it into result
|
||||
if (1 != infos[i].word.size())
|
||||
{
|
||||
unico.push_back(infos[i].word[0]);
|
||||
res.push_back(infos[i].word);
|
||||
continue;
|
||||
}
|
||||
else
|
||||
|
||||
// if mp get a single one, collect it in sequence
|
||||
j = i;
|
||||
while (j < infos.size() && infos[j].word.size() == 1)
|
||||
{
|
||||
if(!unico.empty())
|
||||
{
|
||||
hmmRes.clear();
|
||||
if(!_hmmSeg.cut(unico.begin(), unico.end(), hmmRes))
|
||||
{
|
||||
LogError("_hmmSeg cut failed.");
|
||||
return false;
|
||||
}
|
||||
for(uint j = 0; j < hmmRes.size(); j++)
|
||||
{
|
||||
TransCode::encode(hmmRes[j], tmp);
|
||||
res.push_back(tmp);
|
||||
}
|
||||
}
|
||||
unico.clear();
|
||||
TransCode::encode(infos[i].word, tmp);
|
||||
res.push_back(tmp);
|
||||
piece.push_back(infos[j].word[0]);
|
||||
j++;
|
||||
}
|
||||
}
|
||||
if(!unico.empty())
|
||||
{
|
||||
hmmRes.clear();
|
||||
if(!_hmmSeg.cut(unico.begin(), unico.end(), hmmRes))
|
||||
|
||||
// cut the sequence with hmm
|
||||
if (!_hmmSeg.cut(piece.begin(), piece.end(), hmmRes))
|
||||
{
|
||||
LogError("_hmmSeg cut failed.");
|
||||
return false;
|
||||
}
|
||||
for(uint j = 0; j < hmmRes.size(); j++)
|
||||
|
||||
//put hmm result to return
|
||||
for (uint k = 0; k < hmmRes.size(); k++)
|
||||
{
|
||||
TransCode::encode(hmmRes[j], tmp);
|
||||
res.push_back(tmp);
|
||||
res.push_back(hmmRes[k]);
|
||||
}
|
||||
|
||||
//clear tmp vars
|
||||
piece.clear();
|
||||
hmmRes.clear();
|
||||
|
||||
//let i jump over this piece
|
||||
i = j - 1;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<string>& res)const
|
||||
{
|
||||
assert(_getInitFlag());
|
||||
if(begin >= end)
|
||||
{
|
||||
LogError("begin >= end");
|
||||
return false;
|
||||
}
|
||||
|
||||
vector<Unicode> uRes;
|
||||
if (!cut(begin, end, uRes))
|
||||
{
|
||||
LogError("get unicode cut result error.");
|
||||
return false;
|
||||
}
|
||||
|
||||
string tmp;
|
||||
for (vector<Unicode>::const_iterator uItr = uRes.begin(); uItr != uRes.end(); uItr++)
|
||||
{
|
||||
if (TransCode::encode(*uItr, tmp))
|
||||
{
|
||||
res.push_back(tmp);
|
||||
}
|
||||
else
|
||||
{
|
||||
LogError("encode failed.");
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -27,12 +27,10 @@ namespace CppJieba
|
||||
public:
|
||||
bool init()
|
||||
{
|
||||
#ifndef NO_CODING_LOG
|
||||
if (_getInitFlag())
|
||||
{
|
||||
LogError("inited.");
|
||||
}
|
||||
#endif
|
||||
if (!_hmmSeg.init())
|
||||
{
|
||||
LogError("_hmmSeg init");
|
||||
@ -47,12 +45,10 @@ namespace CppJieba
|
||||
}
|
||||
bool dispose()
|
||||
{
|
||||
#ifndef NO_CODING_LOG
|
||||
if(!_getInitFlag())
|
||||
{
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
_fullSeg.dispose();
|
||||
_hmmSeg.dispose();
|
||||
_setInitFlag(false);
|
||||
@ -66,18 +62,12 @@ namespace CppJieba
|
||||
bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const
|
||||
{
|
||||
assert(_getInitFlag());
|
||||
#ifndef NO_CODING_LOG
|
||||
//if (!_getInitFlag())
|
||||
//{
|
||||
// LogError("not inited.");
|
||||
// return false;
|
||||
//}
|
||||
if (begin >= end)
|
||||
{
|
||||
LogError("begin >= end");
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
//use hmm cut first
|
||||
vector<Unicode> hmmRes;
|
||||
if (!_hmmSeg.cut(begin, end, hmmRes))
|
||||
@ -113,18 +103,13 @@ namespace CppJieba
|
||||
|
||||
bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<string>& res) const
|
||||
{
|
||||
#ifndef NO_CODING_LOG
|
||||
if (!_getInitFlag())
|
||||
assert(_getInitFlag());
|
||||
if (begin >= end)
|
||||
{
|
||||
LogError("not inited.");
|
||||
LogError("begin >= end");
|
||||
return false;
|
||||
}
|
||||
if (begin > end)
|
||||
{
|
||||
LogError("begin > end");
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
vector<Unicode> uRes;
|
||||
if (!cut(begin, end, uRes))
|
||||
{
|
||||
|
Loading…
x
Reference in New Issue
Block a user