finished cutDAG

This commit is contained in:
gwdwyy 2013-07-09 16:43:43 +08:00
parent 9122aeeba6
commit 83a8474a1d
2 changed files with 54 additions and 19 deletions

View File

@ -22,6 +22,7 @@ namespace CppJieba
bool Segment::cutDAG(const string& chStr, vector<string>& res)
{
bool retFlag;
res.clear();
string uniStr = _utf8ToUni(chStr);
if(uniStr.empty())
@ -32,30 +33,31 @@ namespace CppJieba
//calc DAG
vector<vector<uint> > dag;
_calcDAG(uniStr, dag);
cout<<__FILE__<<__LINE__<<endl;
PRINT_MATRIX(dag);
getchar();
vector<pair<int, double> > dp;
_calcDP(uniStr, dag, dp);
cout<<__FILE__<<__LINE__<<endl;
for(int i = 0 ;i< dp.size(); i++)
retFlag = _calcDAG(uniStr, dag);
if(!retFlag)
{
cout<<dp[i].first<<","
<<dp[i].second<<endl;
LogError("_calcDAG failed.");
return false;
}
//calc dp
//cout<<__FILE__<<__LINE__<<endl;
//PRINT_MATRIX(dag);
//getchar();
vector<pair<int, double> > dp;
retFlag = _calcDP(uniStr, dag, dp);
if(!retFlag)
{
LogError("_calcDP failed.");
return false;
}
retFlag = _cutDAG(uniStr, dp, res);
if(!retFlag)
{
LogError("_cutDAG failed.");
return false;
}
return true;
}
@ -128,6 +130,38 @@ namespace CppJieba
}
}
}
res.pop_back();
return true;
}
bool Segment::_cutDAG(const string& uniStr, const vector<pair<int, double> >& dp, vector<string>& res)
{
if(dp.size() != uniStr.size()/2)
{
LogError("dp or uniStr illegal!");
return false;
}
res.clear();
uint begin = 0;
for(uint i = 0; i < dp.size(); i++)
{
//cout<<dp[i].first<<","
// <<dp[i].second<<endl;
uint end = dp[i].first * 2 + 2;
if(end <= begin)
{
continue;
}
string tmp = unicodeToUtf8(uniStr.substr(begin, end - begin));
if(tmp.empty())
{
LogError("unicodeToUtf8 failed.");
return false;
}
res.push_back(tmp);
begin = end;
}
return true;
}

View File

@ -24,6 +24,7 @@ namespace CppJieba
string _utf8ToUni(const string& chStr);
bool _calcDAG(const string& uniStr, vector<vector<uint> >& dag);
bool _calcDP(const string& uniStr, const vector<vector<uint> >& dag, vector<pair<int, double> >& res);
bool _cutDAG(const string& uniStr, const vector<pair<int, double> >& dp, vector<string>& res);
private:
enum {bufSize = 1024};