rmove vector.erase bug

This commit is contained in:
gwdwyy 2013-07-11 14:47:08 +08:00
parent 09db567fb5
commit 4c7cd1deb2
2 changed files with 36 additions and 26 deletions

View File

@ -41,8 +41,7 @@ namespace CppJieba
LogError("_calcDAG failed."); LogError("_calcDAG failed.");
return false; return false;
} }
LogDebug("_calcDAG finished.");
//cout<<__FILE__<<__LINE__<<endl;
vector<pair<int, double> > dp; vector<pair<int, double> > dp;
retFlag = _calcDP(uniStr, dag, dp); retFlag = _calcDP(uniStr, dag, dp);
@ -51,6 +50,7 @@ namespace CppJieba
LogError("_calcDP failed."); LogError("_calcDP failed.");
return false; return false;
} }
LogDebug("_calcDP finished.");
retFlag = _cutDAG(uniStr, dp, res); retFlag = _cutDAG(uniStr, dp, res);
if(!retFlag) if(!retFlag)
@ -58,18 +58,13 @@ namespace CppJieba
LogError("_cutDAG failed."); LogError("_cutDAG failed.");
return false; return false;
} }
LogDebug("_cutDAG finished.");
retFlag = _filter(res);
if(!retFlag)
{
LogError("_cutDAG failed.");
return false;
}
return true; return true;
} }
bool Segment::extract(const string& utf8Str, vector<string>& keywords) bool Segment::extract(const string& utf8Str, vector<string>& keywords)
{ {
LogInfo(utf8Str);
bool retFlag; bool retFlag;
vector<string> tmp; vector<string> tmp;
retFlag = cutDAG(utf8Str, tmp); retFlag = cutDAG(utf8Str, tmp);
@ -78,17 +73,26 @@ namespace CppJieba
LogError(string_format("cutDAG(%s) failed.", utf8Str.c_str())); LogError(string_format("cutDAG(%s) failed.", utf8Str.c_str()));
return false; return false;
} }
// like str.join([]) in python // like str.join([]) in python
LogDebug(string_format("cutDAG result:[%s]", joinStr(tmp, ",").c_str())); LogDebug(string_format("cutDAG result:[%s]", joinStr(tmp, ",").c_str()));
retFlag = _extract(tmp, keywords, 5); retFlag = _filter(tmp);
if(!retFlag) if(!retFlag)
{ {
LogError("_extract failed."); LogError("_filter failed.");
return false; return false;
} }
LogDebug(string_format("_filter res:[%s]", joinStr(tmp, ",").c_str()));
retFlag = _extractTopN(tmp, keywords, 5);
if(!retFlag)
{
LogError("_extractTopN failed.");
return false;
}
LogDebug("_extractTopN finished.");
LogInfo(string_format("ext res:[%s]", joinStr(keywords, ",").c_str()));
return true; return true;
} }
@ -107,7 +111,7 @@ namespace CppJieba
return a.second < b.second; return a.second < b.second;
} }
bool Segment::_extract(const vector<string>& words, vector<string>& keywords, uint topN) bool Segment::_extractTopN(const vector<string>& words, vector<string>& keywords, uint topN)
{ {
keywords.clear(); keywords.clear();
vector<pair<string, double> > tmp; vector<pair<string, double> > tmp;
@ -118,7 +122,6 @@ namespace CppJieba
tmp.push_back(pair<string, double>(words[i], w)); tmp.push_back(pair<string, double>(words[i], w));
} }
sort(tmp.begin(), tmp.end(), _pair_compare); sort(tmp.begin(), tmp.end(), _pair_compare);
//logging result //logging result
@ -247,6 +250,7 @@ namespace CppJieba
LogError("_filterSingleWord failed."); LogError("_filterSingleWord failed.");
return false; return false;
} }
LogDebug(string_format("_filterSingleWord res:[%s]", joinStr(utf8Strs, ",").c_str()));
retFlag = _filterSubstr(utf8Strs); retFlag = _filterSubstr(utf8Strs);
if(!retFlag) if(!retFlag)
@ -254,6 +258,7 @@ namespace CppJieba
LogError("_filterSubstr failed."); LogError("_filterSubstr failed.");
return false; return false;
} }
LogDebug(string_format("_filterSubstr res:[%s]", joinStr(utf8Strs, ",").c_str()));
return true; return true;
} }
@ -298,7 +303,7 @@ namespace CppJieba
} }
//erase subs from utf8Strs //erase subs from utf8Strs
for(VSI it = utf8Strs.begin(); it != utf8Strs.end(); it++) for(VSI it = utf8Strs.begin(); it != utf8Strs.end(); )
{ {
if(subs.end() != subs.find(*it)) if(subs.end() != subs.find(*it))
{ {
@ -326,17 +331,22 @@ int main()
//segment.init("dicts/jieba.dict.utf8"); //segment.init("dicts/jieba.dict.utf8");
vector<string> res; vector<string> res;
//string title = "我来到北京清华大学"; string title;
//string title = "特价camel骆驼 柔软舒适头层牛皮平底凉鞋女 休闲平跟妈妈鞋夏"; title = "我来到北京清华大学";
string title = "包邮拉菲草18cm大檐进口草帽子超强遮阳防晒欧美日韩新款夏天 女"; res.clear();
//string title = "2013新款19CM超大檐帽 遮阳草帽子 沙滩帽防晒大檐欧美新款夏天女"; segment.extract(title, res);
cout<<title<<endl;
//segment.cutDAG(title, res); title = "特价camel骆驼 柔软舒适头层牛皮平底凉鞋女 休闲平跟妈妈鞋夏";
res.clear();
segment.extract(title, res);
title = "包邮拉菲草18cm大檐进口草帽子超强遮阳防晒欧美日韩新款夏天 女";
res.clear();
segment.extract(title, res);
title = "2013新款19CM超大檐帽 遮阳草帽子 沙滩帽防晒大檐欧美新款夏天女";
res.clear();
segment.extract(title, res); segment.extract(title, res);
for(int i = 0; i < res.size(); i++)
{
cout<<res[i]<<endl;
}
segment.destroy(); segment.destroy();
return 0; return 0;

View File

@ -33,7 +33,7 @@ namespace CppJieba
bool _filterSingleWord(vector<string>& utf8Strs); bool _filterSingleWord(vector<string>& utf8Strs);
bool _filterSubstr(vector<string>& utf8Strs); bool _filterSubstr(vector<string>& utf8Strs);
static bool _pair_compare(const pair<string, double>& a, const pair<string, double>& b); static bool _pair_compare(const pair<string, double>& a, const pair<string, double>& b);
bool _extract(const vector<string>& words, vector<string>& keywords, uint topN); bool _extractTopN(const vector<string>& words, vector<string>& keywords, uint topN);
private: private:
enum {bufSize = 1024}; enum {bufSize = 1024};