mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
rmove vector.erase bug
This commit is contained in:
parent
09db567fb5
commit
4c7cd1deb2
60
Segment.cpp
60
Segment.cpp
@ -41,8 +41,7 @@ namespace CppJieba
|
|||||||
LogError("_calcDAG failed.");
|
LogError("_calcDAG failed.");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
LogDebug("_calcDAG finished.");
|
||||||
//cout<<__FILE__<<__LINE__<<endl;
|
|
||||||
|
|
||||||
vector<pair<int, double> > dp;
|
vector<pair<int, double> > dp;
|
||||||
retFlag = _calcDP(uniStr, dag, dp);
|
retFlag = _calcDP(uniStr, dag, dp);
|
||||||
@ -51,6 +50,7 @@ namespace CppJieba
|
|||||||
LogError("_calcDP failed.");
|
LogError("_calcDP failed.");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
LogDebug("_calcDP finished.");
|
||||||
|
|
||||||
retFlag = _cutDAG(uniStr, dp, res);
|
retFlag = _cutDAG(uniStr, dp, res);
|
||||||
if(!retFlag)
|
if(!retFlag)
|
||||||
@ -58,18 +58,13 @@ namespace CppJieba
|
|||||||
LogError("_cutDAG failed.");
|
LogError("_cutDAG failed.");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
LogDebug("_cutDAG finished.");
|
||||||
retFlag = _filter(res);
|
|
||||||
if(!retFlag)
|
|
||||||
{
|
|
||||||
LogError("_cutDAG failed.");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
bool Segment::extract(const string& utf8Str, vector<string>& keywords)
|
bool Segment::extract(const string& utf8Str, vector<string>& keywords)
|
||||||
{
|
{
|
||||||
|
LogInfo(utf8Str);
|
||||||
bool retFlag;
|
bool retFlag;
|
||||||
vector<string> tmp;
|
vector<string> tmp;
|
||||||
retFlag = cutDAG(utf8Str, tmp);
|
retFlag = cutDAG(utf8Str, tmp);
|
||||||
@ -78,17 +73,26 @@ namespace CppJieba
|
|||||||
LogError(string_format("cutDAG(%s) failed.", utf8Str.c_str()));
|
LogError(string_format("cutDAG(%s) failed.", utf8Str.c_str()));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// like str.join([]) in python
|
// like str.join([]) in python
|
||||||
LogDebug(string_format("cutDAG result:[%s]", joinStr(tmp, ",").c_str()));
|
LogDebug(string_format("cutDAG result:[%s]", joinStr(tmp, ",").c_str()));
|
||||||
|
|
||||||
retFlag = _extract(tmp, keywords, 5);
|
retFlag = _filter(tmp);
|
||||||
if(!retFlag)
|
if(!retFlag)
|
||||||
{
|
{
|
||||||
LogError("_extract failed.");
|
LogError("_filter failed.");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
LogDebug(string_format("_filter res:[%s]", joinStr(tmp, ",").c_str()));
|
||||||
|
|
||||||
|
retFlag = _extractTopN(tmp, keywords, 5);
|
||||||
|
if(!retFlag)
|
||||||
|
{
|
||||||
|
LogError("_extractTopN failed.");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
LogDebug("_extractTopN finished.");
|
||||||
|
|
||||||
|
LogInfo(string_format("ext res:[%s]", joinStr(keywords, ",").c_str()));
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -107,7 +111,7 @@ namespace CppJieba
|
|||||||
return a.second < b.second;
|
return a.second < b.second;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Segment::_extract(const vector<string>& words, vector<string>& keywords, uint topN)
|
bool Segment::_extractTopN(const vector<string>& words, vector<string>& keywords, uint topN)
|
||||||
{
|
{
|
||||||
keywords.clear();
|
keywords.clear();
|
||||||
vector<pair<string, double> > tmp;
|
vector<pair<string, double> > tmp;
|
||||||
@ -117,7 +121,6 @@ namespace CppJieba
|
|||||||
double w = getUtf8WordWeight(words[i]);
|
double w = getUtf8WordWeight(words[i]);
|
||||||
tmp.push_back(pair<string, double>(words[i], w));
|
tmp.push_back(pair<string, double>(words[i], w));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
sort(tmp.begin(), tmp.end(), _pair_compare);
|
sort(tmp.begin(), tmp.end(), _pair_compare);
|
||||||
|
|
||||||
@ -247,6 +250,7 @@ namespace CppJieba
|
|||||||
LogError("_filterSingleWord failed.");
|
LogError("_filterSingleWord failed.");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
LogDebug(string_format("_filterSingleWord res:[%s]", joinStr(utf8Strs, ",").c_str()));
|
||||||
|
|
||||||
retFlag = _filterSubstr(utf8Strs);
|
retFlag = _filterSubstr(utf8Strs);
|
||||||
if(!retFlag)
|
if(!retFlag)
|
||||||
@ -254,6 +258,7 @@ namespace CppJieba
|
|||||||
LogError("_filterSubstr failed.");
|
LogError("_filterSubstr failed.");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
LogDebug(string_format("_filterSubstr res:[%s]", joinStr(utf8Strs, ",").c_str()));
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -298,7 +303,7 @@ namespace CppJieba
|
|||||||
}
|
}
|
||||||
|
|
||||||
//erase subs from utf8Strs
|
//erase subs from utf8Strs
|
||||||
for(VSI it = utf8Strs.begin(); it != utf8Strs.end(); it++)
|
for(VSI it = utf8Strs.begin(); it != utf8Strs.end(); )
|
||||||
{
|
{
|
||||||
if(subs.end() != subs.find(*it))
|
if(subs.end() != subs.find(*it))
|
||||||
{
|
{
|
||||||
@ -326,17 +331,22 @@ int main()
|
|||||||
//segment.init("dicts/jieba.dict.utf8");
|
//segment.init("dicts/jieba.dict.utf8");
|
||||||
|
|
||||||
vector<string> res;
|
vector<string> res;
|
||||||
//string title = "我来到北京清华大学";
|
string title;
|
||||||
//string title = "特价!camel骆驼 柔软舒适头层牛皮平底凉鞋女 休闲平跟妈妈鞋夏";
|
title = "我来到北京清华大学";
|
||||||
string title = "包邮拉菲草18cm大檐进口草帽子超强遮阳防晒欧美日韩新款夏天 女";
|
res.clear();
|
||||||
//string title = "2013新款19CM超大檐帽 遮阳草帽子 沙滩帽防晒大檐欧美新款夏天女";
|
segment.extract(title, res);
|
||||||
cout<<title<<endl;
|
|
||||||
//segment.cutDAG(title, res);
|
title = "特价!camel骆驼 柔软舒适头层牛皮平底凉鞋女 休闲平跟妈妈鞋夏";
|
||||||
|
res.clear();
|
||||||
|
segment.extract(title, res);
|
||||||
|
|
||||||
|
title = "包邮拉菲草18cm大檐进口草帽子超强遮阳防晒欧美日韩新款夏天 女";
|
||||||
|
res.clear();
|
||||||
|
segment.extract(title, res);
|
||||||
|
|
||||||
|
title = "2013新款19CM超大檐帽 遮阳草帽子 沙滩帽防晒大檐欧美新款夏天女";
|
||||||
|
res.clear();
|
||||||
segment.extract(title, res);
|
segment.extract(title, res);
|
||||||
for(int i = 0; i < res.size(); i++)
|
|
||||||
{
|
|
||||||
cout<<res[i]<<endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
segment.destroy();
|
segment.destroy();
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -33,7 +33,7 @@ namespace CppJieba
|
|||||||
bool _filterSingleWord(vector<string>& utf8Strs);
|
bool _filterSingleWord(vector<string>& utf8Strs);
|
||||||
bool _filterSubstr(vector<string>& utf8Strs);
|
bool _filterSubstr(vector<string>& utf8Strs);
|
||||||
static bool _pair_compare(const pair<string, double>& a, const pair<string, double>& b);
|
static bool _pair_compare(const pair<string, double>& a, const pair<string, double>& b);
|
||||||
bool _extract(const vector<string>& words, vector<string>& keywords, uint topN);
|
bool _extractTopN(const vector<string>& words, vector<string>& keywords, uint topN);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
enum {bufSize = 1024};
|
enum {bufSize = 1024};
|
||||||
|
Loading…
x
Reference in New Issue
Block a user