mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
add _filterDup
This commit is contained in:
parent
41a4dbdea4
commit
de776afa3f
2
Makefile
2
Makefile
@ -24,7 +24,7 @@ all: demo
|
|||||||
$(CC) $(CCOPT) $<
|
$(CC) $(CCOPT) $<
|
||||||
|
|
||||||
demo: $(OBJS) $(SRCLIB)
|
demo: $(OBJS) $(SRCLIB)
|
||||||
$(DOLINK)
|
$(DOLINK) -liconv
|
||||||
|
|
||||||
$(SRCLIB):
|
$(SRCLIB):
|
||||||
cd $(SRCDIR) && $(MAKE)
|
cd $(SRCDIR) && $(MAKE)
|
||||||
|
8
demo.cpp
8
demo.cpp
@ -20,19 +20,19 @@ int main()
|
|||||||
string title;
|
string title;
|
||||||
title = "我来到北京清华大学";
|
title = "我来到北京清华大学";
|
||||||
res.clear();
|
res.clear();
|
||||||
segment.extract(title, res);
|
segment.extract(title, res, 10);
|
||||||
|
|
||||||
title = "特价!camel骆驼 柔软舒适头层牛皮平底凉鞋女 休闲平跟妈妈鞋夏";
|
title = "特价!camel骆驼 柔软舒适头层牛皮平底凉鞋女 休闲平跟妈妈鞋夏";
|
||||||
res.clear();
|
res.clear();
|
||||||
segment.extract(title, res);
|
segment.extract(title, res, 10);
|
||||||
|
|
||||||
title = "包邮拉菲草18cm大檐进口草帽子超强遮阳防晒欧美日韩新款夏天 女";
|
title = "包邮拉菲草18cm大檐进口草帽子超强遮阳防晒欧美日韩新款夏天 女";
|
||||||
res.clear();
|
res.clear();
|
||||||
segment.extract(title, res);
|
segment.extract(title, res, 10);
|
||||||
|
|
||||||
title = "2013新款19CM超大檐帽 遮阳草帽子 沙滩帽防晒大檐欧美新款夏天女";
|
title = "2013新款19CM超大檐帽 遮阳草帽子 沙滩帽防晒大檐欧美新款夏天女";
|
||||||
res.clear();
|
res.clear();
|
||||||
segment.extract(title, res);
|
segment.extract(title, res, 10);
|
||||||
|
|
||||||
segment.destroy();
|
segment.destroy();
|
||||||
|
|
||||||
|
@ -246,6 +246,14 @@ namespace CppJieba
|
|||||||
bool Segment::_filter(vector<string>& utf8Strs)
|
bool Segment::_filter(vector<string>& utf8Strs)
|
||||||
{
|
{
|
||||||
bool retFlag;
|
bool retFlag;
|
||||||
|
retFlag = _filterDuplicate(utf8Strs);
|
||||||
|
if(!retFlag)
|
||||||
|
{
|
||||||
|
LogError("_filterDuplicate failed.");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
LogDebug(string_format("_filterDuplicate res:[%s]", joinStr(utf8Strs, ",").c_str()));
|
||||||
|
|
||||||
retFlag = _filterSingleWord(utf8Strs);
|
retFlag = _filterSingleWord(utf8Strs);
|
||||||
if(!retFlag)
|
if(!retFlag)
|
||||||
{
|
{
|
||||||
@ -265,6 +273,24 @@ namespace CppJieba
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool Segment::_filterDuplicate(vector<string>& utf8Strs)
|
||||||
|
{
|
||||||
|
set<string> st;
|
||||||
|
for(VSI it = utf8Strs.begin(); it != utf8Strs.end(); )
|
||||||
|
{
|
||||||
|
if(st.find(*it) != st.end())
|
||||||
|
{
|
||||||
|
it = utf8Strs.erase(it);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
st.insert(*it);
|
||||||
|
it++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
bool Segment::_filterSingleWord(vector<string>& utf8Strs)
|
bool Segment::_filterSingleWord(vector<string>& utf8Strs)
|
||||||
{
|
{
|
||||||
for(vector<string>::iterator it = utf8Strs.begin(); it != utf8Strs.end();)
|
for(vector<string>::iterator it = utf8Strs.begin(); it != utf8Strs.end();)
|
||||||
|
@ -30,6 +30,7 @@ namespace CppJieba
|
|||||||
bool _calcDP(const string& uniStr, const vector<vector<uint> >& dag, vector<pair<int, double> >& res);
|
bool _calcDP(const string& uniStr, const vector<vector<uint> >& dag, vector<pair<int, double> >& res);
|
||||||
bool _cutDAG(const string& uniStr, const vector<pair<int, double> >& dp, vector<string>& res);
|
bool _cutDAG(const string& uniStr, const vector<pair<int, double> >& dp, vector<string>& res);
|
||||||
bool _filter(vector<string>& utf8Strs);
|
bool _filter(vector<string>& utf8Strs);
|
||||||
|
bool _filterDuplicate(vector<string>& utf8Strs);
|
||||||
bool _filterSingleWord(vector<string>& utf8Strs);
|
bool _filterSingleWord(vector<string>& utf8Strs);
|
||||||
bool _filterSubstr(vector<string>& utf8Strs);
|
bool _filterSubstr(vector<string>& utf8Strs);
|
||||||
static bool _pair_compare(const pair<string, double>& a, const pair<string, double>& b);
|
static bool _pair_compare(const pair<string, double>& a, const pair<string, double>& b);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user