mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
add _filterDup
This commit is contained in:
parent
41a4dbdea4
commit
de776afa3f
2
Makefile
2
Makefile
@ -24,7 +24,7 @@ all: demo
|
||||
$(CC) $(CCOPT) $<
|
||||
|
||||
demo: $(OBJS) $(SRCLIB)
|
||||
$(DOLINK)
|
||||
$(DOLINK) -liconv
|
||||
|
||||
$(SRCLIB):
|
||||
cd $(SRCDIR) && $(MAKE)
|
||||
|
8
demo.cpp
8
demo.cpp
@ -20,19 +20,19 @@ int main()
|
||||
string title;
|
||||
title = "我来到北京清华大学";
|
||||
res.clear();
|
||||
segment.extract(title, res);
|
||||
segment.extract(title, res, 10);
|
||||
|
||||
title = "特价!camel骆驼 柔软舒适头层牛皮平底凉鞋女 休闲平跟妈妈鞋夏";
|
||||
res.clear();
|
||||
segment.extract(title, res);
|
||||
segment.extract(title, res, 10);
|
||||
|
||||
title = "包邮拉菲草18cm大檐进口草帽子超强遮阳防晒欧美日韩新款夏天 女";
|
||||
res.clear();
|
||||
segment.extract(title, res);
|
||||
segment.extract(title, res, 10);
|
||||
|
||||
title = "2013新款19CM超大檐帽 遮阳草帽子 沙滩帽防晒大檐欧美新款夏天女";
|
||||
res.clear();
|
||||
segment.extract(title, res);
|
||||
segment.extract(title, res, 10);
|
||||
|
||||
segment.destroy();
|
||||
|
||||
|
@ -246,6 +246,14 @@ namespace CppJieba
|
||||
bool Segment::_filter(vector<string>& utf8Strs)
|
||||
{
|
||||
bool retFlag;
|
||||
retFlag = _filterDuplicate(utf8Strs);
|
||||
if(!retFlag)
|
||||
{
|
||||
LogError("_filterDuplicate failed.");
|
||||
return false;
|
||||
}
|
||||
LogDebug(string_format("_filterDuplicate res:[%s]", joinStr(utf8Strs, ",").c_str()));
|
||||
|
||||
retFlag = _filterSingleWord(utf8Strs);
|
||||
if(!retFlag)
|
||||
{
|
||||
@ -265,6 +273,24 @@ namespace CppJieba
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Segment::_filterDuplicate(vector<string>& utf8Strs)
|
||||
{
|
||||
set<string> st;
|
||||
for(VSI it = utf8Strs.begin(); it != utf8Strs.end(); )
|
||||
{
|
||||
if(st.find(*it) != st.end())
|
||||
{
|
||||
it = utf8Strs.erase(it);
|
||||
}
|
||||
else
|
||||
{
|
||||
st.insert(*it);
|
||||
it++;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Segment::_filterSingleWord(vector<string>& utf8Strs)
|
||||
{
|
||||
for(vector<string>::iterator it = utf8Strs.begin(); it != utf8Strs.end();)
|
||||
|
@ -30,6 +30,7 @@ namespace CppJieba
|
||||
bool _calcDP(const string& uniStr, const vector<vector<uint> >& dag, vector<pair<int, double> >& res);
|
||||
bool _cutDAG(const string& uniStr, const vector<pair<int, double> >& dp, vector<string>& res);
|
||||
bool _filter(vector<string>& utf8Strs);
|
||||
bool _filterDuplicate(vector<string>& utf8Strs);
|
||||
bool _filterSingleWord(vector<string>& utf8Strs);
|
||||
bool _filterSubstr(vector<string>& utf8Strs);
|
||||
static bool _pair_compare(const pair<string, double>& a, const pair<string, double>& b);
|
||||
|
Loading…
x
Reference in New Issue
Block a user