add scripts/filter_dict.py

This commit is contained in:
gwdwyy 2013-07-10 18:57:12 +08:00
parent 35b4676dd1
commit 7554740ae2
2 changed files with 26 additions and 2 deletions

View File

@ -174,10 +174,11 @@ using namespace CppJieba;
int main()
{
Segment segment;
segment.init("dicts/segdict.utf8.v2.0");
segment.init("dicts/segdict.utf8.v2.1");
vector<string> res;
string title = "我来到北京清华大学";
//string title = "我来到北京清华大学";
string title = "特价camel骆驼 柔软舒适头层牛皮平底凉鞋女 休闲平跟妈妈鞋夏";
segment.cutDAG(title, res);
for(int i = 0; i < res.size(); i++)
{

23
scripts/filter_dict.py Executable file
View File

@ -0,0 +1,23 @@
#!/usr/bin/python
import sys
if len(sys.argv) == 1:
print "usage : %s dict_file_path"
exit(1)
d = {}
with open(sys.argv[1], "r") as fin:
for i, line in enumerate(fin):
word, cnt, tag = line.strip().split(" ")
if word in d:
#print "error file[%s] line[%s] : %s" %(fname, i, line)
#exit(1)
continue
else:
d[word] = True
if 0 >= int(cnt) :
continue
print line.strip()