mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
add scripts/filter_dict.py
This commit is contained in:
parent
35b4676dd1
commit
7554740ae2
@ -174,10 +174,11 @@ using namespace CppJieba;
|
|||||||
int main()
|
int main()
|
||||||
{
|
{
|
||||||
Segment segment;
|
Segment segment;
|
||||||
segment.init("dicts/segdict.utf8.v2.0");
|
segment.init("dicts/segdict.utf8.v2.1");
|
||||||
|
|
||||||
vector<string> res;
|
vector<string> res;
|
||||||
string title = "我来到北京清华大学";
|
//string title = "我来到北京清华大学";
|
||||||
|
string title = "特价!camel骆驼 柔软舒适头层牛皮平底凉鞋女 休闲平跟妈妈鞋夏";
|
||||||
segment.cutDAG(title, res);
|
segment.cutDAG(title, res);
|
||||||
for(int i = 0; i < res.size(); i++)
|
for(int i = 0; i < res.size(); i++)
|
||||||
{
|
{
|
||||||
|
23
scripts/filter_dict.py
Executable file
23
scripts/filter_dict.py
Executable file
@ -0,0 +1,23 @@
|
|||||||
|
#!/usr/bin/python
|
||||||
|
|
||||||
|
import sys
|
||||||
|
|
||||||
|
if len(sys.argv) == 1:
|
||||||
|
print "usage : %s dict_file_path"
|
||||||
|
exit(1)
|
||||||
|
|
||||||
|
d = {}
|
||||||
|
with open(sys.argv[1], "r") as fin:
|
||||||
|
for i, line in enumerate(fin):
|
||||||
|
word, cnt, tag = line.strip().split(" ")
|
||||||
|
if word in d:
|
||||||
|
#print "error file[%s] line[%s] : %s" %(fname, i, line)
|
||||||
|
#exit(1)
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
d[word] = True
|
||||||
|
if 0 >= int(cnt) :
|
||||||
|
continue
|
||||||
|
|
||||||
|
print line.strip()
|
||||||
|
|
Loading…
x
Reference in New Issue
Block a user