diff --git a/test/test_file.py b/test/test_file.py index fe2d93a..adfb0bb 100644 --- a/test/test_file.py +++ b/test/test_file.py @@ -3,6 +3,7 @@ import sys,time import sys sys.path.append("../") import jieba +jieba.initialize() url = sys.argv[1] content = open(url,"rb").read() @@ -14,7 +15,7 @@ tm_cost = t2-t1 log_f = open("1.log","wb") for w in words: - print >> log_f, w.encode("gbk"), "/" , + print >> log_f, w.encode("utf-8"), "/" , print 'cost',tm_cost print 'speed' , len(content)/tm_cost, " bytes/second" diff --git a/test/test_pos_file.py b/test/test_pos_file.py index fd14a2d..ab70156 100644 --- a/test/test_pos_file.py +++ b/test/test_pos_file.py @@ -2,6 +2,8 @@ import urllib2 import sys,time import sys sys.path.append("../") +import jieba +jieba.initialize() import jieba.posseg as pseg url = sys.argv[1] @@ -14,7 +16,7 @@ tm_cost = t2-t1 log_f = open("1.log","wb") for w in words: - print >> log_f, w.encode("gbk"), "/" , + print >> log_f, w.encode("utf-8"), "/" , print 'speed' , len(content)/tm_cost, " bytes/second"