jieba/test/test_file.py
2012-11-06 07:17:26 +08:00

21 lines
353 B
Python

import urllib2
import sys,time
import sys
sys.path.append("../")
import jieba
url = sys.argv[1]
content = open(url,"rb").read()
t1 = time.time()
words = list(jieba.cut(content))
t2 = time.time()
tm_cost = t2-t1
log_f = open("1.log","wb")
for w in words:
print >> log_f, w.encode("gbk"), "/" ,
print 'speed' , len(content)/tm_cost, " bytes/second"