mirror of
https://github.com/fxsjy/jieba.git
synced 2025-07-10 00:01:33 +08:00
modify test_file.py; use less memory
This commit is contained in:
parent
ed1fa64e27
commit
b77645b3aa
@ -297,7 +297,7 @@ def __lcut_for_search(sentence):
|
|||||||
|
|
||||||
|
|
||||||
@require_initialized
|
@require_initialized
|
||||||
def enable_parallel(processnum):
|
def enable_parallel(processnum=None):
|
||||||
global pool,cut,cut_for_search
|
global pool,cut,cut_for_search
|
||||||
if os.name=='nt':
|
if os.name=='nt':
|
||||||
raise Exception("jieba: parallel mode only supports posix system")
|
raise Exception("jieba: parallel mode only supports posix system")
|
||||||
|
@ -9,14 +9,13 @@ jieba.enable_parallel()
|
|||||||
url = sys.argv[1]
|
url = sys.argv[1]
|
||||||
content = open(url,"rb").read()
|
content = open(url,"rb").read()
|
||||||
t1 = time.time()
|
t1 = time.time()
|
||||||
words = list(jieba.cut(content))
|
words = "/ ".join(jieba.cut(content))
|
||||||
|
|
||||||
t2 = time.time()
|
t2 = time.time()
|
||||||
tm_cost = t2-t1
|
tm_cost = t2-t1
|
||||||
|
|
||||||
log_f = open("1.log","wb")
|
log_f = open("1.log","wb")
|
||||||
for w in words:
|
log_f.write(words.encode('utf-8'))
|
||||||
print >> log_f, w.encode("utf-8"), "/" ,
|
|
||||||
|
|
||||||
print 'speed' , len(content)/tm_cost, " bytes/second"
|
print 'speed' , len(content)/tm_cost, " bytes/second"
|
||||||
|
|
||||||
|
@ -8,14 +8,14 @@ jieba.initialize()
|
|||||||
url = sys.argv[1]
|
url = sys.argv[1]
|
||||||
content = open(url,"rb").read()
|
content = open(url,"rb").read()
|
||||||
t1 = time.time()
|
t1 = time.time()
|
||||||
words = list(jieba.cut(content))
|
words = "/ ".join(jieba.cut(content))
|
||||||
|
|
||||||
t2 = time.time()
|
t2 = time.time()
|
||||||
tm_cost = t2-t1
|
tm_cost = t2-t1
|
||||||
|
|
||||||
log_f = open("1.log","wb")
|
log_f = open("1.log","wb")
|
||||||
for w in words:
|
log_f.write(words.encode('utf-8'))
|
||||||
print >> log_f, w.encode("utf-8"), "/" ,
|
|
||||||
print 'cost',tm_cost
|
print 'cost',tm_cost
|
||||||
print 'speed' , len(content)/tm_cost, " bytes/second"
|
print 'speed' , len(content)/tm_cost, " bytes/second"
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user