mirror of
https://github.com/fxsjy/jieba.git
synced 2025-07-10 00:01:33 +08:00
read() and then split -- faster; from __future__ import with
This commit is contained in:
parent
966532b462
commit
65b78b2b4d
@ -1,3 +1,4 @@
|
|||||||
|
from __future__ import with_statement
|
||||||
import re
|
import re
|
||||||
import math
|
import math
|
||||||
import os,sys
|
import os,sys
|
||||||
@ -19,10 +20,10 @@ def gen_trie(f_name):
|
|||||||
ltotal = 0.0
|
ltotal = 0.0
|
||||||
with open(f_name, 'rb') as f:
|
with open(f_name, 'rb') as f:
|
||||||
lineno = 1
|
lineno = 1
|
||||||
for line in f.readlines():
|
for line in f.read().rstrip().decode('utf-8').split('\n'):
|
||||||
lineno += 1
|
lineno += 1
|
||||||
try:
|
try:
|
||||||
word,freq,_ = line.decode('utf-8').split(' ')
|
word,freq,_ = line.split(' ')
|
||||||
freq = float(freq)
|
freq = float(freq)
|
||||||
lfreq[word] = freq
|
lfreq[word] = freq
|
||||||
ltotal+=freq
|
ltotal+=freq
|
||||||
|
Loading…
x
Reference in New Issue
Block a user