make model loading more faster

This commit is contained in:
fxsjy 2012-10-06 18:28:52 +08:00
parent 64b3c0d0e0
commit 9180b90ae3

View File

@ -3,17 +3,18 @@ import math
import os,sys import os,sys
import pprint import pprint
import finalseg import finalseg
import time
FREQ = {} FREQ = {}
total =0 total =0.0
def gen_trie(f_name): def gen_trie(f_name):
global total global total
trie = {} trie = {}
for line in open(f_name): content = open(f_name,'rb').read().decode('utf-8')
word,freq = line.strip().split(" ") for line in content.split("\n"):
word = word.decode('utf-8') word,freq = line.split(" ")
freq = int(freq) freq = float(freq)
FREQ[word] = freq FREQ[word] = freq
total+=freq total+=freq
p = trie p = trie
@ -24,14 +25,16 @@ def gen_trie(f_name):
p['']='' #ending flag p['']='' #ending flag
return trie return trie
_curpath=os.path.normpath( os.path.join( os.getcwd(), os.path.dirname(__file__) ) ) _curpath=os.path.normpath( os.path.join( os.getcwd(), os.path.dirname(__file__) ) )
print >> sys.stderr, "Building Trie..." print >> sys.stderr, "Building Trie..."
t1 = time.time()
trie = gen_trie(os.path.join(_curpath,"dict.txt")) trie = gen_trie(os.path.join(_curpath,"dict.txt"))
FREQ = dict([(k,float(v)/total) for k,v in FREQ.iteritems()]) #normalize FREQ = dict([(k,float(v)/total) for k,v in FREQ.iteritems()]) #normalize
min_freq = min(FREQ.itervalues()) min_freq = min(FREQ.itervalues())
#print min_freq print >> sys.stderr, "loading model cost ", time.time() - t1, "seconds."
print >> sys.stderr,"Trie has been built succesfully." print >> sys.stderr, "Trie has been built succesfully."
def __cut_all(sentence): def __cut_all(sentence):