use logging instead of print in init file

This commit is contained in:
wind 2014-03-20 13:48:33 +13:00
parent 5e6a2c4661
commit 7488b114e7

View File

@ -13,6 +13,7 @@ from math import log
import random import random
import threading import threading
from functools import wraps from functools import wraps
import logging
DICTIONARY = "dict.txt" DICTIONARY = "dict.txt"
DICT_LOCK = threading.RLock() DICT_LOCK = threading.RLock()
@ -23,6 +24,16 @@ total =0.0
user_word_tag_tab={} user_word_tag_tab={}
initialized = False initialized = False
log_console = logging.StreamHandler(sys.stderr)
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
logger.addHandler(log_console)
def setLogLevel(log_level):
global logger
logger.setLevel(log_level)
def gen_trie(f_name): def gen_trie(f_name):
lfreq = {} lfreq = {}
trie = {} trie = {}
@ -43,7 +54,7 @@ def gen_trie(f_name):
p = p[c] p = p[c]
p['']='' #ending flag p['']='' #ending flag
except ValueError as e: except ValueError as e:
print(f_name,' at line',lineno,line, file=sys.stderr) logger.debug('%s at line %s %s' % (f_name, lineno, line))
raise e raise e
return trie, lfreq,ltotal return trie, lfreq,ltotal
@ -62,7 +73,7 @@ def initialize(*args):
_curpath=os.path.normpath( os.path.join( os.getcwd(), os.path.dirname(__file__) ) ) _curpath=os.path.normpath( os.path.join( os.getcwd(), os.path.dirname(__file__) ) )
abs_path = os.path.join(_curpath,dictionary) abs_path = os.path.join(_curpath,dictionary)
print("Building Trie..., from " + abs_path, file=sys.stderr) logger.debug("Building Trie..., from %s" % abs_path)
t1 = time.time() t1 = time.time()
if abs_path == os.path.join(_curpath,"dict.txt"): #defautl dictionary if abs_path == os.path.join(_curpath,"dict.txt"): #defautl dictionary
cache_file = os.path.join(tempfile.gettempdir(),"jieba.cache") cache_file = os.path.join(tempfile.gettempdir(),"jieba.cache")
@ -71,7 +82,7 @@ def initialize(*args):
load_from_cache_fail = True load_from_cache_fail = True
if os.path.exists(cache_file) and os.path.getmtime(cache_file)>os.path.getmtime(abs_path): if os.path.exists(cache_file) and os.path.getmtime(cache_file)>os.path.getmtime(abs_path):
print("loading model from cache " + cache_file, file=sys.stderr) logger.debug("loading model from cache %s" % cache_file)
try: try:
trie,FREQ,total,min_freq = marshal.load(open(cache_file,'rb')) trie,FREQ,total,min_freq = marshal.load(open(cache_file,'rb'))
load_from_cache_fail = False load_from_cache_fail = False
@ -82,7 +93,7 @@ def initialize(*args):
trie,FREQ,total = gen_trie(abs_path) trie,FREQ,total = gen_trie(abs_path)
FREQ = dict([(k,log(float(v)/total)) for k,v in FREQ.items()]) #normalize FREQ = dict([(k,log(float(v)/total)) for k,v in FREQ.items()]) #normalize
min_freq = min(FREQ.values()) min_freq = min(FREQ.values())
print("dumping model to file cache " + cache_file, file=sys.stderr) logger.debug("dumping model to file cache %s" % cache_file)
try: try:
tmp_suffix = "."+str(random.random()) tmp_suffix = "."+str(random.random())
with open(cache_file+tmp_suffix,'wb') as temp_cache_file: with open(cache_file+tmp_suffix,'wb') as temp_cache_file:
@ -95,12 +106,13 @@ def initialize(*args):
replace_file(cache_file+tmp_suffix,cache_file) replace_file(cache_file+tmp_suffix,cache_file)
except: except:
import traceback import traceback
print("dump cache file failed.",file=sys.stderr) logger.error("dump cache file failed.")
print(traceback.format_exc(),file=sys.stderr) logger.exception("")
#print(traceback.format_exc(),file=sys.stderr)
initialized = True initialized = True
print("loading model cost ", time.time() - t1, "seconds.",file=sys.stderr) logger.debug("loading model cost %s seconds." % (time.time() - t1))
print("Trie has been built succesfully.", file=sys.stderr) logger.debug("Trie has been built succesfully.")
def require_initialized(fn): def require_initialized(fn):