mirror of
https://github.com/fxsjy/jieba.git
synced 2025-07-10 00:01:33 +08:00
use logging instead of print in init file
This commit is contained in:
parent
5e6a2c4661
commit
7488b114e7
@ -13,6 +13,7 @@ from math import log
|
|||||||
import random
|
import random
|
||||||
import threading
|
import threading
|
||||||
from functools import wraps
|
from functools import wraps
|
||||||
|
import logging
|
||||||
|
|
||||||
DICTIONARY = "dict.txt"
|
DICTIONARY = "dict.txt"
|
||||||
DICT_LOCK = threading.RLock()
|
DICT_LOCK = threading.RLock()
|
||||||
@ -23,6 +24,16 @@ total =0.0
|
|||||||
user_word_tag_tab={}
|
user_word_tag_tab={}
|
||||||
initialized = False
|
initialized = False
|
||||||
|
|
||||||
|
log_console = logging.StreamHandler(sys.stderr)
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
logger.setLevel(logging.DEBUG)
|
||||||
|
logger.addHandler(log_console)
|
||||||
|
|
||||||
|
|
||||||
|
def setLogLevel(log_level):
|
||||||
|
global logger
|
||||||
|
logger.setLevel(log_level)
|
||||||
|
|
||||||
def gen_trie(f_name):
|
def gen_trie(f_name):
|
||||||
lfreq = {}
|
lfreq = {}
|
||||||
trie = {}
|
trie = {}
|
||||||
@ -43,7 +54,7 @@ def gen_trie(f_name):
|
|||||||
p = p[c]
|
p = p[c]
|
||||||
p['']='' #ending flag
|
p['']='' #ending flag
|
||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
print(f_name,' at line',lineno,line, file=sys.stderr)
|
logger.debug('%s at line %s %s' % (f_name, lineno, line))
|
||||||
raise e
|
raise e
|
||||||
return trie, lfreq,ltotal
|
return trie, lfreq,ltotal
|
||||||
|
|
||||||
@ -62,7 +73,7 @@ def initialize(*args):
|
|||||||
_curpath=os.path.normpath( os.path.join( os.getcwd(), os.path.dirname(__file__) ) )
|
_curpath=os.path.normpath( os.path.join( os.getcwd(), os.path.dirname(__file__) ) )
|
||||||
|
|
||||||
abs_path = os.path.join(_curpath,dictionary)
|
abs_path = os.path.join(_curpath,dictionary)
|
||||||
print("Building Trie..., from " + abs_path, file=sys.stderr)
|
logger.debug("Building Trie..., from %s" % abs_path)
|
||||||
t1 = time.time()
|
t1 = time.time()
|
||||||
if abs_path == os.path.join(_curpath,"dict.txt"): #defautl dictionary
|
if abs_path == os.path.join(_curpath,"dict.txt"): #defautl dictionary
|
||||||
cache_file = os.path.join(tempfile.gettempdir(),"jieba.cache")
|
cache_file = os.path.join(tempfile.gettempdir(),"jieba.cache")
|
||||||
@ -71,7 +82,7 @@ def initialize(*args):
|
|||||||
|
|
||||||
load_from_cache_fail = True
|
load_from_cache_fail = True
|
||||||
if os.path.exists(cache_file) and os.path.getmtime(cache_file)>os.path.getmtime(abs_path):
|
if os.path.exists(cache_file) and os.path.getmtime(cache_file)>os.path.getmtime(abs_path):
|
||||||
print("loading model from cache " + cache_file, file=sys.stderr)
|
logger.debug("loading model from cache %s" % cache_file)
|
||||||
try:
|
try:
|
||||||
trie,FREQ,total,min_freq = marshal.load(open(cache_file,'rb'))
|
trie,FREQ,total,min_freq = marshal.load(open(cache_file,'rb'))
|
||||||
load_from_cache_fail = False
|
load_from_cache_fail = False
|
||||||
@ -82,7 +93,7 @@ def initialize(*args):
|
|||||||
trie,FREQ,total = gen_trie(abs_path)
|
trie,FREQ,total = gen_trie(abs_path)
|
||||||
FREQ = dict([(k,log(float(v)/total)) for k,v in FREQ.items()]) #normalize
|
FREQ = dict([(k,log(float(v)/total)) for k,v in FREQ.items()]) #normalize
|
||||||
min_freq = min(FREQ.values())
|
min_freq = min(FREQ.values())
|
||||||
print("dumping model to file cache " + cache_file, file=sys.stderr)
|
logger.debug("dumping model to file cache %s" % cache_file)
|
||||||
try:
|
try:
|
||||||
tmp_suffix = "."+str(random.random())
|
tmp_suffix = "."+str(random.random())
|
||||||
with open(cache_file+tmp_suffix,'wb') as temp_cache_file:
|
with open(cache_file+tmp_suffix,'wb') as temp_cache_file:
|
||||||
@ -95,12 +106,13 @@ def initialize(*args):
|
|||||||
replace_file(cache_file+tmp_suffix,cache_file)
|
replace_file(cache_file+tmp_suffix,cache_file)
|
||||||
except:
|
except:
|
||||||
import traceback
|
import traceback
|
||||||
print("dump cache file failed.",file=sys.stderr)
|
logger.error("dump cache file failed.")
|
||||||
print(traceback.format_exc(),file=sys.stderr)
|
logger.exception("")
|
||||||
|
#print(traceback.format_exc(),file=sys.stderr)
|
||||||
initialized = True
|
initialized = True
|
||||||
|
|
||||||
print("loading model cost ", time.time() - t1, "seconds.",file=sys.stderr)
|
logger.debug("loading model cost %s seconds." % (time.time() - t1))
|
||||||
print("Trie has been built succesfully.", file=sys.stderr)
|
logger.debug("Trie has been built succesfully.")
|
||||||
|
|
||||||
|
|
||||||
def require_initialized(fn):
|
def require_initialized(fn):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user