mirror of
https://github.com/fxsjy/jieba.git
synced 2025-07-24 00:00:05 +08:00
fix a bug of cxfree support
This commit is contained in:
parent
a9f53e9c85
commit
e1afafe353
@ -324,3 +324,8 @@ def set_dictionary(dictionary_path):
|
|||||||
raise Exception("path does not exists:" + abs_path)
|
raise Exception("path does not exists:" + abs_path)
|
||||||
DICTIONARY = abs_path
|
DICTIONARY = abs_path
|
||||||
initialized = False
|
initialized = False
|
||||||
|
|
||||||
|
def get_abs_path_dict():
|
||||||
|
_curpath=os.path.normpath( os.path.join( os.getcwd(), os.path.dirname(__file__) ) )
|
||||||
|
abs_path = os.path.join(_curpath,DICTIONARY)
|
||||||
|
return abs_path
|
||||||
|
@ -17,14 +17,14 @@ def load_model(f_name):
|
|||||||
return eval(open(prob_p_path,"rb").read())
|
return eval(open(prob_p_path,"rb").read())
|
||||||
else:
|
else:
|
||||||
result = {}
|
result = {}
|
||||||
for line in open(prob_p_path,"rb"):
|
for line in open(f_name,"rb"):
|
||||||
line = line.strip()
|
line = line.strip()
|
||||||
if line=="":continue
|
if line=="":continue
|
||||||
word, _, tag = line.split(' ')
|
word, _, tag = line.split(' ')
|
||||||
result[word.decode('utf-8')]=tag
|
result[word.decode('utf-8')]=tag
|
||||||
return result
|
return result
|
||||||
|
|
||||||
word_tag_tab = load_model("../dict.txt")
|
word_tag_tab = load_model(jieba.get_abs_path_dict())
|
||||||
|
|
||||||
if jieba.user_word_tag_tab:
|
if jieba.user_word_tag_tab:
|
||||||
word_tag_tab.update(jieba.user_word_tag_tab)
|
word_tag_tab.update(jieba.user_word_tag_tab)
|
||||||
@ -142,7 +142,7 @@ def __cut_internal(sentence):
|
|||||||
tmp = re_skip.split(blk)
|
tmp = re_skip.split(blk)
|
||||||
for x in tmp:
|
for x in tmp:
|
||||||
if re_skip.match(x):
|
if re_skip.match(x):
|
||||||
yield pair(x,'')
|
yield pair(x,'x')
|
||||||
else:
|
else:
|
||||||
for xx in x:
|
for xx in x:
|
||||||
if re_num.match(xx):
|
if re_num.match(xx):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user