mirror of
https://github.com/fxsjy/jieba.git
synced 2025-07-10 00:01:33 +08:00
fix bug issue #132
This commit is contained in:
parent
136676381a
commit
18678d50c6
@ -52545,7 +52545,7 @@ P={u'\u4e00': (('B', 'm'),
|
||||
u'\u8dd6': (('S', 'g'), ('M', 'n'), ('B', 'n'), ('E', 'nr')),
|
||||
u'\u8dd7': (('B', 'n'),),
|
||||
u'\u8dda': (('E', 'v'), ('S', 'x'), ('E', 'l'), ('E', 'nr'), ('E', 'vn')),
|
||||
u'\u8ddb': (('B', 'n'), ('B', 'v')),
|
||||
u'\u8ddb': (('B', 'n'), ('B', 'v'), ('S','a') ),
|
||||
u'\u8ddd': (('B', 'n'),
|
||||
('S', 'p'),
|
||||
('E', 'n'),
|
||||
|
@ -23,7 +23,9 @@ def viterbi(obs, states, start_p, trans_p, emit_p):
|
||||
obs_states = states.get(obs[t],all_states)
|
||||
obs_states = set(obs_states) & set(prev_states_expect_next)
|
||||
|
||||
if len(obs_states)==0: obs_states = prev_states_expect_next
|
||||
if len(obs_states)==0: obs_states = all_states
|
||||
|
||||
for y in obs_states:
|
||||
(prob,state ) = max([(V[t-1][y0] + trans_p[y0].get(y,MIN_FLOAT) + emit_p[y].get(obs[t],MIN_FLOAT) ,y0) for y0 in prev_states])
|
||||
V[t][y] =prob
|
||||
|
9
test/test_bug.py
Normal file
9
test/test_bug.py
Normal file
@ -0,0 +1,9 @@
|
||||
#encoding=utf-8
|
||||
import sys
|
||||
sys.path.append("../")
|
||||
import jieba
|
||||
import jieba.posseg as pseg
|
||||
words=pseg.cut("又跛又啞")
|
||||
for w in words:
|
||||
print w.word,w.flag
|
||||
|
@ -5,7 +5,7 @@ import jieba
|
||||
jieba.load_userdict("userdict.txt")
|
||||
import jieba.posseg as pseg
|
||||
|
||||
test_sent = "李小福是创新办主任也是云计算方面的专家;"
|
||||
test_sent = "李小福是创新办主任也是云计算方面的专家; 什么是八一双鹿"
|
||||
test_sent += "例如我输入一个带“韩玉赏鉴”的标题,在自定义词库中也增加了此词为N类型"
|
||||
words = jieba.cut(test_sent)
|
||||
for w in words:
|
||||
|
@ -3,4 +3,5 @@
|
||||
创新办 3 i
|
||||
easy_install 3 eng
|
||||
好用 300
|
||||
韩玉赏鉴 3 nz
|
||||
韩玉赏鉴 3 nz
|
||||
八一双鹿 3 nz
|
Loading…
x
Reference in New Issue
Block a user