mirror of
https://github.com/fxsjy/jieba.git
synced 2025-07-10 00:01:33 +08:00
fix issue #810
This commit is contained in:
parent
d703bce302
commit
2eb11c8028
@ -205,14 +205,15 @@ class Tokenizer(object):
|
|||||||
eng_scan = 0
|
eng_scan = 0
|
||||||
yield eng_buf
|
yield eng_buf
|
||||||
if len(L) == 1 and k > old_j:
|
if len(L) == 1 and k > old_j:
|
||||||
if re_eng.match(sentence[k]):
|
word = sentence[k:L[0] + 1]
|
||||||
|
if re_eng.match(word):
|
||||||
if eng_scan == 0:
|
if eng_scan == 0:
|
||||||
eng_scan = 1
|
eng_scan = 1
|
||||||
eng_buf = sentence[k]
|
eng_buf = word
|
||||||
else:
|
else:
|
||||||
eng_buf += sentence[k]
|
eng_buf += word
|
||||||
if eng_scan == 0:
|
if eng_scan == 0:
|
||||||
yield sentence[k:L[0] + 1]
|
yield word
|
||||||
old_j = L[0]
|
old_j = L[0]
|
||||||
else:
|
else:
|
||||||
for j in L:
|
for j in L:
|
||||||
|
@ -98,3 +98,4 @@ if __name__ == "__main__":
|
|||||||
cuttest('你认识那个和主席握手的的哥吗?他开一辆黑色的士。')
|
cuttest('你认识那个和主席握手的的哥吗?他开一辆黑色的士。')
|
||||||
jieba.add_word('超敏C反应蛋白')
|
jieba.add_word('超敏C反应蛋白')
|
||||||
cuttest('超敏C反应蛋白是什么, java好学吗?,小潘老板都学Python')
|
cuttest('超敏C反应蛋白是什么, java好学吗?,小潘老板都学Python')
|
||||||
|
cuttest('steel健身爆发力运动兴奋补充剂')
|
||||||
|
Loading…
x
Reference in New Issue
Block a user