mirror of
https://github.com/fxsjy/jieba.git
synced 2025-07-10 00:01:33 +08:00
don't seprate CRLF
This commit is contained in:
parent
c015f4e297
commit
a9f53e9c85
@ -221,7 +221,9 @@ def cut(sentence,cut_all=False):
|
||||
else:
|
||||
tmp = re_skip.split(blk)
|
||||
for x in tmp:
|
||||
if not cut_all:
|
||||
if re_skip.match(x):
|
||||
yield x
|
||||
elif not cut_all:
|
||||
for xx in x:
|
||||
yield xx
|
||||
else:
|
||||
|
@ -141,13 +141,16 @@ def __cut_internal(sentence):
|
||||
else:
|
||||
tmp = re_skip.split(blk)
|
||||
for x in tmp:
|
||||
for xx in x:
|
||||
if re_num.match(xx):
|
||||
yield pair(xx,'m')
|
||||
elif re_eng.match(x):
|
||||
yield pair(xx,'eng')
|
||||
else:
|
||||
yield pair(xx,'x')
|
||||
if re_skip.match(x):
|
||||
yield pair(x,'')
|
||||
else:
|
||||
for xx in x:
|
||||
if re_num.match(xx):
|
||||
yield pair(xx,'m')
|
||||
elif re_eng.match(x):
|
||||
yield pair(xx,'eng')
|
||||
else:
|
||||
yield pair(xx,'x')
|
||||
|
||||
def __lcut_internal(sentence):
|
||||
return list(__cut_internal(sentence))
|
||||
|
Loading…
x
Reference in New Issue
Block a user