mirror of
https://github.com/fxsjy/jieba.git
synced 2025-07-10 00:01:33 +08:00
don't seprate CRLF
This commit is contained in:
parent
c015f4e297
commit
a9f53e9c85
@ -221,7 +221,9 @@ def cut(sentence,cut_all=False):
|
|||||||
else:
|
else:
|
||||||
tmp = re_skip.split(blk)
|
tmp = re_skip.split(blk)
|
||||||
for x in tmp:
|
for x in tmp:
|
||||||
if not cut_all:
|
if re_skip.match(x):
|
||||||
|
yield x
|
||||||
|
elif not cut_all:
|
||||||
for xx in x:
|
for xx in x:
|
||||||
yield xx
|
yield xx
|
||||||
else:
|
else:
|
||||||
|
@ -141,13 +141,16 @@ def __cut_internal(sentence):
|
|||||||
else:
|
else:
|
||||||
tmp = re_skip.split(blk)
|
tmp = re_skip.split(blk)
|
||||||
for x in tmp:
|
for x in tmp:
|
||||||
for xx in x:
|
if re_skip.match(x):
|
||||||
if re_num.match(xx):
|
yield pair(x,'')
|
||||||
yield pair(xx,'m')
|
else:
|
||||||
elif re_eng.match(x):
|
for xx in x:
|
||||||
yield pair(xx,'eng')
|
if re_num.match(xx):
|
||||||
else:
|
yield pair(xx,'m')
|
||||||
yield pair(xx,'x')
|
elif re_eng.match(x):
|
||||||
|
yield pair(xx,'eng')
|
||||||
|
else:
|
||||||
|
yield pair(xx,'x')
|
||||||
|
|
||||||
def __lcut_internal(sentence):
|
def __lcut_internal(sentence):
|
||||||
return list(__cut_internal(sentence))
|
return list(__cut_internal(sentence))
|
||||||
|
Loading…
x
Reference in New Issue
Block a user