mirror of
https://github.com/fxsjy/jieba.git
synced 2025-07-10 00:01:33 +08:00
enhance cut_all=True mode
This commit is contained in:
parent
ddc48d792f
commit
400889b25c
@ -55,25 +55,17 @@ print >> sys.stderr, "Trie has been built succesfully."
|
||||
|
||||
|
||||
def __cut_all(sentence):
|
||||
N = len(sentence)
|
||||
i,j=0,0
|
||||
p = trie
|
||||
while i<N:
|
||||
c = sentence[j]
|
||||
if c in p:
|
||||
p = p[c]
|
||||
if '' in p:
|
||||
yield sentence[i:j+1]
|
||||
j+=1
|
||||
if j>=N:
|
||||
i+=1
|
||||
j=i
|
||||
p=trie
|
||||
dag = get_DAG(sentence)
|
||||
old_j = -1
|
||||
for k,L in dag.iteritems():
|
||||
if len(L)==1 and k>old_j:
|
||||
yield sentence[k:L[0]+1]
|
||||
old_j = L[0]
|
||||
else:
|
||||
p = trie
|
||||
i+=1
|
||||
j=i
|
||||
|
||||
for j in L:
|
||||
if j>k:
|
||||
yield sentence[k:j+1]
|
||||
old_j = j
|
||||
|
||||
def calc(sentence,DAG,idx,route):
|
||||
N = len(sentence)
|
||||
|
Loading…
x
Reference in New Issue
Block a user