mirror of
https://github.com/fxsjy/jieba.git
synced 2025-07-10 00:01:33 +08:00
Merge pull request #99 from aszxqw/branch1
sed -i 's/not \(.*\) in/\1 not in/g' ...
This commit is contained in:
commit
66e334229b
@ -38,7 +38,7 @@ def gen_trie(f_name):
|
|||||||
ltotal+=freq
|
ltotal+=freq
|
||||||
p = trie
|
p = trie
|
||||||
for c in word:
|
for c in word:
|
||||||
if not c in p:
|
if c not in p:
|
||||||
p[c] ={}
|
p[c] ={}
|
||||||
p = p[c]
|
p = p[c]
|
||||||
p['']='' #ending flag
|
p['']='' #ending flag
|
||||||
@ -150,7 +150,7 @@ def get_DAG(sentence):
|
|||||||
if c in p:
|
if c in p:
|
||||||
p = p[c]
|
p = p[c]
|
||||||
if '' in p:
|
if '' in p:
|
||||||
if not i in DAG:
|
if i not in DAG:
|
||||||
DAG[i]=[]
|
DAG[i]=[]
|
||||||
DAG[i].append(j)
|
DAG[i].append(j)
|
||||||
j+=1
|
j+=1
|
||||||
@ -163,7 +163,7 @@ def get_DAG(sentence):
|
|||||||
i+=1
|
i+=1
|
||||||
j=i
|
j=i
|
||||||
for i in xrange(len(sentence)):
|
for i in xrange(len(sentence)):
|
||||||
if not i in DAG:
|
if i not in DAG:
|
||||||
DAG[i] =[i]
|
DAG[i] =[i]
|
||||||
return DAG
|
return DAG
|
||||||
|
|
||||||
@ -186,7 +186,7 @@ def __cut_DAG(sentence):
|
|||||||
yield buf
|
yield buf
|
||||||
buf=u''
|
buf=u''
|
||||||
else:
|
else:
|
||||||
if not (buf in FREQ):
|
if (buf not in FREQ):
|
||||||
regognized = finalseg.cut(buf)
|
regognized = finalseg.cut(buf)
|
||||||
for t in regognized:
|
for t in regognized:
|
||||||
yield t
|
yield t
|
||||||
@ -201,7 +201,7 @@ def __cut_DAG(sentence):
|
|||||||
if len(buf)==1:
|
if len(buf)==1:
|
||||||
yield buf
|
yield buf
|
||||||
else:
|
else:
|
||||||
if not (buf in FREQ):
|
if (buf not in FREQ):
|
||||||
regognized = finalseg.cut(buf)
|
regognized = finalseg.cut(buf)
|
||||||
for t in regognized:
|
for t in regognized:
|
||||||
yield t
|
yield t
|
||||||
@ -281,7 +281,7 @@ def add_word(word, freq, tag=None):
|
|||||||
user_word_tag_tab[word] = tag.strip()
|
user_word_tag_tab[word] = tag.strip()
|
||||||
p = trie
|
p = trie
|
||||||
for c in word:
|
for c in word:
|
||||||
if not c in p:
|
if c not in p:
|
||||||
p[c] = {}
|
p[c] = {}
|
||||||
p = p[c]
|
p = p[c]
|
||||||
p[''] = '' # ending flag
|
p[''] = '' # ending flag
|
||||||
|
@ -136,7 +136,7 @@ def __cut_DAG(sentence):
|
|||||||
yield pair(buf,word_tag_tab.get(buf,'x'))
|
yield pair(buf,word_tag_tab.get(buf,'x'))
|
||||||
buf=u''
|
buf=u''
|
||||||
else:
|
else:
|
||||||
if not (buf in jieba.FREQ):
|
if (buf not in jieba.FREQ):
|
||||||
regognized = __cut_detail(buf)
|
regognized = __cut_detail(buf)
|
||||||
for t in regognized:
|
for t in regognized:
|
||||||
yield t
|
yield t
|
||||||
@ -151,7 +151,7 @@ def __cut_DAG(sentence):
|
|||||||
if len(buf)==1:
|
if len(buf)==1:
|
||||||
yield pair(buf,word_tag_tab.get(buf,'x'))
|
yield pair(buf,word_tag_tab.get(buf,'x'))
|
||||||
else:
|
else:
|
||||||
if not (buf in jieba.FREQ):
|
if (buf not in jieba.FREQ):
|
||||||
regognized = __cut_detail(buf)
|
regognized = __cut_detail(buf)
|
||||||
for t in regognized:
|
for t in regognized:
|
||||||
yield t
|
yield t
|
||||||
|
Loading…
x
Reference in New Issue
Block a user