Merge pull request #99 from aszxqw/branch1

sed -i 's/not \(.*\) in/\1 not in/g' ...
This commit is contained in:
Sun Junyi 2013-08-20 18:33:39 -07:00
commit 66e334229b
2 changed files with 8 additions and 8 deletions

View File

@ -38,7 +38,7 @@ def gen_trie(f_name):
ltotal+=freq ltotal+=freq
p = trie p = trie
for c in word: for c in word:
if not c in p: if c not in p:
p[c] ={} p[c] ={}
p = p[c] p = p[c]
p['']='' #ending flag p['']='' #ending flag
@ -150,7 +150,7 @@ def get_DAG(sentence):
if c in p: if c in p:
p = p[c] p = p[c]
if '' in p: if '' in p:
if not i in DAG: if i not in DAG:
DAG[i]=[] DAG[i]=[]
DAG[i].append(j) DAG[i].append(j)
j+=1 j+=1
@ -163,7 +163,7 @@ def get_DAG(sentence):
i+=1 i+=1
j=i j=i
for i in xrange(len(sentence)): for i in xrange(len(sentence)):
if not i in DAG: if i not in DAG:
DAG[i] =[i] DAG[i] =[i]
return DAG return DAG
@ -186,7 +186,7 @@ def __cut_DAG(sentence):
yield buf yield buf
buf=u'' buf=u''
else: else:
if not (buf in FREQ): if (buf not in FREQ):
regognized = finalseg.cut(buf) regognized = finalseg.cut(buf)
for t in regognized: for t in regognized:
yield t yield t
@ -201,7 +201,7 @@ def __cut_DAG(sentence):
if len(buf)==1: if len(buf)==1:
yield buf yield buf
else: else:
if not (buf in FREQ): if (buf not in FREQ):
regognized = finalseg.cut(buf) regognized = finalseg.cut(buf)
for t in regognized: for t in regognized:
yield t yield t
@ -281,7 +281,7 @@ def add_word(word, freq, tag=None):
user_word_tag_tab[word] = tag.strip() user_word_tag_tab[word] = tag.strip()
p = trie p = trie
for c in word: for c in word:
if not c in p: if c not in p:
p[c] = {} p[c] = {}
p = p[c] p = p[c]
p[''] = '' # ending flag p[''] = '' # ending flag

View File

@ -136,7 +136,7 @@ def __cut_DAG(sentence):
yield pair(buf,word_tag_tab.get(buf,'x')) yield pair(buf,word_tag_tab.get(buf,'x'))
buf=u'' buf=u''
else: else:
if not (buf in jieba.FREQ): if (buf not in jieba.FREQ):
regognized = __cut_detail(buf) regognized = __cut_detail(buf)
for t in regognized: for t in regognized:
yield t yield t
@ -151,7 +151,7 @@ def __cut_DAG(sentence):
if len(buf)==1: if len(buf)==1:
yield pair(buf,word_tag_tab.get(buf,'x')) yield pair(buf,word_tag_tab.get(buf,'x'))
else: else:
if not (buf in jieba.FREQ): if (buf not in jieba.FREQ):
regognized = __cut_detail(buf) regognized = __cut_detail(buf)
for t in regognized: for t in regognized:
yield t yield t