Merge pull request #248 from wangbin/master

exlucde word fragments from FREQ in posseg.cut
This commit is contained in:
Sun Junyi 2015-04-02 15:32:41 +08:00
commit 753c1be49c

View File

@ -189,7 +189,7 @@ def __cut_DAG(sentence):
if buf: if buf:
if len(buf) == 1: if len(buf) == 1:
yield pair(buf, word_tag_tab.get(buf, 'x')) yield pair(buf, word_tag_tab.get(buf, 'x'))
elif buf not in jieba.FREQ: elif not jieba.FREQ.get(buf):
recognized = __cut_detail(buf) recognized = __cut_detail(buf)
for t in recognized: for t in recognized:
yield t yield t
@ -203,7 +203,7 @@ def __cut_DAG(sentence):
if buf: if buf:
if len(buf) == 1: if len(buf) == 1:
yield pair(buf, word_tag_tab.get(buf, 'x')) yield pair(buf, word_tag_tab.get(buf, 'x'))
elif (buf not in jieba.FREQ): elif not jieba.FREQ.get(buf):
recognized = __cut_detail(buf) recognized = __cut_detail(buf)
for t in recognized: for t in recognized:
yield t yield t