mirror of
https://github.com/fxsjy/jieba.git
synced 2025-07-10 00:01:33 +08:00
add new method: cut_for_search(sentence), which can get better recall rate for search engine's reverse index
This commit is contained in:
parent
d421fe5e7e
commit
5ce72e76b1
@ -157,6 +157,21 @@ def cut(sentence,cut_all=False):
|
||||
if x!="":
|
||||
yield x
|
||||
|
||||
def cut_for_search(sentence):
|
||||
words = cut(sentence)
|
||||
for w in words:
|
||||
if len(w)>2:
|
||||
for i in xrange(len(w)-1):
|
||||
gram2 = w[i:i+2]
|
||||
if gram2 in FREQ:
|
||||
yield gram2
|
||||
if len(w)>3:
|
||||
for i in xrange(len(w)-2):
|
||||
gram3 = w[i:i+3]
|
||||
if gram3 in FREQ:
|
||||
yield gram3
|
||||
yield w
|
||||
|
||||
def load_userdict(f):
|
||||
global trie,total,FREQ
|
||||
if isinstance(f, (str, unicode)):
|
||||
|
Loading…
x
Reference in New Issue
Block a user