mirror of
https://github.com/fxsjy/jieba.git
synced 2025-07-10 00:01:33 +08:00
fix self.FREQ in cut_for_search; make pair object iterable
This commit is contained in:
parent
3b76328f2a
commit
ceb5c26be4
@ -200,8 +200,8 @@ https://github.com/fxsjy/jieba/blob/master/test/extract_tags.py
|
||||
```pycon
|
||||
>>> import jieba.posseg as pseg
|
||||
>>> words = pseg.cut("我爱北京天安门")
|
||||
>>> for w in words:
|
||||
... print('%s %s' % (w.word, w.flag))
|
||||
>>> for word, flag in words:
|
||||
... print('%s %s' % (word, flag))
|
||||
...
|
||||
我 r
|
||||
爱 v
|
||||
|
@ -310,12 +310,12 @@ class Tokenizer(object):
|
||||
if len(w) > 2:
|
||||
for i in xrange(len(w) - 1):
|
||||
gram2 = w[i:i + 2]
|
||||
if FREQ.get(gram2):
|
||||
if self.FREQ.get(gram2):
|
||||
yield gram2
|
||||
if len(w) > 3:
|
||||
for i in xrange(len(w) - 2):
|
||||
gram3 = w[i:i + 3]
|
||||
if FREQ.get(gram3):
|
||||
if self.FREQ.get(gram3):
|
||||
yield gram3
|
||||
yield w
|
||||
|
||||
|
@ -70,7 +70,7 @@ class pair(object):
|
||||
return '%s/%s' % (self.word, self.flag)
|
||||
|
||||
def __repr__(self):
|
||||
return self.__str__()
|
||||
return 'pair(%r, %r)' % (self.word, self.flag)
|
||||
|
||||
def __str__(self):
|
||||
if PY2:
|
||||
@ -78,6 +78,9 @@ class pair(object):
|
||||
else:
|
||||
return self.__unicode__()
|
||||
|
||||
def __iter__(self):
|
||||
return iter((self.word, self.flag))
|
||||
|
||||
def encode(self, arg):
|
||||
return self.__unicode__().encode(arg)
|
||||
|
||||
|
@ -62,8 +62,8 @@ print('4. 词性标注')
|
||||
print('-'*40)
|
||||
|
||||
words = jieba.posseg.cut("我爱北京天安门")
|
||||
for w in words:
|
||||
print('%s %s' % (w.word, w.flag))
|
||||
for word, flag in words:
|
||||
print('%s %s' % (word, flag))
|
||||
|
||||
print('='*40)
|
||||
print('6. Tokenize: 返回词语在原文的起止位置')
|
||||
|
@ -6,8 +6,8 @@ import jieba.posseg as pseg
|
||||
|
||||
def cuttest(test_sent):
|
||||
result = pseg.cut(test_sent)
|
||||
for w in result:
|
||||
print(w.word, "/", w.flag, ", ", end=' ')
|
||||
for word, flag in result:
|
||||
print(word, "/", flag, ", ", end=' ')
|
||||
print("")
|
||||
|
||||
|
||||
|
@ -5,9 +5,9 @@ sys.path.append("../")
|
||||
import jieba.posseg as pseg
|
||||
|
||||
def cuttest(test_sent):
|
||||
result = pseg.cut(test_sent,HMM=False)
|
||||
for w in result:
|
||||
print(w.word, "/", w.flag, ", ", end=' ')
|
||||
result = pseg.cut(test_sent, HMM=False)
|
||||
for word, flag in result:
|
||||
print(word, "/", flag, ", ", end=' ')
|
||||
print("")
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user