mirror of
https://github.com/fxsjy/jieba.git
synced 2025-07-10 00:01:33 +08:00
unittest for jieba.tokenize
This commit is contained in:
parent
11a3b10755
commit
cb1b0499f7
@ -149,5 +149,15 @@ class JiebaTestCase(unittest.TestCase):
|
||||
print >> sys.stderr, " , ".join([w.word + " / " + w.flag for w in result])
|
||||
print >> sys.stderr, "testPosseg"
|
||||
|
||||
def testTokenize(self):
|
||||
for content in test_contents:
|
||||
result = jieba.tokenize(content.decode('utf-8'))
|
||||
assert isinstance(result, types.GeneratorType), "Test Tokenize Generator error"
|
||||
result = list(result)
|
||||
assert isinstance(result, list), "Test Tokenize error on content: %s" % content
|
||||
for tk in result:
|
||||
print >>sys.stderr, "word %s\t\t start: %d \t\t end:%d" % (tk[0],tk[1],tk[2])
|
||||
print >> sys.stderr, "testTokenize"
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
@ -8,7 +8,7 @@ def cuttest(test_sent):
|
||||
test_sent = test_sent.decode('utf-8')
|
||||
result = jieba.tokenize(test_sent)
|
||||
for tk in result:
|
||||
print "word %s, start: %d, end:%d" % (tk[0],tk[1],tk[2])
|
||||
print "word %s\t\t start: %d \t\t end:%d" % (tk[0],tk[1],tk[2])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
Loading…
x
Reference in New Issue
Block a user