unittest for jieba.tokenize

This commit is contained in:
Sun Junyi 2013-06-24 16:20:04 +08:00
parent 11a3b10755
commit cb1b0499f7
2 changed files with 11 additions and 1 deletions

View File

@ -149,5 +149,15 @@ class JiebaTestCase(unittest.TestCase):
print >> sys.stderr, " , ".join([w.word + " / " + w.flag for w in result])
print >> sys.stderr, "testPosseg"
def testTokenize(self):
for content in test_contents:
result = jieba.tokenize(content.decode('utf-8'))
assert isinstance(result, types.GeneratorType), "Test Tokenize Generator error"
result = list(result)
assert isinstance(result, list), "Test Tokenize error on content: %s" % content
for tk in result:
print >>sys.stderr, "word %s\t\t start: %d \t\t end:%d" % (tk[0],tk[1],tk[2])
print >> sys.stderr, "testTokenize"
if __name__ == "__main__":
unittest.main()

View File

@ -8,7 +8,7 @@ def cuttest(test_sent):
test_sent = test_sent.decode('utf-8')
result = jieba.tokenize(test_sent)
for tk in result:
print "word %s, start: %d, end:%d" % (tk[0],tk[1],tk[2])
print "word %s\t\t start: %d \t\t end:%d" % (tk[0],tk[1],tk[2])
if __name__ == "__main__":