diff --git a/test/jieba_test.py b/test/jieba_test.py index 432aa9f..4e6d35d 100644 --- a/test/jieba_test.py +++ b/test/jieba_test.py @@ -149,5 +149,15 @@ class JiebaTestCase(unittest.TestCase): print >> sys.stderr, " , ".join([w.word + " / " + w.flag for w in result]) print >> sys.stderr, "testPosseg" + def testTokenize(self): + for content in test_contents: + result = jieba.tokenize(content.decode('utf-8')) + assert isinstance(result, types.GeneratorType), "Test Tokenize Generator error" + result = list(result) + assert isinstance(result, list), "Test Tokenize error on content: %s" % content + for tk in result: + print >>sys.stderr, "word %s\t\t start: %d \t\t end:%d" % (tk[0],tk[1],tk[2]) + print >> sys.stderr, "testTokenize" + if __name__ == "__main__": unittest.main() diff --git a/test/test_tokenize.py b/test/test_tokenize.py index ab46256..9e26d07 100644 --- a/test/test_tokenize.py +++ b/test/test_tokenize.py @@ -8,7 +8,7 @@ def cuttest(test_sent): test_sent = test_sent.decode('utf-8') result = jieba.tokenize(test_sent) for tk in result: - print "word %s, start: %d, end:%d" % (tk[0],tk[1],tk[2]) + print "word %s\t\t start: %d \t\t end:%d" % (tk[0],tk[1],tk[2]) if __name__ == "__main__":