Merge pull request #74 from chgwei/jieba3k

Jieba3k
2025-07-10 00:01:33 +08:00 · 2013-07-05 20:34:12 -07:00 · 2013-07-05 20:34:12 -07:00 · 7334bedf5c
commit 7334bedf5c
parent b9b1f1a418 6035bb6320
2 changed files with 9 additions and 8 deletions
--- a/jieba/analyse/analyzer.py
+++ b/jieba/analyse/analyzer.py
@ -9,9 +9,10 @@ STOP_WORDS = frozenset(('a', 'an', 'and', 'are', 'as', 'at', 'be', 'by', 'can',
                        'for', 'from', 'have', 'if', 'in', 'is', 'it', 'may',
                        'not', 'of', 'on', 'or', 'tbd', 'that', 'the', 'this',
                        'to', 'us', 'we', 'when', 'will', 'with', 'yet',
-                        'you', 'your',u'的',u'了',u'和'))
+                        'you', 'your', '的', '了', '和'))
 accepted_chars = re.compile(r"[\u4E00-\u9FA5]+")
 accepted_chars = re.compile(ur"[\u4E00-\u9FA5]+")
 class ChineseTokenizer(Tokenizer):
    def __call__(self,text,**kargs):
--- a/test/test_whoosh.py
+++ b/test/test_whoosh.py
@ -5,13 +5,13 @@ from whoosh.index import create_in,open_dir
 from whoosh.fields import *
 from whoosh.qparser import QueryParser
-from jieba.analyse import ChineseAnalyzer 
+from jieba.analyse.analyzer import ChineseAnalyzer
 analyzer = ChineseAnalyzer()
 schema = Schema(title=TEXT(stored=True), path=ID(stored=True), content=TEXT(stored=True, analyzer=analyzer))
 ix = create_in("tmp", schema) # for create new index
-#ix = open_dir("tmp") # for read only
+#ix = open_dir("tmp", schema=schema) # for read only
 writer = ix.writer()
 writer.add_document(
@ -49,12 +49,12 @@ searcher = ix.searcher()
 parser = QueryParser("content", schema=ix.schema)
 for keyword in ("水果世博园","你","first","中文","交换机","交换"):
-    print "result of ",keyword
+    print("result of ",keyword)
    q = parser.parse(keyword)
    results = searcher.search(q)
    for hit in results:  
-        print hit.highlights("content")
+        print(hit.highlights("content"))
-    print "="*10
+    print("="*10)
 for t in analyzer("我的好朋友是李明;我爱北京天安门;IBM和Microsoft; I have a dream."):
-    print t.text
+    print(t.text)