fix version; fix spaces at end of line

2025-07-10 00:01:33 +08:00 · 2014-10-19 10:57:46 +08:00 · 2014-10-19 10:57:46 +08:00 · bb1e6000c6
commit bb1e6000c6
parent 51df77831b
10 changed files with 53 additions and 50 deletions
--- a/14
+++ b/14
@ -13,10 +13,10 @@

 2013-07-01: version 0.31
 1. 修改了代码缩进格式，遵循PEP8标准
-2. 支持Jython解析器，感谢 @piaolingxue 
+2. 支持Jython解析器，感谢 @piaolingxue
 3. 修复中英混合词汇不能识别数字在前词语的Bug
-4. 部分代码重构，感谢 @chao78787 
-5. 多进程并行分词模式下自动检测CPU个数设置合适的进程数，感谢@linkerlin 
+4. 部分代码重构，感谢 @chao78787
+5. 多进程并行分词模式下自动检测CPU个数设置合适的进程数，感谢@linkerlin
 6. 修复了0.3版中jieba.extra_tags方法对whoosh模块的错误依赖


@ -55,8 +55,8 @@
 2013-04-27: version 0.28
 ========================
 1) 新增词典lazy load功能，用户可以在'import jieba'后再改变词典的路径. 感谢hermanschaaf
-2) 显示词典加载异常时错误的词条信息. 感谢neuront 
-3) 修正了词典被vim编辑后会加载失败的bug. 感谢neuront  
+2) 显示词典加载异常时错误的词条信息. 感谢neuront
+3) 修正了词典被vim编辑后会加载失败的bug. 感谢neuront

 2013-04-22: version 0.27
 ========================
@ -93,7 +93,7 @@
 2012-11-28: version 0.22
 ========================
 1) 新增jieba.cut_for_search方法， 该方法在精确分词的基础上对“长词”进行再次切分，适用于搜索引擎领域的分词，比精确分词模式有更高的召回率。
-2) 开始支持Python3.x版。 之前一直是只支持Python2.x系列，从这个版本起有一个单独的jieba3k 
+2) 开始支持Python3.x版。 之前一直是只支持Python2.x系列，从这个版本起有一个单独的jieba3k


 2012-11-23: version 0.21
@ -104,7 +104,7 @@

 2012-11-06: version 0.20
 ========================
-1) 新增词性标注功能 
+1) 新增词性标注功能


 2012-10-25: version 0.19
--- a/jieba/main.py
+++ b/jieba/main.py
@ -3,7 +3,7 @@ import sys
 import jieba
 from argparse import ArgumentParser

-parser = ArgumentParser(usage="%s -m jieba [options] filename" % sys.executable, description="Jieba command line interface.", version="Jieba " + jieba.__version__, epilog="If no filename specified, use STDIN instead.")
+parser = ArgumentParser(usage="%s -m jieba [options] filename" % sys.executable, description="Jieba command line interface.", epilog="If no filename specified, use STDIN instead.")
 parser.add_argument("-d", "--delimiter", metavar="DELIM", default=' / ',
                    nargs='?', const=' ',
                    help="use DELIM instead of ' / ' for word delimiter; use a space if it is without DELIM")
@ -14,6 +14,8 @@ parser.add_argument("-n", "--no-hmm", dest="hmm", action="store_false",
                    default=True, help="don't use the Hidden Markov Model")
 parser.add_argument("-q", "--quiet", action="store_true", default=False,
                    help="don't print loading messages to stderr")
+parser.add_argument("-V", '--version', action='version',
+		    version="Jieba " + jieba.__version__)
 parser.add_argument("filename", nargs='?', help="input file")

 args = parser.parse_args()
--- a/jieba/analyse/analyzer.py
+++ b/jieba/analyse/analyzer.py
@ -1,6 +1,6 @@
 ##encoding=utf-8
 from whoosh.analysis import RegexAnalyzer,LowercaseFilter,StopFilter,StemFilter
-from whoosh.analysis import Tokenizer,Token 
+from whoosh.analysis import Tokenizer,Token
 from whoosh.lang.porter import stem

 import jieba
--- a/jieba/finalseg/init.py
+++ b/jieba/finalseg/init.py
@ -23,26 +23,26 @@ def load_model():

    start_p = {}
    abs_path = os.path.join(_curpath, PROB_START_P)
-    with open(abs_path, mode='rb') as f:
+    with open(abs_path, mode='r') as f:
        start_p = marshal.load(f)
    f.closed

    trans_p = {}
    abs_path = os.path.join(_curpath, PROB_TRANS_P)
-    with open(abs_path, 'rb') as f:
+    with open(abs_path, 'r') as f:
        trans_p = marshal.load(f)
    f.closed

    emit_p = {}
    abs_path = os.path.join(_curpath, PROB_EMIT_P)
-    with file(abs_path, 'rb') as f:
+    with open(abs_path, 'r') as f:
        emit_p = marshal.load(f)
    f.closed

    return start_p, trans_p, emit_p

 if sys.platform.startswith("java"):
-    start_P, trans_P, emit_P = load_model()    
+    start_P, trans_P, emit_P = load_model()
 else:
    import prob_start,prob_trans,prob_emit
    start_P, trans_P, emit_P = prob_start.P, prob_trans.P, prob_emit.P
--- a/jieba/posseg/init.py
+++ b/jieba/posseg/init.py
@ -15,40 +15,41 @@ PROB_EMIT_P = "prob_emit.p"
 CHAR_STATE_TAB_P = "char_state_tab.p"

 def load_model(f_name, isJython=True):
-    _curpath=os.path.normpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))
+    _curpath = os.path.normpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))

    result = {}
-    with file(f_name, "rb") as f:
-        for line in open(f_name,"rb"):
+    with open(f_name, "r") as f:
+        for line in f:
            line = line.strip()
-            if line=="":continue
+            if not line:
+                continue
            word, _, tag = line.split(' ')
            result[word.decode('utf-8')] = tag
    f.closed
    if not isJython:
        return result
-    
+
    start_p = {}
    abs_path = os.path.join(_curpath, PROB_START_P)
-    with open(abs_path, mode='rb') as f:
+    with open(abs_path, mode='r') as f:
        start_p = marshal.load(f)
    f.closed
-    
+
    trans_p = {}
    abs_path = os.path.join(_curpath, PROB_TRANS_P)
-    with open(abs_path, 'rb') as f:
+    with open(abs_path, 'r') as f:
        trans_p = marshal.load(f)
    f.closed
-    
+
    emit_p = {}
    abs_path = os.path.join(_curpath, PROB_EMIT_P)
-    with file(abs_path, 'rb') as f:
+    with open(abs_path, 'r') as f:
        emit_p = marshal.load(f)
    f.closed

    state = {}
    abs_path = os.path.join(_curpath, CHAR_STATE_TAB_P)
-    with file(abs_path, 'rb') as f:
+    with open(abs_path, 'r') as f:
        state = marshal.load(f)
    f.closed

@ -62,14 +63,14 @@ else:
    word_tag_tab = load_model(jieba.get_abs_path_dict(), isJython=False)

 def makesure_userdict_loaded(fn):
-    
+
    @wraps(fn)
    def wrapped(*args,**kwargs):
-        if len(jieba.user_word_tag_tab)>0:
+        if jieba.user_word_tag_tab:
            word_tag_tab.update(jieba.user_word_tag_tab)
            jieba.user_word_tag_tab = {}
        return fn(*args,**kwargs)
-    
+
    return wrapped

 class pair(object):
@ -152,7 +153,7 @@ def __cut_DAG_NO_HMM(sentence):
 def __cut_DAG(sentence):
    DAG = jieba.get_DAG(sentence)
    route = {}
-    
+
    jieba.calc(sentence,DAG,0,route=route)

    x = 0
--- a/setup.py
+++ b/setup.py
@ -1,11 +1,11 @@
-from distutils.core import setup  
-setup(name='jieba',  
-      version='0.33',  
-      description='Chinese Words Segementation Utilities',  
-      author='Sun, Junyi',  
-      author_email='ccnusjy@gmail.com',  
-      url='http://github.com/fxsjy',  
-      packages=['jieba'],  
+from distutils.core import setup
+setup(name='jieba',
+      version='0.33',
+      description='Chinese Words Segementation Utilities',
+      author='Sun, Junyi',
+      author_email='ccnusjy@gmail.com',
+      url='http://github.com/fxsjy',
+      packages=['jieba'],
      package_dir={'jieba':'jieba'},
      package_data={'jieba':['*.*','finalseg/*','analyse/*','posseg/*']}
-)  
+)
--- a/test/test_pos.py
+++ b/test/test_pos.py
@ -6,7 +6,7 @@ import jieba.posseg as pseg
 def cuttest(test_sent):
    result = pseg.cut(test_sent)
    for w in result:
-        print w.word, "/", w.flag, ", ",  
+        print w.word, "/", w.flag, ", ",
    print ""


@ -95,4 +95,4 @@ if __name__ == "__main__":
    cuttest('AT&T是一件不错的公司，给你发offer了吗？')
    cuttest('C++和c#是什么关系？11+122=133，是吗？PI=3.14159')
    cuttest('你认识那个和主席握手的的哥吗？他开一辆黑色的士。')
-    cuttest('枪杆子中出政权')
+    cuttest('枪杆子中出政权')
--- a/test/test_userdict.py
+++ b/test/test_userdict.py
@ -14,7 +14,7 @@ for w in words:
 result = pseg.cut(test_sent)

 for w in result:
-    print w.word, "/", w.flag, ", ",  
+    print w.word, "/", w.flag, ", ",

 print "\n========"

--- a/test/test_whoosh.py
+++ b/test/test_whoosh.py
@ -5,7 +5,7 @@ from whoosh.index import create_in,open_dir
 from whoosh.fields import *
 from whoosh.qparser import QueryParser

-from jieba.analyse import ChineseAnalyzer 
+from jieba.analyse import ChineseAnalyzer

 analyzer = ChineseAnalyzer()

@ -18,31 +18,31 @@ ix = create_in("tmp", schema) # for create new index
 writer = ix.writer()

 writer.add_document(
-    title=u"document1", 
+    title=u"document1",
    path=u"/a",
    content=u"This is the first document we’ve added!"
 )

 writer.add_document(
-    title=u"document2", 
+    title=u"document2",
    path=u"/b",
    content=u"The second one 你 中文测试中文 is even more interesting! 吃水果"
 )

 writer.add_document(
-    title=u"document3", 
+    title=u"document3",
    path=u"/c",
    content=u"买水果然后来世博园。"
 )

 writer.add_document(
-    title=u"document4", 
+    title=u"document4",
    path=u"/c",
    content=u"工信处女干事每月经过下属科室都要亲口交代24口交换机等技术性器件的安装工作"
 )

 writer.add_document(
-    title=u"document4", 
+    title=u"document4",
    path=u"/c",
    content=u"咱俩交换一下吧。"
 )
@ -55,7 +55,7 @@ for keyword in (u"水果世博园",u"你",u"first",u"中文",u"交换机",u"交
    print "result of ",keyword
    q = parser.parse(keyword)
    results = searcher.search(q)
-    for hit in results:  
+    for hit in results:
        print hit.highlights("content")
    print "="*10

--- a/test/test_whoosh_flie.py
+++ b/test/test_whoosh_flie.py
@ -6,7 +6,7 @@ from whoosh.index import create_in
 from whoosh.fields import *
 from whoosh.qparser import QueryParser

-from jieba.analyse import ChineseAnalyzer 
+from jieba.analyse import ChineseAnalyzer

 analyzer = ChineseAnalyzer()

@ -23,7 +23,7 @@ with open(file_name,"rb") as inf:
    for line in inf:
        i+=1
        writer.add_document(
-            title=u"line"+str(i), 
+            title=u"line"+str(i),
            path=u"/a",
            content=line.decode('gbk','ignore')
        )
@ -36,6 +36,6 @@ for keyword in (u"水果小姐",u"你",u"first",u"中文",u"交换机",u"交换"
    print "result of ",keyword
    q = parser.parse(keyword)
    results = searcher.search(q)
-    for hit in results:  
+    for hit in results:
        print hit.highlights("content")
    print "="*10