From bb1e6000c61247c6134f8edebfcedc4589ba9e46 Mon Sep 17 00:00:00 2001
From: Dingyuan Wang <abcdoyle888@gmail.com>
Date: Sun, 19 Oct 2014 10:57:46 +0800
Subject: [PATCH] fix version; fix spaces at end of line

---
 Changelog                  | 14 +++++++-------
 jieba/__main__.py          |  4 +++-
 jieba/analyse/analyzer.py  |  2 +-
 jieba/finalseg/__init__.py |  8 ++++----
 jieba/posseg/__init__.py   | 31 ++++++++++++++++---------------
 setup.py                   | 18 +++++++++---------
 test/test_pos.py           |  4 ++--
 test/test_userdict.py      |  2 +-
 test/test_whoosh.py        | 14 +++++++-------
 test/test_whoosh_flie.py   |  6 +++---
 10 files changed, 53 insertions(+), 50 deletions(-)

diff --git a/Changelog b/Changelog
index 258dda2..99be4ee 100644
--- a/Changelog
+++ b/Changelog
@@ -13,10 +13,10 @@
 
 2013-07-01: version 0.31
 1. 修改了代码缩进格式，遵循PEP8标准
-2. 支持Jython解析器，感谢 @piaolingxue 
+2. 支持Jython解析器，感谢 @piaolingxue
 3. 修复中英混合词汇不能识别数字在前词语的Bug
-4. 部分代码重构，感谢 @chao78787 
-5. 多进程并行分词模式下自动检测CPU个数设置合适的进程数，感谢@linkerlin 
+4. 部分代码重构，感谢 @chao78787
+5. 多进程并行分词模式下自动检测CPU个数设置合适的进程数，感谢@linkerlin
 6. 修复了0.3版中jieba.extra_tags方法对whoosh模块的错误依赖
 
 
@@ -55,8 +55,8 @@
 2013-04-27: version 0.28
 ========================
 1) 新增词典lazy load功能，用户可以在'import jieba'后再改变词典的路径. 感谢hermanschaaf
-2) 显示词典加载异常时错误的词条信息. 感谢neuront 
-3) 修正了词典被vim编辑后会加载失败的bug. 感谢neuront  
+2) 显示词典加载异常时错误的词条信息. 感谢neuront
+3) 修正了词典被vim编辑后会加载失败的bug. 感谢neuront
 
 2013-04-22: version 0.27
 ========================
@@ -93,7 +93,7 @@
 2012-11-28: version 0.22
 ========================
 1) 新增jieba.cut_for_search方法， 该方法在精确分词的基础上对“长词”进行再次切分，适用于搜索引擎领域的分词，比精确分词模式有更高的召回率。
-2) 开始支持Python3.x版。 之前一直是只支持Python2.x系列，从这个版本起有一个单独的jieba3k 
+2) 开始支持Python3.x版。 之前一直是只支持Python2.x系列，从这个版本起有一个单独的jieba3k
 
 
 2012-11-23: version 0.21
@@ -104,7 +104,7 @@
 
 2012-11-06: version 0.20
 ========================
-1) 新增词性标注功能 
+1) 新增词性标注功能
 
 
 2012-10-25: version 0.19
diff --git a/jieba/__main__.py b/jieba/__main__.py
index b2bd203..d90096d 100644
--- a/jieba/__main__.py
+++ b/jieba/__main__.py
@@ -3,7 +3,7 @@ import sys
 import jieba
 from argparse import ArgumentParser
 
-parser = ArgumentParser(usage="%s -m jieba [options] filename" % sys.executable, description="Jieba command line interface.", version="Jieba " + jieba.__version__, epilog="If no filename specified, use STDIN instead.")
+parser = ArgumentParser(usage="%s -m jieba [options] filename" % sys.executable, description="Jieba command line interface.", epilog="If no filename specified, use STDIN instead.")
 parser.add_argument("-d", "--delimiter", metavar="DELIM", default=' / ',
                     nargs='?', const=' ',
                     help="use DELIM instead of ' / ' for word delimiter; use a space if it is without DELIM")
@@ -14,6 +14,8 @@ parser.add_argument("-n", "--no-hmm", dest="hmm", action="store_false",
                     default=True, help="don't use the Hidden Markov Model")
 parser.add_argument("-q", "--quiet", action="store_true", default=False,
                     help="don't print loading messages to stderr")
+parser.add_argument("-V", '--version', action='version',
+		    version="Jieba " + jieba.__version__)
 parser.add_argument("filename", nargs='?', help="input file")
 
 args = parser.parse_args()
diff --git a/jieba/analyse/analyzer.py b/jieba/analyse/analyzer.py
index cc73589..d1b16b5 100644
--- a/jieba/analyse/analyzer.py
+++ b/jieba/analyse/analyzer.py
@@ -1,6 +1,6 @@
 ##encoding=utf-8
 from whoosh.analysis import RegexAnalyzer,LowercaseFilter,StopFilter,StemFilter
-from whoosh.analysis import Tokenizer,Token 
+from whoosh.analysis import Tokenizer,Token
 from whoosh.lang.porter import stem
 
 import jieba
diff --git a/jieba/finalseg/__init__.py b/jieba/finalseg/__init__.py
index 9426a72..fa47268 100644
--- a/jieba/finalseg/__init__.py
+++ b/jieba/finalseg/__init__.py
@@ -23,26 +23,26 @@ def load_model():
 
     start_p = {}
     abs_path = os.path.join(_curpath, PROB_START_P)
-    with open(abs_path, mode='rb') as f:
+    with open(abs_path, mode='r') as f:
         start_p = marshal.load(f)
     f.closed
 
     trans_p = {}
     abs_path = os.path.join(_curpath, PROB_TRANS_P)
-    with open(abs_path, 'rb') as f:
+    with open(abs_path, 'r') as f:
         trans_p = marshal.load(f)
     f.closed
 
     emit_p = {}
     abs_path = os.path.join(_curpath, PROB_EMIT_P)
-    with file(abs_path, 'rb') as f:
+    with open(abs_path, 'r') as f:
         emit_p = marshal.load(f)
     f.closed
 
     return start_p, trans_p, emit_p
 
 if sys.platform.startswith("java"):
-    start_P, trans_P, emit_P = load_model()    
+    start_P, trans_P, emit_P = load_model()
 else:
     import prob_start,prob_trans,prob_emit
     start_P, trans_P, emit_P = prob_start.P, prob_trans.P, prob_emit.P
diff --git a/jieba/posseg/__init__.py b/jieba/posseg/__init__.py
index b45136e..a048d22 100644
--- a/jieba/posseg/__init__.py
+++ b/jieba/posseg/__init__.py
@@ -15,40 +15,41 @@ PROB_EMIT_P = "prob_emit.p"
 CHAR_STATE_TAB_P = "char_state_tab.p"
 
 def load_model(f_name, isJython=True):
-    _curpath=os.path.normpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))
+    _curpath = os.path.normpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))
 
     result = {}
-    with file(f_name, "rb") as f:
-        for line in open(f_name,"rb"):
+    with open(f_name, "r") as f:
+        for line in f:
             line = line.strip()
-            if line=="":continue
+            if not line:
+                continue
             word, _, tag = line.split(' ')
             result[word.decode('utf-8')] = tag
     f.closed
     if not isJython:
         return result
-    
+
     start_p = {}
     abs_path = os.path.join(_curpath, PROB_START_P)
-    with open(abs_path, mode='rb') as f:
+    with open(abs_path, mode='r') as f:
         start_p = marshal.load(f)
     f.closed
-    
+
     trans_p = {}
     abs_path = os.path.join(_curpath, PROB_TRANS_P)
-    with open(abs_path, 'rb') as f:
+    with open(abs_path, 'r') as f:
         trans_p = marshal.load(f)
     f.closed
-    
+
     emit_p = {}
     abs_path = os.path.join(_curpath, PROB_EMIT_P)
-    with file(abs_path, 'rb') as f:
+    with open(abs_path, 'r') as f:
         emit_p = marshal.load(f)
     f.closed
 
     state = {}
     abs_path = os.path.join(_curpath, CHAR_STATE_TAB_P)
-    with file(abs_path, 'rb') as f:
+    with open(abs_path, 'r') as f:
         state = marshal.load(f)
     f.closed
 
@@ -62,14 +63,14 @@ else:
     word_tag_tab = load_model(jieba.get_abs_path_dict(), isJython=False)
 
 def makesure_userdict_loaded(fn):
-    
+
     @wraps(fn)
     def wrapped(*args,**kwargs):
-        if len(jieba.user_word_tag_tab)>0:
+        if jieba.user_word_tag_tab:
             word_tag_tab.update(jieba.user_word_tag_tab)
             jieba.user_word_tag_tab = {}
         return fn(*args,**kwargs)
-    
+
     return wrapped
 
 class pair(object):
@@ -152,7 +153,7 @@ def __cut_DAG_NO_HMM(sentence):
 def __cut_DAG(sentence):
     DAG = jieba.get_DAG(sentence)
     route = {}
-    
+
     jieba.calc(sentence,DAG,0,route=route)
 
     x = 0
diff --git a/setup.py b/setup.py
index a65e500..db06809 100644
--- a/setup.py
+++ b/setup.py
@@ -1,11 +1,11 @@
-from distutils.core import setup  
-setup(name='jieba',  
-      version='0.33',  
-      description='Chinese Words Segementation Utilities',  
-      author='Sun, Junyi',  
-      author_email='ccnusjy@gmail.com',  
-      url='http://github.com/fxsjy',  
-      packages=['jieba'],  
+from distutils.core import setup
+setup(name='jieba',
+      version='0.33',
+      description='Chinese Words Segementation Utilities',
+      author='Sun, Junyi',
+      author_email='ccnusjy@gmail.com',
+      url='http://github.com/fxsjy',
+      packages=['jieba'],
       package_dir={'jieba':'jieba'},
       package_data={'jieba':['*.*','finalseg/*','analyse/*','posseg/*']}
-)  
+)
diff --git a/test/test_pos.py b/test/test_pos.py
index 5354a43..3815e72 100644
--- a/test/test_pos.py
+++ b/test/test_pos.py
@@ -6,7 +6,7 @@ import jieba.posseg as pseg
 def cuttest(test_sent):
     result = pseg.cut(test_sent)
     for w in result:
-        print w.word, "/", w.flag, ", ",  
+        print w.word, "/", w.flag, ", ",
     print ""
 
 
@@ -95,4 +95,4 @@ if __name__ == "__main__":
     cuttest('AT&T是一件不错的公司，给你发offer了吗？')
     cuttest('C++和c#是什么关系？11+122=133，是吗？PI=3.14159')
     cuttest('你认识那个和主席握手的的哥吗？他开一辆黑色的士。')
-    cuttest('枪杆子中出政权')
\ No newline at end of file
+    cuttest('枪杆子中出政权')
diff --git a/test/test_userdict.py b/test/test_userdict.py
index e5a4727..0a3d1ee 100644
--- a/test/test_userdict.py
+++ b/test/test_userdict.py
@@ -14,7 +14,7 @@ for w in words:
 result = pseg.cut(test_sent)
 
 for w in result:
-    print w.word, "/", w.flag, ", ",  
+    print w.word, "/", w.flag, ", ",
 
 print "\n========"
 
diff --git a/test/test_whoosh.py b/test/test_whoosh.py
index 9a7c033..4995139 100644
--- a/test/test_whoosh.py
+++ b/test/test_whoosh.py
@@ -5,7 +5,7 @@ from whoosh.index import create_in,open_dir
 from whoosh.fields import *
 from whoosh.qparser import QueryParser
 
-from jieba.analyse import ChineseAnalyzer 
+from jieba.analyse import ChineseAnalyzer
 
 analyzer = ChineseAnalyzer()
 
@@ -18,31 +18,31 @@ ix = create_in("tmp", schema) # for create new index
 writer = ix.writer()
 
 writer.add_document(
-    title=u"document1", 
+    title=u"document1",
     path=u"/a",
     content=u"This is the first document we’ve added!"
 )
 
 writer.add_document(
-    title=u"document2", 
+    title=u"document2",
     path=u"/b",
     content=u"The second one 你 中文测试中文 is even more interesting! 吃水果"
 )
 
 writer.add_document(
-    title=u"document3", 
+    title=u"document3",
     path=u"/c",
     content=u"买水果然后来世博园。"
 )
 
 writer.add_document(
-    title=u"document4", 
+    title=u"document4",
     path=u"/c",
     content=u"工信处女干事每月经过下属科室都要亲口交代24口交换机等技术性器件的安装工作"
 )
 
 writer.add_document(
-    title=u"document4", 
+    title=u"document4",
     path=u"/c",
     content=u"咱俩交换一下吧。"
 )
@@ -55,7 +55,7 @@ for keyword in (u"水果世博园",u"你",u"first",u"中文",u"交换机",u"交
     print "result of ",keyword
     q = parser.parse(keyword)
     results = searcher.search(q)
-    for hit in results:  
+    for hit in results:
         print hit.highlights("content")
     print "="*10
 
diff --git a/test/test_whoosh_flie.py b/test/test_whoosh_flie.py
index 3610b49..d403213 100644
--- a/test/test_whoosh_flie.py
+++ b/test/test_whoosh_flie.py
@@ -6,7 +6,7 @@ from whoosh.index import create_in
 from whoosh.fields import *
 from whoosh.qparser import QueryParser
 
-from jieba.analyse import ChineseAnalyzer 
+from jieba.analyse import ChineseAnalyzer
 
 analyzer = ChineseAnalyzer()
 
@@ -23,7 +23,7 @@ with open(file_name,"rb") as inf:
     for line in inf:
         i+=1
         writer.add_document(
-            title=u"line"+str(i), 
+            title=u"line"+str(i),
             path=u"/a",
             content=line.decode('gbk','ignore')
         )
@@ -36,6 +36,6 @@ for keyword in (u"水果小姐",u"你",u"first",u"中文",u"交换机",u"交换"
     print "result of ",keyword
     q = parser.parse(keyword)
     results = searcher.search(q)
-    for hit in results:  
+    for hit in results:
         print hit.highlights("content")
     print "="*10