From 7bfd432fc59c130b98bc893fb53cf3c07079eaf2 Mon Sep 17 00:00:00 2001 From: Richard Wong Date: Mon, 8 Jul 2013 13:51:39 +0800 Subject: [PATCH 1/4] Remove the unused imports. --- jieba/__init__.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/jieba/__init__.py b/jieba/__init__.py index 5731a0f..7b710b4 100644 --- a/jieba/__init__.py +++ b/jieba/__init__.py @@ -1,8 +1,7 @@ from __future__ import with_statement import re -import math -import os,sys -import pprint +import os +import sys import finalseg import time import tempfile From fbfaac2eaab5b3fd8e3a90cc44e3d7a6e395df40 Mon Sep 17 00:00:00 2001 From: Richard Wong Date: Mon, 8 Jul 2013 13:54:36 +0800 Subject: [PATCH 2/4] Reindent function * jieba/__init__.py (require_initialized): --- jieba/__init__.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/jieba/__init__.py b/jieba/__init__.py index 7b710b4..d206542 100644 --- a/jieba/__init__.py +++ b/jieba/__init__.py @@ -102,16 +102,18 @@ def initialize(*args): def require_initialized(fn): - global initialized,DICTIONARY - - @wraps(fn) - def wrapped(*args, **kwargs): - if initialized: - return fn(*args, **kwargs) - else: - initialize(DICTIONARY) - return fn(*args, **kwargs) - return wrapped + global initialized,DICTIONARY + + @wraps(fn) + def wrapped(*args, **kwargs): + if initialized: + return fn(*args, **kwargs) + else: + initialize(DICTIONARY) + return fn(*args, **kwargs) + + return wrapped + def __cut_all(sentence): dag = get_DAG(sentence) From 99d2492d679265d2470a955a6ad165e0cc0606a4 Mon Sep 17 00:00:00 2001 From: Richard Wong Date: Wed, 10 Jul 2013 16:22:17 +0800 Subject: [PATCH 3/4] Add re.U flag to re variable. --- jieba/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/jieba/__init__.py b/jieba/__init__.py index d206542..469bb1a 100644 --- a/jieba/__init__.py +++ b/jieba/__init__.py @@ -212,9 +212,9 @@ def cut(sentence,cut_all=False): sentence = sentence.decode('utf-8') except UnicodeDecodeError: sentence = sentence.decode('gbk','ignore') - re_han, re_skip = re.compile(ur"([\u4E00-\u9FA5a-zA-Z0-9+#&\._]+)"), re.compile(ur"(\s+)") + re_han, re_skip = re.compile(ur"([\u4E00-\u9FA5a-zA-Z0-9+#&\._]+)", re.U), re.compile(ur"(\s+)", re.U) if cut_all: - re_han, re_skip = re.compile(ur"([\u4E00-\u9FA5]+)"), re.compile(ur"[^a-zA-Z0-9+#\n]") + re_han, re_skip = re.compile(ur"([\u4E00-\u9FA5]+)", re.U), re.compile(ur"[^a-zA-Z0-9+#\n]", re.U) blocks = re_han.split(sentence) cut_block = __cut_DAG if cut_all: From c2ded83ead9e2c9ed195897913f3f481bec96ddf Mon Sep 17 00:00:00 2001 From: Richard Wong Date: Wed, 10 Jul 2013 16:22:49 +0800 Subject: [PATCH 4/4] Refactor: fix line indent to 4. * jieba/__init__.py (cut): --- jieba/__init__.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/jieba/__init__.py b/jieba/__init__.py index 469bb1a..fe3988c 100644 --- a/jieba/__init__.py +++ b/jieba/__init__.py @@ -221,9 +221,9 @@ def cut(sentence,cut_all=False): cut_block = __cut_all for blk in blocks: if re_han.match(blk): - #pprint.pprint(__cut_DAG(blk)) - for word in cut_block(blk): - yield word + #pprint.pprint(__cut_DAG(blk)) + for word in cut_block(blk): + yield word else: tmp = re_skip.split(blk) for x in tmp: