Merge pull request #75 from chao787/feature_richard

Refactoring jieba/__init__.py
This commit is contained in:
Sun Junyi 2013-07-10 01:34:43 -07:00
commit a1ad2cbd55

View File

@ -1,8 +1,7 @@
from __future__ import with_statement
import re
import math
import os,sys
import pprint
import os
import sys
import finalseg
import time
import tempfile
@ -112,8 +111,10 @@ def require_initialized(fn):
else:
initialize(DICTIONARY)
return fn(*args, **kwargs)
return wrapped
def __cut_all(sentence):
dag = get_DAG(sentence)
old_j = -1
@ -211,9 +212,9 @@ def cut(sentence,cut_all=False):
sentence = sentence.decode('utf-8')
except UnicodeDecodeError:
sentence = sentence.decode('gbk','ignore')
re_han, re_skip = re.compile(ur"([\u4E00-\u9FA5a-zA-Z0-9+#&\._]+)"), re.compile(ur"(\s+)")
re_han, re_skip = re.compile(ur"([\u4E00-\u9FA5a-zA-Z0-9+#&\._]+)", re.U), re.compile(ur"(\s+)", re.U)
if cut_all:
re_han, re_skip = re.compile(ur"([\u4E00-\u9FA5]+)"), re.compile(ur"[^a-zA-Z0-9+#\n]")
re_han, re_skip = re.compile(ur"([\u4E00-\u9FA5]+)", re.U), re.compile(ur"[^a-zA-Z0-9+#\n]", re.U)
blocks = re_han.split(sentence)
cut_block = __cut_DAG
if cut_all: