From 99d2492d679265d2470a955a6ad165e0cc0606a4 Mon Sep 17 00:00:00 2001 From: Richard Wong Date: Wed, 10 Jul 2013 16:22:17 +0800 Subject: [PATCH] Add re.U flag to re variable. --- jieba/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/jieba/__init__.py b/jieba/__init__.py index d206542..469bb1a 100644 --- a/jieba/__init__.py +++ b/jieba/__init__.py @@ -212,9 +212,9 @@ def cut(sentence,cut_all=False): sentence = sentence.decode('utf-8') except UnicodeDecodeError: sentence = sentence.decode('gbk','ignore') - re_han, re_skip = re.compile(ur"([\u4E00-\u9FA5a-zA-Z0-9+#&\._]+)"), re.compile(ur"(\s+)") + re_han, re_skip = re.compile(ur"([\u4E00-\u9FA5a-zA-Z0-9+#&\._]+)", re.U), re.compile(ur"(\s+)", re.U) if cut_all: - re_han, re_skip = re.compile(ur"([\u4E00-\u9FA5]+)"), re.compile(ur"[^a-zA-Z0-9+#\n]") + re_han, re_skip = re.compile(ur"([\u4E00-\u9FA5]+)", re.U), re.compile(ur"[^a-zA-Z0-9+#\n]", re.U) blocks = re_han.split(sentence) cut_block = __cut_DAG if cut_all: