diff --git a/jieba/finalseg/__init__.py b/jieba/finalseg/__init__.py index bf30fe2..177932e 100644 --- a/jieba/finalseg/__init__.py +++ b/jieba/finalseg/__init__.py @@ -58,7 +58,7 @@ def cut(sentence): sentence = sentence.decode('utf-8') except: sentence = sentence.decode('gbk','ignore') - re_han, re_skip = re.compile(ur"([\u4E00-\u9FA5]+)"), re.compile(ur"([\.0-9]+|[a-zA-Z0-9]+)") + re_han, re_skip = re.compile(ur"([\u4E00-\u9FA5]+)"), re.compile(ur"(\d+\.\d+|[a-zA-Z0-9]+)") blocks = re_han.split(sentence) for blk in blocks: if re_han.match(blk): diff --git a/test/test.py b/test/test.py index 868011f..cc43f53 100644 --- a/test/test.py +++ b/test/test.py @@ -97,4 +97,4 @@ if __name__ == "__main__": cuttest('枪杆子中出政权') cuttest('张三风同学走上了不归路') cuttest('阿Q腰间挂着BB机手里拿着大哥大,说:我一般吃饭不AA制的。') - cuttest('在1号店能买到小S和大S八卦的书。') + cuttest('在1号店能买到小S和大S八卦的书,还有3D电视。')