From a5ecf70f71a96cd78e3130fc0de9b9e894ac1d6e Mon Sep 17 00:00:00 2001 From: Dingyuan Wang Date: Fri, 14 Nov 2014 20:58:29 +0800 Subject: [PATCH] update to v0.35 --- Changelog | 7 +++++++ jieba/analyse/__init__.py | 2 +- setup.py | 2 +- test/2to3.diff | 2 +- 4 files changed, 10 insertions(+), 3 deletions(-) diff --git a/Changelog b/Changelog index 2d786f1..671add0 100644 --- a/Changelog +++ b/Changelog @@ -1,3 +1,10 @@ +2014-11-13: version 0.35 +1. 改进词典cache的dump和加载机制;by @gumblex +2. 提升关键词提取的性能; by @gumblex +3. 关键词提取新增基于textrank算法的子模块; by @singlee +4. 修复自定义stopwords功能的bug; by @walkskyer + + 2014-10-20: version 0.34 1. 提升性能,词典结构由Trie改为Prefix Set,内存占用减少2/3, 详见:https://github.com/fxsjy/jieba/pull/187;by @gumblex 2. 修复关键词提取功能的性能问题 diff --git a/jieba/analyse/__init__.py b/jieba/analyse/__init__.py index cdfb855..752204e 100644 --- a/jieba/analyse/__init__.py +++ b/jieba/analyse/__init__.py @@ -54,7 +54,7 @@ def set_stop_words(stop_words_path): if not os.path.exists(abs_path): raise Exception("jieba: path does not exist: " + abs_path) content = open(abs_path,'rb').read().decode('utf-8') - lines = content.split('\n') + lines = content.replace("\r", "").split('\n') for line in lines: STOP_WORDS.add(line) diff --git a/setup.py b/setup.py index d084544..3e6d860 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,6 @@ from distutils.core import setup setup(name='jieba3k', - version='0.34', + version='0.35', description='Chinese Words Segementation Utilities', author='Sun, Junyi', author_email='ccnusjy@gmail.com', diff --git a/test/2to3.diff b/test/2to3.diff index 36c9c10..d811204 100644 --- a/test/2to3.diff +++ b/test/2to3.diff @@ -354,7 +354,7 @@ diff -d -r -u '--exclude=.git' '--exclude=prob_*.py' '--exclude=char_state_tab.p from distutils.core import setup -setup(name='jieba', +setup(name='jieba3k', - version='0.34', + version='0.35', description='Chinese Words Segementation Utilities', author='Sun, Junyi', diff -d -r -u '--exclude=.git' '--exclude=prob_*.py' '--exclude=char_state_tab.py' ./test/extract_topic.py ../jieba/test/extract_topic.py