mirror of
https://github.com/fxsjy/jieba.git
synced 2025-07-10 00:01:33 +08:00
update to v0.35
This commit is contained in:
parent
40c0edfd99
commit
a5ecf70f71
@ -1,3 +1,10 @@
|
||||
2014-11-13: version 0.35
|
||||
1. 改进词典cache的dump和加载机制;by @gumblex
|
||||
2. 提升关键词提取的性能; by @gumblex
|
||||
3. 关键词提取新增基于textrank算法的子模块; by @singlee
|
||||
4. 修复自定义stopwords功能的bug; by @walkskyer
|
||||
|
||||
|
||||
2014-10-20: version 0.34
|
||||
1. 提升性能,词典结构由Trie改为Prefix Set,内存占用减少2/3, 详见:https://github.com/fxsjy/jieba/pull/187;by @gumblex
|
||||
2. 修复关键词提取功能的性能问题
|
||||
|
@ -54,7 +54,7 @@ def set_stop_words(stop_words_path):
|
||||
if not os.path.exists(abs_path):
|
||||
raise Exception("jieba: path does not exist: " + abs_path)
|
||||
content = open(abs_path,'rb').read().decode('utf-8')
|
||||
lines = content.split('\n')
|
||||
lines = content.replace("\r", "").split('\n')
|
||||
for line in lines:
|
||||
STOP_WORDS.add(line)
|
||||
|
||||
|
2
setup.py
2
setup.py
@ -1,6 +1,6 @@
|
||||
from distutils.core import setup
|
||||
setup(name='jieba3k',
|
||||
version='0.34',
|
||||
version='0.35',
|
||||
description='Chinese Words Segementation Utilities',
|
||||
author='Sun, Junyi',
|
||||
author_email='ccnusjy@gmail.com',
|
||||
|
@ -354,7 +354,7 @@ diff -d -r -u '--exclude=.git' '--exclude=prob_*.py' '--exclude=char_state_tab.p
|
||||
from distutils.core import setup
|
||||
-setup(name='jieba',
|
||||
+setup(name='jieba3k',
|
||||
version='0.34',
|
||||
version='0.35',
|
||||
description='Chinese Words Segementation Utilities',
|
||||
author='Sun, Junyi',
|
||||
diff -d -r -u '--exclude=.git' '--exclude=prob_*.py' '--exclude=char_state_tab.py' ./test/extract_topic.py ../jieba/test/extract_topic.py
|
||||
|
Loading…
x
Reference in New Issue
Block a user