mirror of
https://github.com/fxsjy/jieba.git
synced 2025-07-10 00:01:33 +08:00
Compare commits
2 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
67fa2e36e7 | ||
|
1e20c89b66 |
@ -1,3 +1,6 @@
|
||||
2019-1-20: version 0.42.1
|
||||
1. 修复setup.py在python2.7版本无法工作的问题 (issue #809)
|
||||
|
||||
2019-1-13: version 0.42
|
||||
1. 修复paddle模式空字符串coredump问题 @JesseyXujin
|
||||
2. 修复cut_all模式切分丢字问题 @fxsjy
|
||||
|
@ -13,7 +13,7 @@ jieba
|
||||
* 精确模式,试图将句子最精确地切开,适合文本分析;
|
||||
* 全模式,把句子中所有的可以成词的词语都扫描出来, 速度非常快,但是不能解决歧义;
|
||||
* 搜索引擎模式,在精确模式的基础上,对长词再次切分,提高召回率,适合用于搜索引擎分词。
|
||||
* paddle模式,利用PaddlePaddle深度学习框架,训练序列标注(双向GRU)网络模型实现分词。同时支持词性标注。paddle模式使用需安装paddlepaddle-tiny,`pip install paddlepaddle-tiny==1.6.1`。目前paddle模式支持jieba v0.40及以上版本。jieba v0.40以下版本,请升级jieba,`pip install jieba --upgrade` 。[PaddlePaddle官网](www.paddlepaddle.org.cn)
|
||||
* paddle模式,利用PaddlePaddle深度学习框架,训练序列标注(双向GRU)网络模型实现分词。同时支持词性标注。paddle模式使用需安装paddlepaddle-tiny,`pip install paddlepaddle-tiny==1.6.1`。目前paddle模式支持jieba v0.40及以上版本。jieba v0.40以下版本,请升级jieba,`pip install jieba --upgrade` 。[PaddlePaddle官网](https://www.paddlepaddle.org.cn/)
|
||||
* 支持繁体分词
|
||||
* 支持自定义词典
|
||||
* MIT 授权协议
|
||||
|
@ -1,6 +1,6 @@
|
||||
from __future__ import absolute_import, unicode_literals
|
||||
|
||||
__version__ = '0.42'
|
||||
__version__ = '0.42.1'
|
||||
__license__ = 'MIT'
|
||||
|
||||
import marshal
|
||||
@ -300,7 +300,7 @@ class Tokenizer(object):
|
||||
sentence = strdecode(sentence)
|
||||
if use_paddle and is_paddle_installed:
|
||||
# if sentence is null, it will raise core exception in paddle.
|
||||
if sentence is None or sentence == "" or sentence == u"":
|
||||
if sentence is None or len(sentence) == 0:
|
||||
return
|
||||
import jieba.lac_small.predict as predict
|
||||
results = predict.get_sent(sentence)
|
||||
|
4
setup.py
4
setup.py
@ -43,7 +43,7 @@ GitHub: https://github.com/fxsjy/jieba
|
||||
"""
|
||||
|
||||
setup(name='jieba',
|
||||
version='0.42',
|
||||
version='0.42.1',
|
||||
description='Chinese Words Segmentation Utilities',
|
||||
long_description=LONGDOC,
|
||||
author='Sun, Junyi',
|
||||
@ -71,5 +71,5 @@ setup(name='jieba',
|
||||
keywords='NLP,tokenizing,Chinese word segementation',
|
||||
packages=['jieba'],
|
||||
package_dir={'jieba':'jieba'},
|
||||
package_data={'jieba':['*.*','finalseg/*','analyse/*','posseg/*', 'lac_small/*','lac_small/model_baseline/*']}
|
||||
package_data={'jieba':['*.*','finalseg/*','analyse/*','posseg/*', 'lac_small/*.py','lac_small/*.dic', 'lac_small/model_baseline/*']}
|
||||
)
|
||||
|
Loading…
x
Reference in New Issue
Block a user