mirror of
https://github.com/fxsjy/jieba.git
synced 2025-07-10 00:01:33 +08:00
fix English part of README
This commit is contained in:
parent
31b7d11809
commit
9d2818b440
44
README.md
44
README.md
@ -400,20 +400,22 @@ Main Functions
|
||||
|
||||
**Code example: segmentation**
|
||||
|
||||
#encoding=utf-8
|
||||
import jieba
|
||||
```python
|
||||
#encoding=utf-8
|
||||
import jieba
|
||||
|
||||
seg_list = jieba.cut("我来到北京清华大学", cut_all=True)
|
||||
print "Full Mode:", "/ ".join(seg_list) # 全模式
|
||||
seg_list = jieba.cut("我来到北京清华大学", cut_all=True)
|
||||
print "Full Mode:", "/ ".join(seg_list) # 全模式
|
||||
|
||||
seg_list = jieba.cut("我来到北京清华大学", cut_all=False)
|
||||
print "Default Mode:", "/ ".join(seg_list) # 默认模式
|
||||
seg_list = jieba.cut("我来到北京清华大学", cut_all=False)
|
||||
print "Default Mode:", "/ ".join(seg_list) # 默认模式
|
||||
|
||||
seg_list = jieba.cut("他来到了网易杭研大厦")
|
||||
print ", ".join(seg_list)
|
||||
seg_list = jieba.cut("他来到了网易杭研大厦")
|
||||
print ", ".join(seg_list)
|
||||
|
||||
seg_list = jieba.cut_for_search("小明硕士毕业于中国科学院计算所,后在日本京都大学深造") # 搜索引擎模式
|
||||
print ", ".join(seg_list)
|
||||
seg_list = jieba.cut_for_search("小明硕士毕业于中国科学院计算所,后在日本京都大学深造") # 搜索引擎模式
|
||||
print ", ".join(seg_list)
|
||||
```
|
||||
|
||||
Output:
|
||||
|
||||
@ -451,7 +453,7 @@ Output:
|
||||
|
||||
Example (keyword extraction)
|
||||
|
||||
https://github.com/fxsjy/jieba/blob/master/test/extract_tags.py
|
||||
https://github.com/fxsjy/jieba/blob/master/test/extract_tags.py
|
||||
|
||||
Developers can specify their own custom IDF corpus in jieba keyword extraction
|
||||
|
||||
@ -474,15 +476,17 @@ Use: `jieba.analyse.textrank(raw_text)`.
|
||||
* Tags the POS of each word after segmentation, using labels compatible with ictclas.
|
||||
* Example:
|
||||
|
||||
>>> import jieba.posseg as pseg
|
||||
>>> words = pseg.cut("我爱北京天安门")
|
||||
>>> for w in words:
|
||||
... print w.word, w.flag
|
||||
...
|
||||
我 r
|
||||
爱 v
|
||||
北京 ns
|
||||
天安门 ns
|
||||
```pycon
|
||||
>>> import jieba.posseg as pseg
|
||||
>>> words = pseg.cut("我爱北京天安门")
|
||||
>>> for w in words:
|
||||
... print w.word, w.flag
|
||||
...
|
||||
我 r
|
||||
爱 v
|
||||
北京 ns
|
||||
天安门 ns
|
||||
```
|
||||
|
||||
5) : Parallel Processing
|
||||
-----------
|
||||
|
Loading…
x
Reference in New Issue
Block a user