diff --git a/README.md b/README.md index e81d4d4..21ad8ec 100644 --- a/README.md +++ b/README.md @@ -400,20 +400,22 @@ Main Functions **Code example: segmentation** - #encoding=utf-8 - import jieba +```python +#encoding=utf-8 +import jieba - seg_list = jieba.cut("我来到北京清华大学", cut_all=True) - print "Full Mode:", "/ ".join(seg_list) # 全模式 +seg_list = jieba.cut("我来到北京清华大学", cut_all=True) +print "Full Mode:", "/ ".join(seg_list) # 全模式 - seg_list = jieba.cut("我来到北京清华大学", cut_all=False) - print "Default Mode:", "/ ".join(seg_list) # 默认模式 +seg_list = jieba.cut("我来到北京清华大学", cut_all=False) +print "Default Mode:", "/ ".join(seg_list) # 默认模式 - seg_list = jieba.cut("他来到了网易杭研大厦") - print ", ".join(seg_list) +seg_list = jieba.cut("他来到了网易杭研大厦") +print ", ".join(seg_list) - seg_list = jieba.cut_for_search("小明硕士毕业于中国科学院计算所,后在日本京都大学深造") # 搜索引擎模式 - print ", ".join(seg_list) +seg_list = jieba.cut_for_search("小明硕士毕业于中国科学院计算所,后在日本京都大学深造") # 搜索引擎模式 +print ", ".join(seg_list) +``` Output: @@ -451,7 +453,7 @@ Output: Example (keyword extraction) - https://github.com/fxsjy/jieba/blob/master/test/extract_tags.py +https://github.com/fxsjy/jieba/blob/master/test/extract_tags.py Developers can specify their own custom IDF corpus in jieba keyword extraction @@ -474,15 +476,17 @@ Use: `jieba.analyse.textrank(raw_text)`. * Tags the POS of each word after segmentation, using labels compatible with ictclas. * Example: - >>> import jieba.posseg as pseg - >>> words = pseg.cut("我爱北京天安门") - >>> for w in words: - ... print w.word, w.flag - ... - 我 r - 爱 v - 北京 ns - 天安门 ns +```pycon +>>> import jieba.posseg as pseg +>>> words = pseg.cut("我爱北京天安门") +>>> for w in words: +... print w.word, w.flag +... +我 r +爱 v +北京 ns +天安门 ns +``` 5) : Parallel Processing -----------