mirror of
https://github.com/fxsjy/jieba.git
synced 2025-07-10 00:01:33 +08:00
use only one dictionary
This commit is contained in:
parent
6c0bcf6557
commit
193bfee1d4
16
README.md
16
README.md
@ -78,6 +78,22 @@ Output:
|
||||
|
||||
https://github.com/fxsjy/jieba/blob/master/test/extract_tags.py
|
||||
|
||||
功能 4) : 词性标注
|
||||
================
|
||||
* 标注句子分词后每个词的词性,采用和ictclas兼容的标记法
|
||||
* 用法示例
|
||||
|
||||
>>> import jieba.posseg as pseg
|
||||
>>> words =pseg.cut("我爱北京天安门")
|
||||
>>> for w in words:
|
||||
... print w.word,w.flag
|
||||
...
|
||||
我 r
|
||||
爱 v
|
||||
北京 ns
|
||||
天安门 ns
|
||||
|
||||
|
||||
|
||||
分词速度
|
||||
=========
|
||||
|
@ -16,7 +16,7 @@ def gen_trie(f_name):
|
||||
ltotal = 0.0
|
||||
content = open(f_name,'rb').read().decode('utf-8')
|
||||
for line in content.split("\n"):
|
||||
word,freq = line.split(" ")
|
||||
word,freq,_ = line.split(" ")
|
||||
freq = float(freq)
|
||||
lfreq[word] = freq
|
||||
ltotal+=freq
|
||||
|
756493
jieba/dict.txt
756493
jieba/dict.txt
File diff suppressed because it is too large
Load Diff
@ -15,7 +15,7 @@ def load_model(f_name):
|
||||
for line in open(prob_p_path,"rb"):
|
||||
line = line.strip()
|
||||
if line=="":continue
|
||||
word, tag = line.split(' ')
|
||||
word, _, tag = line.split(' ')
|
||||
result[word.decode('utf-8')]=tag
|
||||
return result
|
||||
|
||||
@ -24,7 +24,7 @@ prob_start = load_model("prob_start.py")
|
||||
prob_trans = load_model("prob_trans.py")
|
||||
prob_emit = load_model("prob_emit.py")
|
||||
char_state_tab = load_model("char_state_tab.py")
|
||||
word_tag_tab = load_model("tags.txt")
|
||||
word_tag_tab = load_model("../dict.txt")
|
||||
|
||||
class pair(object):
|
||||
def __init__(self,word,flag):
|
||||
|
Loading…
x
Reference in New Issue
Block a user