mirror of
https://github.com/fxsjy/jieba.git
synced 2025-07-10 00:01:33 +08:00
use only one dictionary
This commit is contained in:
parent
6c0bcf6557
commit
193bfee1d4
16
README.md
16
README.md
@ -78,6 +78,22 @@ Output:
|
|||||||
|
|
||||||
https://github.com/fxsjy/jieba/blob/master/test/extract_tags.py
|
https://github.com/fxsjy/jieba/blob/master/test/extract_tags.py
|
||||||
|
|
||||||
|
功能 4) : 词性标注
|
||||||
|
================
|
||||||
|
* 标注句子分词后每个词的词性,采用和ictclas兼容的标记法
|
||||||
|
* 用法示例
|
||||||
|
|
||||||
|
>>> import jieba.posseg as pseg
|
||||||
|
>>> words =pseg.cut("我爱北京天安门")
|
||||||
|
>>> for w in words:
|
||||||
|
... print w.word,w.flag
|
||||||
|
...
|
||||||
|
我 r
|
||||||
|
爱 v
|
||||||
|
北京 ns
|
||||||
|
天安门 ns
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
分词速度
|
分词速度
|
||||||
=========
|
=========
|
||||||
|
@ -16,7 +16,7 @@ def gen_trie(f_name):
|
|||||||
ltotal = 0.0
|
ltotal = 0.0
|
||||||
content = open(f_name,'rb').read().decode('utf-8')
|
content = open(f_name,'rb').read().decode('utf-8')
|
||||||
for line in content.split("\n"):
|
for line in content.split("\n"):
|
||||||
word,freq = line.split(" ")
|
word,freq,_ = line.split(" ")
|
||||||
freq = float(freq)
|
freq = float(freq)
|
||||||
lfreq[word] = freq
|
lfreq[word] = freq
|
||||||
ltotal+=freq
|
ltotal+=freq
|
||||||
|
756493
jieba/dict.txt
756493
jieba/dict.txt
File diff suppressed because it is too large
Load Diff
@ -15,7 +15,7 @@ def load_model(f_name):
|
|||||||
for line in open(prob_p_path,"rb"):
|
for line in open(prob_p_path,"rb"):
|
||||||
line = line.strip()
|
line = line.strip()
|
||||||
if line=="":continue
|
if line=="":continue
|
||||||
word, tag = line.split(' ')
|
word, _, tag = line.split(' ')
|
||||||
result[word.decode('utf-8')]=tag
|
result[word.decode('utf-8')]=tag
|
||||||
return result
|
return result
|
||||||
|
|
||||||
@ -24,7 +24,7 @@ prob_start = load_model("prob_start.py")
|
|||||||
prob_trans = load_model("prob_trans.py")
|
prob_trans = load_model("prob_trans.py")
|
||||||
prob_emit = load_model("prob_emit.py")
|
prob_emit = load_model("prob_emit.py")
|
||||||
char_state_tab = load_model("char_state_tab.py")
|
char_state_tab = load_model("char_state_tab.py")
|
||||||
word_tag_tab = load_model("tags.txt")
|
word_tag_tab = load_model("../dict.txt")
|
||||||
|
|
||||||
class pair(object):
|
class pair(object):
|
||||||
def __init__(self,word,flag):
|
def __init__(self,word,flag):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user