mirror of
https://github.com/fxsjy/jieba.git
synced 2025-07-10 00:01:33 +08:00
fix python3 stuff
This commit is contained in:
parent
b16cf0d63f
commit
6a3f228c72
@ -91,7 +91,7 @@ def initialize(*args):
|
||||
|
||||
if load_from_cache_fail:
|
||||
trie,FREQ,total = gen_trie(abs_path)
|
||||
FREQ = dict([(k,log(float(v)/total)) for k,v in FREQ.iteritems()]) #normalize
|
||||
FREQ = dict([(k,log(float(v)/total)) for k,v in FREQ.items()]) #normalize
|
||||
min_freq = min(FREQ.itervalues())
|
||||
logger.debug("dumping model to file cache %s" % cache_file)
|
||||
try:
|
||||
@ -131,7 +131,7 @@ def require_initialized(fn):
|
||||
def __cut_all(sentence):
|
||||
dag = get_DAG(sentence)
|
||||
old_j = -1
|
||||
for k,L in dag.iteritems():
|
||||
for k,L in dag.items():
|
||||
if len(L)==1 and k>old_j:
|
||||
yield sentence[k:L[0]+1]
|
||||
old_j = L[0]
|
||||
|
@ -29,7 +29,7 @@ def get_idf(abs_path):
|
||||
for line in lines:
|
||||
word,freq = line.split(' ')
|
||||
idf_freq[word] = float(freq)
|
||||
median_idf = sorted(idf_freq.values())[len(idf_freq)/2]
|
||||
median_idf = sorted(idf_freq.values())[len(idf_freq)//2]
|
||||
return idf_freq, median_idf
|
||||
|
||||
def set_stop_words(stop_words_path):
|
||||
@ -56,7 +56,7 @@ def extract_tags(sentence,topK=20):
|
||||
if w.lower() in STOP_WORDS: continue
|
||||
freq[w]=freq.get(w,0.0)+1.0
|
||||
total = sum(freq.values())
|
||||
freq = [(k,v/total) for k,v in freq.iteritems()]
|
||||
freq = [(k,v/total) for k,v in freq.items()]
|
||||
|
||||
tf_idf_list = [(v * idf_freq.get(k,median_idf),k) for k,v in freq]
|
||||
st_list = sorted(tf_idf_list,reverse=True)
|
||||
|
Loading…
x
Reference in New Issue
Block a user