mirror of
https://github.com/fxsjy/jieba.git
synced 2025-07-10 00:01:33 +08:00
fix compatibility problem with with
statememt
This commit is contained in:
parent
8814e08f9b
commit
1fcd3a417c
@ -71,21 +71,21 @@ class Tokenizer(object):
|
|||||||
lfreq = {}
|
lfreq = {}
|
||||||
ltotal = 0
|
ltotal = 0
|
||||||
f_name = resolve_filename(f)
|
f_name = resolve_filename(f)
|
||||||
with f:
|
for lineno, line in enumerate(f, 1):
|
||||||
for lineno, line in enumerate(f, 1):
|
try:
|
||||||
try:
|
line = line.strip().decode('utf-8')
|
||||||
line = line.strip().decode('utf-8')
|
word, freq = line.split(' ')[:2]
|
||||||
word, freq = line.split(' ')[:2]
|
freq = int(freq)
|
||||||
freq = int(freq)
|
lfreq[word] = freq
|
||||||
lfreq[word] = freq
|
ltotal += freq
|
||||||
ltotal += freq
|
for ch in xrange(len(word)):
|
||||||
for ch in xrange(len(word)):
|
wfrag = word[:ch + 1]
|
||||||
wfrag = word[:ch + 1]
|
if wfrag not in lfreq:
|
||||||
if wfrag not in lfreq:
|
lfreq[wfrag] = 0
|
||||||
lfreq[wfrag] = 0
|
except ValueError:
|
||||||
except ValueError:
|
raise ValueError(
|
||||||
raise ValueError(
|
'invalid dictionary entry in %s at Line %s: %s' % (f_name, lineno, line))
|
||||||
'invalid dictionary entry in %s at Line %s: %s' % (f_name, lineno, line))
|
f.close()
|
||||||
return lfreq, ltotal
|
return lfreq, ltotal
|
||||||
|
|
||||||
def initialize(self, dictionary=None):
|
def initialize(self, dictionary=None):
|
||||||
|
@ -88,17 +88,17 @@ class POSTokenizer(object):
|
|||||||
def load_word_tag(self, f):
|
def load_word_tag(self, f):
|
||||||
self.word_tag_tab = {}
|
self.word_tag_tab = {}
|
||||||
f_name = resolve_filename(f)
|
f_name = resolve_filename(f)
|
||||||
with f:
|
for lineno, line in enumerate(f, 1):
|
||||||
for lineno, line in enumerate(f, 1):
|
try:
|
||||||
try:
|
line = line.strip().decode("utf-8")
|
||||||
line = line.strip().decode("utf-8")
|
if not line:
|
||||||
if not line:
|
continue
|
||||||
continue
|
word, _, tag = line.split(" ")
|
||||||
word, _, tag = line.split(" ")
|
self.word_tag_tab[word] = tag
|
||||||
self.word_tag_tab[word] = tag
|
except Exception:
|
||||||
except Exception:
|
raise ValueError(
|
||||||
raise ValueError(
|
'invalid POS dictionary entry in %s at Line %s: %s' % (f_name, lineno, line))
|
||||||
'invalid POS dictionary entry in %s at Line %s: %s' % (f_name, lineno, line))
|
f.close()
|
||||||
|
|
||||||
def makesure_userdict_loaded(self):
|
def makesure_userdict_loaded(self):
|
||||||
if self.tokenizer.user_word_tag_tab:
|
if self.tokenizer.user_word_tag_tab:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user