mirror of
https://github.com/fxsjy/jieba.git
synced 2025-07-10 00:01:33 +08:00
fix compatibility problem with with
statememt
This commit is contained in:
parent
8814e08f9b
commit
1fcd3a417c
@ -71,21 +71,21 @@ class Tokenizer(object):
|
||||
lfreq = {}
|
||||
ltotal = 0
|
||||
f_name = resolve_filename(f)
|
||||
with f:
|
||||
for lineno, line in enumerate(f, 1):
|
||||
try:
|
||||
line = line.strip().decode('utf-8')
|
||||
word, freq = line.split(' ')[:2]
|
||||
freq = int(freq)
|
||||
lfreq[word] = freq
|
||||
ltotal += freq
|
||||
for ch in xrange(len(word)):
|
||||
wfrag = word[:ch + 1]
|
||||
if wfrag not in lfreq:
|
||||
lfreq[wfrag] = 0
|
||||
except ValueError:
|
||||
raise ValueError(
|
||||
'invalid dictionary entry in %s at Line %s: %s' % (f_name, lineno, line))
|
||||
for lineno, line in enumerate(f, 1):
|
||||
try:
|
||||
line = line.strip().decode('utf-8')
|
||||
word, freq = line.split(' ')[:2]
|
||||
freq = int(freq)
|
||||
lfreq[word] = freq
|
||||
ltotal += freq
|
||||
for ch in xrange(len(word)):
|
||||
wfrag = word[:ch + 1]
|
||||
if wfrag not in lfreq:
|
||||
lfreq[wfrag] = 0
|
||||
except ValueError:
|
||||
raise ValueError(
|
||||
'invalid dictionary entry in %s at Line %s: %s' % (f_name, lineno, line))
|
||||
f.close()
|
||||
return lfreq, ltotal
|
||||
|
||||
def initialize(self, dictionary=None):
|
||||
|
@ -88,17 +88,17 @@ class POSTokenizer(object):
|
||||
def load_word_tag(self, f):
|
||||
self.word_tag_tab = {}
|
||||
f_name = resolve_filename(f)
|
||||
with f:
|
||||
for lineno, line in enumerate(f, 1):
|
||||
try:
|
||||
line = line.strip().decode("utf-8")
|
||||
if not line:
|
||||
continue
|
||||
word, _, tag = line.split(" ")
|
||||
self.word_tag_tab[word] = tag
|
||||
except Exception:
|
||||
raise ValueError(
|
||||
'invalid POS dictionary entry in %s at Line %s: %s' % (f_name, lineno, line))
|
||||
for lineno, line in enumerate(f, 1):
|
||||
try:
|
||||
line = line.strip().decode("utf-8")
|
||||
if not line:
|
||||
continue
|
||||
word, _, tag = line.split(" ")
|
||||
self.word_tag_tab[word] = tag
|
||||
except Exception:
|
||||
raise ValueError(
|
||||
'invalid POS dictionary entry in %s at Line %s: %s' % (f_name, lineno, line))
|
||||
f.close()
|
||||
|
||||
def makesure_userdict_loaded(self):
|
||||
if self.tokenizer.user_word_tag_tab:
|
||||
|
Loading…
x
Reference in New Issue
Block a user