fix compatibility problem with with statememt

This commit is contained in:
Dingyuan Wang 2015-11-13 13:16:19 +08:00
parent 8814e08f9b
commit 1fcd3a417c
2 changed files with 26 additions and 26 deletions

View File

@ -71,21 +71,21 @@ class Tokenizer(object):
lfreq = {} lfreq = {}
ltotal = 0 ltotal = 0
f_name = resolve_filename(f) f_name = resolve_filename(f)
with f: for lineno, line in enumerate(f, 1):
for lineno, line in enumerate(f, 1): try:
try: line = line.strip().decode('utf-8')
line = line.strip().decode('utf-8') word, freq = line.split(' ')[:2]
word, freq = line.split(' ')[:2] freq = int(freq)
freq = int(freq) lfreq[word] = freq
lfreq[word] = freq ltotal += freq
ltotal += freq for ch in xrange(len(word)):
for ch in xrange(len(word)): wfrag = word[:ch + 1]
wfrag = word[:ch + 1] if wfrag not in lfreq:
if wfrag not in lfreq: lfreq[wfrag] = 0
lfreq[wfrag] = 0 except ValueError:
except ValueError: raise ValueError(
raise ValueError( 'invalid dictionary entry in %s at Line %s: %s' % (f_name, lineno, line))
'invalid dictionary entry in %s at Line %s: %s' % (f_name, lineno, line)) f.close()
return lfreq, ltotal return lfreq, ltotal
def initialize(self, dictionary=None): def initialize(self, dictionary=None):

View File

@ -88,17 +88,17 @@ class POSTokenizer(object):
def load_word_tag(self, f): def load_word_tag(self, f):
self.word_tag_tab = {} self.word_tag_tab = {}
f_name = resolve_filename(f) f_name = resolve_filename(f)
with f: for lineno, line in enumerate(f, 1):
for lineno, line in enumerate(f, 1): try:
try: line = line.strip().decode("utf-8")
line = line.strip().decode("utf-8") if not line:
if not line: continue
continue word, _, tag = line.split(" ")
word, _, tag = line.split(" ") self.word_tag_tab[word] = tag
self.word_tag_tab[word] = tag except Exception:
except Exception: raise ValueError(
raise ValueError( 'invalid POS dictionary entry in %s at Line %s: %s' % (f_name, lineno, line))
'invalid POS dictionary entry in %s at Line %s: %s' % (f_name, lineno, line)) f.close()
def makesure_userdict_loaded(self): def makesure_userdict_loaded(self):
if self.tokenizer.user_word_tag_tab: if self.tokenizer.user_word_tag_tab: