From 65b78b2b4d860d300ddd7f8471b9561bffe11ff4 Mon Sep 17 00:00:00 2001 From: fxsjy Date: Wed, 24 Apr 2013 22:14:10 +0800 Subject: [PATCH] read() and then split -- faster; from __future__ import with --- jieba/__init__.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/jieba/__init__.py b/jieba/__init__.py index df3faff..f658b5a 100644 --- a/jieba/__init__.py +++ b/jieba/__init__.py @@ -1,3 +1,4 @@ +from __future__ import with_statement import re import math import os,sys @@ -19,10 +20,10 @@ def gen_trie(f_name): ltotal = 0.0 with open(f_name, 'rb') as f: lineno = 1 - for line in f.readlines(): + for line in f.read().rstrip().decode('utf-8').split('\n'): lineno += 1 try: - word,freq,_ = line.decode('utf-8').split(' ') + word,freq,_ = line.split(' ') freq = float(freq) lfreq[word] = freq ltotal+=freq