Merge pull request #238 from gumblex/master

use str.splitlines to avoid losing line breaks
This commit is contained in:
Sun Junyi 2015-02-12 13:35:52 +08:00
commit 9ca5b69907
2 changed files with 5 additions and 5 deletions

View File

@ -43,7 +43,7 @@ def gen_pfdict(f_name):
ltotal = 0 ltotal = 0
with open(f_name, 'rb') as f: with open(f_name, 'rb') as f:
lineno = 0 lineno = 0
for line in f.read().rstrip().decode('utf-8').split('\n'): for line in f.read().rstrip().decode('utf-8').splitlines():
lineno += 1 lineno += 1
try: try:
word, freq = line.split(' ')[:2] word, freq = line.split(' ')[:2]
@ -313,7 +313,7 @@ def load_userdict(f):
f = open(f, 'rb') f = open(f, 'rb')
content = f.read().decode('utf-8').lstrip('\ufeff') content = f.read().decode('utf-8').lstrip('\ufeff')
line_no = 0 line_no = 0
for line in content.split("\n"): for line in content.splitlines():
line_no += 1 line_no += 1
if not line.rstrip(): if not line.rstrip():
continue continue
@ -366,7 +366,7 @@ def enable_parallel(processnum=None):
pool = Pool(processnum) pool = Pool(processnum)
def pcut(sentence, cut_all=False, HMM=True): def pcut(sentence, cut_all=False, HMM=True):
parts = strdecode(sentence).split('\n') parts = strdecode(sentence).splitlines(True)
if cut_all: if cut_all:
result = pool.map(__lcut_all, parts) result = pool.map(__lcut_all, parts)
elif HMM: elif HMM:
@ -378,7 +378,7 @@ def enable_parallel(processnum=None):
yield w yield w
def pcut_for_search(sentence): def pcut_for_search(sentence):
parts = strdecode(sentence).split('\n') parts = strdecode(sentence).splitlines(True)
result = pool.map(__lcut_for_search, parts) result = pool.map(__lcut_for_search, parts)
for r in result: for r in result:
for w in r: for w in r:

View File

@ -253,7 +253,7 @@ def cut(sentence, HMM=True):
for w in __cut_internal(sentence, HMM=HMM): for w in __cut_internal(sentence, HMM=HMM):
yield w yield w
else: else:
parts = strdecode(sentence).split('\n') parts = strdecode(sentence).splitlines(True)
if HMM: if HMM:
result = jieba.pool.map(__lcut_internal, parts) result = jieba.pool.map(__lcut_internal, parts)
else: else: