Merge pull request #238 from gumblex/master

use str.splitlines to avoid losing line breaks
2025-07-10 00:01:33 +08:00 · 2015-02-12 13:35:52 +08:00 · 2015-02-12 13:35:52 +08:00 · 9ca5b69907
commit 9ca5b69907
parent b14eb329e3 f2b7183a71
2 changed files with 5 additions and 5 deletions
--- a/jieba/init.py
+++ b/jieba/init.py
@ -43,7 +43,7 @@ def gen_pfdict(f_name):
    ltotal = 0
    with open(f_name, 'rb') as f:
        lineno = 0
-        for line in f.read().rstrip().decode('utf-8').split('\n'):
+        for line in f.read().rstrip().decode('utf-8').splitlines():
            lineno += 1
            try:
                word, freq = line.split(' ')[:2]
@ -313,7 +313,7 @@ def load_userdict(f):
        f = open(f, 'rb')
    content = f.read().decode('utf-8').lstrip('\ufeff')
    line_no = 0
-    for line in content.split("\n"):
+    for line in content.splitlines():
        line_no += 1
        if not line.rstrip():
            continue
@ -366,7 +366,7 @@ def enable_parallel(processnum=None):
    pool = Pool(processnum)

    def pcut(sentence, cut_all=False, HMM=True):
-        parts = strdecode(sentence).split('\n')
+        parts = strdecode(sentence).splitlines(True)
        if cut_all:
            result = pool.map(__lcut_all, parts)
        elif HMM:
@ -378,7 +378,7 @@ def enable_parallel(processnum=None):
                yield w

    def pcut_for_search(sentence):
-        parts = strdecode(sentence).split('\n')
+        parts = strdecode(sentence).splitlines(True)
        result = pool.map(__lcut_for_search, parts)
        for r in result:
            for w in r:
--- a/jieba/posseg/init.py
+++ b/jieba/posseg/init.py
@ -253,7 +253,7 @@ def cut(sentence, HMM=True):
        for w in __cut_internal(sentence, HMM=HMM):
            yield w
    else:
-        parts = strdecode(sentence).split('\n')
+        parts = strdecode(sentence).splitlines(True)
        if HMM:
            result = jieba.pool.map(__lcut_internal, parts)
        else: