merge chage from chao78787

2025-07-10 00:01:33 +08:00 · 2013-07-03 17:07:16 +08:00 · 2013-07-03 17:07:16 +08:00 · 9ea14a8a54
commit 9ea14a8a54
parent 45daf561c7 632a086035
3 changed files with 19 additions and 12 deletions
--- a/README.md
+++ b/README.md
@ -144,7 +144,7 @@ Output:
 * 实验结果：在4核3.4GHz Linux机器上，对金庸全集进行精确分词，获得了1MB/s的速度，是单进程版的3.3倍。
-功能 5) : Tokenize：返回词语在原文的起始位置
+功能 6) : Tokenize：返回词语在原文的起始位置
 ============================================
 * 注意，输入参数只接受unicode
 * 默认模式
@ -181,7 +181,7 @@ word 有限公司            start: 6                end:10
 ```
-功能 6) : ChineseAnalyzer for Whoosh搜索引擎
+功能 7) : ChineseAnalyzer for Whoosh搜索引擎
 ============================================
 * 引用： `from jieba.analyse import ChineseAnalyzer `
 * 用法示例：https://github.com/fxsjy/jieba/blob/master/test/test_whoosh.py
--- a/jieba/init.py
+++ b/jieba/init.py
@ -264,15 +264,22 @@ def load_userdict(f):
        if line_no==1:
            word = word.replace(u'\ufeff',u"") #remove bom flag if it exists
        if len(tup)==3:
-            user_word_tag_tab[word]=tup[2].strip()
+            add_word(word, freq, tup[2])
        else:
            add_word(word, freq)
 def add_word(word, freq, tag=None):
    global FREQ, trie, total, user_word_tag_tab
    freq = float(freq)
    FREQ[word] = log(freq / total)
    if tag is not None:
        user_word_tag_tab[word] = tag.strip()
    p = trie
    for c in word:
        if not c in p:
-                p[c] ={}
+            p[c] = {}
        p = p[c]
-        p['']='' #ending flag
+    p[''] = ''                  # ending flag
 __ref_cut = cut
 __ref_cut_for_search = cut_for_search
--- a/setup.py
+++ b/setup.py
@ -1,6 +1,6 @@
 from distutils.core import setup  
 setup(name='jieba',  
-      version='0.30',  
+      version='0.31.alpha',  
      description='Chinese Words Segementation Utilities',  
      author='Sun, Junyi',  
      author_email='ccnusjy@gmail.com',