diff --git a/Changelog b/Changelog
index 671add0..be1aaa3 100644
--- a/Changelog
+++ b/Changelog
@@ -1,3 +1,6 @@
+2014-11-15: version 0.35.1
+1. 修复 Python 3.2 的兼容性问题
+
 2014-11-13: version 0.35
 1. 改进词典cache的dump和加载机制；by @gumblex
 2. 提升关键词提取的性能; by @gumblex
diff --git a/setup.py b/setup.py
index 19200f2..10af9e9 100644
--- a/setup.py
+++ b/setup.py
@@ -73,7 +73,6 @@ setup(name='jieba',
         'Operating System :: OS Independent',
         'Natural Language :: Chinese (Simplified)',
         'Natural Language :: Chinese (Traditional)',
-        'Programming Language :: Cython',
         'Programming Language :: Python',
         'Programming Language :: Python :: 2',
         'Topic :: Text Processing',
diff --git a/test/2to3.diff b/test/2to3.diff
new file mode 100644
index 0000000..2c4396f
--- /dev/null
+++ b/test/2to3.diff
@@ -0,0 +1,522 @@
+diff -d -r -u '--exclude=.git' '--exclude=prob_*.py' '--exclude=char_state_tab.py' ./jieba/analyse/analyzer.py ../jieba/jieba/analyse/analyzer.py
+--- ./jieba/analyse/analyzer.py	2014-11-29 15:46:45.987925569 +0800
++++ ../jieba/jieba/analyse/analyzer.py	2014-11-29 15:34:42.859932465 +0800
+@@ -1,4 +1,4 @@
+-##encoding=utf-8
++#encoding=utf-8
+ from whoosh.analysis import RegexAnalyzer,LowercaseFilter,StopFilter,StemFilter
+ from whoosh.analysis import Tokenizer,Token
+ from whoosh.lang.porter import stem
+diff -d -r -u '--exclude=.git' '--exclude=prob_*.py' '--exclude=char_state_tab.py' ./jieba/analyse/__init__.py ../jieba/jieba/analyse/__init__.py
+--- ./jieba/analyse/__init__.py	2014-11-29 15:46:46.139925567 +0800
++++ ../jieba/jieba/analyse/__init__.py	2014-11-29 15:36:13.147931604 +0800
+@@ -26,7 +26,7 @@
+ 
+     def set_new_path(self, new_idf_path):
+         if self.path != new_idf_path:
+-            content = open(new_idf_path, 'rb').read().decode('utf-8')
++            content = open(new_idf_path, 'r', encoding='utf-8').read()
+             idf_freq = {}
+             lines = content.rstrip('\n').split('\n')
+             for line in lines:
+@@ -93,7 +93,7 @@
+         freq[k] *= idf_freq.get(k, median_idf) / total
+ 
+     if withWeight:
+-        tags = sorted(list(freq.items()), key=itemgetter(1), reverse=True)
++        tags = sorted(freq.items(), key=itemgetter(1), reverse=True)
+     else:
+         tags = sorted(freq, key=freq.__getitem__, reverse=True)
+     if topK:
+diff -d -r -u '--exclude=.git' '--exclude=prob_*.py' '--exclude=char_state_tab.py' ./jieba/analyse/textrank.py ../jieba/jieba/analyse/textrank.py
+--- ./jieba/analyse/textrank.py	2014-11-29 15:46:46.043925568 +0800
++++ ../jieba/jieba/analyse/textrank.py	2014-11-29 15:36:39.291931354 +0800
+@@ -1,4 +1,4 @@
+-#!/usr/bin/env python
++#!/usr/bin/env python3
+ # -*- coding: utf-8 -*-
+ 
+ import sys
+@@ -22,12 +22,12 @@
+         outSum = collections.defaultdict(float)
+ 
+         wsdef = 1.0 / len(self.graph)
+-        for n, out in list(self.graph.items()):
++        for n, out in self.graph.items():
+             ws[n] = wsdef
+             outSum[n] = sum((e[2] for e in out), 0.0)
+ 
+         for x in range(10):  # 10 iters
+-            for n, inedges in list(self.graph.items()):
++            for n, inedges in self.graph.items():
+                 s = 0
+                 for e in inedges:
+                     s += e[2] / outSum[e[1]] * ws[e[1]]
+@@ -41,7 +41,7 @@
+             elif w > max_rank:
+                 max_rank = w
+ 
+-        for n, w in list(ws.items()):
++        for n, w in ws.items():
+             # to unify the weights, don't *100.
+             ws[n] = (w - min_rank / 10.0) / (max_rank - min_rank / 10.0)
+ 
+@@ -72,12 +72,12 @@
+                     continue
+                 cm[(words[i].word, words[j].word)] += 1
+ 
+-    for terms, w in list(cm.items()):
++    for terms, w in cm.items():
+         g.addEdge(terms[0], terms[1], w)
+ 
+     nodes_rank = g.rank()
+     if withWeight:
+-        tags = sorted(list(nodes_rank.items()), key=itemgetter(1), reverse=True)
++        tags = sorted(nodes_rank.items(), key=itemgetter(1), reverse=True)
+     else:
+         tags = sorted(nodes_rank, key=nodes_rank.__getitem__, reverse=True)
+     if topK:
+diff -d -r -u '--exclude=.git' '--exclude=prob_*.py' '--exclude=char_state_tab.py' ./jieba/finalseg/__init__.py ../jieba/jieba/finalseg/__init__.py
+--- ./jieba/finalseg/__init__.py	2014-11-29 15:46:46.367925565 +0800
++++ ../jieba/jieba/finalseg/__init__.py	2014-11-29 15:34:42.859932465 +0800
+@@ -1,4 +1,3 @@
+-
+ import re
+ import os
+ import marshal
+@@ -89,7 +88,7 @@
+             sentence = sentence.decode('utf-8')
+         except UnicodeDecodeError:
+             sentence = sentence.decode('gbk', 'ignore')
+-    re_han, re_skip = re.compile(r"([\u4E00-\u9FA5]+)"), re.compile(r"(\d+\.\d+|[a-zA-Z0-9]+)")
++    re_han, re_skip = re.compile("([\u4E00-\u9FA5]+)"), re.compile("(\d+\.\d+|[a-zA-Z0-9]+)")
+     blocks = re_han.split(sentence)
+     for blk in blocks:
+         if re_han.match(blk):
+diff -d -r -u '--exclude=.git' '--exclude=prob_*.py' '--exclude=char_state_tab.py' ./jieba/__init__.py ../jieba/jieba/__init__.py
+--- ./jieba/__init__.py	2014-11-29 15:46:45.955925569 +0800
++++ ../jieba/jieba/__init__.py	2014-11-29 15:39:03.335929981 +0800
+@@ -1,4 +1,3 @@
+-
+ __version__ = '0.35'
+ __license__ = 'MIT'
+ 
+@@ -51,7 +50,7 @@
+                     pfdict.add(word[:ch+1])
+             except ValueError as e:
+                 logger.debug('%s at line %s %s' % (f_name, lineno, line))
+-                raise ValueError(e)
++                raise e
+     return pfdict, lfreq, ltotal
+ 
+ def initialize(dictionary=None):
+@@ -229,11 +228,11 @@
+     '''The main function that segments an entire sentence that contains
+     Chinese characters into seperated words.
+     Parameter:
+-        - sentence: The str/unicode to be segmented.
++        - sentence: The str to be segmented.
+         - cut_all: Model type. True for full pattern, False for accurate pattern.
+         - HMM: Whether to use the Hidden Markov Model.
+     '''
+-    if not isinstance(sentence, str):
++    if isinstance(sentence, bytes):
+         try:
+             sentence = sentence.decode('utf-8')
+         except UnicodeDecodeError:
+@@ -243,9 +242,9 @@
+     # \r\n|\s : whitespace characters. Will not be handled.
+ 
+     if cut_all:
+-        re_han, re_skip = re.compile(r"([\u4E00-\u9FA5]+)", re.U), re.compile(r"[^a-zA-Z0-9+#\n]", re.U)
++        re_han, re_skip = re.compile("([\u4E00-\u9FA5]+)", re.U), re.compile("[^a-zA-Z0-9+#\n]", re.U)
+     else:
+-        re_han, re_skip = re.compile(r"([\u4E00-\u9FA5a-zA-Z0-9+#&\._]+)", re.U), re.compile(r"(\r\n|\s)", re.U)
++        re_han, re_skip = re.compile("([\u4E00-\u9FA5a-zA-Z0-9+#&\._]+)", re.U), re.compile("(\r\n|\s)", re.U)
+     blocks = re_han.split(sentence)
+     if cut_all:
+         cut_block = __cut_all
+@@ -339,8 +338,6 @@
+     global pool, cut, cut_for_search
+     if os.name == 'nt':
+         raise Exception("jieba: parallel mode only supports posix system")
+-    if sys.version_info[0]==2 and sys.version_info[1]<6:
+-        raise Exception("jieba: the parallel feature needs Python version>2.5")
+     from multiprocessing import Pool, cpu_count
+     if processnum is None:
+         processnum = cpu_count()
+@@ -393,12 +390,12 @@
+ def tokenize(unicode_sentence, mode="default", HMM=True):
+     """Tokenize a sentence and yields tuples of (word, start, end)
+     Parameter:
+-        - sentence: the unicode to be segmented.
++        - sentence: the str to be segmented.
+         - mode: "default" or "search", "search" is for finer segmentation.
+         - HMM: whether to use the Hidden Markov Model.
+     """
+     if not isinstance(unicode_sentence, str):
+-        raise Exception("jieba: the input parameter should be unicode.")
++        raise Exception("jieba: the input parameter should be str.")
+     start = 0
+     if mode == 'default':
+         for w in cut(unicode_sentence, HMM=HMM):
+diff -d -r -u '--exclude=.git' '--exclude=prob_*.py' '--exclude=char_state_tab.py' ./jieba/__main__.py ../jieba/jieba/__main__.py
+--- ./jieba/__main__.py	2014-11-29 15:46:45.747925571 +0800
++++ ../jieba/jieba/__main__.py	2014-11-29 15:34:42.859932465 +0800
+@@ -40,7 +40,7 @@
+ ln = fp.readline()
+ while ln:
+     l = ln.rstrip('\r\n')
+-    print((delim.join(jieba.cut(ln.rstrip('\r\n'), cutall, hmm)).encode('utf-8')))
++    print(delim.join(jieba.cut(ln.rstrip('\r\n'), cutall, hmm)))
+     ln = fp.readline()
+ 
+ fp.close()
+diff -d -r -u '--exclude=.git' '--exclude=prob_*.py' '--exclude=char_state_tab.py' ./jieba/posseg/__init__.py ../jieba/jieba/posseg/__init__.py
+--- ./jieba/posseg/__init__.py	2014-11-29 15:46:46.271925566 +0800
++++ ../jieba/jieba/posseg/__init__.py	2014-11-29 15:37:52.299930658 +0800
+@@ -1,4 +1,3 @@
+-
+ import re
+ import os
+ from . import viterbi
+@@ -18,14 +17,14 @@
+     _curpath = os.path.normpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))
+ 
+     result = {}
+-    with open(f_name, "r") as f:
++    with open(f_name, "rb") as f:
+         for line in f:
+             line = line.strip()
+             if not line:
+                 continue
+-            word, _, tag = line.split(' ')
+-            result[word.decode('utf-8')] = tag
+-
++            line = line.decode("utf-8")
++            word, _, tag = line.split(" ")
++            result[word] = tag
+     if not isJython:
+         return result
+ 
+@@ -105,8 +104,8 @@
+         yield pair(sentence[next:], pos_list[next][1])
+ 
+ def __cut_detail(sentence):
+-    re_han, re_skip = re.compile(r"([\u4E00-\u9FA5]+)"), re.compile(r"([\.0-9]+|[a-zA-Z0-9]+)")
+-    re_eng, re_num = re.compile(r"[a-zA-Z0-9]+"), re.compile(r"[\.0-9]+")
++    re_han, re_skip = re.compile("([\u4E00-\u9FA5]+)"), re.compile("([\.0-9]+|[a-zA-Z0-9]+)")
++    re_eng, re_num = re.compile("[a-zA-Z0-9]+"), re.compile("[\.0-9]+")
+     blocks = re_han.split(sentence)
+     for blk in blocks:
+         if re_han.match(blk):
+@@ -130,7 +129,7 @@
+     x = 0
+     N = len(sentence)
+     buf = ''
+-    re_eng = re.compile(r'[a-zA-Z0-9]',re.U)
++    re_eng = re.compile('[a-zA-Z0-9]',re.U)
+     while x < N:
+         y = route[x][1]+1
+         l_word = sentence[x:y]
+@@ -195,8 +194,8 @@
+             sentence = sentence.decode('utf-8')
+         except UnicodeDecodeError:
+             sentence = sentence.decode('gbk', 'ignore')
+-    re_han, re_skip = re.compile(r"([\u4E00-\u9FA5a-zA-Z0-9+#&\._]+)"), re.compile(r"(\r\n|\s)")
+-    re_eng, re_num = re.compile(r"[a-zA-Z0-9]+"), re.compile(r"[\.0-9]+")
++    re_han, re_skip = re.compile("([\u4E00-\u9FA5a-zA-Z0-9+#&\._]+)"), re.compile("(\r\n|\s)")
++    re_eng, re_num = re.compile("[a-zA-Z0-9]+"), re.compile("[\.0-9]+")
+     blocks = re_han.split(sentence)
+     if HMM:
+         __cut_blk = __cut_DAG
+diff -d -r -u '--exclude=.git' '--exclude=prob_*.py' '--exclude=char_state_tab.py' ./jieba/posseg/viterbi.py ../jieba/jieba/posseg/viterbi.py
+--- ./jieba/posseg/viterbi.py	2014-11-29 15:46:46.303925566 +0800
++++ ../jieba/jieba/posseg/viterbi.py	2014-11-29 15:38:28.527930313 +0800
+@@ -8,7 +8,7 @@
+ def viterbi(obs, states, start_p, trans_p, emit_p):
+     V = [{}] #tabular
+     mem_path = [{}]
+-    all_states = list(trans_p.keys())
++    all_states = trans_p.keys()
+     for y in states.get(obs[0], all_states): #init
+         V[0][y] = start_p[y] + emit_p[y].get(obs[0], MIN_FLOAT)
+         mem_path[0][y] = ''
+@@ -16,9 +16,9 @@
+         V.append({})
+         mem_path.append({})
+         #prev_states = get_top_states(V[t-1])
+-        prev_states = [x for x in list(mem_path[t-1].keys()) if len(trans_p[x]) > 0]
++        prev_states = [x for x in mem_path[t-1].keys() if len(trans_p[x]) > 0]
+ 
+-        prev_states_expect_next = set((y for x in prev_states for y in list(trans_p[x].keys())))
++        prev_states_expect_next = set((y for x in prev_states for y in trans_p[x].keys()))
+         obs_states = set(states.get(obs[t], all_states)) & prev_states_expect_next
+ 
+         if not obs_states:
+@@ -29,7 +29,7 @@
+             V[t][y] = prob
+             mem_path[t][y] = state
+ 
+-    last = [(V[-1][y], y) for y in list(mem_path[-1].keys())]
++    last = [(V[-1][y], y) for y in mem_path[-1].keys()]
+     #if len(last)==0:
+         #print obs
+     prob, state = max(last)
+diff -d -r -u '--exclude=.git' '--exclude=prob_*.py' '--exclude=char_state_tab.py' ./README.md ../jieba/README.md
+--- ./README.md	2014-11-29 15:46:08.487925926 +0800
++++ ../jieba/README.md	2014-11-29 15:34:42.859932465 +0800
+@@ -4,6 +4,9 @@
+ "Jieba" (Chinese for "to stutter") Chinese text segmentation: built to be the best Python Chinese word segmentation module.
+ - _Scroll down for English documentation._
+ 
++注意！
++========
++这个branch `jieba3k` 是专门用于Python3.x的版本
+ 
+ 特点
+ ========
+@@ -68,16 +71,16 @@
+ import jieba
+ 
+ seg_list = jieba.cut("我来到北京清华大学", cut_all=True)
+-print "Full Mode:", "/ ".join(seg_list)  # 全模式
++print("Full Mode:", "/ ".join(seg_list))  # 全模式
+ 
+ seg_list = jieba.cut("我来到北京清华大学", cut_all=False)
+-print "Default Mode:", "/ ".join(seg_list)  # 精确模式
++print("Default Mode:", "/ ".join(seg_list))  # 精确模式
+ 
+ seg_list = jieba.cut("他来到了网易杭研大厦")  # 默认是精确模式
+-print ", ".join(seg_list)
++print(", ".join(seg_list))
+ 
+ seg_list = jieba.cut_for_search("小明硕士毕业于中国科学院计算所，后在日本京都大学深造")  # 搜索引擎模式
+-print ", ".join(seg_list)
++print(", ".join(seg_list))
+ ```
+ 
+ 输出:
+@@ -174,7 +177,7 @@
+ >>> import jieba.posseg as pseg
+ >>> words = pseg.cut("我爱北京天安门")
+ >>> for w in words:
+-...    print w.word, w.flag
++...    print(w.word, w.flag)
+ ...
+ 我 r
+ 爱 v
+@@ -203,7 +206,7 @@
+ ```python
+ result = jieba.tokenize(u'永和服装饰品有限公司')
+ for tk in result:
+-    print "word %s\t\t start: %d \t\t end:%d" % (tk[0],tk[1],tk[2])
++    print("word %s\t\t start: %d \t\t end:%d" % (tk[0],tk[1],tk[2]))
+ ```
+ 
+ ```
+@@ -219,7 +222,7 @@
+ ```python
+ result = jieba.tokenize(u'永和服装饰品有限公司',mode='search')
+ for tk in result:
+-    print "word %s\t\t start: %d \t\t end:%d" % (tk[0],tk[1],tk[2])
++    print("word %s\t\t start: %d \t\t end:%d" % (tk[0],tk[1],tk[2]))
+ ```
+ 
+ ```
+@@ -408,16 +411,16 @@
+ import jieba
+ 
+ seg_list = jieba.cut("我来到北京清华大学", cut_all=True)
+-print "Full Mode:", "/ ".join(seg_list)  # 全模式
++print("Full Mode:", "/ ".join(seg_list))  # 全模式
+ 
+ seg_list = jieba.cut("我来到北京清华大学", cut_all=False)
+-print "Default Mode:", "/ ".join(seg_list)  # 默认模式
++print("Default Mode:", "/ ".join(seg_list))  # 默认模式
+ 
+ seg_list = jieba.cut("他来到了网易杭研大厦")
+-print ", ".join(seg_list)
++print(", ".join(seg_list))
+ 
+ seg_list = jieba.cut_for_search("小明硕士毕业于中国科学院计算所，后在日本京都大学深造")  # 搜索引擎模式
+-print ", ".join(seg_list)
++print(", ".join(seg_list))
+ ```
+ 
+ Output:
+@@ -483,7 +486,7 @@
+ >>> import jieba.posseg as pseg
+ >>> words = pseg.cut("我爱北京天安门")
+ >>> for w in words:
+-...    print w.word, w.flag
++...    print(w.word, w.flag)
+ ...
+ 我 r
+ 爱 v
+@@ -512,7 +515,7 @@
+ ```python
+ result = jieba.tokenize(u'永和服装饰品有限公司')
+ for tk in result:
+-    print "word %s\t\t start: %d \t\t end:%d" % (tk[0],tk[1],tk[2])
++    print("word %s\t\t start: %d \t\t end:%d" % (tk[0],tk[1],tk[2]))
+ ```
+ 
+ ```
+@@ -528,7 +531,7 @@
+ ```python
+ result = jieba.tokenize(u'永和服装饰品有限公司',mode='search')
+ for tk in result:
+-    print "word %s\t\t start: %d \t\t end:%d" % (tk[0],tk[1],tk[2])
++    print("word %s\t\t start: %d \t\t end:%d" % (tk[0],tk[1],tk[2]))
+ ```
+ 
+ ```
+diff -d -r -u '--exclude=.git' '--exclude=prob_*.py' '--exclude=char_state_tab.py' ./setup.py ../jieba/setup.py
+--- ./setup.py	2014-11-29 15:46:46.379925565 +0800
++++ ../jieba/setup.py	2014-11-29 15:42:20.263928103 +0800
+@@ -11,7 +11,7 @@
+ 
+ 完整文档见 ``README.md``
+ 
+-GitHub: https://github.com/fxsjy/jieba
++GitHub: https://github.com/fxsjy/jieba/tree/jieba3k
+ 
+ 特点
+ ====
+@@ -34,17 +34,11 @@
+ Python 2.x
+ ----------
+ 
+--  全自动安装： ``easy_install jieba`` 或者 ``pip install jieba``
+--  半自动安装：先下载 https://pypi.python.org/pypi/jieba/ ，解压后运行
+-   python setup.py install
+--  手动安装：将 jieba 目录放置于当前目录或者 site-packages 目录
+--  通过 ``import jieba`` 来引用
++见 https://pypi.python.org/pypi/jieba/
+ 
+ Python 3.x
+ ----------
+ 
+-见 https://pypi.python.org/pypi/jieba3k/
+-
+ -  目前 master 分支是只支持 Python 2.x 的
+ -  Python 3.x 版本的分支也已经基本可用：
+    https://github.com/fxsjy/jieba/tree/jieba3k
+@@ -59,13 +53,13 @@
+ 
+ """
+ 
+-setup(name='jieba',
++setup(name='jieba3k',
+       version='0.35.1',
+       description='Chinese Words Segementation Utilities',
+       long_description=LONGDOC,
+       author='Sun, Junyi',
+       author_email='ccnusjy@gmail.com',
+-      url='https://github.com/fxsjy/jieba',
++      url='https://github.com/fxsjy/jieba/tree/jieba3k',
+       license="MIT",
+       classifiers=[
+         'Intended Audience :: Developers',
+@@ -73,9 +67,8 @@
+         'Operating System :: OS Independent',
+         'Natural Language :: Chinese (Simplified)',
+         'Natural Language :: Chinese (Traditional)',
+         'Programming Language :: Python',
+-        'Programming Language :: Python :: 2',
++        'Programming Language :: Python :: 3',
+         'Topic :: Text Processing',
+         'Topic :: Text Processing :: Indexing',
+         'Topic :: Text Processing :: Linguistic',
+diff -d -r -u '--exclude=.git' '--exclude=prob_*.py' '--exclude=char_state_tab.py' ./test/extract_topic.py ../jieba/test/extract_topic.py
+--- ./test/extract_topic.py	2014-11-29 15:46:47.003925559 +0800
++++ ../jieba/test/extract_topic.py	2014-11-29 15:34:42.919932464 +0800
+@@ -51,13 +51,13 @@
+ print("training...")
+ 
+ nmf = decomposition.NMF(n_components=n_topic).fit(tfidf)
+-print(("done in %0.3fs." % (time.time() - t0)))
++print("done in %0.3fs." % (time.time() - t0))
+ 
+ # Inverse the vectorizer vocabulary to be able
+ feature_names = count_vect.get_feature_names()
+ 
+ for topic_idx, topic in enumerate(nmf.components_):
+-    print(("Topic #%d:" % topic_idx))
+-    print((" ".join([feature_names[i]
+-                    for i in topic.argsort()[:-n_top_words - 1:-1]])))
++    print("Topic #%d:" % topic_idx)
++    print(" ".join([feature_names[i]
++                    for i in topic.argsort()[:-n_top_words - 1:-1]]))
+     print("")
+diff -d -r -u '--exclude=.git' '--exclude=prob_*.py' '--exclude=char_state_tab.py' ./test/jiebacmd.py ../jieba/test/jiebacmd.py
+--- ./test/jiebacmd.py	2014-11-29 15:46:46.443925564 +0800
++++ ../jieba/test/jiebacmd.py	2014-11-29 15:34:42.919932464 +0800
+@@ -23,6 +23,6 @@
+         break
+     line = line.strip()
+     for word in jieba.cut(line):
+-        print(word.encode(default_encoding))
++        print(word)
+ 
+ 
+diff -d -r -u '--exclude=.git' '--exclude=prob_*.py' '--exclude=char_state_tab.py' ./test/jieba_test.py ../jieba/test/jieba_test.py
+--- ./test/jieba_test.py	2014-11-29 15:46:47.271925556 +0800
++++ ../jieba/test/jieba_test.py	2014-11-29 15:34:42.919932464 +0800
+@@ -152,7 +152,7 @@
+ #-*-coding: utf-8 -*-
+ import sys
++import imp
+ sys.path.append("../")
+ import unittest
+ import types
+@@ -97,7 +98,7 @@
+ 
+ class JiebaTestCase(unittest.TestCase):
+     def setUp(self):
+-        reload(jieba)
++        imp.reload(jieba)
+ 
+     def tearDown(self):
+         pass
+@@ -151,7 +152,7 @@
+ 
+     def testTokenize(self):
+         for content in test_contents:
+-            result = jieba.tokenize(content.decode('utf-8'))
++            result = jieba.tokenize(content)
+             assert isinstance(result, types.GeneratorType), "Test Tokenize Generator error"
+             result = list(result)
+             assert isinstance(result, list), "Test Tokenize error on content: %s" % content
+@@ -181,7 +181,7 @@
+ 
+     def testTokenize_NOHMM(self):
+         for content in test_contents:
+-            result = jieba.tokenize(content.decode('utf-8'),HMM=False)
++            result = jieba.tokenize(content,HMM=False)
+             assert isinstance(result, types.GeneratorType), "Test Tokenize Generator error"
+             result = list(result)
+             assert isinstance(result, list), "Test Tokenize error on content: %s" % content
+diff -d -r -u '--exclude=.git' '--exclude=prob_*.py' '--exclude=char_state_tab.py' ./test/test_tokenize_no_hmm.py ../jieba/test/test_tokenize_no_hmm.py
+--- ./test/test_tokenize_no_hmm.py	2014-11-29 15:46:47.355925556 +0800
++++ ../jieba/test/test_tokenize_no_hmm.py	2014-11-29 15:34:42.919932464 +0800
+@@ -7,7 +7,6 @@
+ 
+ def cuttest(test_sent):
+     global g_mode
+-    test_sent = test_sent.decode('utf-8')
+     result = jieba.tokenize(test_sent,mode=g_mode,HMM=False)
+     for tk in result:
+         print("word %s\t\t start: %d \t\t end:%d" % (tk[0],tk[1],tk[2]))
+diff -d -r -u '--exclude=.git' '--exclude=prob_*.py' '--exclude=char_state_tab.py' ./test/test_tokenize.py ../jieba/test/test_tokenize.py
+--- ./test/test_tokenize.py	2014-11-29 15:46:47.403925555 +0800
++++ ../jieba/test/test_tokenize.py	2014-11-29 15:34:42.919932464 +0800
+@@ -7,7 +7,6 @@
+ 
+ def cuttest(test_sent):
+     global g_mode
+-    test_sent = test_sent.decode('utf-8')
+     result = jieba.tokenize(test_sent,mode=g_mode)
+     for tk in result:
+         print("word %s\t\t start: %d \t\t end:%d" % (tk[0],tk[1],tk[2]))
diff --git a/test/auto2to3 b/test/auto2to3
new file mode 100644
index 0000000..72ed37d
--- /dev/null
+++ b/test/auto2to3
@@ -0,0 +1,34 @@
+#!/bin/bash
+# Set 2to3 path.
+PYTHON2TO3=2to3
+# Copy the python2 version.
+echo Jieba 2to3 manual conversion tool
+echo
+if ! git rev-parse; then
+ exit 1
+fi
+echo Copying working directory to ../jieba2
+if [ -d ../jieba2 ]; then
+ echo Found existing ../jieba2
+ read -p "Replace it with new one? (y/n) " -r
+ if ! [[ $REPLY =~ ^[Yy]$ ]]; then
+  echo Cancelled.
+  exit
+ else
+  rm -rf ../jieba2
+ fi
+fi
+if ! git checkout jieba3k; then
+ exit 1
+fi
+cp -r . ../jieba2
+cd ../jieba2
+if ! git checkout master; then
+ exit 1
+fi
+# Here starts auto conversion.
+echo Converting jieba2 to Python3 ...
+find . -type f -name '*.py' \! -path '*/build/*' \! -name 'prob_*.py' \! -name 'char_state_tab.py' -exec $PYTHON2TO3 -w -n {} +
+find . -type f \! -path '*/build/*' -a \( -name 'prob_*.py' -o -name 'char_state_tab.py' \) -exec sed -i "s/u'\\\u/'\\\u/g" {} \;
+patch -p0 -s <../jieba/test/2to3.diff
+echo Done. Compare jieba and jieba2 to manually port.