diff --git a/jieba/__init__.py b/jieba/__init__.py
index 9d06158..bac3075 100644
--- a/jieba/__init__.py
+++ b/jieba/__init__.py
@@ -82,16 +82,20 @@ def initialize(*args):
             FREQ = dict([(k,log(float(v)/total)) for k,v in FREQ.items()]) #normalize
             min_freq = min(FREQ.values())
             print("dumping model to file cache " + cache_file, file=sys.stderr)
-            tmp_suffix = "."+str(random.random())
-            with open(cache_file+tmp_suffix,'wb') as temp_cache_file:
-                marshal.dump((trie,FREQ,total,min_freq),temp_cache_file)
-            if os.name=='nt':
-                import shutil
-                replace_file = shutil.move
-            else:
-                replace_file = os.rename
-            replace_file(cache_file+tmp_suffix,cache_file)
-
+            try:
+                tmp_suffix = "."+str(random.random())
+                with open(cache_file+tmp_suffix,'wb') as temp_cache_file:
+                    marshal.dump((trie,FREQ,total,min_freq),temp_cache_file)
+                if os.name=='nt':
+                    import shutil
+                    replace_file = shutil.move
+                else:
+                    replace_file = os.rename
+                replace_file(cache_file+tmp_suffix,cache_file)
+            except:
+                import traceback
+                print("dump cache file failed.",file=sys.stderr)
+                print(traceback.format_exc(),file=sys.stderr)
         initialized = True
 
         print("loading model cost ", time.time() - t1, "seconds.",file=sys.stderr)
@@ -263,15 +267,22 @@ def load_userdict(f):
         if line_no==1:
             word = word.replace('\ufeff',"") #remove bom flag if it exists
         if len(tup)==3:
-            user_word_tag_tab[word]=tup[2].strip()
-        freq = float(freq)
-        FREQ[word] = log(freq / total)
-        p = trie
-        for c in word:
-            if not c in p:
-                p[c] ={}
-            p = p[c]
-        p['']='' #ending flag
+            add_word(word, freq, tup[2])
+        else:
+            add_word(word, freq)
+
+def add_word(word, freq, tag=None):
+    global FREQ, trie, total, user_word_tag_tab
+    freq = float(freq)
+    FREQ[word] = log(freq / total)
+    if tag is not None:
+        user_word_tag_tab[word] = tag.strip()
+    p = trie
+    for c in word:
+        if not c in p:
+            p[c] = {}
+        p = p[c]
+    p[''] = ''                  # ending flag
 
 __ref_cut = cut
 __ref_cut_for_search = cut_for_search
@@ -333,3 +344,29 @@ def get_abs_path_dict():
     _curpath=os.path.normpath( os.path.join( os.getcwd(), os.path.dirname(__file__) )  )
     abs_path = os.path.join(_curpath,DICTIONARY)
     return abs_path
+
+def tokenize(unicode_sentence,mode="default"):
+    #mode ("default" or "search")
+    if not isinstance(unicode_sentence, str):
+        raise Exception("jieba: the input parameter should  string.")
+    start = 0 
+    if mode=='default':
+        for w in cut(unicode_sentence):
+            width = len(w)
+            yield (w,start,start+width)
+            start+=width
+    else:
+        for w in cut(unicode_sentence):
+            width = len(w)
+            if len(w)>2:
+                for i in range(len(w)-1):
+                    gram2 = w[i:i+2]
+                    if gram2 in FREQ:
+                        yield (gram2,start+i,start+i+2)
+            if len(w)>3:
+                for i in range(len(w)-2):
+                    gram3 = w[i:i+3]
+                    if gram3 in FREQ:
+                        yield (gram3,start+i,start+i+3)
+            yield (w,start,start+width)
+            start+=width
diff --git a/test/jieba_test.py b/test/jieba_test.py
index eea76f3..9404704 100644
--- a/test/jieba_test.py
+++ b/test/jieba_test.py
@@ -150,5 +150,15 @@ class JiebaTestCase(unittest.TestCase):
             print(" , ".join([w.word + " / " + w.flag for w in result]),file=sys.stderr)
         print("testPosseg",file=sys.stderr)
 
+    def testTokenize(self):
+        for content in test_contents:
+            result = jieba.tokenize(content)
+            assert isinstance(result, types.GeneratorType), "Test Tokenize Generator error"
+            result = list(result)
+            assert isinstance(result, list), "Test Tokenize error on content: %s" % content
+            for tk in result:
+                print("word %s\t\t start: %d \t\t end:%d" % (tk[0],tk[1],tk[2]), file=sys.stderr)
+        print("testTokenize",file=sys.stderr)
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/test/test_tokenize.py b/test/test_tokenize.py
index 994e1ca..ed7e172 100644
--- a/test/test_tokenize.py
+++ b/test/test_tokenize.py
@@ -7,10 +7,9 @@ g_mode="default"
 
 def cuttest(test_sent):
     global g_mode
-    test_sent = test_sent.decode('utf-8')
     result = jieba.tokenize(test_sent,mode=g_mode)
     for tk in result:
-        print "word %s\t\t start: %d \t\t end:%d" % (tk[0],tk[1],tk[2])
+        print("word %s\t\t start: %d \t\t end:%d" % (tk[0],tk[1],tk[2]))
 
 
 if __name__ == "__main__":
diff --git a/test/test_whoosh.py b/test/test_whoosh.py
index bb2fda6..5617b5d 100644
--- a/test/test_whoosh.py
+++ b/test/test_whoosh.py
@@ -15,40 +15,40 @@ ix = create_in("tmp", schema) # for create new index
 writer = ix.writer()
 
 writer.add_document(
-    title=u"document1", 
-    path=u"/a",
-    content=u"This is the first document we’ve added!"
+    title="document1", 
+    path="/a",
+    content="This is the first document we’ve added!"
 )
 
 writer.add_document(
-    title=u"document2", 
-    path=u"/b",
-    content=u"The second one 你 中文测试中文 is even more interesting! 吃水果"
+    title="document2", 
+    path="/b",
+    content="The second one 你 中文测试中文 is even more interesting! 吃水果"
 )
 
 writer.add_document(
-    title=u"document3", 
-    path=u"/c",
-    content=u"买水果然后来世博园。"
+    title="document3", 
+    path="/c",
+    content="买水果然后来世博园。"
 )
 
 writer.add_document(
-    title=u"document4", 
-    path=u"/c",
-    content=u"工信处女干事每月经过下属科室都要亲口交代24口交换机等技术性器件的安装工作"
+    title="document4", 
+    path="/c",
+    content="工信处女干事每月经过下属科室都要亲口交代24口交换机等技术性器件的安装工作"
 )
 
 writer.add_document(
-    title=u"document4", 
-    path=u"/c",
-    content=u"咱俩交换一下吧。"
+    title="document4", 
+    path="/c",
+    content="咱俩交换一下吧。"
 )
 
 writer.commit()
 searcher = ix.searcher()
 parser = QueryParser("content", schema=ix.schema)
 
-for keyword in (u"水果世博园",u"你",u"first",u"中文",u"交换机",u"交换"):
+for keyword in ("水果世博园","你","first","中文","交换机","交换"):
     print "result of ",keyword
     q = parser.parse(keyword)
     results = searcher.search(q)
@@ -56,5 +56,5 @@ for keyword in (u"水果世博园",u"你",u"first",u"中文",u"交换机",u"交
         print hit.highlights("content")
     print "="*10
 
-for t in analyzer(u"我的好朋友是李明;我爱北京天安门;IBM和Microsoft; I have a dream."):
+for t in analyzer("我的好朋友是李明;我爱北京天安门;IBM和Microsoft; I have a dream."):
     print t.text