From de294427ccfc61c01433c39db2c4d631b444664c Mon Sep 17 00:00:00 2001
From: wyy <wuyanyi09@gmail.com>
Date: Mon, 9 Sep 2013 16:54:31 +0800
Subject: [PATCH] add example.sh && modify some demo funct

---
 README.md             | 39 +++++++++++++++++++++++++++++++--------
 demo/example.sh       |  4 ++++
 demo/segment_demo.cpp |  6 ++----
 3 files changed, 37 insertions(+), 12 deletions(-)
 create mode 100755 demo/example.sh
diff --git a/README.md b/README.md
index 0c51004..1649935 100644
--- a/README.md
+++ b/README.md
@@ -9,7 +9,7 @@
 Trie.cpp/Trie.h 负责载入词典的trie树，主要供Segment模块使用。
 ###Segment模块
 MPSegment.cpp/MPSegment.h 
-最大概率发:负责根据Trie树构建有向无环图和进行动态规划算法，是分词算法的核心。
+(Maximum Probability)最大概率法:负责根据Trie树构建有向无环图和进行动态规划算法，是分词算法的核心。
 ###TransCode模块
 TransCode.cpp/TransCode.h 负责转换编码类型，将utf8和gbk都转换成`uint16_t`类型，也负责逆转换。
 ###HMMSegment模块
@@ -44,8 +44,6 @@ Output:
 
 ### HMMSegment's demo
 
-___有待完善_
-
 ```
 cd ./demo;
 make;
@@ -62,6 +60,30 @@ Output:
 小明/硕士/毕业于/中国/科学院/计算所/，/后/在/日/本/京/都/大/学/深/造
 ```
 
+### MixSegment's demo
+```
+cd ./demo;
+make;
+./segment_demo testlines.utf8 --algorithm cutMix
+```
+
+Output:
+```
+我来到北京清华大学
+我/来到/北京/清华大学
+他来到了网易杭研大厦
+他/来到/了/网易/杭研/大厦
+杭研
+杭研
+小明硕士毕业于中国科学院计算所，后在日本京都大学深造
+小明/硕士/毕业/于/中国科学院/计算所/，/后/在/日本京都大学/深造
+```
+
+### 效果分析
+
+以上依次是MP,HMM,Mix三种方法的效果。  
+可以看出效果最好的是Mix，也就是融合MP和HMM的切词算法。即可以准确切出词典已有的词，又可以切出像"杭研"这样的未登录词。
+
 ##Help
 
 run `./segment_demo` to get help.
@@ -70,15 +92,16 @@ run `./segment_demo` to get help.
 usage:
         ./segment_demo[options] <filename>
 options:
-        --algorithm     Supported encoding methods are [cutDAG, cutHMM] for now.
-                        If is not specified, the default is cutDAG
-        --dictpath      If is not specified, the default is ../dicts/jieba.dict.utf8
-        --modelpath     If is not specified, the default is ../dicts/hmm_model.utf8
+        --algorithm     Supported encoding methods are [cutDAG, cutHMM, cutMix] for now.
+                        If not specified, the default is cutDAG
+        --dictpath      If not specified, the default is ../dicts/jieba.dict.utf8
+        --modelpath     If not specified, the default is ../dicts/hmm_model.utf8
         --encoding      Supported encoding methods are [gbk, utf-8] for now.
-                        If is not specified, the default is utf8.
+                        If not specified, the default is utf8.
 example:
         ./segment_demo testlines.utf8 --encoding utf-8 --dictpath ../dicts/jieba.dict.utf8
         ./segment_demo testlines.utf8 --modelpath ../dicts/hmm_model.utf8 --algorithm cutHMM
+        ./segment_demo testlines.utf8 --modelpath ../dicts/hmm_model.utf8 --algorithm cutMix
         ./segment_demo testlines.gbk --encoding gbk --dictpath ../dicts/jieba.dict.gbk
 
 ```
diff --git a/demo/example.sh b/demo/example.sh
new file mode 100755
index 0000000..b856869
--- /dev/null
+++ b/demo/example.sh
@@ -0,0 +1,4 @@
+make && \
+./segment_demo testlines.utf8 --encoding utf-8 --dictpath ../dicts/jieba.dict.utf8 &&\
+./segment_demo testlines.utf8 --modelpath ../dicts/hmm_model.utf8 --algorithm cutHMM &&\
+./segment_demo testlines.utf8 --algorithm cutMix
diff --git a/demo/segment_demo.cpp b/demo/segment_demo.cpp
index d3f9a7e..3911c90 100644
--- a/demo/segment_demo.cpp
+++ b/demo/segment_demo.cpp
@@ -68,8 +68,7 @@ void cutMix(const char* const filePath)
 	while(getline(ifs, line))
 	{
 		mixseg.cut(line, res);
-        cout<<line<<endl;
-        cout<<vecToString(res)<<endl;
+        cout<<line<<'\n'<<joinStr(res,"/")<<endl;
 	}
 }
 
@@ -110,7 +109,6 @@ int main(int argc, char ** argv)
 			<<"\t"<<argv[0]<<" testlines.utf8 --encoding utf-8 --dictpath ../dicts/jieba.dict.utf8\n"
 			<<"\t"<<argv[0]<<" testlines.utf8 --modelpath ../dicts/hmm_model.utf8 --algorithm cutHMM\n"
 			<<"\t"<<argv[0]<<" testlines.utf8 --modelpath ../dicts/hmm_model.utf8 --algorithm cutMix\n"
-			<<"\t"<<argv[0]<<" testlines.gbk --encoding gbk --dictpath ../dicts/jieba.dict.gbk\n"
 			<<endl;
 		
 		return -1;
@@ -152,7 +150,7 @@ int main(int argc, char ** argv)
 	}
     else
     {
-		cutMix(arg[1].c_str());
+		cutMP(arg[1].c_str());
     }
 	dispose();
 	return 0;