mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
little change
This commit is contained in:
parent
847fb9f094
commit
8530585d05
14
README.md
14
README.md
@ -16,16 +16,30 @@ TransCode.cpp/TransCode.h 负责转换编码类型,将utf8和gbk都转换成`u
|
|||||||
HMMSegment.cpp/HMMSegment.h
|
HMMSegment.cpp/HMMSegment.h
|
||||||
是根据HMM模型来进行分词,主要算法思路是根据(B,E,M,S)四个状态来代表每个字的隐藏状态。
|
是根据HMM模型来进行分词,主要算法思路是根据(B,E,M,S)四个状态来代表每个字的隐藏状态。
|
||||||
HMM模型由dicts/下面的`hmm_model.utf8`提供。
|
HMM模型由dicts/下面的`hmm_model.utf8`提供。
|
||||||
|
分词算法即viterbi算法。
|
||||||
|
|
||||||
##Demo
|
##Demo
|
||||||
|
|
||||||
### Segment's demo
|
### Segment's demo
|
||||||
|
运行方法示例:
|
||||||
```
|
```
|
||||||
cd ./demo;
|
cd ./demo;
|
||||||
make;
|
make;
|
||||||
./segment_demo testlines.gbk
|
./segment_demo testlines.gbk
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Output:
|
||||||
|
```
|
||||||
|
我来到北京清华大学
|
||||||
|
我/来到/北京/清华大学
|
||||||
|
|
||||||
|
他来到了网易杭研大厦
|
||||||
|
他/来到/了/网易/杭/研/大厦
|
||||||
|
|
||||||
|
小明硕士毕业于中国科学院计算所,后在日本京都大学深造
|
||||||
|
小/明/硕士/毕业/于/中国科学院/计算所/,/后/在/日本京都大学/深造
|
||||||
|
```
|
||||||
|
|
||||||
run `./segment_demo` to get help.
|
run `./segment_demo` to get help.
|
||||||
|
|
||||||
```
|
```
|
||||||
|
@ -58,7 +58,7 @@ int main(int argc, char ** argv)
|
|||||||
*/
|
*/
|
||||||
if(argc < 2)
|
if(argc < 2)
|
||||||
{
|
{
|
||||||
cout<<"usage: \n\t"<<argv[0]<<" <filename> [options]\n"
|
cout<<"usage: \n\t"<<argv[0]<<"[options] <filename>\n"
|
||||||
<<"options:\n"
|
<<"options:\n"
|
||||||
<<"\t--dictpath\tIf is not specified, the default is ../dicts/jieba.dict.utf8\n"
|
<<"\t--dictpath\tIf is not specified, the default is ../dicts/jieba.dict.utf8\n"
|
||||||
<<"\t--encoding\tSupported encoding methods are [gbk, utf-8] for now. \n\t\t\tIf is not specified, the default is utf8.\n"
|
<<"\t--encoding\tSupported encoding methods are [gbk, utf-8] for now. \n\t\t\tIf is not specified, the default is utf8.\n"
|
||||||
|
@ -1,6 +0,0 @@
|
|||||||
AT&T 3 nz
|
|
||||||
B超 3 n
|
|
||||||
C# 3 nz
|
|
||||||
C++ 3 nz
|
|
||||||
一一 1670 m
|
|
||||||
一一二 11 m
|
|
Loading…
x
Reference in New Issue
Block a user