add HMMsegment into demo but with bug unsolved.

This commit is contained in:
gwdwyy 2013-08-26 00:58:41 +08:00
parent 8530585d05
commit 05a4ba3a22
3 changed files with 61 additions and 15 deletions

View File

@ -24,7 +24,6 @@ SRCDIR = ../src
SRCLIB = $(SRCDIR)/libcppjieba.a SRCLIB = $(SRCDIR)/libcppjieba.a
# remove the objs after compilation # remove the objs after compilation
.INTERMEDIATE: $(OBJS)
.PHONY: clean $(SRCLIB) .PHONY: clean $(SRCLIB)
# Main Targets # Main Targets

View File

@ -5,7 +5,8 @@
using namespace CppJieba; using namespace CppJieba;
Segment seg; Segment seg;
bool init(const char * const filePath) HMMSegment hmmseg;
bool init(const char * const dictPath, const char * const modelPath)
{ {
if(!seg.init()) if(!seg.init())
{ {
@ -13,11 +14,16 @@ bool init(const char * const filePath)
return false; return false;
} }
if(!seg.loadSegDict(filePath)) if(!seg.loadSegDict(dictPath))
{ {
cout<<"seg loadDict failed."<<endl; cout<<"seg loadDict failed."<<endl;
return false; return false;
} }
if(!hmmseg.loadModel(modelPath))
{
cout<<"hmmseg loadModel failed."<<endl;
return false;
}
return true; return true;
} }
@ -37,6 +43,22 @@ void run(const char * const filePath)
} }
} }
void hmmrun(const char * const filePath)
{
ifstream ifile(filePath);
vector<string> res;
string line;
while(getline(ifile, line))
{
res.clear();
if(!line.empty())
{
hmmseg.cut(line, res);
cout<<line<<"\n"<<joinStr(res,"/")<<endl;
}
}
}
bool dispose() bool dispose()
{ {
if(!seg.dispose()) if(!seg.dispose())
@ -48,22 +70,21 @@ bool dispose()
} }
const char * const DEFAULT_DICTPATH = "../dicts/jieba.dict.utf8"; const char * const DEFAULT_DICTPATH = "../dicts/jieba.dict.utf8";
const char * const DEFAULT_MODELPATH = "../dicts/hmm_model.utf8";
int main(int argc, char ** argv) int main(int argc, char ** argv)
{ {
/*map<string, string> mpss;
getArgvMap(argc, argv, mpss);
string enc = getMap<string, string>(mpss, "--encoding", "");
string dictPath = getMap<string, string>(mpss, "--dictpath", "../dicts/jieba.dict.gbk");
*/
if(argc < 2) if(argc < 2)
{ {
cout<<"usage: \n\t"<<argv[0]<<"[options] <filename>\n" cout<<"usage: \n\t"<<argv[0]<<"[options] <filename>\n"
<<"options:\n" <<"options:\n"
<<"\t--dictpath\tIf is not specified, the default is ../dicts/jieba.dict.utf8\n" <<"\t--algorithm\tSupported encoding methods are [cutDAG, cutHMM] for now. \n\t\t\tIf is not specified, the default is cutDAG\n"
<<"\t--dictpath\tIf is not specified, the default is "<<DEFAULT_DICTPATH<<'\n'
<<"\t--modelpath\tIf is not specified, the default is "<<DEFAULT_MODELPATH<<'\n'
<<"\t--encoding\tSupported encoding methods are [gbk, utf-8] for now. \n\t\t\tIf is not specified, the default is utf8.\n" <<"\t--encoding\tSupported encoding methods are [gbk, utf-8] for now. \n\t\t\tIf is not specified, the default is utf8.\n"
<<"example:\n" <<"example:\n"
<<"\t"<<argv[0]<<" testlines.utf8 --encoding utf-8 --dictpath ../dicts/jieba.dict.utf8\n" <<"\t"<<argv[0]<<" testlines.utf8 --encoding utf-8 --dictpath ../dicts/jieba.dict.utf8\n"
<<"\t"<<argv[0]<<" testlines.utf8 --modelpath ../dicts/hmm_model.utf8 --algorithm cutHMM\n"
<<"\t"<<argv[0]<<" testlines.gbk --encoding gbk --dictpath ../dicts/jieba.dict.gbk\n" <<"\t"<<argv[0]<<" testlines.gbk --encoding gbk --dictpath ../dicts/jieba.dict.gbk\n"
<<endl; <<endl;
@ -71,11 +92,17 @@ int main(int argc, char ** argv)
} }
ArgvContext arg(argc, argv); ArgvContext arg(argc, argv);
string dictPath = arg["--dictpath"]; string dictPath = arg["--dictpath"];
string modelPath = arg["--modelpath"];
string encoding = arg["--encoding"]; string encoding = arg["--encoding"];
if("" == dictPath) string algorithm = arg["--algorithm"];
if(dictPath.empty())
{ {
dictPath = DEFAULT_DICTPATH; dictPath = DEFAULT_DICTPATH;
} }
if(modelPath.empty())
{
modelPath = DEFAULT_MODELPATH;
}
if("gbk" == encoding) if("gbk" == encoding)
{ {
TransCode::setGbkEnc(); TransCode::setGbkEnc();
@ -85,8 +112,19 @@ int main(int argc, char ** argv)
TransCode::setUtf8Enc(); TransCode::setUtf8Enc();
} }
init(dictPath.c_str()); if(!init(dictPath.c_str(), modelPath.c_str()))
run(arg[1].c_str()); {
LogError("init failed.");
return -1;
}
if("cutHMM" == algorithm)
{
hmmrun(arg[1].c_str());
}
else
{
run(arg[1].c_str());
}
dispose(); dispose();
return 0; return 0;
} }

View File

@ -33,6 +33,7 @@ namespace CppJieba
bool HMMSegment::loadModel(const char* const filePath) bool HMMSegment::loadModel(const char* const filePath)
{ {
LogInfo(string_format("loadModel [%s] start ...", filePath));
ifstream ifile(filePath); ifstream ifile(filePath);
string line; string line;
vector<string> tmp; vector<string> tmp;
@ -98,6 +99,8 @@ namespace CppJieba
return false; return false;
} }
LogInfo(string_format("loadModel [%s] end.", filePath));
return true; return true;
} }
@ -117,6 +120,7 @@ namespace CppJieba
return false; return false;
} }
cout<<__FILE__<<__LINE__<<endl;
if(!viterbi(unico, status)) if(!viterbi(unico, status))
{ {
LogError("viterbi failed."); LogError("viterbi failed.");
@ -183,6 +187,7 @@ namespace CppJieba
weight[0 + y * X] = _startProb[y] + _getEmitProb(_emitProbVec[y], unico[0], MIN_DOUBLE); weight[0 + y * X] = _startProb[y] + _getEmitProb(_emitProbVec[y], unico[0], MIN_DOUBLE);
path[0 + y * X] = -1; path[0 + y * X] = -1;
} }
cout<<__FILE__<<__LINE__<<endl;
//process //process
for(uint x = 1; x < X; x++) for(uint x = 1; x < X; x++)
@ -191,6 +196,7 @@ namespace CppJieba
{ {
now = x + y*X; now = x + y*X;
weight[now] = MIN_DOUBLE; weight[now] = MIN_DOUBLE;
path[now] = E;
for(uint preY = 0; preY < Y; preY++) for(uint preY = 0; preY < Y; preY++)
{ {
old = x - 1 + preY * X; old = x - 1 + preY * X;
@ -209,8 +215,6 @@ namespace CppJieba
//cout<<x<<":"<<y<<":"<<weight[now]<<endl; //cout<<x<<":"<<y<<":"<<weight[now]<<endl;
//getchar(); //getchar();
} }
//cout<<_getEmitProb(_emitProbB, unico[x], MIN_DOUBLE)<<endl;
//getchar();
} }
endE = weight[X-1+E*X]; endE = weight[X-1+E*X];
@ -224,14 +228,19 @@ namespace CppJieba
{ {
stat = S; stat = S;
} }
cout<<__FILE__<<__LINE__<<endl;
status.assign(X, 0); status.assign(X, 0);
cout<<__FILE__<<__LINE__<<endl;
for(int x = X -1 ; x >= 0; x--) for(int x = X -1 ; x >= 0; x--)
{ {
status[x] = stat; status[x] = stat;
cout<<__FILE__<<__LINE__<<endl;
cout<<stat<<endl;
stat = path[x + stat*X]; stat = path[x + stat*X];
} }
cout<<__FILE__<<__LINE__<<endl;
delete [] path; delete [] path;
delete [] weight; delete [] weight;
return true; return true;
@ -330,7 +339,7 @@ int main()
HMMSegment hmm; HMMSegment hmm;
hmm.loadModel("../dicts/hmm_model.utf8"); hmm.loadModel("../dicts/hmm_model.utf8");
vector<string> res; vector<string> res;
hmm.cut("小明硕士毕业于北邮网络研究院", res); hmm.cut("小明硕士毕业于北邮网络研究院,然", res);
cout<<joinStr(res, "/")<<endl; cout<<joinStr(res, "/")<<endl;