mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
add HMMsegment into demo but with bug unsolved.
This commit is contained in:
parent
8530585d05
commit
05a4ba3a22
@ -24,7 +24,6 @@ SRCDIR = ../src
|
|||||||
SRCLIB = $(SRCDIR)/libcppjieba.a
|
SRCLIB = $(SRCDIR)/libcppjieba.a
|
||||||
|
|
||||||
# remove the objs after compilation
|
# remove the objs after compilation
|
||||||
.INTERMEDIATE: $(OBJS)
|
|
||||||
.PHONY: clean $(SRCLIB)
|
.PHONY: clean $(SRCLIB)
|
||||||
|
|
||||||
# Main Targets
|
# Main Targets
|
||||||
|
@ -5,7 +5,8 @@
|
|||||||
using namespace CppJieba;
|
using namespace CppJieba;
|
||||||
|
|
||||||
Segment seg;
|
Segment seg;
|
||||||
bool init(const char * const filePath)
|
HMMSegment hmmseg;
|
||||||
|
bool init(const char * const dictPath, const char * const modelPath)
|
||||||
{
|
{
|
||||||
if(!seg.init())
|
if(!seg.init())
|
||||||
{
|
{
|
||||||
@ -13,11 +14,16 @@ bool init(const char * const filePath)
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if(!seg.loadSegDict(filePath))
|
if(!seg.loadSegDict(dictPath))
|
||||||
{
|
{
|
||||||
cout<<"seg loadDict failed."<<endl;
|
cout<<"seg loadDict failed."<<endl;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
if(!hmmseg.loadModel(modelPath))
|
||||||
|
{
|
||||||
|
cout<<"hmmseg loadModel failed."<<endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -37,6 +43,22 @@ void run(const char * const filePath)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void hmmrun(const char * const filePath)
|
||||||
|
{
|
||||||
|
ifstream ifile(filePath);
|
||||||
|
vector<string> res;
|
||||||
|
string line;
|
||||||
|
while(getline(ifile, line))
|
||||||
|
{
|
||||||
|
res.clear();
|
||||||
|
if(!line.empty())
|
||||||
|
{
|
||||||
|
hmmseg.cut(line, res);
|
||||||
|
cout<<line<<"\n"<<joinStr(res,"/")<<endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
bool dispose()
|
bool dispose()
|
||||||
{
|
{
|
||||||
if(!seg.dispose())
|
if(!seg.dispose())
|
||||||
@ -48,22 +70,21 @@ bool dispose()
|
|||||||
}
|
}
|
||||||
|
|
||||||
const char * const DEFAULT_DICTPATH = "../dicts/jieba.dict.utf8";
|
const char * const DEFAULT_DICTPATH = "../dicts/jieba.dict.utf8";
|
||||||
|
const char * const DEFAULT_MODELPATH = "../dicts/hmm_model.utf8";
|
||||||
|
|
||||||
int main(int argc, char ** argv)
|
int main(int argc, char ** argv)
|
||||||
{
|
{
|
||||||
/*map<string, string> mpss;
|
|
||||||
getArgvMap(argc, argv, mpss);
|
|
||||||
string enc = getMap<string, string>(mpss, "--encoding", "");
|
|
||||||
string dictPath = getMap<string, string>(mpss, "--dictpath", "../dicts/jieba.dict.gbk");
|
|
||||||
*/
|
|
||||||
if(argc < 2)
|
if(argc < 2)
|
||||||
{
|
{
|
||||||
cout<<"usage: \n\t"<<argv[0]<<"[options] <filename>\n"
|
cout<<"usage: \n\t"<<argv[0]<<"[options] <filename>\n"
|
||||||
<<"options:\n"
|
<<"options:\n"
|
||||||
<<"\t--dictpath\tIf is not specified, the default is ../dicts/jieba.dict.utf8\n"
|
<<"\t--algorithm\tSupported encoding methods are [cutDAG, cutHMM] for now. \n\t\t\tIf is not specified, the default is cutDAG\n"
|
||||||
|
<<"\t--dictpath\tIf is not specified, the default is "<<DEFAULT_DICTPATH<<'\n'
|
||||||
|
<<"\t--modelpath\tIf is not specified, the default is "<<DEFAULT_MODELPATH<<'\n'
|
||||||
<<"\t--encoding\tSupported encoding methods are [gbk, utf-8] for now. \n\t\t\tIf is not specified, the default is utf8.\n"
|
<<"\t--encoding\tSupported encoding methods are [gbk, utf-8] for now. \n\t\t\tIf is not specified, the default is utf8.\n"
|
||||||
<<"example:\n"
|
<<"example:\n"
|
||||||
<<"\t"<<argv[0]<<" testlines.utf8 --encoding utf-8 --dictpath ../dicts/jieba.dict.utf8\n"
|
<<"\t"<<argv[0]<<" testlines.utf8 --encoding utf-8 --dictpath ../dicts/jieba.dict.utf8\n"
|
||||||
|
<<"\t"<<argv[0]<<" testlines.utf8 --modelpath ../dicts/hmm_model.utf8 --algorithm cutHMM\n"
|
||||||
<<"\t"<<argv[0]<<" testlines.gbk --encoding gbk --dictpath ../dicts/jieba.dict.gbk\n"
|
<<"\t"<<argv[0]<<" testlines.gbk --encoding gbk --dictpath ../dicts/jieba.dict.gbk\n"
|
||||||
<<endl;
|
<<endl;
|
||||||
|
|
||||||
@ -71,11 +92,17 @@ int main(int argc, char ** argv)
|
|||||||
}
|
}
|
||||||
ArgvContext arg(argc, argv);
|
ArgvContext arg(argc, argv);
|
||||||
string dictPath = arg["--dictpath"];
|
string dictPath = arg["--dictpath"];
|
||||||
|
string modelPath = arg["--modelpath"];
|
||||||
string encoding = arg["--encoding"];
|
string encoding = arg["--encoding"];
|
||||||
if("" == dictPath)
|
string algorithm = arg["--algorithm"];
|
||||||
|
if(dictPath.empty())
|
||||||
{
|
{
|
||||||
dictPath = DEFAULT_DICTPATH;
|
dictPath = DEFAULT_DICTPATH;
|
||||||
}
|
}
|
||||||
|
if(modelPath.empty())
|
||||||
|
{
|
||||||
|
modelPath = DEFAULT_MODELPATH;
|
||||||
|
}
|
||||||
if("gbk" == encoding)
|
if("gbk" == encoding)
|
||||||
{
|
{
|
||||||
TransCode::setGbkEnc();
|
TransCode::setGbkEnc();
|
||||||
@ -85,8 +112,19 @@ int main(int argc, char ** argv)
|
|||||||
TransCode::setUtf8Enc();
|
TransCode::setUtf8Enc();
|
||||||
}
|
}
|
||||||
|
|
||||||
init(dictPath.c_str());
|
if(!init(dictPath.c_str(), modelPath.c_str()))
|
||||||
run(arg[1].c_str());
|
{
|
||||||
|
LogError("init failed.");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if("cutHMM" == algorithm)
|
||||||
|
{
|
||||||
|
hmmrun(arg[1].c_str());
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
run(arg[1].c_str());
|
||||||
|
}
|
||||||
dispose();
|
dispose();
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -33,6 +33,7 @@ namespace CppJieba
|
|||||||
|
|
||||||
bool HMMSegment::loadModel(const char* const filePath)
|
bool HMMSegment::loadModel(const char* const filePath)
|
||||||
{
|
{
|
||||||
|
LogInfo(string_format("loadModel [%s] start ...", filePath));
|
||||||
ifstream ifile(filePath);
|
ifstream ifile(filePath);
|
||||||
string line;
|
string line;
|
||||||
vector<string> tmp;
|
vector<string> tmp;
|
||||||
@ -98,6 +99,8 @@ namespace CppJieba
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
LogInfo(string_format("loadModel [%s] end.", filePath));
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -117,6 +120,7 @@ namespace CppJieba
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
cout<<__FILE__<<__LINE__<<endl;
|
||||||
if(!viterbi(unico, status))
|
if(!viterbi(unico, status))
|
||||||
{
|
{
|
||||||
LogError("viterbi failed.");
|
LogError("viterbi failed.");
|
||||||
@ -183,6 +187,7 @@ namespace CppJieba
|
|||||||
weight[0 + y * X] = _startProb[y] + _getEmitProb(_emitProbVec[y], unico[0], MIN_DOUBLE);
|
weight[0 + y * X] = _startProb[y] + _getEmitProb(_emitProbVec[y], unico[0], MIN_DOUBLE);
|
||||||
path[0 + y * X] = -1;
|
path[0 + y * X] = -1;
|
||||||
}
|
}
|
||||||
|
cout<<__FILE__<<__LINE__<<endl;
|
||||||
|
|
||||||
//process
|
//process
|
||||||
for(uint x = 1; x < X; x++)
|
for(uint x = 1; x < X; x++)
|
||||||
@ -191,6 +196,7 @@ namespace CppJieba
|
|||||||
{
|
{
|
||||||
now = x + y*X;
|
now = x + y*X;
|
||||||
weight[now] = MIN_DOUBLE;
|
weight[now] = MIN_DOUBLE;
|
||||||
|
path[now] = E;
|
||||||
for(uint preY = 0; preY < Y; preY++)
|
for(uint preY = 0; preY < Y; preY++)
|
||||||
{
|
{
|
||||||
old = x - 1 + preY * X;
|
old = x - 1 + preY * X;
|
||||||
@ -209,8 +215,6 @@ namespace CppJieba
|
|||||||
//cout<<x<<":"<<y<<":"<<weight[now]<<endl;
|
//cout<<x<<":"<<y<<":"<<weight[now]<<endl;
|
||||||
//getchar();
|
//getchar();
|
||||||
}
|
}
|
||||||
//cout<<_getEmitProb(_emitProbB, unico[x], MIN_DOUBLE)<<endl;
|
|
||||||
//getchar();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
endE = weight[X-1+E*X];
|
endE = weight[X-1+E*X];
|
||||||
@ -224,14 +228,19 @@ namespace CppJieba
|
|||||||
{
|
{
|
||||||
stat = S;
|
stat = S;
|
||||||
}
|
}
|
||||||
|
cout<<__FILE__<<__LINE__<<endl;
|
||||||
|
|
||||||
status.assign(X, 0);
|
status.assign(X, 0);
|
||||||
|
cout<<__FILE__<<__LINE__<<endl;
|
||||||
for(int x = X -1 ; x >= 0; x--)
|
for(int x = X -1 ; x >= 0; x--)
|
||||||
{
|
{
|
||||||
status[x] = stat;
|
status[x] = stat;
|
||||||
|
cout<<__FILE__<<__LINE__<<endl;
|
||||||
|
cout<<stat<<endl;
|
||||||
stat = path[x + stat*X];
|
stat = path[x + stat*X];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
cout<<__FILE__<<__LINE__<<endl;
|
||||||
delete [] path;
|
delete [] path;
|
||||||
delete [] weight;
|
delete [] weight;
|
||||||
return true;
|
return true;
|
||||||
@ -330,7 +339,7 @@ int main()
|
|||||||
HMMSegment hmm;
|
HMMSegment hmm;
|
||||||
hmm.loadModel("../dicts/hmm_model.utf8");
|
hmm.loadModel("../dicts/hmm_model.utf8");
|
||||||
vector<string> res;
|
vector<string> res;
|
||||||
hmm.cut("小明硕士毕业于北邮网络研究院", res);
|
hmm.cut("小明硕士毕业于北邮网络研究院,然", res);
|
||||||
cout<<joinStr(res, "/")<<endl;
|
cout<<joinStr(res, "/")<<endl;
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user