mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
将 HMMSegment 里面关于模型文件的数据独立成 HMMModel
This commit is contained in:
parent
d3b34b73c6
commit
b99d0698f0
142
src/HMMModel.hpp
Normal file
142
src/HMMModel.hpp
Normal file
@ -0,0 +1,142 @@
|
|||||||
|
#ifndef CPPJIEBA_HMMMODEL_H
|
||||||
|
#define CPPJIEBA_HMMMODEL_H
|
||||||
|
|
||||||
|
#include "Limonp/StringUtil.hpp"
|
||||||
|
|
||||||
|
namespace CppJieba {
|
||||||
|
|
||||||
|
using namespace Limonp;
|
||||||
|
typedef unordered_map<uint16_t, double> EmitProbMap;
|
||||||
|
|
||||||
|
struct HMMModel {
|
||||||
|
/*
|
||||||
|
* STATUS:
|
||||||
|
* 0: HMMModel::B, 1: HMMModel::E, 2: HMMModel::M, 3:HMMModel::S
|
||||||
|
* */
|
||||||
|
enum {B = 0, E = 1, M = 2, S = 3, STATUS_SUM = 4};
|
||||||
|
|
||||||
|
HMMModel(const string& modelPath) {
|
||||||
|
memset(startProb, 0, sizeof(startProb));
|
||||||
|
memset(transProb, 0, sizeof(transProb));
|
||||||
|
statMap[0] = 'B';
|
||||||
|
statMap[1] = 'E';
|
||||||
|
statMap[2] = 'M';
|
||||||
|
statMap[3] = 'S';
|
||||||
|
emitProbVec.push_back(&emitProbB);
|
||||||
|
emitProbVec.push_back(&emitProbE);
|
||||||
|
emitProbVec.push_back(&emitProbM);
|
||||||
|
emitProbVec.push_back(&emitProbS);
|
||||||
|
loadModel(modelPath);
|
||||||
|
}
|
||||||
|
~HMMModel() {
|
||||||
|
}
|
||||||
|
void loadModel(const string& filePath) {
|
||||||
|
ifstream ifile(filePath.c_str());
|
||||||
|
if(!ifile.is_open()) {
|
||||||
|
LogFatal("open %s failed.", filePath.c_str());
|
||||||
|
}
|
||||||
|
string line;
|
||||||
|
vector<string> tmp;
|
||||||
|
vector<string> tmp2;
|
||||||
|
//load startProb
|
||||||
|
if(!getLine(ifile, line)) {
|
||||||
|
LogFatal("load startProb");
|
||||||
|
}
|
||||||
|
split(line, tmp, " ");
|
||||||
|
if(tmp.size() != STATUS_SUM) {
|
||||||
|
LogFatal("start_p illegal");
|
||||||
|
}
|
||||||
|
for(size_t j = 0; j< tmp.size(); j++) {
|
||||||
|
startProb[j] = atof(tmp[j].c_str());
|
||||||
|
}
|
||||||
|
|
||||||
|
//load transProb
|
||||||
|
for(size_t i = 0; i < STATUS_SUM; i++) {
|
||||||
|
if(!getLine(ifile, line)) {
|
||||||
|
LogFatal("load transProb failed.");
|
||||||
|
}
|
||||||
|
split(line, tmp, " ");
|
||||||
|
if(tmp.size() != STATUS_SUM) {
|
||||||
|
LogFatal("trans_p illegal");
|
||||||
|
}
|
||||||
|
for(size_t j =0; j < STATUS_SUM; j++) {
|
||||||
|
transProb[i][j] = atof(tmp[j].c_str());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//load emitProbB
|
||||||
|
if(!getLine(ifile, line) || !loadEmitProb(line, emitProbB)) {
|
||||||
|
LogFatal("load emitProbB failed.");
|
||||||
|
}
|
||||||
|
|
||||||
|
//load emitProbE
|
||||||
|
if(!getLine(ifile, line) || !loadEmitProb(line, emitProbE)) {
|
||||||
|
LogFatal("load emitProbE failed.");
|
||||||
|
}
|
||||||
|
|
||||||
|
//load emitProbM
|
||||||
|
if(!getLine(ifile, line) || !loadEmitProb(line, emitProbM)) {
|
||||||
|
LogFatal("load emitProbM failed.");
|
||||||
|
}
|
||||||
|
|
||||||
|
//load emitProbS
|
||||||
|
if(!getLine(ifile, line) || !loadEmitProb(line, emitProbS)) {
|
||||||
|
LogFatal("load emitProbS failed.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
double getEmitProb(const EmitProbMap* ptMp, uint16_t key,
|
||||||
|
double defVal)const {
|
||||||
|
EmitProbMap::const_iterator cit = ptMp->find(key);
|
||||||
|
if(cit == ptMp->end()) {
|
||||||
|
return defVal;
|
||||||
|
}
|
||||||
|
return cit->second;
|
||||||
|
}
|
||||||
|
bool getLine(ifstream& ifile, string& line) {
|
||||||
|
while(getline(ifile, line)) {
|
||||||
|
trim(line);
|
||||||
|
if(line.empty()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if(startsWith(line, "#")) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
bool loadEmitProb(const string& line, EmitProbMap& mp) {
|
||||||
|
if(line.empty()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
vector<string> tmp, tmp2;
|
||||||
|
Unicode unicode;
|
||||||
|
split(line, tmp, ",");
|
||||||
|
for(size_t i = 0; i < tmp.size(); i++) {
|
||||||
|
split(tmp[i], tmp2, ":");
|
||||||
|
if(2 != tmp2.size()) {
|
||||||
|
LogError("emitProb illegal.");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if(!TransCode::decode(tmp2[0], unicode) || unicode.size() != 1) {
|
||||||
|
LogError("TransCode failed.");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
mp[unicode[0]] = atof(tmp2[1].c_str());
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
char statMap[STATUS_SUM];
|
||||||
|
double startProb[STATUS_SUM];
|
||||||
|
double transProb[STATUS_SUM][STATUS_SUM];
|
||||||
|
EmitProbMap emitProbB;
|
||||||
|
EmitProbMap emitProbE;
|
||||||
|
EmitProbMap emitProbM;
|
||||||
|
EmitProbMap emitProbS;
|
||||||
|
vector<EmitProbMap* > emitProbVec;
|
||||||
|
}; // struct HMMModel
|
||||||
|
|
||||||
|
} // namespace CppJieba
|
||||||
|
|
||||||
|
#endif
|
@ -5,47 +5,18 @@
|
|||||||
#include <fstream>
|
#include <fstream>
|
||||||
#include <memory.h>
|
#include <memory.h>
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
#include "Limonp/StringUtil.hpp"
|
#include "HMMModel.hpp"
|
||||||
#include "TransCode.hpp"
|
|
||||||
#include "ISegment.hpp"
|
|
||||||
#include "SegmentBase.hpp"
|
#include "SegmentBase.hpp"
|
||||||
#include "DictTrie.hpp"
|
|
||||||
|
|
||||||
namespace CppJieba {
|
namespace CppJieba {
|
||||||
using namespace Limonp;
|
|
||||||
typedef unordered_map<uint16_t, double> EmitProbMap;
|
|
||||||
class HMMSegment: public SegmentBase {
|
class HMMSegment: public SegmentBase {
|
||||||
public:
|
public:
|
||||||
/*
|
explicit HMMSegment(const string& filePath): model_(filePath) {
|
||||||
* STATUS:
|
|
||||||
* 0:B, 1:E, 2:M, 3:S
|
|
||||||
* */
|
|
||||||
enum {B = 0, E = 1, M = 2, S = 3, STATUS_SUM = 4};
|
|
||||||
|
|
||||||
public:
|
|
||||||
HMMSegment() {}
|
|
||||||
explicit HMMSegment(const string& filePath) {
|
|
||||||
init(filePath);
|
|
||||||
}
|
}
|
||||||
virtual ~HMMSegment() {}
|
virtual ~HMMSegment() {}
|
||||||
public:
|
|
||||||
void init(const string& filePath) {
|
|
||||||
memset(startProb_, 0, sizeof(startProb_));
|
|
||||||
memset(transProb_, 0, sizeof(transProb_));
|
|
||||||
statMap_[0] = 'B';
|
|
||||||
statMap_[1] = 'E';
|
|
||||||
statMap_[2] = 'M';
|
|
||||||
statMap_[3] = 'S';
|
|
||||||
emitProbVec_.push_back(&emitProbB_);
|
|
||||||
emitProbVec_.push_back(&emitProbE_);
|
|
||||||
emitProbVec_.push_back(&emitProbM_);
|
|
||||||
emitProbVec_.push_back(&emitProbS_);
|
|
||||||
loadModel_(filePath.c_str());
|
|
||||||
LogInfo("HMMSegment init(%s) ok.", filePath.c_str());
|
|
||||||
}
|
|
||||||
public:
|
|
||||||
using SegmentBase::cut;
|
using SegmentBase::cut;
|
||||||
public:
|
|
||||||
bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res)const {
|
bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res)const {
|
||||||
Unicode::const_iterator left = begin;
|
Unicode::const_iterator left = begin;
|
||||||
Unicode::const_iterator right = begin;
|
Unicode::const_iterator right = begin;
|
||||||
@ -77,7 +48,6 @@ class HMMSegment: public SegmentBase {
|
|||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
public:
|
|
||||||
virtual bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<string>& res)const {
|
virtual bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<string>& res)const {
|
||||||
if(begin == end) {
|
if(begin == end) {
|
||||||
return false;
|
return false;
|
||||||
@ -141,7 +111,7 @@ class HMMSegment: public SegmentBase {
|
|||||||
Unicode::const_iterator left = begin;
|
Unicode::const_iterator left = begin;
|
||||||
Unicode::const_iterator right;
|
Unicode::const_iterator right;
|
||||||
for(size_t i = 0; i < status.size(); i++) {
|
for(size_t i = 0; i < status.size(); i++) {
|
||||||
if(status[i] % 2) { //if(E == status[i] || S == status[i])
|
if(status[i] % 2) { //if(HMMModel::E == status[i] || HMMModel::S == status[i])
|
||||||
right = begin + i + 1;
|
right = begin + i + 1;
|
||||||
res.push_back(Unicode(left, right));
|
res.push_back(Unicode(left, right));
|
||||||
left = right;
|
left = right;
|
||||||
@ -150,12 +120,13 @@ class HMMSegment: public SegmentBase {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool viterbi_(Unicode::const_iterator begin, Unicode::const_iterator end, vector<size_t>& status)const {
|
bool viterbi_(Unicode::const_iterator begin, Unicode::const_iterator end,
|
||||||
|
vector<size_t>& status) const {
|
||||||
if(begin == end) {
|
if(begin == end) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t Y = STATUS_SUM;
|
size_t Y = HMMModel::STATUS_SUM;
|
||||||
size_t X = end - begin;
|
size_t X = end - begin;
|
||||||
|
|
||||||
size_t XYSize = X * Y;
|
size_t XYSize = X * Y;
|
||||||
@ -167,22 +138,21 @@ class HMMSegment: public SegmentBase {
|
|||||||
|
|
||||||
//start
|
//start
|
||||||
for(size_t y = 0; y < Y; y++) {
|
for(size_t y = 0; y < Y; y++) {
|
||||||
weight[0 + y * X] = startProb_[y] + getEmitProb_(emitProbVec_[y], *begin, MIN_DOUBLE);
|
weight[0 + y * X] = model_.startProb[y] + model_.getEmitProb(model_.emitProbVec[y], *begin, MIN_DOUBLE);
|
||||||
path[0 + y * X] = -1;
|
path[0 + y * X] = -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
double emitProb;
|
double emitProb;
|
||||||
|
|
||||||
for(size_t x = 1; x < X; x++) {
|
for(size_t x = 1; x < X; x++) {
|
||||||
for(size_t y = 0; y < Y; y++) {
|
for(size_t y = 0; y < Y; y++) {
|
||||||
now = x + y*X;
|
now = x + y*X;
|
||||||
weight[now] = MIN_DOUBLE;
|
weight[now] = MIN_DOUBLE;
|
||||||
path[now] = E; // warning
|
path[now] = HMMModel::E; // warning
|
||||||
emitProb = getEmitProb_(emitProbVec_[y], *(begin+x), MIN_DOUBLE);
|
emitProb = model_.getEmitProb(model_.emitProbVec[y], *(begin+x), MIN_DOUBLE);
|
||||||
for(size_t preY = 0; preY < Y; preY++) {
|
for(size_t preY = 0; preY < Y; preY++) {
|
||||||
old = x - 1 + preY * X;
|
old = x - 1 + preY * X;
|
||||||
tmp = weight[old] + transProb_[preY][y] + emitProb;
|
tmp = weight[old] + model_.transProb[preY][y] + emitProb;
|
||||||
if(tmp > weight[now]) {
|
if(tmp > weight[now]) {
|
||||||
weight[now] = tmp;
|
weight[now] = tmp;
|
||||||
path[now] = preY;
|
path[now] = preY;
|
||||||
@ -191,13 +161,13 @@ class HMMSegment: public SegmentBase {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
endE = weight[X-1+E*X];
|
endE = weight[X-1+HMMModel::E*X];
|
||||||
endS = weight[X-1+S*X];
|
endS = weight[X-1+HMMModel::S*X];
|
||||||
stat = 0;
|
stat = 0;
|
||||||
if(endE >= endS) {
|
if(endE >= endS) {
|
||||||
stat = E;
|
stat = HMMModel::E;
|
||||||
} else {
|
} else {
|
||||||
stat = S;
|
stat = HMMModel::S;
|
||||||
}
|
}
|
||||||
|
|
||||||
status.resize(X);
|
status.resize(X);
|
||||||
@ -208,114 +178,10 @@ class HMMSegment: public SegmentBase {
|
|||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
void loadModel_(const char* const filePath) {
|
|
||||||
ifstream ifile(filePath);
|
|
||||||
if(!ifile.is_open()) {
|
|
||||||
LogFatal("open %s failed.", filePath);
|
|
||||||
}
|
|
||||||
string line;
|
|
||||||
vector<string> tmp;
|
|
||||||
vector<string> tmp2;
|
|
||||||
//load startProb_
|
|
||||||
if(!getLine_(ifile, line)) {
|
|
||||||
LogFatal("load startProb_");
|
|
||||||
}
|
|
||||||
split(line, tmp, " ");
|
|
||||||
if(tmp.size() != STATUS_SUM) {
|
|
||||||
LogFatal("start_p illegal");
|
|
||||||
}
|
|
||||||
for(size_t j = 0; j< tmp.size(); j++) {
|
|
||||||
startProb_[j] = atof(tmp[j].c_str());
|
|
||||||
}
|
|
||||||
|
|
||||||
//load transProb_
|
HMMModel model_;
|
||||||
for(size_t i = 0; i < STATUS_SUM; i++) {
|
}; // class HMMSegment
|
||||||
if(!getLine_(ifile, line)) {
|
|
||||||
LogFatal("load transProb_ failed.");
|
|
||||||
}
|
|
||||||
split(line, tmp, " ");
|
|
||||||
if(tmp.size() != STATUS_SUM) {
|
|
||||||
LogFatal("trans_p illegal");
|
|
||||||
}
|
|
||||||
for(size_t j =0; j < STATUS_SUM; j++) {
|
|
||||||
transProb_[i][j] = atof(tmp[j].c_str());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//load emitProbB_
|
} // namespace CppJieba
|
||||||
if(!getLine_(ifile, line) || !loadEmitProb_(line, emitProbB_)) {
|
|
||||||
LogFatal("load emitProbB_ failed.");
|
|
||||||
}
|
|
||||||
|
|
||||||
//load emitProbE_
|
|
||||||
if(!getLine_(ifile, line) || !loadEmitProb_(line, emitProbE_)) {
|
|
||||||
LogFatal("load emitProbE_ failed.");
|
|
||||||
}
|
|
||||||
|
|
||||||
//load emitProbM_
|
|
||||||
if(!getLine_(ifile, line) || !loadEmitProb_(line, emitProbM_)) {
|
|
||||||
LogFatal("load emitProbM_ failed.");
|
|
||||||
}
|
|
||||||
|
|
||||||
//load emitProbS_
|
|
||||||
if(!getLine_(ifile, line) || !loadEmitProb_(line, emitProbS_)) {
|
|
||||||
LogFatal("load emitProbS_ failed.");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
bool getLine_(ifstream& ifile, string& line) {
|
|
||||||
while(getline(ifile, line)) {
|
|
||||||
trim(line);
|
|
||||||
if(line.empty()) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if(startsWith(line, "#")) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
bool loadEmitProb_(const string& line, EmitProbMap& mp) {
|
|
||||||
if(line.empty()) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
vector<string> tmp, tmp2;
|
|
||||||
Unicode unicode;
|
|
||||||
split(line, tmp, ",");
|
|
||||||
for(size_t i = 0; i < tmp.size(); i++) {
|
|
||||||
split(tmp[i], tmp2, ":");
|
|
||||||
if(2 != tmp2.size()) {
|
|
||||||
LogError("emitProb_ illegal.");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
if(!TransCode::decode(tmp2[0], unicode) || unicode.size() != 1) {
|
|
||||||
LogError("TransCode failed.");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
mp[unicode[0]] = atof(tmp2[1].c_str());
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
double getEmitProb_(const EmitProbMap* ptMp, uint16_t key, double defVal)const {
|
|
||||||
EmitProbMap::const_iterator cit = ptMp->find(key);
|
|
||||||
if(cit == ptMp->end()) {
|
|
||||||
return defVal;
|
|
||||||
}
|
|
||||||
return cit->second;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
char statMap_[STATUS_SUM];
|
|
||||||
double startProb_[STATUS_SUM];
|
|
||||||
double transProb_[STATUS_SUM][STATUS_SUM];
|
|
||||||
EmitProbMap emitProbB_;
|
|
||||||
EmitProbMap emitProbE_;
|
|
||||||
EmitProbMap emitProbM_;
|
|
||||||
EmitProbMap emitProbS_;
|
|
||||||
vector<EmitProbMap* > emitProbVec_;
|
|
||||||
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -11,17 +11,17 @@ using namespace Limonp;
|
|||||||
/*utf8*/
|
/*utf8*/
|
||||||
class KeywordExtractor {
|
class KeywordExtractor {
|
||||||
public:
|
public:
|
||||||
KeywordExtractor() {};
|
KeywordExtractor(const string& dictPath,
|
||||||
KeywordExtractor(const string& dictPath, const string& hmmFilePath, const string& idfPath, const string& stopWordPath, const string& userDict = "") {
|
const string& hmmFilePath,
|
||||||
init(dictPath, hmmFilePath, idfPath, stopWordPath, userDict);
|
const string& idfPath,
|
||||||
};
|
const string& stopWordPath,
|
||||||
~KeywordExtractor() {};
|
const string& userDict = "")
|
||||||
|
: segment_(dictPath, hmmFilePath, userDict) {
|
||||||
void init(const string& dictPath, const string& hmmFilePath, const string& idfPath, const string& stopWordPath, const string& userDict = "") {
|
|
||||||
loadIdfDict_(idfPath);
|
loadIdfDict_(idfPath);
|
||||||
loadStopWordDict_(stopWordPath);
|
loadStopWordDict_(stopWordPath);
|
||||||
segment_.init(dictPath, hmmFilePath, userDict);
|
}
|
||||||
};
|
~KeywordExtractor() {
|
||||||
|
}
|
||||||
|
|
||||||
bool extract(const string& str, vector<string>& keywords, size_t topN) const {
|
bool extract(const string& str, vector<string>& keywords, size_t topN) const {
|
||||||
vector<pair<string, double> > topWords;
|
vector<pair<string, double> > topWords;
|
||||||
|
@ -9,18 +9,14 @@
|
|||||||
namespace CppJieba {
|
namespace CppJieba {
|
||||||
class MixSegment: public SegmentBase {
|
class MixSegment: public SegmentBase {
|
||||||
public:
|
public:
|
||||||
MixSegment() {
|
MixSegment(const string& mpSegDict, const string& hmmSegDict,
|
||||||
}
|
const string& userDict = "")
|
||||||
MixSegment(const string& mpSegDict, const string& hmmSegDict, const string& userDict = "") {
|
: mpSeg_(mpSegDict, userDict),
|
||||||
init(mpSegDict, hmmSegDict, userDict);
|
hmmSeg_(hmmSegDict) {
|
||||||
|
LogInfo("MixSegment init %s, %s", mpSegDict.c_str(), hmmSegDict.c_str());
|
||||||
}
|
}
|
||||||
virtual ~MixSegment() {
|
virtual ~MixSegment() {
|
||||||
}
|
}
|
||||||
void init(const string& mpSegDict, const string& hmmSegDict, const string& userDict = "") {
|
|
||||||
mpSeg_.init(mpSegDict, userDict);
|
|
||||||
hmmSeg_.init(hmmSegDict);
|
|
||||||
LogInfo("MixSegment init(%s, %s)", mpSegDict.c_str(), hmmSegDict.c_str());
|
|
||||||
}
|
|
||||||
using SegmentBase::cut;
|
using SegmentBase::cut;
|
||||||
virtual bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const {
|
virtual bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const {
|
||||||
vector<Unicode> words;
|
vector<Unicode> words;
|
||||||
|
@ -14,27 +14,15 @@ static const char* const POS_X = "x";
|
|||||||
|
|
||||||
class PosTagger {
|
class PosTagger {
|
||||||
public:
|
public:
|
||||||
PosTagger() {
|
PosTagger(const string& dictPath,
|
||||||
}
|
|
||||||
PosTagger(
|
|
||||||
const string& dictPath,
|
|
||||||
const string& hmmFilePath,
|
const string& hmmFilePath,
|
||||||
const string& userDictPath = ""
|
const string& userDictPath = "")
|
||||||
) {
|
: segment_(dictPath, hmmFilePath, userDictPath) {
|
||||||
init(dictPath, hmmFilePath, userDictPath);
|
dictTrie_ = segment_.getDictTrie();
|
||||||
|
LIMONP_CHECK(dictTrie_);
|
||||||
}
|
}
|
||||||
~PosTagger() {
|
~PosTagger() {
|
||||||
}
|
}
|
||||||
void init(
|
|
||||||
const string& dictPath,
|
|
||||||
const string& hmmFilePath,
|
|
||||||
const string& userDictPath = ""
|
|
||||||
) {
|
|
||||||
segment_.init(dictPath, hmmFilePath, userDictPath);
|
|
||||||
dictTrie_ = segment_.getDictTrie();
|
|
||||||
LIMONP_CHECK(dictTrie_);
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
bool tag(const string& src, vector<pair<string, string> >& res) const {
|
bool tag(const string& src, vector<pair<string, string> >& res) const {
|
||||||
vector<string> cutRes;
|
vector<string> cutRes;
|
||||||
|
@ -16,17 +16,14 @@
|
|||||||
namespace CppJieba {
|
namespace CppJieba {
|
||||||
class QuerySegment: public SegmentBase {
|
class QuerySegment: public SegmentBase {
|
||||||
public:
|
public:
|
||||||
QuerySegment() {};
|
QuerySegment(const string& dict, const string& model, size_t maxWordLen = 4,
|
||||||
QuerySegment(const string& dict, const string& model, size_t maxWordLen = 4, const string& userDict = "") {
|
const string& userDict = "")
|
||||||
init(dict, model, maxWordLen, userDict);
|
: mixSeg_(dict, model, userDict),
|
||||||
};
|
fullSeg_(mixSeg_.getDictTrie()) {
|
||||||
virtual ~QuerySegment() {};
|
|
||||||
void init(const string& dict, const string& model, size_t maxWordLen, const string& userDict = "") {
|
|
||||||
mixSeg_.init(dict, model, userDict);
|
|
||||||
fullSeg_.init(mixSeg_.getDictTrie());
|
|
||||||
assert(maxWordLen);
|
assert(maxWordLen);
|
||||||
maxWordLen_ = maxWordLen;
|
maxWordLen_ = maxWordLen;
|
||||||
}
|
};
|
||||||
|
virtual ~QuerySegment() {};
|
||||||
using SegmentBase::cut;
|
using SegmentBase::cut;
|
||||||
bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const {
|
bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const {
|
||||||
//use mix cut first
|
//use mix cut first
|
||||||
|
Loading…
x
Reference in New Issue
Block a user