mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
增加更详细的错误日志,在初始化过程中合理使用LogFatal。
This commit is contained in:
parent
31400cee17
commit
c04b2dd0d4
@ -1,10 +1,11 @@
|
|||||||
# CppJieba ChangeLog
|
# CppJieba ChangeLog
|
||||||
|
|
||||||
## v2.4.5 (untagged)
|
## v2.5.0 (untagged)
|
||||||
|
|
||||||
1. 使得 QuerySegment 支持自定义词典(可选参数)。
|
1. 使得 QuerySegment 支持自定义词典(可选参数)。
|
||||||
2. 使得 KeywordExtractor 支持自定义词典(可选参数)。
|
2. 使得 KeywordExtractor 支持自定义词典(可选参数)。
|
||||||
3. 修改 Code Style ,参照 google code style 。
|
3. 修改 Code Style ,参照 google code style 。
|
||||||
|
4. 增加更详细的错误日志,在初始化过程中合理使用LogFatal。
|
||||||
|
|
||||||
## v2.4.4
|
## v2.4.4
|
||||||
|
|
||||||
|
@ -39,7 +39,7 @@ class DictTrie {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool init(const string& dictPath, const string& userDictPath = "") {
|
void init(const string& dictPath, const string& userDictPath = "") {
|
||||||
if(trie_ != NULL) {
|
if(trie_ != NULL) {
|
||||||
LogFatal("trie already initted");
|
LogFatal("trie already initted");
|
||||||
}
|
}
|
||||||
@ -54,7 +54,6 @@ class DictTrie {
|
|||||||
shrink_(nodeInfos_);
|
shrink_(nodeInfos_);
|
||||||
trie_ = createTrie_(nodeInfos_);
|
trie_ = createTrie_(nodeInfos_);
|
||||||
assert(trie_);
|
assert(trie_);
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const DictUnit* find(Unicode::const_iterator begin, Unicode::const_iterator end) const {
|
const DictUnit* find(Unicode::const_iterator begin, Unicode::const_iterator end) const {
|
||||||
|
@ -6,7 +6,6 @@
|
|||||||
#include <memory.h>
|
#include <memory.h>
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
#include "Limonp/StringUtil.hpp"
|
#include "Limonp/StringUtil.hpp"
|
||||||
#include "Limonp/Logger.hpp"
|
|
||||||
#include "TransCode.hpp"
|
#include "TransCode.hpp"
|
||||||
#include "ISegment.hpp"
|
#include "ISegment.hpp"
|
||||||
#include "SegmentBase.hpp"
|
#include "SegmentBase.hpp"
|
||||||
@ -26,11 +25,11 @@ class HMMSegment: public SegmentBase {
|
|||||||
public:
|
public:
|
||||||
HMMSegment() {}
|
HMMSegment() {}
|
||||||
explicit HMMSegment(const string& filePath) {
|
explicit HMMSegment(const string& filePath) {
|
||||||
LIMONP_CHECK(init(filePath));
|
init(filePath);
|
||||||
}
|
}
|
||||||
virtual ~HMMSegment() {}
|
virtual ~HMMSegment() {}
|
||||||
public:
|
public:
|
||||||
bool init(const string& filePath) {
|
void init(const string& filePath) {
|
||||||
memset(startProb_, 0, sizeof(startProb_));
|
memset(startProb_, 0, sizeof(startProb_));
|
||||||
memset(transProb_, 0, sizeof(transProb_));
|
memset(transProb_, 0, sizeof(transProb_));
|
||||||
statMap_[0] = 'B';
|
statMap_[0] = 'B';
|
||||||
@ -41,9 +40,8 @@ class HMMSegment: public SegmentBase {
|
|||||||
emitProbVec_.push_back(&emitProbE_);
|
emitProbVec_.push_back(&emitProbE_);
|
||||||
emitProbVec_.push_back(&emitProbM_);
|
emitProbVec_.push_back(&emitProbM_);
|
||||||
emitProbVec_.push_back(&emitProbS_);
|
emitProbVec_.push_back(&emitProbS_);
|
||||||
LIMONP_CHECK(loadModel_(filePath.c_str()));
|
loadModel_(filePath.c_str());
|
||||||
LogInfo("HMMSegment init(%s) ok.", filePath.c_str());
|
LogInfo("HMMSegment init(%s) ok.", filePath.c_str());
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
public:
|
public:
|
||||||
using SegmentBase::cut;
|
using SegmentBase::cut;
|
||||||
@ -212,19 +210,21 @@ class HMMSegment: public SegmentBase {
|
|||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
bool loadModel_(const char* const filePath) {
|
void loadModel_(const char* const filePath) {
|
||||||
ifstream ifile(filePath);
|
ifstream ifile(filePath);
|
||||||
|
if(!ifile.is_open()) {
|
||||||
|
LogFatal("open %s failed.", filePath);
|
||||||
|
}
|
||||||
string line;
|
string line;
|
||||||
vector<string> tmp;
|
vector<string> tmp;
|
||||||
vector<string> tmp2;
|
vector<string> tmp2;
|
||||||
//load startProb_
|
//load startProb_
|
||||||
if(!getLine_(ifile, line)) {
|
if(!getLine_(ifile, line)) {
|
||||||
return false;
|
LogFatal("load startProb_");
|
||||||
}
|
}
|
||||||
split(line, tmp, " ");
|
split(line, tmp, " ");
|
||||||
if(tmp.size() != STATUS_SUM) {
|
if(tmp.size() != STATUS_SUM) {
|
||||||
LogError("start_p illegal");
|
LogFatal("start_p illegal");
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
for(size_t j = 0; j< tmp.size(); j++) {
|
for(size_t j = 0; j< tmp.size(); j++) {
|
||||||
startProb_[j] = atof(tmp[j].c_str());
|
startProb_[j] = atof(tmp[j].c_str());
|
||||||
@ -233,12 +233,11 @@ class HMMSegment: public SegmentBase {
|
|||||||
//load transProb_
|
//load transProb_
|
||||||
for(size_t i = 0; i < STATUS_SUM; i++) {
|
for(size_t i = 0; i < STATUS_SUM; i++) {
|
||||||
if(!getLine_(ifile, line)) {
|
if(!getLine_(ifile, line)) {
|
||||||
return false;
|
LogFatal("load transProb_ failed.");
|
||||||
}
|
}
|
||||||
split(line, tmp, " ");
|
split(line, tmp, " ");
|
||||||
if(tmp.size() != STATUS_SUM) {
|
if(tmp.size() != STATUS_SUM) {
|
||||||
LogError("trans_p illegal");
|
LogFatal("trans_p illegal");
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
for(size_t j =0; j < STATUS_SUM; j++) {
|
for(size_t j =0; j < STATUS_SUM; j++) {
|
||||||
transProb_[i][j] = atof(tmp[j].c_str());
|
transProb_[i][j] = atof(tmp[j].c_str());
|
||||||
@ -247,25 +246,23 @@ class HMMSegment: public SegmentBase {
|
|||||||
|
|
||||||
//load emitProbB_
|
//load emitProbB_
|
||||||
if(!getLine_(ifile, line) || !loadEmitProb_(line, emitProbB_)) {
|
if(!getLine_(ifile, line) || !loadEmitProb_(line, emitProbB_)) {
|
||||||
return false;
|
LogFatal("load emitProbB_ failed.");
|
||||||
}
|
}
|
||||||
|
|
||||||
//load emitProbE_
|
//load emitProbE_
|
||||||
if(!getLine_(ifile, line) || !loadEmitProb_(line, emitProbE_)) {
|
if(!getLine_(ifile, line) || !loadEmitProb_(line, emitProbE_)) {
|
||||||
return false;
|
LogFatal("load emitProbE_ failed.");
|
||||||
}
|
}
|
||||||
|
|
||||||
//load emitProbM_
|
//load emitProbM_
|
||||||
if(!getLine_(ifile, line) || !loadEmitProb_(line, emitProbM_)) {
|
if(!getLine_(ifile, line) || !loadEmitProb_(line, emitProbM_)) {
|
||||||
return false;
|
LogFatal("load emitProbM_ failed.");
|
||||||
}
|
}
|
||||||
|
|
||||||
//load emitProbS_
|
//load emitProbS_
|
||||||
if(!getLine_(ifile, line) || !loadEmitProb_(line, emitProbS_)) {
|
if(!getLine_(ifile, line) || !loadEmitProb_(line, emitProbS_)) {
|
||||||
return false;
|
LogFatal("load emitProbS_ failed.");
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
bool getLine_(ifstream& ifile, string& line) {
|
bool getLine_(ifstream& ifile, string& line) {
|
||||||
while(getline(ifile, line)) {
|
while(getline(ifile, line)) {
|
||||||
|
@ -20,7 +20,7 @@ class KeywordExtractor {
|
|||||||
void init(const string& dictPath, const string& hmmFilePath, const string& idfPath, const string& stopWordPath, const string& userDict = "") {
|
void init(const string& dictPath, const string& hmmFilePath, const string& idfPath, const string& stopWordPath, const string& userDict = "") {
|
||||||
loadIdfDict_(idfPath);
|
loadIdfDict_(idfPath);
|
||||||
loadStopWordDict_(stopWordPath);
|
loadStopWordDict_(stopWordPath);
|
||||||
LIMONP_CHECK(segment_.init(dictPath, hmmFilePath, userDict));
|
segment_.init(dictPath, hmmFilePath, userDict);
|
||||||
};
|
};
|
||||||
|
|
||||||
bool extract(const string& str, vector<string>& keywords, size_t topN) const {
|
bool extract(const string& str, vector<string>& keywords, size_t topN) const {
|
||||||
|
@ -16,14 +16,13 @@ class MPSegment: public SegmentBase {
|
|||||||
public:
|
public:
|
||||||
MPSegment() {};
|
MPSegment() {};
|
||||||
MPSegment(const string& dictPath, const string& userDictPath = "") {
|
MPSegment(const string& dictPath, const string& userDictPath = "") {
|
||||||
LIMONP_CHECK(init(dictPath, userDictPath));
|
init(dictPath, userDictPath);
|
||||||
};
|
};
|
||||||
virtual ~MPSegment() {};
|
virtual ~MPSegment() {};
|
||||||
|
|
||||||
bool init(const string& dictPath, const string& userDictPath = "") {
|
void init(const string& dictPath, const string& userDictPath = "") {
|
||||||
LIMONP_CHECK(dictTrie_.init(dictPath, userDictPath));
|
dictTrie_.init(dictPath, userDictPath);
|
||||||
LogInfo("MPSegment init(%s) ok", dictPath.c_str());
|
LogInfo("MPSegment init(%s) ok", dictPath.c_str());
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
bool isUserDictSingleChineseWord(const Unicode::value_type & value) const {
|
bool isUserDictSingleChineseWord(const Unicode::value_type & value) const {
|
||||||
return dictTrie_.isUserDictSingleChineseWord(value);
|
return dictTrie_.isUserDictSingleChineseWord(value);
|
||||||
|
@ -12,15 +12,14 @@ class MixSegment: public SegmentBase {
|
|||||||
MixSegment() {
|
MixSegment() {
|
||||||
}
|
}
|
||||||
MixSegment(const string& mpSegDict, const string& hmmSegDict, const string& userDict = "") {
|
MixSegment(const string& mpSegDict, const string& hmmSegDict, const string& userDict = "") {
|
||||||
LIMONP_CHECK(init(mpSegDict, hmmSegDict, userDict));
|
init(mpSegDict, hmmSegDict, userDict);
|
||||||
}
|
}
|
||||||
virtual ~MixSegment() {
|
virtual ~MixSegment() {
|
||||||
}
|
}
|
||||||
bool init(const string& mpSegDict, const string& hmmSegDict, const string& userDict = "") {
|
void init(const string& mpSegDict, const string& hmmSegDict, const string& userDict = "") {
|
||||||
LIMONP_CHECK(mpSeg_.init(mpSegDict, userDict));
|
mpSeg_.init(mpSegDict, userDict);
|
||||||
LIMONP_CHECK(hmmSeg_.init(hmmSegDict));
|
hmmSeg_.init(hmmSegDict);
|
||||||
LogInfo("MixSegment init(%s, %s)", mpSegDict.c_str(), hmmSegDict.c_str());
|
LogInfo("MixSegment init(%s, %s)", mpSegDict.c_str(), hmmSegDict.c_str());
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
using SegmentBase::cut;
|
using SegmentBase::cut;
|
||||||
virtual bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const {
|
virtual bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const {
|
||||||
|
@ -30,7 +30,7 @@ class PosTagger {
|
|||||||
const string& hmmFilePath,
|
const string& hmmFilePath,
|
||||||
const string& userDictPath = ""
|
const string& userDictPath = ""
|
||||||
) {
|
) {
|
||||||
LIMONP_CHECK(segment_.init(dictPath, hmmFilePath, userDictPath));
|
segment_.init(dictPath, hmmFilePath, userDictPath);
|
||||||
dictTrie_ = segment_.getDictTrie();
|
dictTrie_ = segment_.getDictTrie();
|
||||||
LIMONP_CHECK(dictTrie_);
|
LIMONP_CHECK(dictTrie_);
|
||||||
};
|
};
|
||||||
|
@ -21,12 +21,11 @@ class QuerySegment: public SegmentBase {
|
|||||||
init(dict, model, maxWordLen, userDict);
|
init(dict, model, maxWordLen, userDict);
|
||||||
};
|
};
|
||||||
virtual ~QuerySegment() {};
|
virtual ~QuerySegment() {};
|
||||||
bool init(const string& dict, const string& model, size_t maxWordLen, const string& userDict = "") {
|
void init(const string& dict, const string& model, size_t maxWordLen, const string& userDict = "") {
|
||||||
LIMONP_CHECK(mixSeg_.init(dict, model, userDict));
|
mixSeg_.init(dict, model, userDict);
|
||||||
LIMONP_CHECK(fullSeg_.init(mixSeg_.getDictTrie()));
|
fullSeg_.init(mixSeg_.getDictTrie());
|
||||||
assert(maxWordLen);
|
assert(maxWordLen);
|
||||||
maxWordLen_ = maxWordLen;
|
maxWordLen_ = maxWordLen;
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
using SegmentBase::cut;
|
using SegmentBase::cut;
|
||||||
bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const {
|
bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const {
|
||||||
|
@ -18,7 +18,7 @@ TEST(DictTrieTest, Test1) {
|
|||||||
|
|
||||||
string s1, s2;
|
string s1, s2;
|
||||||
DictTrie trie;
|
DictTrie trie;
|
||||||
ASSERT_TRUE(trie.init(DICT_FILE));
|
trie.init(DICT_FILE);
|
||||||
ASSERT_LT(trie.getMinWeight() + 15.6479, 0.001);
|
ASSERT_LT(trie.getMinWeight() + 15.6479, 0.001);
|
||||||
string word("来到");
|
string word("来到");
|
||||||
Unicode uni;
|
Unicode uni;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user