增加更详细的错误日志,在初始化过程中合理使用LogFatal。

This commit is contained in:
yanyiwu 2015-05-07 20:03:19 +08:00
parent 31400cee17
commit c04b2dd0d4
9 changed files with 31 additions and 37 deletions

View File

@ -1,10 +1,11 @@
# CppJieba ChangeLog # CppJieba ChangeLog
## v2.4.5 (untagged) ## v2.5.0 (untagged)
1. 使得 QuerySegment 支持自定义词典(可选参数)。 1. 使得 QuerySegment 支持自定义词典(可选参数)。
2. 使得 KeywordExtractor 支持自定义词典(可选参数)。 2. 使得 KeywordExtractor 支持自定义词典(可选参数)。
3. 修改 Code Style ,参照 google code style 。 3. 修改 Code Style ,参照 google code style 。
4. 增加更详细的错误日志在初始化过程中合理使用LogFatal。
## v2.4.4 ## v2.4.4

View File

@ -39,7 +39,7 @@ class DictTrie {
} }
} }
bool init(const string& dictPath, const string& userDictPath = "") { void init(const string& dictPath, const string& userDictPath = "") {
if(trie_ != NULL) { if(trie_ != NULL) {
LogFatal("trie already initted"); LogFatal("trie already initted");
} }
@ -54,7 +54,6 @@ class DictTrie {
shrink_(nodeInfos_); shrink_(nodeInfos_);
trie_ = createTrie_(nodeInfos_); trie_ = createTrie_(nodeInfos_);
assert(trie_); assert(trie_);
return true;
} }
const DictUnit* find(Unicode::const_iterator begin, Unicode::const_iterator end) const { const DictUnit* find(Unicode::const_iterator begin, Unicode::const_iterator end) const {

View File

@ -6,7 +6,6 @@
#include <memory.h> #include <memory.h>
#include <cassert> #include <cassert>
#include "Limonp/StringUtil.hpp" #include "Limonp/StringUtil.hpp"
#include "Limonp/Logger.hpp"
#include "TransCode.hpp" #include "TransCode.hpp"
#include "ISegment.hpp" #include "ISegment.hpp"
#include "SegmentBase.hpp" #include "SegmentBase.hpp"
@ -26,11 +25,11 @@ class HMMSegment: public SegmentBase {
public: public:
HMMSegment() {} HMMSegment() {}
explicit HMMSegment(const string& filePath) { explicit HMMSegment(const string& filePath) {
LIMONP_CHECK(init(filePath)); init(filePath);
} }
virtual ~HMMSegment() {} virtual ~HMMSegment() {}
public: public:
bool init(const string& filePath) { void init(const string& filePath) {
memset(startProb_, 0, sizeof(startProb_)); memset(startProb_, 0, sizeof(startProb_));
memset(transProb_, 0, sizeof(transProb_)); memset(transProb_, 0, sizeof(transProb_));
statMap_[0] = 'B'; statMap_[0] = 'B';
@ -41,9 +40,8 @@ class HMMSegment: public SegmentBase {
emitProbVec_.push_back(&emitProbE_); emitProbVec_.push_back(&emitProbE_);
emitProbVec_.push_back(&emitProbM_); emitProbVec_.push_back(&emitProbM_);
emitProbVec_.push_back(&emitProbS_); emitProbVec_.push_back(&emitProbS_);
LIMONP_CHECK(loadModel_(filePath.c_str())); loadModel_(filePath.c_str());
LogInfo("HMMSegment init(%s) ok.", filePath.c_str()); LogInfo("HMMSegment init(%s) ok.", filePath.c_str());
return true;
} }
public: public:
using SegmentBase::cut; using SegmentBase::cut;
@ -212,19 +210,21 @@ class HMMSegment: public SegmentBase {
return true; return true;
} }
bool loadModel_(const char* const filePath) { void loadModel_(const char* const filePath) {
ifstream ifile(filePath); ifstream ifile(filePath);
if(!ifile.is_open()) {
LogFatal("open %s failed.", filePath);
}
string line; string line;
vector<string> tmp; vector<string> tmp;
vector<string> tmp2; vector<string> tmp2;
//load startProb_ //load startProb_
if(!getLine_(ifile, line)) { if(!getLine_(ifile, line)) {
return false; LogFatal("load startProb_");
} }
split(line, tmp, " "); split(line, tmp, " ");
if(tmp.size() != STATUS_SUM) { if(tmp.size() != STATUS_SUM) {
LogError("start_p illegal"); LogFatal("start_p illegal");
return false;
} }
for(size_t j = 0; j< tmp.size(); j++) { for(size_t j = 0; j< tmp.size(); j++) {
startProb_[j] = atof(tmp[j].c_str()); startProb_[j] = atof(tmp[j].c_str());
@ -233,12 +233,11 @@ class HMMSegment: public SegmentBase {
//load transProb_ //load transProb_
for(size_t i = 0; i < STATUS_SUM; i++) { for(size_t i = 0; i < STATUS_SUM; i++) {
if(!getLine_(ifile, line)) { if(!getLine_(ifile, line)) {
return false; LogFatal("load transProb_ failed.");
} }
split(line, tmp, " "); split(line, tmp, " ");
if(tmp.size() != STATUS_SUM) { if(tmp.size() != STATUS_SUM) {
LogError("trans_p illegal"); LogFatal("trans_p illegal");
return false;
} }
for(size_t j =0; j < STATUS_SUM; j++) { for(size_t j =0; j < STATUS_SUM; j++) {
transProb_[i][j] = atof(tmp[j].c_str()); transProb_[i][j] = atof(tmp[j].c_str());
@ -247,25 +246,23 @@ class HMMSegment: public SegmentBase {
//load emitProbB_ //load emitProbB_
if(!getLine_(ifile, line) || !loadEmitProb_(line, emitProbB_)) { if(!getLine_(ifile, line) || !loadEmitProb_(line, emitProbB_)) {
return false; LogFatal("load emitProbB_ failed.");
} }
//load emitProbE_ //load emitProbE_
if(!getLine_(ifile, line) || !loadEmitProb_(line, emitProbE_)) { if(!getLine_(ifile, line) || !loadEmitProb_(line, emitProbE_)) {
return false; LogFatal("load emitProbE_ failed.");
} }
//load emitProbM_ //load emitProbM_
if(!getLine_(ifile, line) || !loadEmitProb_(line, emitProbM_)) { if(!getLine_(ifile, line) || !loadEmitProb_(line, emitProbM_)) {
return false; LogFatal("load emitProbM_ failed.");
} }
//load emitProbS_ //load emitProbS_
if(!getLine_(ifile, line) || !loadEmitProb_(line, emitProbS_)) { if(!getLine_(ifile, line) || !loadEmitProb_(line, emitProbS_)) {
return false; LogFatal("load emitProbS_ failed.");
} }
return true;
} }
bool getLine_(ifstream& ifile, string& line) { bool getLine_(ifstream& ifile, string& line) {
while(getline(ifile, line)) { while(getline(ifile, line)) {

View File

@ -20,7 +20,7 @@ class KeywordExtractor {
void init(const string& dictPath, const string& hmmFilePath, const string& idfPath, const string& stopWordPath, const string& userDict = "") { void init(const string& dictPath, const string& hmmFilePath, const string& idfPath, const string& stopWordPath, const string& userDict = "") {
loadIdfDict_(idfPath); loadIdfDict_(idfPath);
loadStopWordDict_(stopWordPath); loadStopWordDict_(stopWordPath);
LIMONP_CHECK(segment_.init(dictPath, hmmFilePath, userDict)); segment_.init(dictPath, hmmFilePath, userDict);
}; };
bool extract(const string& str, vector<string>& keywords, size_t topN) const { bool extract(const string& str, vector<string>& keywords, size_t topN) const {

View File

@ -16,14 +16,13 @@ class MPSegment: public SegmentBase {
public: public:
MPSegment() {}; MPSegment() {};
MPSegment(const string& dictPath, const string& userDictPath = "") { MPSegment(const string& dictPath, const string& userDictPath = "") {
LIMONP_CHECK(init(dictPath, userDictPath)); init(dictPath, userDictPath);
}; };
virtual ~MPSegment() {}; virtual ~MPSegment() {};
bool init(const string& dictPath, const string& userDictPath = "") { void init(const string& dictPath, const string& userDictPath = "") {
LIMONP_CHECK(dictTrie_.init(dictPath, userDictPath)); dictTrie_.init(dictPath, userDictPath);
LogInfo("MPSegment init(%s) ok", dictPath.c_str()); LogInfo("MPSegment init(%s) ok", dictPath.c_str());
return true;
} }
bool isUserDictSingleChineseWord(const Unicode::value_type & value) const { bool isUserDictSingleChineseWord(const Unicode::value_type & value) const {
return dictTrie_.isUserDictSingleChineseWord(value); return dictTrie_.isUserDictSingleChineseWord(value);

View File

@ -12,15 +12,14 @@ class MixSegment: public SegmentBase {
MixSegment() { MixSegment() {
} }
MixSegment(const string& mpSegDict, const string& hmmSegDict, const string& userDict = "") { MixSegment(const string& mpSegDict, const string& hmmSegDict, const string& userDict = "") {
LIMONP_CHECK(init(mpSegDict, hmmSegDict, userDict)); init(mpSegDict, hmmSegDict, userDict);
} }
virtual ~MixSegment() { virtual ~MixSegment() {
} }
bool init(const string& mpSegDict, const string& hmmSegDict, const string& userDict = "") { void init(const string& mpSegDict, const string& hmmSegDict, const string& userDict = "") {
LIMONP_CHECK(mpSeg_.init(mpSegDict, userDict)); mpSeg_.init(mpSegDict, userDict);
LIMONP_CHECK(hmmSeg_.init(hmmSegDict)); hmmSeg_.init(hmmSegDict);
LogInfo("MixSegment init(%s, %s)", mpSegDict.c_str(), hmmSegDict.c_str()); LogInfo("MixSegment init(%s, %s)", mpSegDict.c_str(), hmmSegDict.c_str());
return true;
} }
using SegmentBase::cut; using SegmentBase::cut;
virtual bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const { virtual bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const {

View File

@ -30,7 +30,7 @@ class PosTagger {
const string& hmmFilePath, const string& hmmFilePath,
const string& userDictPath = "" const string& userDictPath = ""
) { ) {
LIMONP_CHECK(segment_.init(dictPath, hmmFilePath, userDictPath)); segment_.init(dictPath, hmmFilePath, userDictPath);
dictTrie_ = segment_.getDictTrie(); dictTrie_ = segment_.getDictTrie();
LIMONP_CHECK(dictTrie_); LIMONP_CHECK(dictTrie_);
}; };

View File

@ -21,12 +21,11 @@ class QuerySegment: public SegmentBase {
init(dict, model, maxWordLen, userDict); init(dict, model, maxWordLen, userDict);
}; };
virtual ~QuerySegment() {}; virtual ~QuerySegment() {};
bool init(const string& dict, const string& model, size_t maxWordLen, const string& userDict = "") { void init(const string& dict, const string& model, size_t maxWordLen, const string& userDict = "") {
LIMONP_CHECK(mixSeg_.init(dict, model, userDict)); mixSeg_.init(dict, model, userDict);
LIMONP_CHECK(fullSeg_.init(mixSeg_.getDictTrie())); fullSeg_.init(mixSeg_.getDictTrie());
assert(maxWordLen); assert(maxWordLen);
maxWordLen_ = maxWordLen; maxWordLen_ = maxWordLen;
return true;
} }
using SegmentBase::cut; using SegmentBase::cut;
bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const { bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const {

View File

@ -18,7 +18,7 @@ TEST(DictTrieTest, Test1) {
string s1, s2; string s1, s2;
DictTrie trie; DictTrie trie;
ASSERT_TRUE(trie.init(DICT_FILE)); trie.init(DICT_FILE);
ASSERT_LT(trie.getMinWeight() + 15.6479, 0.001); ASSERT_LT(trie.getMinWeight() + 15.6479, 0.001);
string word("来到"); string word("来到");
Unicode uni; Unicode uni;