From c04b2dd0d4d52cd7149721892750ed574a3f67e4 Mon Sep 17 00:00:00 2001 From: yanyiwu Date: Thu, 7 May 2015 20:03:19 +0800 Subject: [PATCH] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E6=9B=B4=E8=AF=A6=E7=BB=86?= =?UTF-8?q?=E7=9A=84=E9=94=99=E8=AF=AF=E6=97=A5=E5=BF=97=EF=BC=8C=E5=9C=A8?= =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E8=BF=87=E7=A8=8B=E4=B8=AD=E5=90=88?= =?UTF-8?q?=E7=90=86=E4=BD=BF=E7=94=A8LogFatal=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ChangeLog.md | 3 ++- src/DictTrie.hpp | 3 +-- src/HMMSegment.hpp | 33 +++++++++++++++------------------ src/KeywordExtractor.hpp | 2 +- src/MPSegment.hpp | 7 +++---- src/MixSegment.hpp | 9 ++++----- src/PosTagger.hpp | 2 +- src/QuerySegment.hpp | 7 +++---- test/unittest/TTrie.cpp | 2 +- 9 files changed, 31 insertions(+), 37 deletions(-) diff --git a/ChangeLog.md b/ChangeLog.md index 7738ee0..e56d0f6 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -1,10 +1,11 @@ # CppJieba ChangeLog -## v2.4.5 (untagged) +## v2.5.0 (untagged) 1. 使得 QuerySegment 支持自定义词典(可选参数)。 2. 使得 KeywordExtractor 支持自定义词典(可选参数)。 3. 修改 Code Style ,参照 google code style 。 +4. 增加更详细的错误日志,在初始化过程中合理使用LogFatal。 ## v2.4.4 diff --git a/src/DictTrie.hpp b/src/DictTrie.hpp index 92a4645..d5e8163 100644 --- a/src/DictTrie.hpp +++ b/src/DictTrie.hpp @@ -39,7 +39,7 @@ class DictTrie { } } - bool init(const string& dictPath, const string& userDictPath = "") { + void init(const string& dictPath, const string& userDictPath = "") { if(trie_ != NULL) { LogFatal("trie already initted"); } @@ -54,7 +54,6 @@ class DictTrie { shrink_(nodeInfos_); trie_ = createTrie_(nodeInfos_); assert(trie_); - return true; } const DictUnit* find(Unicode::const_iterator begin, Unicode::const_iterator end) const { diff --git a/src/HMMSegment.hpp b/src/HMMSegment.hpp index 9d624f2..f384eb2 100644 --- a/src/HMMSegment.hpp +++ b/src/HMMSegment.hpp @@ -6,7 +6,6 @@ #include #include #include "Limonp/StringUtil.hpp" -#include "Limonp/Logger.hpp" #include "TransCode.hpp" #include "ISegment.hpp" #include "SegmentBase.hpp" @@ -26,11 +25,11 @@ class HMMSegment: public SegmentBase { public: HMMSegment() {} explicit HMMSegment(const string& filePath) { - LIMONP_CHECK(init(filePath)); + init(filePath); } virtual ~HMMSegment() {} public: - bool init(const string& filePath) { + void init(const string& filePath) { memset(startProb_, 0, sizeof(startProb_)); memset(transProb_, 0, sizeof(transProb_)); statMap_[0] = 'B'; @@ -41,9 +40,8 @@ class HMMSegment: public SegmentBase { emitProbVec_.push_back(&emitProbE_); emitProbVec_.push_back(&emitProbM_); emitProbVec_.push_back(&emitProbS_); - LIMONP_CHECK(loadModel_(filePath.c_str())); + loadModel_(filePath.c_str()); LogInfo("HMMSegment init(%s) ok.", filePath.c_str()); - return true; } public: using SegmentBase::cut; @@ -212,19 +210,21 @@ class HMMSegment: public SegmentBase { return true; } - bool loadModel_(const char* const filePath) { + void loadModel_(const char* const filePath) { ifstream ifile(filePath); + if(!ifile.is_open()) { + LogFatal("open %s failed.", filePath); + } string line; vector tmp; vector tmp2; //load startProb_ if(!getLine_(ifile, line)) { - return false; + LogFatal("load startProb_"); } split(line, tmp, " "); if(tmp.size() != STATUS_SUM) { - LogError("start_p illegal"); - return false; + LogFatal("start_p illegal"); } for(size_t j = 0; j< tmp.size(); j++) { startProb_[j] = atof(tmp[j].c_str()); @@ -233,12 +233,11 @@ class HMMSegment: public SegmentBase { //load transProb_ for(size_t i = 0; i < STATUS_SUM; i++) { if(!getLine_(ifile, line)) { - return false; + LogFatal("load transProb_ failed."); } split(line, tmp, " "); if(tmp.size() != STATUS_SUM) { - LogError("trans_p illegal"); - return false; + LogFatal("trans_p illegal"); } for(size_t j =0; j < STATUS_SUM; j++) { transProb_[i][j] = atof(tmp[j].c_str()); @@ -247,25 +246,23 @@ class HMMSegment: public SegmentBase { //load emitProbB_ if(!getLine_(ifile, line) || !loadEmitProb_(line, emitProbB_)) { - return false; + LogFatal("load emitProbB_ failed."); } //load emitProbE_ if(!getLine_(ifile, line) || !loadEmitProb_(line, emitProbE_)) { - return false; + LogFatal("load emitProbE_ failed."); } //load emitProbM_ if(!getLine_(ifile, line) || !loadEmitProb_(line, emitProbM_)) { - return false; + LogFatal("load emitProbM_ failed."); } //load emitProbS_ if(!getLine_(ifile, line) || !loadEmitProb_(line, emitProbS_)) { - return false; + LogFatal("load emitProbS_ failed."); } - - return true; } bool getLine_(ifstream& ifile, string& line) { while(getline(ifile, line)) { diff --git a/src/KeywordExtractor.hpp b/src/KeywordExtractor.hpp index 8114556..05280bd 100644 --- a/src/KeywordExtractor.hpp +++ b/src/KeywordExtractor.hpp @@ -20,7 +20,7 @@ class KeywordExtractor { void init(const string& dictPath, const string& hmmFilePath, const string& idfPath, const string& stopWordPath, const string& userDict = "") { loadIdfDict_(idfPath); loadStopWordDict_(stopWordPath); - LIMONP_CHECK(segment_.init(dictPath, hmmFilePath, userDict)); + segment_.init(dictPath, hmmFilePath, userDict); }; bool extract(const string& str, vector& keywords, size_t topN) const { diff --git a/src/MPSegment.hpp b/src/MPSegment.hpp index 398df5a..1dbb5ae 100644 --- a/src/MPSegment.hpp +++ b/src/MPSegment.hpp @@ -16,14 +16,13 @@ class MPSegment: public SegmentBase { public: MPSegment() {}; MPSegment(const string& dictPath, const string& userDictPath = "") { - LIMONP_CHECK(init(dictPath, userDictPath)); + init(dictPath, userDictPath); }; virtual ~MPSegment() {}; - bool init(const string& dictPath, const string& userDictPath = "") { - LIMONP_CHECK(dictTrie_.init(dictPath, userDictPath)); + void init(const string& dictPath, const string& userDictPath = "") { + dictTrie_.init(dictPath, userDictPath); LogInfo("MPSegment init(%s) ok", dictPath.c_str()); - return true; } bool isUserDictSingleChineseWord(const Unicode::value_type & value) const { return dictTrie_.isUserDictSingleChineseWord(value); diff --git a/src/MixSegment.hpp b/src/MixSegment.hpp index 6d47e89..18b0b84 100644 --- a/src/MixSegment.hpp +++ b/src/MixSegment.hpp @@ -12,15 +12,14 @@ class MixSegment: public SegmentBase { MixSegment() { } MixSegment(const string& mpSegDict, const string& hmmSegDict, const string& userDict = "") { - LIMONP_CHECK(init(mpSegDict, hmmSegDict, userDict)); + init(mpSegDict, hmmSegDict, userDict); } virtual ~MixSegment() { } - bool init(const string& mpSegDict, const string& hmmSegDict, const string& userDict = "") { - LIMONP_CHECK(mpSeg_.init(mpSegDict, userDict)); - LIMONP_CHECK(hmmSeg_.init(hmmSegDict)); + void init(const string& mpSegDict, const string& hmmSegDict, const string& userDict = "") { + mpSeg_.init(mpSegDict, userDict); + hmmSeg_.init(hmmSegDict); LogInfo("MixSegment init(%s, %s)", mpSegDict.c_str(), hmmSegDict.c_str()); - return true; } using SegmentBase::cut; virtual bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector& res) const { diff --git a/src/PosTagger.hpp b/src/PosTagger.hpp index 908e024..dfeebf1 100644 --- a/src/PosTagger.hpp +++ b/src/PosTagger.hpp @@ -30,7 +30,7 @@ class PosTagger { const string& hmmFilePath, const string& userDictPath = "" ) { - LIMONP_CHECK(segment_.init(dictPath, hmmFilePath, userDictPath)); + segment_.init(dictPath, hmmFilePath, userDictPath); dictTrie_ = segment_.getDictTrie(); LIMONP_CHECK(dictTrie_); }; diff --git a/src/QuerySegment.hpp b/src/QuerySegment.hpp index 317ce7c..4655d30 100644 --- a/src/QuerySegment.hpp +++ b/src/QuerySegment.hpp @@ -21,12 +21,11 @@ class QuerySegment: public SegmentBase { init(dict, model, maxWordLen, userDict); }; virtual ~QuerySegment() {}; - bool init(const string& dict, const string& model, size_t maxWordLen, const string& userDict = "") { - LIMONP_CHECK(mixSeg_.init(dict, model, userDict)); - LIMONP_CHECK(fullSeg_.init(mixSeg_.getDictTrie())); + void init(const string& dict, const string& model, size_t maxWordLen, const string& userDict = "") { + mixSeg_.init(dict, model, userDict); + fullSeg_.init(mixSeg_.getDictTrie()); assert(maxWordLen); maxWordLen_ = maxWordLen; - return true; } using SegmentBase::cut; bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector& res) const { diff --git a/test/unittest/TTrie.cpp b/test/unittest/TTrie.cpp index 7449b8a..e49aceb 100644 --- a/test/unittest/TTrie.cpp +++ b/test/unittest/TTrie.cpp @@ -18,7 +18,7 @@ TEST(DictTrieTest, Test1) { string s1, s2; DictTrie trie; - ASSERT_TRUE(trie.init(DICT_FILE)); + trie.init(DICT_FILE); ASSERT_LT(trie.getMinWeight() + 15.6479, 0.001); string word("来到"); Unicode uni;