diff --git a/src/DictTrie.hpp b/src/DictTrie.hpp index 17af029..5a7390c 100644 --- a/src/DictTrie.hpp +++ b/src/DictTrie.hpp @@ -10,7 +10,6 @@ #include #include "Limonp/StringUtil.hpp" #include "Limonp/Logger.hpp" -#include "Limonp/InitOnOff.hpp" #include "TransCode.hpp" #include "Trie.hpp" @@ -41,7 +40,7 @@ namespace CppJieba typedef map DagType; - class DictTrie: public InitOnOff + class DictTrie { public: typedef Trie, vector > TrieType; @@ -65,12 +64,11 @@ namespace CppJieba { _trie = NULL; _minWeight = MAX_DOUBLE; - _setInitFlag(false); } DictTrie(const string& dictPath, const string& userDictPath = "") { new (this) DictTrie(); - _setInitFlag(init(dictPath, userDictPath)); + init(dictPath, userDictPath); } ~DictTrie() { @@ -83,7 +81,7 @@ namespace CppJieba public: bool init(const string& dictPath, const string& userDictPath = "") { - assert(!_getInitFlag()); + assert(!_trie); _loadDict(dictPath, _nodeInfos); _calculateWeight(_nodeInfos); _minWeight = _findMinWeight(_nodeInfos); @@ -96,7 +94,7 @@ namespace CppJieba _shrink(_nodeInfos); _trie = _creatTrie(_nodeInfos); assert(_trie); - return _setInitFlag(true); + return true; } public: diff --git a/src/HMMSegment.hpp b/src/HMMSegment.hpp index 6afe185..6935958 100644 --- a/src/HMMSegment.hpp +++ b/src/HMMSegment.hpp @@ -35,20 +35,15 @@ namespace CppJieba vector _emitProbVec; public: - HMMSegment(){_setInitFlag(false);} + HMMSegment(){} explicit HMMSegment(const string& filePath) { - _setInitFlag(init(filePath)); + LIMONP_CHECK(init(filePath)); } virtual ~HMMSegment(){} public: bool init(const string& filePath) { - if(_getInitFlag()) - { - LogError("inited already."); - return false; - } memset(_startProb, 0, sizeof(_startProb)); memset(_transProb, 0, sizeof(_transProb)); _statMap[0] = 'B'; @@ -59,11 +54,7 @@ namespace CppJieba _emitProbVec.push_back(&_emitProbE); _emitProbVec.push_back(&_emitProbM); _emitProbVec.push_back(&_emitProbS); - if(!_setInitFlag(_loadModel(filePath.c_str()))) - { - LogError("_loadModel(%s) failed.", filePath.c_str()); - return false; - } + LIMONP_CHECK(_loadModel(filePath.c_str())); LogInfo("HMMSegment init(%s) ok.", filePath.c_str()); return true; } @@ -104,7 +95,6 @@ namespace CppJieba private: bool _cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector& res) const { - assert(_getInitFlag()); vector status; if(!_viterbi(begin, end, status)) { @@ -128,7 +118,6 @@ namespace CppJieba public: virtual bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector& res)const { - assert(_getInitFlag()); if(begin == end) { return false; diff --git a/src/KeywordExtractor.hpp b/src/KeywordExtractor.hpp index 90be022..36c7da0 100644 --- a/src/KeywordExtractor.hpp +++ b/src/KeywordExtractor.hpp @@ -10,7 +10,7 @@ namespace CppJieba using namespace Limonp; /*utf8*/ - class KeywordExtractor: public InitOnOff + class KeywordExtractor { private: MixSegment _segment; @@ -20,10 +20,10 @@ namespace CppJieba unordered_set _stopWords; public: - KeywordExtractor(){_setInitFlag(false);}; + KeywordExtractor(){}; KeywordExtractor(const string& dictPath, const string& hmmFilePath, const string& idfPath, const string& stopWordPath) { - _setInitFlag(init(dictPath, hmmFilePath, idfPath, stopWordPath)); + LIMONP_CHECK(init(dictPath, hmmFilePath, idfPath, stopWordPath)); }; ~KeywordExtractor(){}; @@ -32,13 +32,13 @@ namespace CppJieba { _loadIdfDict(idfPath); _loadStopWordDict(stopWordPath); - return _setInitFlag(_segment.init(dictPath, hmmFilePath)); + LIMONP_CHECK(_segment.init(dictPath, hmmFilePath)); + return true; }; public: bool extract(const string& str, vector& keywords, size_t topN) const { - assert(_getInitFlag()); vector > topWords; if(!extract(str, topWords, topN)) { diff --git a/src/Limonp/HandyMacro.hpp b/src/Limonp/HandyMacro.hpp index 6ab71c7..1468690 100644 --- a/src/Limonp/HandyMacro.hpp +++ b/src/Limonp/HandyMacro.hpp @@ -4,7 +4,7 @@ #include #define LIMONP_CHECK(exp) \ - if(exp){fprintf(stderr, "File:%s, Line:%d Exp:[" #exp "] is true, abort.\n", __FILE__, __LINE__); abort();} + if(!(exp)){fprintf(stderr, "File:%s, Line:%d Exp:[" #exp "] is true, abort.\n", __FILE__, __LINE__); abort();} #define print(x) cout<< #x": " << x <& res)const { - assert(_getInitFlag()); if(begin == end) { return false; @@ -92,7 +85,6 @@ namespace CppJieba { return false; } - assert(_getInitFlag()); vector segmentChars(end - begin); //calc DAG diff --git a/src/MixSegment.hpp b/src/MixSegment.hpp index f8a4369..ff06f47 100644 --- a/src/MixSegment.hpp +++ b/src/MixSegment.hpp @@ -14,36 +14,25 @@ namespace CppJieba MPSegment _mpSeg; HMMSegment _hmmSeg; public: - MixSegment(){_setInitFlag(false);}; + MixSegment(){}; MixSegment(const string& mpSegDict, const string& hmmSegDict, const string& userDict = "") { - _setInitFlag(init(mpSegDict, hmmSegDict, userDict)); - assert(_getInitFlag()); + LIMONP_CHECK(init(mpSegDict, hmmSegDict, userDict)); } virtual ~MixSegment(){} public: bool init(const string& mpSegDict, const string& hmmSegDict, const string& userDict = "") { - assert(!_getInitFlag()); - if(!_mpSeg.init(mpSegDict, userDict)) - { - LogError("_mpSeg init"); - return false; - } - if(!_hmmSeg.init(hmmSegDict)) - { - LogError("_hmmSeg init"); - return false; - } + LIMONP_CHECK(_mpSeg.init(mpSegDict, userDict)); + LIMONP_CHECK(_hmmSeg.init(hmmSegDict)); LogInfo("MixSegment init(%s, %s)", mpSegDict.c_str(), hmmSegDict.c_str()); - return _setInitFlag(true); + return true; } public: using SegmentBase::cut; public: virtual bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector& res) const { - assert(_getInitFlag()); vector words; words.reserve(end - begin); if(!_mpSeg.cut(begin, end, words)) @@ -98,7 +87,6 @@ namespace CppJieba virtual bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector& res)const { - assert(_getInitFlag()); if(begin == end) { return false; diff --git a/src/PosTagger.hpp b/src/PosTagger.hpp index 325e765..63f13c0 100644 --- a/src/PosTagger.hpp +++ b/src/PosTagger.hpp @@ -9,32 +9,29 @@ namespace CppJieba { using namespace Limonp; - class PosTagger: public InitOnOff + class PosTagger { private: MixSegment _segment; DictTrie _dictTrie; public: - PosTagger(){_setInitFlag(false);}; + PosTagger(){}; PosTagger(const string& dictPath, const string& hmmFilePath, const string& charStatus, const string& startProb, const string& emitProb, const string& endProb, const string& transProb) { - _setInitFlag(init(dictPath, hmmFilePath, charStatus, startProb, emitProb, endProb, transProb)); + LIMONP_CHECK(init(dictPath, hmmFilePath, charStatus, startProb, emitProb, endProb, transProb)); }; ~PosTagger(){}; public: bool init(const string& dictPath, const string& hmmFilePath, const string& charStatus, const string& startProb, const string& emitProb, const string& endProb, const string& transProb) { - - assert(!_getInitFlag()); - _dictTrie.init(dictPath); - assert(_dictTrie); - return _setInitFlag(_segment.init(dictPath, hmmFilePath)); + LIMONP_CHECK(_dictTrie.init(dictPath)); + LIMONP_CHECK(_segment.init(dictPath, hmmFilePath)); + return true; }; bool tag(const string& src, vector >& res) { - assert(_getInitFlag()); vector cutRes; if (!_segment.cut(src, cutRes)) { diff --git a/src/QuerySegment.hpp b/src/QuerySegment.hpp index 8f284eb..e942e6c 100644 --- a/src/QuerySegment.hpp +++ b/src/QuerySegment.hpp @@ -23,32 +23,20 @@ namespace CppJieba size_t _maxWordLen; public: - QuerySegment(){_setInitFlag(false);}; + QuerySegment(){}; QuerySegment(const string& dict, const string& model, size_t maxWordLen) { - _setInitFlag(init(dict, model, maxWordLen)); + init(dict, model, maxWordLen); }; virtual ~QuerySegment(){}; public: bool init(const string& dict, const string& model, size_t maxWordLen) { - if (_getInitFlag()) - { - LogError("inited already."); - return false; - } - if (!_mixSeg.init(dict, model)) - { - LogError("_mixSeg init"); - return false; - } - if (!_fullSeg.init(_mixSeg.getDictTrie())) - { - LogError("_fullSeg init"); - return false; - } + LIMONP_CHECK(_mixSeg.init(dict, model)); + LIMONP_CHECK(_fullSeg.init(_mixSeg.getDictTrie())); + assert(maxWordLen); _maxWordLen = maxWordLen; - return _setInitFlag(true); + return true; } public: @@ -57,7 +45,6 @@ namespace CppJieba public: bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector& res) const { - assert(_getInitFlag()); if (begin >= end) { LogError("begin >= end"); @@ -102,7 +89,6 @@ namespace CppJieba bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector& res) const { - assert(_getInitFlag()); if (begin >= end) { LogError("begin >= end"); diff --git a/src/SegmentBase.hpp b/src/SegmentBase.hpp index 6b3ca93..25d384f 100644 --- a/src/SegmentBase.hpp +++ b/src/SegmentBase.hpp @@ -3,8 +3,8 @@ #include "TransCode.hpp" #include "Limonp/Logger.hpp" -#include "Limonp/InitOnOff.hpp" #include "Limonp/NonCopyable.hpp" +#include "Limonp/HandyMacro.hpp" #include "ISegment.hpp" #include @@ -20,7 +20,7 @@ namespace CppJieba const UnicodeValueType SPECIAL_SYMBOL[] = {32u, 9u, 10u}; #endif - class SegmentBase: public ISegment, public InitOnOff, public NonCopyable + class SegmentBase: public ISegment, public NonCopyable { public: SegmentBase(){_loadSpecialSymbols();}; diff --git a/test/load_test.cpp b/test/load_test.cpp index f2a5cea..2d9543d 100644 --- a/test/load_test.cpp +++ b/test/load_test.cpp @@ -11,7 +11,6 @@ using namespace CppJieba; void cut(size_t times = 20) { MixSegment seg("../dict/jieba.dict.utf8", "../dict/hmm_model.utf8"); - assert(seg); vector res; string doc; ifstream ifs("../test/testdata/weicheng.utf8"); @@ -32,7 +31,6 @@ void cut(size_t times = 20) void extract(size_t times = 400) { KeywordExtractor extractor("../dict/jieba.dict.utf8", "../dict/hmm_model.utf8", "../dict/idf.utf8", "../dict/stop_words.utf8"); - assert(extractor); vector words; string doc; ifstream ifs("../test/testdata/review.100"); diff --git a/test/unittest/TSegments.cpp b/test/unittest/TSegments.cpp index ac2ad79..41c9d65 100644 --- a/test/unittest/TSegments.cpp +++ b/test/unittest/TSegments.cpp @@ -17,8 +17,6 @@ TEST(MixSegmentTest, Test1) const char* str2 = "B超 T恤"; const char* res2[] = {"B超"," ", "T恤"}; vector words; - ASSERT_TRUE(segment); - ASSERT_TRUE(segment.cut(str, words)); ASSERT_EQ(words, vector(res, res + sizeof(res)/sizeof(res[0]))); ASSERT_TRUE(segment.cut(str2, words)); @@ -29,7 +27,6 @@ TEST(MixSegmentTest, Test1) TEST(MixSegmentTest, NoUserDict) { MixSegment segment("../dict/extra_dict/jieba.dict.small.utf8", "../dict/hmm_model.utf8"); - ASSERT_TRUE(segment); const char* str = "令狐冲是云计算方面的专家"; vector words; ASSERT_TRUE(segment.cut(str, words)); @@ -40,7 +37,6 @@ TEST(MixSegmentTest, NoUserDict) TEST(MixSegmentTest, UserDict) { MixSegment segment("../dict/extra_dict/jieba.dict.small.utf8", "../dict/hmm_model.utf8", "../test/testdata/userdict.utf8"); - ASSERT_TRUE(segment); const char* str = "令狐冲是云计算方面的专家"; vector words; ASSERT_TRUE(segment.cut(str, words)); @@ -55,7 +51,6 @@ TEST(MPSegmentTest, Test1) const char* str = "我来自北京邮电大学。"; const char* res[] = {"我", "来自", "北京邮电大学", "。"}; vector words; - ASSERT_TRUE(segment); ASSERT_TRUE(segment.cut(str, words)); ASSERT_EQ(words, vector(res, res + sizeof(res)/sizeof(res[0]))); @@ -105,7 +100,6 @@ TEST(HMMSegmentTest, Test1) const char* str = "我来自北京邮电大学。。。学号123456"; const char* res[] = {"我来", "自北京", "邮电大学", "。", "。", "。", "学号", "123456"}; vector words; - ASSERT_TRUE(segment); ASSERT_TRUE(segment.cut(str, words)); ASSERT_EQ(words, vector(res, res + sizeof(res)/sizeof(res[0]))); } diff --git a/test/unittest/TTrie.cpp b/test/unittest/TTrie.cpp index cb65e50..2791884 100644 --- a/test/unittest/TTrie.cpp +++ b/test/unittest/TTrie.cpp @@ -55,7 +55,6 @@ TEST(DictTrieTest, Test1) TEST(DictTrieTest, UserDict) { DictTrie trie(DICT_FILE, "../test/testdata/userdict.utf8"); - ASSERT_TRUE(trie); string word = "云计算"; Unicode unicode; ASSERT_TRUE(TransCode::decode(word, unicode));