diff --git a/src/FullSegment.hpp b/src/FullSegment.hpp index 5c997e4..654f6af 100644 --- a/src/FullSegment.hpp +++ b/src/FullSegment.hpp @@ -9,14 +9,13 @@ #include "ISegment.hpp" #include "SegmentBase.hpp" #include "TransCode.hpp" -#include "TrieManager.hpp" namespace CppJieba { class FullSegment: public SegmentBase { private: - Trie* _trie; + Trie _trie; public: FullSegment(){_setInitFlag(false);}; @@ -30,12 +29,8 @@ namespace CppJieba LogError("already inited before now."); return false; } - _trie = TrieManager::getInstance().getTrie(dictPath.c_str()); - if (NULL == _trie) - { - LogError("get NULL pointor from getTrie(\"%s\")", dictPath.c_str()); - return false; - } + _trie.init(dictPath.c_str()); + assert(_trie); return _setInitFlag(true); } @@ -66,7 +61,7 @@ namespace CppJieba for (Unicode::const_iterator uItr = begin; uItr != end; uItr++) { //find word start from uItr - if (_trie->find(uItr, end, tRes)) + if (_trie.find(uItr, end, tRes)) { for (vector >::const_iterator itr = tRes.begin(); itr != tRes.end(); itr++) { diff --git a/src/MPSegment.hpp b/src/MPSegment.hpp index 5176c36..1d3ca1d 100644 --- a/src/MPSegment.hpp +++ b/src/MPSegment.hpp @@ -10,7 +10,7 @@ #include #include "Limonp/logger.hpp" #include "Trie.hpp" -#include "TrieManager.hpp" +#include "Trie.hpp" #include "ISegment.hpp" #include "SegmentBase.hpp" @@ -25,15 +25,14 @@ namespace CppJieba double weight; SegmentChar(uint16_t uni):uniCh(uni), pInfo(NULL), weight(0.0) - { - } + {} }; typedef vector SegmentContext; class MPSegment: public SegmentBase { protected: - Trie* _trie; + Trie _trie; public: MPSegment(){_setInitFlag(false);}; @@ -50,12 +49,8 @@ namespace CppJieba LogError("already inited before now."); return false; } - _trie = TrieManager::getInstance().getTrie(dictPath.c_str()); - if (_trie == NULL) - { - LogError("get a NULL pointor form getTrie(\"%s\").", dictPath.c_str()); - return false; - } + _trie.init(dictPath); + assert(_trie); LogInfo("MPSegment init(%s) ok", dictPath.c_str()); return _setInitFlag(true); } @@ -129,7 +124,7 @@ namespace CppJieba { SegmentChar schar(*it); uint i = it - begin; - _trie->find(it, end, i, schar.dag); + _trie.find(it, end, i, schar.dag); //DagType::iterator dagIter; if(schar.dag.end() == schar.dag.find(i)) { @@ -167,7 +162,7 @@ namespace CppJieba } else { - val += _trie->getMinLogFreq(); + val += _trie.getMinLogFreq(); } if(val > segContext[i].weight) { @@ -195,7 +190,7 @@ namespace CppJieba TrieNodeInfo nodeInfo; nodeInfo.word.push_back(segContext[i].uniCh); nodeInfo.freq = 0; - nodeInfo.logFreq = _trie->getMinLogFreq(); + nodeInfo.logFreq = _trie.getMinLogFreq(); res.push_back(nodeInfo); i++; } diff --git a/src/MixSegment.hpp b/src/MixSegment.hpp index 9218056..11c5dc7 100644 --- a/src/MixSegment.hpp +++ b/src/MixSegment.hpp @@ -18,16 +18,13 @@ namespace CppJieba explicit MixSegment(const string& mpSegDict, const string& hmmSegDict) { _setInitFlag(init(mpSegDict, hmmSegDict)); + assert(_getInitFlag()); } virtual ~MixSegment(){} public: bool init(const string& mpSegDict, const string& hmmSegDict) { - if(_getInitFlag()) - { - LogError("inited."); - return false; - } + assert(!_getInitFlag()); if(!_mpSeg.init(mpSegDict)) { LogError("_mpSeg init"); diff --git a/src/PosTagger.hpp b/src/PosTagger.hpp index 5c0f506..79bacd9 100644 --- a/src/PosTagger.hpp +++ b/src/PosTagger.hpp @@ -4,7 +4,6 @@ #include "MixSegment.hpp" #include "Limonp/str_functs.hpp" #include "Trie.hpp" -#include "TrieManager.hpp" namespace CppJieba { @@ -14,7 +13,7 @@ namespace CppJieba { private: MixSegment _segment; - Trie* _trie; + Trie _trie; public: PosTagger(){_setInitFlag(false);}; @@ -26,17 +25,10 @@ namespace CppJieba public: bool init(const string& dictPath, const string& hmmFilePath, const string& charStatus, const string& startProb, const string& emitProb, const string& endProb, const string& transProb) { - if (_getInitFlag()) - { - LogError("already inited before."); - return false; - } - _trie = TrieManager::getInstance().getTrie(dictPath.c_str()); - if (NULL == _trie) - { - LogError("get a NULL pointor from getTrie(\"%s\").", dictPath.c_str()); - return false; - } + + assert(!_getInitFlag()); + _trie.init(dictPath); + assert(_trie); return _setInitFlag(_segment.init(dictPath, hmmFilePath)); }; @@ -59,7 +51,7 @@ namespace CppJieba LogError("decode failed."); return false; } - tmp = _trie->find(unico.begin(), unico.end()); + tmp = _trie.find(unico.begin(), unico.end()); res.push_back(make_pair(*itr, tmp == NULL ? "x" : tmp->tag)); } tmp = NULL; diff --git a/src/QuerySegment.hpp b/src/QuerySegment.hpp index 3d8cd5b..93a207a 100644 --- a/src/QuerySegment.hpp +++ b/src/QuerySegment.hpp @@ -11,7 +11,7 @@ #include "FullSegment.hpp" #include "MixSegment.hpp" #include "TransCode.hpp" -#include "TrieManager.hpp" +#include "Trie.hpp" namespace CppJieba { diff --git a/src/Trie.hpp b/src/Trie.hpp index c88a4ef..17812e9 100644 --- a/src/Trie.hpp +++ b/src/Trie.hpp @@ -14,6 +14,7 @@ #include #include "Limonp/str_functs.hpp" #include "Limonp/logger.hpp" +#include "Limonp/InitOnOff.hpp" #include "TransCode.hpp" @@ -64,14 +65,13 @@ namespace CppJieba typedef map DagType; - class Trie + class Trie: public InitOnOff { private: TrieNode* _root; vector _nodeInfoVec; - bool _initFlag; int64_t _freqSum; double _minLogFreq; @@ -81,57 +81,28 @@ namespace CppJieba _root = NULL; _freqSum = 0; _minLogFreq = MAX_DOUBLE; - _initFlag = false; + _setInitFlag(false); + } + Trie(const string& filePath): Trie() + { + _setInitFlag(init(filePath)); } ~Trie() - { - dispose(); - } - bool init() - { - if(_getInitFlag()) - { - LogError("already initted!"); - return false; - } - - try - { - _root = new TrieNode; - } - catch(const bad_alloc& e) - { - return false; - } - if(NULL == _root) - { - return false; - } - _setInitFlag(true); - return true; - } - bool dispose() { if(!_getInitFlag()) { - return false; + return; } - bool ret = _deleteNode(_root); - if(!ret) - { - LogFatal("_deleteNode failed!"); - return false; - } - _root = NULL; - _nodeInfoVec.clear(); - - _setInitFlag(false); - return ret; + _deleteNode(_root); } - bool loadDict(const char * const filePath) + public: + bool init(const string& filePath) { - assert(_getInitFlag()); - if(!_trieInsert(filePath)) + assert(!_getInitFlag()); + + _root = new TrieNode; + assert(_root); + if(!_trieInsert(filePath.c_str())) { LogError("_trieInsert failed."); return false; @@ -141,13 +112,9 @@ namespace CppJieba LogError("_countWeight failed."); return false; } - return true; + return _setInitFlag(true); } - private: - void _setInitFlag(bool on){_initFlag = on;}; - bool _getInitFlag()const{return _initFlag;}; - public: const TrieNodeInfo* find(Unicode::const_iterator begin, Unicode::const_iterator end)const { @@ -271,12 +238,6 @@ namespace CppJieba private: bool _insert(const TrieNodeInfo& nodeInfo) { - if(!_getInitFlag()) - { - LogFatal("not initted!"); - return false; - } - const Unicode& uintVec = nodeInfo.word; TrieNode* p = _root; @@ -358,10 +319,9 @@ namespace CppJieba nodeInfo.tag = vecBuf[2]; } - //_insert node if(!_insert(nodeInfo)) { - LogError("_insert node failed!"); + assert(false); } } return true; @@ -405,7 +365,7 @@ namespace CppJieba return true; } - bool _deleteNode(TrieNode* node) + void _deleteNode(TrieNode* node) { for(TrieNodeMap::iterator it = node->hmap.begin(); it != node->hmap.end(); it++) { @@ -414,7 +374,6 @@ namespace CppJieba } delete node; - return true; } }; diff --git a/src/TrieManager.hpp b/src/TrieManager.hpp deleted file mode 100644 index aabdb05..0000000 --- a/src/TrieManager.hpp +++ /dev/null @@ -1,73 +0,0 @@ -#ifndef CPPJIEBA_TRIEMANAGER_H -#define CPPJIEBA_TRIEMANAGER_H - -#include "Trie.hpp" -#include "Limonp/md5.hpp" -#include "Limonp/logger.hpp" - -namespace CppJieba -{ - using namespace Limonp; - class TrieManager - { - private: - unordered_map _tries; - TrieManager(){}; - TrieManager(TrieManager& tm){}; - public: - Trie* getTrie(const char* dictpath) - { - string md5; - if (!md5File(dictpath, md5)) - { - LogError("error when getting md5 for file '%s'", dictpath); - return NULL; - } - - if (_tries.find(md5) != _tries.end()) - { - return _tries[md5.c_str()]; - } - - //LogDebug("create a new trie for md5: '%s'", md5.c_str()); - Trie* trie = NULL; - try - { - trie = new Trie(); - } - catch (const bad_alloc& e) - { - LogError("error when new a trie for file '%s'", dictpath); - return NULL; - } - if (NULL == trie) - { - LogError("get NULL from new trie for file '%s'", dictpath); - return NULL; - } - - if (!trie->init()) - { - LogError("trie init error for file '%s'", dictpath); - return NULL; - } - - if (!trie->loadDict(dictpath)) - { - LogError("trie->loadDict(%s) failed...", dictpath); - return NULL; - } - - _tries[md5.c_str()] = trie; - LogDebug("trie->loadDict(%s)", dictpath); - return trie; - } - - static TrieManager& getInstance() - { - static TrieManager _this; - return _this; - } - }; -} -#endif diff --git a/test/unittest/CMakeLists.txt b/test/unittest/CMakeLists.txt index 6eafa8f..ac3b930 100644 --- a/test/unittest/CMakeLists.txt +++ b/test/unittest/CMakeLists.txt @@ -6,7 +6,7 @@ SET(GTEST_ROOT_DIR gtest-1.6.0) ADD_DEFINITIONS(-DLOGGER_LEVEL=LL_WARN) INCLUDE_DIRECTORIES(${GTEST_ROOT_DIR} ${GTEST_ROOT_DIR}/include ${PROJECT_SOURCE_DIR}) ADD_LIBRARY(gtest STATIC ${GTEST_ROOT_DIR}/src/gtest-all.cc) -ADD_EXECUTABLE(test.run gtest_main.cpp TKeywordExtractor.cpp TMPSegment.cpp TTrie.cpp TFullSegment.cpp TMd5.cpp TQuerySegment.cpp TTrieManager TFullSegment.cpp TMd5.cpp TQuerySegment.cpp TTrieManager.cpp THMMSegment.cpp TMixSegment.cpp TSegmentBase.cpp) +ADD_EXECUTABLE(test.run gtest_main.cpp TKeywordExtractor.cpp TMPSegment.cpp TTrie.cpp TFullSegment.cpp TQuerySegment.cpp THMMSegment.cpp TMixSegment.cpp TSegmentBase.cpp) TARGET_LINK_LIBRARIES(gtest pthread) TARGET_LINK_LIBRARIES(test.run gtest pthread) diff --git a/test/unittest/TMd5.cpp b/test/unittest/TMd5.cpp deleted file mode 100644 index 455b125..0000000 --- a/test/unittest/TMd5.cpp +++ /dev/null @@ -1,28 +0,0 @@ -#include "src/TrieManager.hpp" -#include "gtest/gtest.h" - -using namespace CppJieba; - -const char* const DICT_FILE[] = { - "../test/testdata/jieba.dict.0.utf8", - "../test/testdata/jieba.dict.0.1.utf8", - "../test/testdata/jieba.dict.1.utf8", - "../test/testdata/jieba.dict.2.utf8"}; - -const char* const DICT_FILE_MD5[] = { - "5aef74a56b363d994095c407c4809d84", - "5aef74a56b363d994095c407c4809d84", - "55f1116c05c8051ab53171f0b7455197", - "b123553a2418c4bda51abc64d705d5d4"}; - -TEST(Md5Test, Test1) -{ - ASSERT_EQ(sizeof(DICT_FILE)/sizeof(DICT_FILE[0]), sizeof(DICT_FILE_MD5)/sizeof(DICT_FILE_MD5[0])); - string tmp; - for (uint i = 0; i < sizeof(DICT_FILE)/sizeof(DICT_FILE[0]); i++) - { - md5File(DICT_FILE[i], tmp); - ASSERT_EQ(tmp, string(DICT_FILE_MD5[i])); - } -} - diff --git a/test/unittest/TTrie.cpp b/test/unittest/TTrie.cpp index 4645d91..ebf9a32 100644 --- a/test/unittest/TTrie.cpp +++ b/test/unittest/TTrie.cpp @@ -8,8 +8,7 @@ static const char* const DICT_FILE = "../dict/jieba.dict.utf8"; TEST(TrieTest, Test1) { Trie trie; - ASSERT_TRUE(trie.init()); - ASSERT_TRUE(trie.loadDict(DICT_FILE)); + ASSERT_TRUE(trie.init(DICT_FILE)); ASSERT_LT(trie.getMinLogFreq() + 17.2184, 0.001); string word("来到"); Unicode uni; diff --git a/test/unittest/TTrieManager.cpp b/test/unittest/TTrieManager.cpp deleted file mode 100644 index a51341a..0000000 --- a/test/unittest/TTrieManager.cpp +++ /dev/null @@ -1,52 +0,0 @@ -#include "src/TrieManager.hpp" -#include "gtest/gtest.h" - -using namespace CppJieba; - -struct md5_ptr -{ - string md5; - Trie* ptr; -}; -typedef struct md5_ptr MD5_PTR; - -static const char* const DICT_FILE[] = { - "../test/testdata/jieba.dict.0.utf8", - "../test/testdata/jieba.dict.0.utf8", - "../test/testdata/jieba.dict.0.utf8", - "../test/testdata/jieba.dict.0.1.utf8", - "../test/testdata/jieba.dict.0.1.utf8", - "../test/testdata/jieba.dict.0.1.utf8", - "../test/testdata/jieba.dict.1.utf8", - "../test/testdata/jieba.dict.1.utf8", - "../test/testdata/jieba.dict.1.utf8", - "../test/testdata/jieba.dict.2.utf8", - "../test/testdata/jieba.dict.2.utf8", - "../test/testdata/jieba.dict.2.utf8", - "../test/testdata/jieba.dict.2.utf8"}; - -TEST(TrieManagerTest, Test1) -{ - vector tries(sizeof(DICT_FILE)/sizeof(DICT_FILE[0])); - for (uint i = 0; i < tries.size(); i++) - { - tries[i].ptr = TrieManager::getInstance().getTrie(DICT_FILE[i]); - ASSERT_TRUE(md5File(DICT_FILE[i], tries[i].md5)); - } - - for (uint i = 0; i < tries.size(); i++) - { - for (uint j = i + 1; j < tries.size(); j++) - { - if (tries[i].md5 == tries[j].md5) - { - ASSERT_EQ(tries[i].ptr, tries[j].ptr); - } - else - { - ASSERT_NE(tries[i].ptr, tries[j].ptr); - } - } - } -} -