diff --git a/src/FullSegment.hpp b/src/FullSegment.hpp index f1182fc..d4bfa31 100644 --- a/src/FullSegment.hpp +++ b/src/FullSegment.hpp @@ -9,13 +9,14 @@ #include "ISegment.hpp" #include "SegmentBase.hpp" #include "TransCode.hpp" +#include "TrieManager.hpp" namespace CppJieba { class FullSegment: public SegmentBase { private: - Trie _trie; + Trie* _trie; const string _dictPath; public: @@ -29,18 +30,12 @@ namespace CppJieba LogError("already inited before now."); return false; } - if(!_trie.init()) + _trie = TrieManager::getInstance().getTrie(_dictPath.c_str()); + if (NULL == _trie) { - LogError("_trie.init failed."); + LogError("get NULL pointor from getTrie(\"%s\")", _dictPath.c_str()); return false; } - LogInfo("_trie.loadDict(%s) start...", _dictPath.c_str()); - if(!_trie.loadDict(_dictPath.c_str())) - { - LogError("_trie.loadDict faield."); - return false; - } - LogInfo("_trie.loadDict end."); return _setInitFlag(true); } bool dispose() @@ -49,7 +44,6 @@ namespace CppJieba { return true; } - _trie.dispose(); _setInitFlag(false); return true; } @@ -81,7 +75,7 @@ namespace CppJieba for (Unicode::const_iterator uItr = begin; uItr != end; uItr++) { //find word start from uItr - if (_trie.find(uItr, end, tRes)) + if (_trie->find(uItr, end, tRes)) { for (vector >::const_iterator itr = tRes.begin(); itr != tRes.end(); itr++) { diff --git a/src/MPSegment.hpp b/src/MPSegment.hpp index 14b8f21..b0a7bed 100644 --- a/src/MPSegment.hpp +++ b/src/MPSegment.hpp @@ -10,6 +10,7 @@ #include #include "Limonp/logger.hpp" #include "Trie.hpp" +#include "TrieManager.hpp" #include "ISegment.hpp" #include "SegmentBase.hpp" @@ -32,7 +33,7 @@ namespace CppJieba class MPSegment: public SegmentBase { private: - Trie _trie; + Trie* _trie; private: const string _dictPath; @@ -47,18 +48,12 @@ namespace CppJieba LogError("already inited before now."); return false; } - if(!_trie.init()) + _trie = TrieManager::getInstance().getTrie(_dictPath.c_str()); + if (_trie == NULL) { - LogError("_trie.init failed."); + LogError("get a NULL pointor form getTrie(\"%s\").", _dictPath.c_str()); return false; } - LogInfo("_trie.loadDict(%s) start...", _dictPath.c_str()); - if(!_trie.loadDict(_dictPath.c_str())) - { - LogError("_trie.loadDict faield."); - return false; - } - LogInfo("_trie.loadDict end."); return _setInitFlag(true); } virtual bool dispose() @@ -67,18 +62,12 @@ namespace CppJieba { return true; } - _trie.dispose(); _setInitFlag(false); return true; } public: virtual bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector& res)const { - //if(!_getInitFlag()) - //{ - // LogError("not inited."); - // return false; - //} assert(_getInitFlag()); vector segWordInfos; @@ -145,7 +134,7 @@ namespace CppJieba { SegmentChar schar(*it); uint i = it - begin; - _trie.find(it, end, i, schar.dag); + _trie->find(it, end, i, schar.dag); //DagType::iterator dagIter; if(schar.dag.end() == schar.dag.find(i)) { @@ -183,7 +172,7 @@ namespace CppJieba } else { - val += _trie.getMinLogFreq(); + val += _trie->getMinLogFreq(); } if(val > segContext[i].weight) { @@ -211,7 +200,7 @@ namespace CppJieba TrieNodeInfo nodeInfo; nodeInfo.word.push_back(segContext[i].uniCh); nodeInfo.freq = 0; - nodeInfo.logFreq = _trie.getMinLogFreq(); + nodeInfo.logFreq = _trie->getMinLogFreq(); res.push_back(nodeInfo); i++; } diff --git a/src/QuerySegment.hpp b/src/QuerySegment.hpp index ae5987e..a57f710 100644 --- a/src/QuerySegment.hpp +++ b/src/QuerySegment.hpp @@ -8,21 +8,22 @@ #include "Trie.hpp" #include "ISegment.hpp" #include "SegmentBase.hpp" -#include "HMMSegment.hpp" #include "FullSegment.hpp" +#include "MixSegment.hpp" #include "TransCode.hpp" +#include "TrieManager.hpp" namespace CppJieba { class QuerySegment: public SegmentBase { private: - HMMSegment _hmmSeg; + MixSegment _mixSeg; FullSegment _fullSeg; int _maxWordLen; public: - QuerySegment(const char* fullSegDict, const char* hmmSegDict, int maxWordLen): _hmmSeg(hmmSegDict), _fullSeg(fullSegDict), _maxWordLen(maxWordLen){}; + QuerySegment(const char* dict, const char* model, int maxWordLen): _mixSeg(dict, model), _fullSeg(dict), _maxWordLen(maxWordLen){}; virtual ~QuerySegment(){dispose();}; public: bool init() @@ -31,9 +32,9 @@ namespace CppJieba { LogError("inited."); } - if (!_hmmSeg.init()) + if (!_mixSeg.init()) { - LogError("_hmmSeg init"); + LogError("_mixSeg init"); return false; } if (!_fullSeg.init()) @@ -50,7 +51,7 @@ namespace CppJieba return true; } _fullSeg.dispose(); - _hmmSeg.dispose(); + _mixSeg.dispose(); _setInitFlag(false); return true; } @@ -68,22 +69,22 @@ namespace CppJieba return false; } - //use hmm cut first - vector hmmRes; - if (!_hmmSeg.cut(begin, end, hmmRes)) + //use mix cut first + vector mixRes; + if (!_mixSeg.cut(begin, end, mixRes)) { - LogError("_hmmSeg cut failed."); + LogError("_mixSeg cut failed."); return false; } vector fullRes; - for (vector::const_iterator hmmResItr = hmmRes.begin(); hmmResItr != hmmRes.end(); hmmResItr++) + for (vector::const_iterator mixResItr = mixRes.begin(); mixResItr != mixRes.end(); mixResItr++) { // if it's too long, cut with _fullSeg, put fullRes in res - if (hmmResItr->size() > _maxWordLen) + if (mixResItr->size() > _maxWordLen) { - if (_fullSeg.cut(hmmResItr->begin(), hmmResItr->end(), fullRes)) + if (_fullSeg.cut(mixResItr->begin(), mixResItr->end(), fullRes)) { for (vector::const_iterator fullResItr = fullRes.begin(); fullResItr != fullRes.end(); fullResItr++) { @@ -91,9 +92,9 @@ namespace CppJieba } } } - else // just use the hmm result + else // just use the mix result { - res.push_back(*hmmResItr); + res.push_back(*mixResItr); } }