diff --git a/src/KeyWordExt.cpp b/src/KeyWordExt.cpp index ea512c8..548ccd2 100644 --- a/src/KeyWordExt.cpp +++ b/src/KeyWordExt.cpp @@ -24,8 +24,7 @@ namespace CppJieba LogError(string_format("cann't find file[%s].",filePath)); return false; } - bool retFlag = _segment.init(filePath); - LogInfo(string_format("init(%s) end", filePath)); + bool retFlag = _segment.init(); return retFlag; } diff --git a/src/Segment.cpp b/src/Segment.cpp index eb86d97..e2344dc 100644 --- a/src/Segment.cpp +++ b/src/Segment.cpp @@ -18,14 +18,22 @@ namespace CppJieba { } - bool Segment::init(const string& dictFilePath) + bool Segment::init() { bool retFlag; - LogInfo(string_format("_trie.init(%s) start...", dictFilePath)); - retFlag = _trie.init(dictFilePath); - LogInfo("_trie.init end."); + retFlag = _trie.init(); return retFlag; } + + bool Segment::loadSegDict(const string& filePath) + { + bool retFlag; + retFlag = _trie.loadDict(filePath); + LogInfo(string_format("_trie.loadDict(%s) start...", filePath.c_str())); + LogInfo("_trie.loadDict end."); + return retFlag; + } + bool Segment::destroy() { @@ -36,7 +44,7 @@ namespace CppJieba { bool retFlag; res.clear(); - string uniStr = _utf8ToUni(str; + string uniStr = _utf8ToUni(str); if(uniStr.empty()) { LogError("_utf8ToUni failed."); @@ -103,7 +111,7 @@ namespace CppJieba vec.push_back(i/2); for(uint j = i + 4; j <= uniStr.size(); j+=2) { - cout< res; diff --git a/src/Segment.h b/src/Segment.h index bab0254..ee9d4ae 100644 --- a/src/Segment.h +++ b/src/Segment.h @@ -22,7 +22,8 @@ namespace CppJieba Segment(); ~Segment(); public: - bool init(const string& dictFilePath); + bool init(); + bool loadSegDict(const string& filePath); bool destroy(); public: bool cutDAG(const string& chStr, vector& res); diff --git a/src/Trie.cpp b/src/Trie.cpp index d5bf04a..6b4af3e 100644 --- a/src/Trie.cpp +++ b/src/Trie.cpp @@ -30,6 +30,7 @@ namespace CppJieba _root = NULL; _totalCount = 0; _minWeight = numeric_limits::max(); + _initFlag = false; } Trie::~Trie() @@ -48,8 +49,47 @@ namespace CppJieba return true; } - bool Trie::init(const string& filePath) + bool Trie::_getInitFlag() { + return _initFlag; + } + void Trie::_setInitFlag() + { + _initFlag = true; + } + + bool Trie::init() + { + if(_getInitFlag()) + { + LogError("already initted!"); + return false; + } + + try + { + _root = new TrieNode; + } + catch(const bad_alloc& e) + { + return false; + } + if(NULL == _root) + { + return false; + } + _setInitFlag(); + return true; + } + + bool Trie::loadDict(const string& filePath) + { + if(!_getInitFlag()) + { + LogError("not initted."); + return false; + } + if(!checkFileExist(filePath.c_str())) { LogError(string_format("cann't find fiel[%s].",filePath.c_str())); @@ -70,15 +110,9 @@ namespace CppJieba } return true; } - + bool Trie::_buildTree(const string& filePath) { - if(NULL != _root) - { - LogError("already initted!"); - return false; - } - _root = new TrieNode; ifstream ifile(filePath.c_str()); string line; @@ -247,21 +281,6 @@ namespace CppJieba return NULL; } - /* - double Trie::getWeight(const ChUnicode* uniStr, size_t len) - { - const TrieNodeInfo* p = find(uniStr, len); - if(NULL != p) - { - return p->weight; - } - else - { - return getMinWeight(); - } - } - */ - double Trie::getWeight(const string& uniStr) { const TrieNodeInfo * p = _findUniStr(uniStr); @@ -431,7 +450,8 @@ using namespace CppJieba; int main() { Trie trie; - trie.init("../dicts/segdict.utf8.v2.1"); + trie.init(); + trie.loadDict("../dicts/segdict.utf8.v2.1"); //trie.init("dicts/jieba.dict.utf8"); //trie.init("dict.100"); //char utf[1024] = "我来到北京清华大学3D电视"; diff --git a/src/Trie.h b/src/Trie.h index 6942308..e05a59f 100644 --- a/src/Trie.h +++ b/src/Trie.h @@ -67,6 +67,7 @@ namespace CppJieba int64_t _totalCount; double _minWeight; + bool _initFlag; public: typedef vector::iterator iterator; @@ -78,11 +79,16 @@ namespace CppJieba public: Trie(); ~Trie(); - bool init(const string& filePath); + bool init(); + bool loadDict(const string& filePath); bool setEncoding(const string& enc); bool destroy(); void display(); + private: + void _setInitFlag(); + bool _getInitFlag(); + public: //const TrieNodeInfo* find(const string& uniStr); //const TrieNodeInfo* find(const ChUnicode* const chUniStr, size_t len); diff --git a/src/cppcommon/str_functs.cpp b/src/cppcommon/str_functs.cpp index 9299e7d..4394206 100644 --- a/src/cppcommon/str_functs.cpp +++ b/src/cppcommon/str_functs.cpp @@ -291,7 +291,6 @@ namespace CPPCOMMON string utf8ToUnicode(const string& utfStr) { - cout<<__FILE__<<__LINE__<