diff --git a/Segment.cpp b/Segment.cpp index 9e5efb3..f795da1 100644 --- a/Segment.cpp +++ b/Segment.cpp @@ -129,7 +129,8 @@ using namespace CppJieba; int main() { Segment segment; - segment.init("dict.utf8"); + segment.init("jieba.dict.utf8"); + vector res; string title = "我来到北京清华大学3D电视"; bool flag = segment.cutMM(title, res); diff --git a/Trie.cpp b/Trie.cpp index 2625547..2989dac 100644 --- a/Trie.cpp +++ b/Trie.cpp @@ -12,17 +12,34 @@ namespace CppJieba return Trie::iterator(NULL); } - Trie::Trie() + Trie::Trie():_root(NULL), _nodeInfoVec(), _totalWeight(0) { - _root = NULL; } Trie::~Trie() { destroy(); } + + bool Trie::init(const char* const filePath) + { + bool res = false; + res = _buildTree(filePath); + if(!res) + { + LogError("_buildTree failed."); + return false; + } + res = _countWeight(); + if(!res) + { + LogError("_countWeight failed."); + return false; + } + return true; + } - bool Trie::init(const char* const filepath) + bool Trie::_buildTree(const char* const filePath) { char msgBuf[bufSize]; if(NULL != _root) @@ -31,7 +48,7 @@ namespace CppJieba return false; } _root = new TrieNode; - ifstream ifile(filepath); + ifstream ifile(filePath); string line; vector vecBuf; while(getline(ifile, line)) @@ -281,6 +298,31 @@ namespace CppJieba } return true; } + + bool Trie::_countWeight() + { + if(_nodeInfoVec.empty() || 0 != _totalWeight) + { + LogError("_nodeInfoVec is empty or _totalWeight has been counted already."); + return false; + } + + //count total freq + for(size_t i = 0; i < _nodeInfoVec.size(); i++) + { + _totalWeight += _nodeInfoVec[i].count; + //cout<<_nodeInfoVec[i].word<<_nodeInfoVec[i].count< > res; + trie.init("jieba.dict.utf8"); + //char utf[1024] = "我来到北京清华大学3D电视"; trie.destroy(); return 0; } diff --git a/Trie.h b/Trie.h index e946310..83fc11b 100644 --- a/Trie.h +++ b/Trie.h @@ -5,6 +5,8 @@ //#include #include #include +#include +#include #include "cppcommon/str_functs.h" #include "cppcommon/vec_functs.h" #include "cppcommon/logger.h" @@ -25,7 +27,8 @@ namespace CppJieba string word; unsigned int count; string tag; - TrieNodeInfo():word(),count(0),tag() + double weight; + TrieNodeInfo():word(),count(0),tag(),weight(0.0) { } }; @@ -91,6 +94,8 @@ namespace CppJieba private: TrieNode* _root; vector _nodeInfoVec; + int64_t _totalWeight; + public: typedef TrieNodeIterator iterator; @@ -101,7 +106,7 @@ namespace CppJieba public: Trie(); ~Trie(); - bool init(const char* const filepath = DICT_FILE_PATH); + bool init(const char* const filePath); bool destroy(); void display(); @@ -117,9 +122,11 @@ namespace CppJieba //bool cutMa private: + bool _buildTree(const char* const filePath); bool _destroyNode(TrieNode* node); void _display(TrieNode* node, int level); bool _insert(const TrieNodeInfo& nodeInfo); + bool _countWeight(); private: enum {bufSize = 1024};