/************************************ * file enc : ASCII * author : wuyanyi09@gmail.com ************************************/ #ifndef CPPJIEBA_TRIE_H #define CPPJIEBA_TRIE_H #include #include #include #include #include #include #include #include "cppcommon/str_functs.h" #include "cppcommon/vec_functs.h" #include "cppcommon/file_functs.h" #include "cppcommon/logger.h" #include "TransCode.h" #include "globals.h" namespace CppJieba { using namespace CPPCOMMON; using namespace std; typedef map TrieNodeMap; struct TrieNodeInfo { string word; size_t wLen;// the word's len , not string.size(), size_t count; string tag; double weight; TrieNodeInfo() { word = ""; wLen = 0; count = 0; tag = ""; weight = 0.0; } }; struct TrieNode { TrieNodeMap hmap; bool isLeaf; uint nodeInfoVecPos; TrieNode() { isLeaf = false; nodeInfoVecPos = 0; } }; class Trie { private: TrieNode* _root; vector _nodeInfoVec; int64_t _totalCount; double _minWeight; bool _initFlag; public: typedef vector::iterator iterator; public: iterator begin(); iterator end(); public: Trie(); ~Trie(); bool init(); bool loadDict(const string& filePath); bool dispose(); private: void _setInitFlag(bool on); bool _getInitFlag(); public: const TrieNodeInfo* find(const string& str); const TrieNodeInfo* find(const VUINT16& unicode); const TrieNodeInfo* find(VUINT16_CONST_ITER begin, VUINT16_CONST_ITER end); const TrieNodeInfo* findPrefix(const string& str); public: double getWeight(const string& str); double getWeight(const VUINT16& unicode); double getWeight(VUINT16_CONST_ITER begin, VUINT16_CONST_ITER end); double getMinWeight(); int64_t getTotalCount(); bool insert(const TrieNodeInfo& nodeInfo); private: bool _buildTree(const string& filePath); bool _countWeight(); bool _deleteNode(TrieNode* node); }; } #endif