From 29d3ee1bacf29c8b24314f15755493f96fbfaed0 Mon Sep 17 00:00:00 2001 From: wyy Date: Mon, 24 Jun 2013 13:30:58 +0800 Subject: [PATCH] finished trie's insert && display && init --- Trie.cpp | 102 +++++++++++++++++++++++++++++++++++++++++++++++++------ Trie.h | 21 +++++++++++- 2 files changed, 111 insertions(+), 12 deletions(-) diff --git a/Trie.cpp b/Trie.cpp index 1fcbb13..dacfbb8 100644 --- a/Trie.cpp +++ b/Trie.cpp @@ -4,14 +4,25 @@ namespace CppJieba { Trie::Trie() { + _root = NULL; } Trie::~Trie() { + destroy(); } bool Trie::init(const char* const filepath) { + char msgBuf[512]; + ChUnicode chUniBuf[512]; + if(NULL != _root) + { + LogError("already initted!"); + return false; + } + _root = new TrieNode; + _root->isLeaf = false; ifstream ifile(filepath); string line; vector vecBuf; @@ -19,21 +30,87 @@ namespace CppJieba { vecBuf.clear(); splitStr(line, vecBuf, " "); - PRINT_VECTOR(vecBuf); - getchar(); - uint16_t strbuf[1024]; - - size_t unilen = utf8ToUnicode(line.c_str(), line.size(), strbuf); - for(int i = 0; i < unilen; i++) + if(3 != vecBuf.size()) { - // printf("%x\n", strbuf[i]); + sprintf(msgBuf, "line[%s] illegal.", line.c_str()); + LogError(msgBuf); + return false; } - char utf8str[512]={0}; - unicodeToUtf8(strbuf, unilen, utf8str); + //PRINT_VECTOR(vecBuf); + //getchar(); + string chWord = vecBuf[0]; + size_t uniLen = utf8ToUnicode(chWord.c_str(), chWord.size(), chUniBuf); + _insert(chUniBuf, uniLen); + //for(int i = 0; i < unilen; i++) + //{ + // // printf("%x\n", strbuf[i]); + //} + //char utf8str[512]={0}; + //unicodeToUtf8(strbuf, unilen, utf8str); //cout<hmap.begin(); it != node->hmap.end(); it++) + { + char utfBuf[8]; + ChUnicode chBuf[1]; + for(int i = 0; i < level; i++) + { + cout<<" "; + } + chBuf[0]=it->first; + unicodeToUtf8(chBuf, 1, utfBuf); + cout<second, level + 1); + } + } + + bool Trie::_insert(const ChUnicode* chUniStr, size_t len) + { + if(0 == len) + { + LogError("input args illegal: len == 0"); + return false; + } + TrieNode* p = _root; + for(int i = 0; i < len; i++) + { + ChUnicode cu = chUniStr[i]; + if(p->hmap.end() == p->hmap.find(cu)) + { + TrieNode * next = new TrieNode; + next->isLeaf = false; + p->hmap[cu] = next; + p = next; + } + else + { + p = p->hmap[cu]; + } + } + p->isLeaf = true; + return true; } } @@ -43,7 +120,10 @@ using namespace CppJieba; int main() { Trie trie; - trie.init(); + trie.init("test/dict.txt"); + trie.display(); + //hash_map hmap; + //hmap[136]=1; return 0; } #endif diff --git a/Trie.h b/Trie.h index b03e4a7..2b7fe26 100644 --- a/Trie.h +++ b/Trie.h @@ -2,21 +2,40 @@ #define TRIE_H #include #include +#include #include "cppcommon/str_functs.h" #include "cppcommon/vec_functs.h" +#include "cppcommon/logger.h" #include "globals.h" + namespace CppJieba { using namespace CPPCOMMON; using namespace std; + using __gnu_cxx::hash_map; + typedef uint16_t ChUnicode; + typedef hash_map TrieNodeHashMap; + + struct TrieNode + { + TrieNodeHashMap hmap; + bool isLeaf; + }; + class Trie { + private: + TrieNode* _root; public: Trie(); ~Trie(); - public: bool init(const char* const filepath = DICT_FILE_PATH); + bool destroy(); + void display(); + void _display(TrieNode* node, int level); + private: + bool _insert(const ChUnicode* chUniBuf, size_t len); }; }