From a36782ca76121bbc6ed7efd6095127e3c04cf819 Mon Sep 17 00:00:00 2001 From: gwdwyy Date: Fri, 5 Jul 2013 11:09:27 +0800 Subject: [PATCH] bak trie --- Trie.cpp | 42 +++++++++++++++++++++++++++--------------- Trie.h | 9 +++++++++ 2 files changed, 36 insertions(+), 15 deletions(-) diff --git a/Trie.cpp b/Trie.cpp index d6d4a40..8db90f6 100644 --- a/Trie.cpp +++ b/Trie.cpp @@ -94,10 +94,10 @@ namespace CppJieba bool Trie::cut(const ChUnicode* chUniStr, size_t len, vector< vector >& res) { res.clear(); - cout<()); vector& vec = res[i]; for(size_t j = i; j < len; j++) @@ -111,6 +111,28 @@ namespace CppJieba return true; } + bool Trie::cutUtf8(const string& str, vector< vector >& res) + { + ChUnicode buf[ChUniMaxLen]; + size_t len = utf8ToUnicode(str.c_str(), str.size(), buf); + if(0 == len) + { + return false; + } + return cut(buf, len, res); + /* + PRINT_MATRIX(res); + char buf[1024]; + FOR_VECTOR(res, i) + { + FOR_VECTOR(res[i], j) + { + unicodeToUtf8(chUniStr + i, res[i][j] - i + 1, buf); + cout<hmap.begin(); it != node->hmap.end(); it++) @@ -189,21 +211,11 @@ int main() //int uniLen = utf8ToUnicode(utf, sizeof(utf), chUniStr); //cout< > res; - cout< hmap; //hmap[136]=1; diff --git a/Trie.h b/Trie.h index 6da99c5..005caf7 100644 --- a/Trie.h +++ b/Trie.h @@ -17,6 +17,7 @@ namespace CppJieba using namespace std; //using __gnu_cxx::hash_map; typedef uint16_t ChUnicode; + const size_t ChUniMaxLen = 1024; typedef map TrieNodeHashMap; struct TrieNode @@ -83,9 +84,11 @@ namespace CppJieba vector _nodeVec; public: typedef TrieNodeIterator iterator; + public: iterator begin(); iterator end(); + public: Trie(); ~Trie(); @@ -93,7 +96,13 @@ namespace CppJieba bool destroy(); void display(); bool find(const ChUnicode* chUniStr, size_t len); + + public: bool cut(const ChUnicode* chUniStr, size_t len, vector< vector >& res); + //bool cutUni(const vector& uniVec, ) + bool cutUtf8(const string& str, vector< vector >& res); + //bool cutMa + private: bool _destroyNode(TrieNode* node); void _display(TrieNode* node, int level);