finished trie'cut

This commit is contained in:
wyy 2013-06-24 15:35:12 +08:00
parent 812adcc20b
commit 5be8d48a18
2 changed files with 42 additions and 6 deletions

View File

@ -91,6 +91,26 @@ namespace CppJieba
return p->isLeaf; return p->isLeaf;
} }
bool Trie::cut(const ChUnicode* chUniStr, size_t len, vector< vector<size_t> >& res)
{
res.clear();
cout<<len<<endl;
for(size_t i = 0; i < len; i++)
{
cout<<__LINE__<<","<<chUniStr[i]<<endl;
res.push_back(vector<size_t>());
vector<size_t>& vec = res[i];
for(size_t j = i; j < len; j++)
{
if(find(chUniStr + i, j - i + 1))
{
vec.push_back(j);
}
}
}
return true;
}
bool Trie::_destroyNode(TrieNode* node) bool Trie::_destroyNode(TrieNode* node)
{ {
for(TrieNodeHashMap::iterator it = node->hmap.begin(); it != node->hmap.end(); it++) for(TrieNodeHashMap::iterator it = node->hmap.begin(); it != node->hmap.end(); it++)
@ -161,13 +181,27 @@ int main()
//trie.init("test/dict.txt"); //trie.init("test/dict.txt");
trie.init("dict.txt"); trie.init("dict.txt");
//trie.display(); //trie.display();
const char * utf = "B"; //const char * utf = "B";
ChUnicode chUniStr[16]; //ChUnicode chUniStr[16];
int uniLen = utf8ToUnicode(utf, sizeof(utf), chUniStr); //int uniLen = utf8ToUnicode(utf, sizeof(utf), chUniStr);
cout<<trie.find(chUniStr, uniLen)<<endl; //cout<<trie.find(chUniStr, uniLen)<<endl;
getchar(); char utf[1024] = "我来到北京清华大学";
char buf[1024];
ChUnicode chUniStr[1024];
//cout<<sizeof(utf)<<endl;
int uniLen = utf8ToUnicode(utf, strlen(utf), chUniStr);
vector< vector<size_t> > res;
cout<<trie.cut(chUniStr, uniLen, res)<<endl;
PRINT_MATRIX(res);
FOR_VECTOR(res, i)
{
FOR_VECTOR(res[i], j)
{
unicodeToUtf8(chUniStr + i, res[i][j] - i + 1, buf);
cout<<buf<<endl;
}
}
trie.destroy(); trie.destroy();
getchar();
//hash_map<ChUnicode, int> hmap; //hash_map<ChUnicode, int> hmap;
//hmap[136]=1; //hmap[136]=1;
return 0; return 0;

2
Trie.h
View File

@ -3,6 +3,7 @@
#include <iostream> #include <iostream>
#include <fstream> #include <fstream>
#include <ext/hash_map> #include <ext/hash_map>
#include <cstring>
#include "cppcommon/str_functs.h" #include "cppcommon/str_functs.h"
#include "cppcommon/vec_functs.h" #include "cppcommon/vec_functs.h"
#include "cppcommon/logger.h" #include "cppcommon/logger.h"
@ -34,6 +35,7 @@ namespace CppJieba
bool destroy(); bool destroy();
void display(); void display();
bool find(const ChUnicode* chUniStr, size_t len); bool find(const ChUnicode* chUniStr, size_t len);
bool cut(const ChUnicode* chUniStr, size_t len, vector< vector<size_t> >& res);
private: private:
bool _destroyNode(TrieNode* node); bool _destroyNode(TrieNode* node);
void _display(TrieNode* node, int level); void _display(TrieNode* node, int level);