finished trie's insert && display && init

This commit is contained in:
wyy 2013-06-24 13:30:58 +08:00
parent 12ac1c9a6b
commit 29d3ee1bac
2 changed files with 111 additions and 12 deletions

102
Trie.cpp
View File

@ -4,14 +4,25 @@ namespace CppJieba
{ {
Trie::Trie() Trie::Trie()
{ {
_root = NULL;
} }
Trie::~Trie() Trie::~Trie()
{ {
destroy();
} }
bool Trie::init(const char* const filepath) bool Trie::init(const char* const filepath)
{ {
char msgBuf[512];
ChUnicode chUniBuf[512];
if(NULL != _root)
{
LogError("already initted!");
return false;
}
_root = new TrieNode;
_root->isLeaf = false;
ifstream ifile(filepath); ifstream ifile(filepath);
string line; string line;
vector<string> vecBuf; vector<string> vecBuf;
@ -19,21 +30,87 @@ namespace CppJieba
{ {
vecBuf.clear(); vecBuf.clear();
splitStr(line, vecBuf, " "); splitStr(line, vecBuf, " ");
PRINT_VECTOR(vecBuf); if(3 != vecBuf.size())
getchar();
uint16_t strbuf[1024];
size_t unilen = utf8ToUnicode(line.c_str(), line.size(), strbuf);
for(int i = 0; i < unilen; i++)
{ {
// printf("%x\n", strbuf[i]); sprintf(msgBuf, "line[%s] illegal.", line.c_str());
LogError(msgBuf);
return false;
} }
char utf8str[512]={0}; //PRINT_VECTOR(vecBuf);
unicodeToUtf8(strbuf, unilen, utf8str); //getchar();
string chWord = vecBuf[0];
size_t uniLen = utf8ToUnicode(chWord.c_str(), chWord.size(), chUniBuf);
_insert(chUniBuf, uniLen);
//for(int i = 0; i < unilen; i++)
//{
// // printf("%x\n", strbuf[i]);
//}
//char utf8str[512]={0};
//unicodeToUtf8(strbuf, unilen, utf8str);
//cout<<strlen(utf8str); //cout<<strlen(utf8str);
cout<<utf8str<<endl; //cout<<utf8str<<endl;
} }
return true;
}
bool Trie::destroy()
{
return true;
}
void Trie::display()
{
_display(_root, 0);
}
void Trie::_display(TrieNode* node, int level)
{
if(NULL == node)
{
LogError("failed! node is null.");
return;
}
for(TrieNodeHashMap::const_iterator it = node->hmap.begin(); it != node->hmap.end(); it++)
{
char utfBuf[8];
ChUnicode chBuf[1];
for(int i = 0; i < level; i++)
{
cout<<" ";
}
chBuf[0]=it->first;
unicodeToUtf8(chBuf, 1, utfBuf);
cout<<utfBuf<<endl;
_display(it->second, level + 1);
}
}
bool Trie::_insert(const ChUnicode* chUniStr, size_t len)
{
if(0 == len)
{
LogError("input args illegal: len == 0");
return false;
}
TrieNode* p = _root;
for(int i = 0; i < len; i++)
{
ChUnicode cu = chUniStr[i];
if(p->hmap.end() == p->hmap.find(cu))
{
TrieNode * next = new TrieNode;
next->isLeaf = false;
p->hmap[cu] = next;
p = next;
}
else
{
p = p->hmap[cu];
}
}
p->isLeaf = true;
return true;
} }
} }
@ -43,7 +120,10 @@ using namespace CppJieba;
int main() int main()
{ {
Trie trie; Trie trie;
trie.init(); trie.init("test/dict.txt");
trie.display();
//hash_map<ChUnicode, int> hmap;
//hmap[136]=1;
return 0; return 0;
} }
#endif #endif

21
Trie.h
View File

@ -2,21 +2,40 @@
#define TRIE_H #define TRIE_H
#include <iostream> #include <iostream>
#include <fstream> #include <fstream>
#include <ext/hash_map>
#include "cppcommon/str_functs.h" #include "cppcommon/str_functs.h"
#include "cppcommon/vec_functs.h" #include "cppcommon/vec_functs.h"
#include "cppcommon/logger.h"
#include "globals.h" #include "globals.h"
namespace CppJieba namespace CppJieba
{ {
using namespace CPPCOMMON; using namespace CPPCOMMON;
using namespace std; using namespace std;
using __gnu_cxx::hash_map;
typedef uint16_t ChUnicode;
typedef hash_map<ChUnicode, struct TrieNode*> TrieNodeHashMap;
struct TrieNode
{
TrieNodeHashMap hmap;
bool isLeaf;
};
class Trie class Trie
{ {
private:
TrieNode* _root;
public: public:
Trie(); Trie();
~Trie(); ~Trie();
public:
bool init(const char* const filepath = DICT_FILE_PATH); bool init(const char* const filepath = DICT_FILE_PATH);
bool destroy();
void display();
void _display(TrieNode* node, int level);
private:
bool _insert(const ChUnicode* chUniBuf, size_t len);
}; };
} }