finished trie's insert && display && init

This commit is contained in:
wyy 2013-06-24 13:30:58 +08:00
parent 12ac1c9a6b
commit 29d3ee1bac
2 changed files with 111 additions and 12 deletions

102
Trie.cpp
View File

@ -4,14 +4,25 @@ namespace CppJieba
{
Trie::Trie()
{
_root = NULL;
}
Trie::~Trie()
{
destroy();
}
bool Trie::init(const char* const filepath)
{
char msgBuf[512];
ChUnicode chUniBuf[512];
if(NULL != _root)
{
LogError("already initted!");
return false;
}
_root = new TrieNode;
_root->isLeaf = false;
ifstream ifile(filepath);
string line;
vector<string> vecBuf;
@ -19,21 +30,87 @@ namespace CppJieba
{
vecBuf.clear();
splitStr(line, vecBuf, " ");
PRINT_VECTOR(vecBuf);
getchar();
uint16_t strbuf[1024];
size_t unilen = utf8ToUnicode(line.c_str(), line.size(), strbuf);
for(int i = 0; i < unilen; i++)
if(3 != vecBuf.size())
{
// printf("%x\n", strbuf[i]);
sprintf(msgBuf, "line[%s] illegal.", line.c_str());
LogError(msgBuf);
return false;
}
char utf8str[512]={0};
unicodeToUtf8(strbuf, unilen, utf8str);
//PRINT_VECTOR(vecBuf);
//getchar();
string chWord = vecBuf[0];
size_t uniLen = utf8ToUnicode(chWord.c_str(), chWord.size(), chUniBuf);
_insert(chUniBuf, uniLen);
//for(int i = 0; i < unilen; i++)
//{
// // printf("%x\n", strbuf[i]);
//}
//char utf8str[512]={0};
//unicodeToUtf8(strbuf, unilen, utf8str);
//cout<<strlen(utf8str);
cout<<utf8str<<endl;
//cout<<utf8str<<endl;
}
return true;
}
bool Trie::destroy()
{
return true;
}
void Trie::display()
{
_display(_root, 0);
}
void Trie::_display(TrieNode* node, int level)
{
if(NULL == node)
{
LogError("failed! node is null.");
return;
}
for(TrieNodeHashMap::const_iterator it = node->hmap.begin(); it != node->hmap.end(); it++)
{
char utfBuf[8];
ChUnicode chBuf[1];
for(int i = 0; i < level; i++)
{
cout<<" ";
}
chBuf[0]=it->first;
unicodeToUtf8(chBuf, 1, utfBuf);
cout<<utfBuf<<endl;
_display(it->second, level + 1);
}
}
bool Trie::_insert(const ChUnicode* chUniStr, size_t len)
{
if(0 == len)
{
LogError("input args illegal: len == 0");
return false;
}
TrieNode* p = _root;
for(int i = 0; i < len; i++)
{
ChUnicode cu = chUniStr[i];
if(p->hmap.end() == p->hmap.find(cu))
{
TrieNode * next = new TrieNode;
next->isLeaf = false;
p->hmap[cu] = next;
p = next;
}
else
{
p = p->hmap[cu];
}
}
p->isLeaf = true;
return true;
}
}
@ -43,7 +120,10 @@ using namespace CppJieba;
int main()
{
Trie trie;
trie.init();
trie.init("test/dict.txt");
trie.display();
//hash_map<ChUnicode, int> hmap;
//hmap[136]=1;
return 0;
}
#endif

21
Trie.h
View File

@ -2,21 +2,40 @@
#define TRIE_H
#include <iostream>
#include <fstream>
#include <ext/hash_map>
#include "cppcommon/str_functs.h"
#include "cppcommon/vec_functs.h"
#include "cppcommon/logger.h"
#include "globals.h"
namespace CppJieba
{
using namespace CPPCOMMON;
using namespace std;
using __gnu_cxx::hash_map;
typedef uint16_t ChUnicode;
typedef hash_map<ChUnicode, struct TrieNode*> TrieNodeHashMap;
struct TrieNode
{
TrieNodeHashMap hmap;
bool isLeaf;
};
class Trie
{
private:
TrieNode* _root;
public:
Trie();
~Trie();
public:
bool init(const char* const filepath = DICT_FILE_PATH);
bool destroy();
void display();
void _display(TrieNode* node, int level);
private:
bool _insert(const ChUnicode* chUniBuf, size_t len);
};
}