mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
130 lines
2.9 KiB
C++
130 lines
2.9 KiB
C++
#include "Trie.h"
|
|
|
|
namespace CppJieba
|
|
{
|
|
Trie::Trie()
|
|
{
|
|
_root = NULL;
|
|
}
|
|
|
|
Trie::~Trie()
|
|
{
|
|
destroy();
|
|
}
|
|
|
|
bool Trie::init(const char* const filepath)
|
|
{
|
|
char msgBuf[512];
|
|
ChUnicode chUniBuf[512];
|
|
if(NULL != _root)
|
|
{
|
|
LogError("already initted!");
|
|
return false;
|
|
}
|
|
_root = new TrieNode;
|
|
_root->isLeaf = false;
|
|
ifstream ifile(filepath);
|
|
string line;
|
|
vector<string> vecBuf;
|
|
while(getline(ifile, line))
|
|
{
|
|
vecBuf.clear();
|
|
splitStr(line, vecBuf, " ");
|
|
if(3 != vecBuf.size())
|
|
{
|
|
sprintf(msgBuf, "line[%s] illegal.", line.c_str());
|
|
LogError(msgBuf);
|
|
return false;
|
|
}
|
|
//PRINT_VECTOR(vecBuf);
|
|
//getchar();
|
|
string chWord = vecBuf[0];
|
|
size_t uniLen = utf8ToUnicode(chWord.c_str(), chWord.size(), chUniBuf);
|
|
_insert(chUniBuf, uniLen);
|
|
//for(int i = 0; i < unilen; i++)
|
|
//{
|
|
// // printf("%x\n", strbuf[i]);
|
|
//}
|
|
//char utf8str[512]={0};
|
|
//unicodeToUtf8(strbuf, unilen, utf8str);
|
|
//cout<<strlen(utf8str);
|
|
//cout<<utf8str<<endl;
|
|
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool Trie::destroy()
|
|
{
|
|
return true;
|
|
}
|
|
|
|
void Trie::display()
|
|
{
|
|
_display(_root, 0);
|
|
}
|
|
|
|
void Trie::_display(TrieNode* node, int level)
|
|
{
|
|
if(NULL == node)
|
|
{
|
|
LogError("failed! node is null.");
|
|
return;
|
|
}
|
|
for(TrieNodeHashMap::const_iterator it = node->hmap.begin(); it != node->hmap.end(); it++)
|
|
{
|
|
char utfBuf[8];
|
|
ChUnicode chBuf[1];
|
|
for(int i = 0; i < level; i++)
|
|
{
|
|
cout<<" ";
|
|
}
|
|
chBuf[0]=it->first;
|
|
unicodeToUtf8(chBuf, 1, utfBuf);
|
|
cout<<utfBuf<<endl;
|
|
_display(it->second, level + 1);
|
|
}
|
|
}
|
|
|
|
bool Trie::_insert(const ChUnicode* chUniStr, size_t len)
|
|
{
|
|
if(0 == len)
|
|
{
|
|
LogError("input args illegal: len == 0");
|
|
return false;
|
|
}
|
|
TrieNode* p = _root;
|
|
for(int i = 0; i < len; i++)
|
|
{
|
|
ChUnicode cu = chUniStr[i];
|
|
if(p->hmap.end() == p->hmap.find(cu))
|
|
{
|
|
TrieNode * next = new TrieNode;
|
|
next->isLeaf = false;
|
|
p->hmap[cu] = next;
|
|
p = next;
|
|
}
|
|
else
|
|
{
|
|
p = p->hmap[cu];
|
|
}
|
|
}
|
|
p->isLeaf = true;
|
|
return true;
|
|
}
|
|
|
|
}
|
|
|
|
#ifdef TRIE_UT
|
|
using namespace CppJieba;
|
|
int main()
|
|
{
|
|
Trie trie;
|
|
trie.init("test/dict.txt");
|
|
trie.display();
|
|
//hash_map<ChUnicode, int> hmap;
|
|
//hmap[136]=1;
|
|
return 0;
|
|
}
|
|
#endif
|