add nodeinfoVec into trie.cpp/h

This commit is contained in:
gwdwyy 2013-07-05 18:19:23 +08:00
parent f2c5f571f2
commit 1974c49071
2 changed files with 72 additions and 34 deletions

View File

@ -24,8 +24,7 @@ namespace CppJieba
bool Trie::init(const char* const filepath) bool Trie::init(const char* const filepath)
{ {
char msgBuf[512]; char msgBuf[bufSize];
ChUnicode chUniBuf[512];
if(NULL != _root) if(NULL != _root)
{ {
LogError("already initted!"); LogError("already initted!");
@ -48,8 +47,21 @@ namespace CppJieba
string chWord = vecBuf[0]; string chWord = vecBuf[0];
unsigned int count = atoi(vecBuf[1].c_str()); unsigned int count = atoi(vecBuf[1].c_str());
const string& tag = vecBuf[2]; const string& tag = vecBuf[2];
size_t uniLen = utf8ToUnicode(chWord.c_str(), chWord.size(), chUniBuf);
_insert(chUniBuf, uniLen, count, tag); //insert node
TrieNodeInfo nodeInfo;
nodeInfo.word = chWord;
nodeInfo.count = count;
nodeInfo.tag = tag;
bool flag = _insert(nodeInfo);
if(!flag)
{
LogError("insert node failed!");
return false;
}
} }
return true; return true;
} }
@ -66,6 +78,7 @@ namespace CppJieba
_root = NULL; _root = NULL;
return ret; return ret;
} }
_nodeInfoVec.clear();
} }
void Trie::display() void Trie::display()
@ -214,20 +227,37 @@ namespace CppJieba
} }
} }
bool Trie::_insert(const ChUnicode* chUniStr, size_t len, unsigned int cnt, const string& tag) bool Trie::_insert(const TrieNodeInfo& nodeInfo)
{ {
if(0 == len) _nodeInfoVec.push_back(nodeInfo);
{ const string& word = nodeInfo.word;
LogError("input args illegal: len == 0"); ChUnicode chUniStr[bufSize];
return false; memset(chUniStr, 0, sizeof(chUniStr));
} size_t len = utf8ToUnicode(word.c_str(), word.size(), chUniStr);
if(0 == len)
{
return false;
}
TrieNode* p = _root; TrieNode* p = _root;
for(int i = 0; i < len; i++) for(int i = 0; i < len; i++)
{ {
ChUnicode cu = chUniStr[i]; ChUnicode cu = chUniStr[i];
if(NULL == p)
{
return false;
}
if(p->hmap.end() == p->hmap.find(cu)) if(p->hmap.end() == p->hmap.find(cu))
{ {
TrieNode * next = new TrieNode; TrieNode * next = NULL;
try
{
next = new TrieNode;
}
catch(const bad_alloc& e)
{
return false;
}
p->hmap[cu] = next; p->hmap[cu] = next;
p = next; p = next;
} }
@ -236,7 +266,19 @@ namespace CppJieba
p = p->hmap[cu]; p = p->hmap[cu];
} }
} }
if(NULL == p)
{
return false;
}
p->isLeaf = true; p->isLeaf = true;
if(!_nodeInfoVec.empty())
{
p->nodeInfoVecPos = _nodeInfoVec.size() - 1;
}
else
{
return false;
}
return true; return true;
} }
} }
@ -247,25 +289,9 @@ int main()
{ {
Trie trie; Trie trie;
trie.init("dict.utf8"); trie.init("dict.utf8");
//trie.init("test/dict.txt");
//cout<<trie.begin()->count<<endl;
//return 0;
//trie.init("dict.txt");
//trie.display();
//const char * utf = "B";
//ChUnicode chUniStr[16];
//int uniLen = utf8ToUnicode(utf, sizeof(utf), chUniStr);
//cout<<trie.find(chUniStr, uniLen)<<endl;
char utf[1024] = "我来到北京清华大学3D电视"; char utf[1024] = "我来到北京清华大学3D电视";
//ChUnicode chUniStr[1024];
//cout<<sizeof(utf)<<endl;
//int uniLen = utf8ToUnicode(utf, strlen(utf), chUniStr);
vector< vector<size_t> > res; vector< vector<size_t> > res;
//cout<<trie.cutUtf8(utf, res)<<endl;
trie.destroy(); trie.destroy();
//hash_map<ChUnicode, int> hmap;
//hmap[136]=1;
return 0; return 0;
} }
#endif #endif

24
Trie.h
View File

@ -20,15 +20,24 @@ namespace CppJieba
const size_t ChUniMaxLen = 1024; const size_t ChUniMaxLen = 1024;
typedef map<ChUnicode, struct TrieNode*> TrieNodeHashMap; typedef map<ChUnicode, struct TrieNode*> TrieNodeHashMap;
struct TrieNodeInfo
{
string word;
unsigned int count;
string tag;
TrieNodeInfo():word(),count(0),tag()
{
}
};
struct TrieNode struct TrieNode
{ {
TrieNodeHashMap hmap; TrieNodeHashMap hmap;
bool isLeaf; bool isLeaf;
unsigned int nodeInfoVecPos;
unsigned int count;
string tag;
TrieNode() TrieNode()
:hmap(), isLeaf(false), count(0), tag() :hmap(), isLeaf(false), nodeInfoVecPos(0)
{ {
} }
}; };
@ -81,7 +90,7 @@ namespace CppJieba
{ {
private: private:
TrieNode* _root; TrieNode* _root;
vector<TrieNode> _nodeVec; vector<TrieNodeInfo> _nodeInfoVec;
public: public:
typedef TrieNodeIterator iterator; typedef TrieNodeIterator iterator;
@ -110,7 +119,10 @@ namespace CppJieba
private: private:
bool _destroyNode(TrieNode* node); bool _destroyNode(TrieNode* node);
void _display(TrieNode* node, int level); void _display(TrieNode* node, int level);
bool _insert(const ChUnicode* chUniBuf, size_t len, unsigned int cnt, const string& tag); bool _insert(const TrieNodeInfo& nodeInfo);
private:
enum {bufSize = 1024};
}; };
} }