add findPrefix into trie.cpp

This commit is contained in:
gwdwyy 2013-07-16 22:40:57 +08:00
parent 6cf4d5c8bc
commit 3007249099
3 changed files with 44 additions and 5 deletions

View File

@ -286,7 +286,7 @@ namespace CppJieba
for(vector<WordInfo>::iterator it = wordInfos.begin(); it != wordInfos.end(); )
{
cout<<__FILE__<<__LINE__<<endl;
if(NULL != _priorPrefixTrie.findUtf8(it->word))
if(NULL != _priorPrefixTrie.findPrefix(it->word))
{
prior = *it;
it = wordInfos.erase(it);

View File

@ -112,8 +112,13 @@ namespace CppJieba
}
}
const TrieNodeInfo* Trie::findUtf8(const string& utf8Str)
const TrieNodeInfo* Trie::findPrefix(const string& utf8Str)
{
if(NULL == _root)
{
LogFatal("trie not initted!");
return NULL;
}
if(utf8Str.empty())
{
LogError("utf8Str is empty");
@ -122,10 +127,44 @@ namespace CppJieba
string uniStr = utf8ToUnicode(utf8Str);
if(uniStr.empty())
{
LogError("utf8ToUnicode return empty str");
LogError("utf8ToUnicode return empty star");
return NULL;
}
return find(uniStr);
if(uniStr.size() % 2)
{
LogError("utf8ToUnicode return uniStr illegal");
return NULL;
}
//find
TrieNode* p = _root;
TrieNodeInfo * res = NULL;
for(uint i = 0; i < uniStr.size(); i+=2)
{
ChUnicode chUni = twocharToUint16(uniStr[0], uniStr[i+1]);
if(p->isLeaf)
{
uint pos = p->nodeInfoVecPos;
if(pos < _nodeInfoVec.size())
{
res = &(_nodeInfoVec[pos]);
}
else
{
LogFatal("node's nodeInfoVecPos is out of _nodeInfoVec's range");
return NULL;
}
}
if(p->hmap.find(chUni) == p->hmap.end())
{
break;
}
else
{
p = p->hmap[chUni];
}
}
return res;
}
const TrieNodeInfo* Trie::find(const string& uniStr)

View File

@ -70,9 +70,9 @@ namespace CppJieba
void display();
public:
const TrieNodeInfo* findUtf8(const string& utf8Str);
const TrieNodeInfo* find(const string& uniStr);
const TrieNodeInfo* find(const ChUnicode* const chUniStr, size_t len);
const TrieNodeInfo* findPrefix(const string& utf8Str);
public:
double getWeight(const ChUnicode* uniStr, size_t len);