inlining some funct int Trie.cpp/h and add find(const Unicode&, vector<pair<uint, Trienodeinfo*> >&res)

This commit is contained in:
wyy 2013-09-13 13:12:32 +08:00
parent b9882f8297
commit dc41068d58
3 changed files with 80 additions and 59 deletions

View File

@ -34,10 +34,14 @@ namespace CppJieba
bool MixSegment::cut(const string& str, vector<string>& res)
{
if(str.empty())
{
return false;
}
vector<TrieNodeInfo> infos;
if(!_mpSeg.cut(str, infos))
{
LogError("_mpSeg cutDAG failed.");
LogError("_mpSeg cutDAG [%s] failed.", str.c_str());
return false;
}
res.clear();

View File

@ -30,15 +30,6 @@ namespace CppJieba
dispose();
}
bool Trie::_getInitFlag()
{
return _initFlag;
}
void Trie::_setInitFlag(bool on)
{
_initFlag = on;
}
bool Trie::init()
{
if(_getInitFlag())
@ -256,54 +247,78 @@ namespace CppJieba
return NULL;
}
double Trie::getWeight(const string& str)
{
Unicode uintVec;
TransCode::decode(str, uintVec);
return getWeight(uintVec);
}
double Trie::getWeight(const Unicode& uintVec)
{
if(uintVec.empty())
bool Trie::find(const Unicode& unico, vector<pair<uint, TrieNodeInfo*> >& res)
{
if(!_getInitFlag())
{
return getMinLogFreq();
LogFatal("trie not initted!");
return false;
}
const TrieNodeInfo * p = find(uintVec);
if(NULL != p)
res.clear();
TrieNode* p = _root;
//for(Unicode::const_iterator it = begin; it != end; it++)
for(uint i = 0; i < unico.size(); i++)
{
return p->logFreq;
if(p->hmap.find(unico[i]) == p-> hmap.end())
{
break;
}
p = p->hmap[unico[i]];
if(p->isLeaf)
{
uint pos = p->nodeInfoVecPos;
if(pos < _nodeInfoVec.size())
{
res.push_back(make_pair(i, &_nodeInfoVec[pos]));
}
else
{
LogFatal("node's nodeInfoVecPos is out of _nodeInfoVec's range");
return false;
}
}
}
else
{
return getMinLogFreq();
}
}
return !res.empty();
}
double Trie::getWeight(Unicode::const_iterator begin, Unicode::const_iterator end)
{
const TrieNodeInfo * p = find(begin, end);
if(NULL != p)
{
return p->logFreq;
}
else
{
return getMinLogFreq();
}
}
//double Trie::getWeight(const string& str)
//{
double Trie::getMinLogFreq()
{
return _minLogFreq;
}
// Unicode uintVec;
// TransCode::decode(str, uintVec);
// return getWeight(uintVec);
//}
int64_t Trie::getTotalCount()
{
return _freqSum;
}
//double Trie::getWeight(const Unicode& uintVec)
//{
// if(uintVec.empty())
// {
// return getMinLogFreq();
// }
// const TrieNodeInfo * p = find(uintVec);
// if(NULL != p)
// {
// return p->logFreq;
// }
// else
// {
// return getMinLogFreq();
// }
//
//}
//double Trie::getWeight(Unicode::const_iterator begin, Unicode::const_iterator end)
//{
// const TrieNodeInfo * p = find(begin, end);
// if(NULL != p)
// {
// return p->logFreq;
// }
// else
// {
// return getMinLogFreq();
// }
//}
bool Trie::_deleteNode(TrieNode* node)
{

View File

@ -43,9 +43,9 @@ namespace CppJieba
TrieNode* _root;
vector<TrieNodeInfo> _nodeInfoVec;
bool _initFlag;
int64_t _freqSum;
double _minLogFreq;
bool _initFlag;
public:
typedef vector<TrieNodeInfo>::iterator iterator;
@ -62,22 +62,24 @@ namespace CppJieba
bool dispose();
private:
void _setInitFlag(bool on);
bool _getInitFlag();
void _setInitFlag(bool on){_initFlag = on;};
bool _getInitFlag(){return _initFlag;};
public:
TrieNodeInfo* find(const string& str);
TrieNodeInfo* find(const Unicode& uintVec);
TrieNodeInfo* find(Unicode::const_iterator begin, Unicode::const_iterator end);
bool find(const Unicode& unico, vector<pair<uint, TrieNodeInfo*> >& res);
const TrieNodeInfo* findPrefix(const string& str);
public:
double getWeight(const string& str);
double getWeight(const Unicode& uintVec);
double getWeight(Unicode::const_iterator begin, Unicode::const_iterator end);
double getMinLogFreq();
//double getWeight(const string& str);
//double getWeight(const Unicode& uintVec);
//double getWeight(Unicode::const_iterator begin, Unicode::const_iterator end);
double getMinLogFreq(){return _minLogFreq;};
int64_t getTotalCount();
int64_t getTotalCount(){return _freqSum;};
bool insert(const TrieNodeInfo& nodeInfo);