inlining some funct int Trie.cpp/h and add find(const Unicode&, vector<pair<uint, Trienodeinfo*> >&res)

This commit is contained in:
wyy 2013-09-13 13:12:32 +08:00
parent b9882f8297
commit dc41068d58
3 changed files with 80 additions and 59 deletions

View File

@ -34,10 +34,14 @@ namespace CppJieba
bool MixSegment::cut(const string& str, vector<string>& res) bool MixSegment::cut(const string& str, vector<string>& res)
{ {
if(str.empty())
{
return false;
}
vector<TrieNodeInfo> infos; vector<TrieNodeInfo> infos;
if(!_mpSeg.cut(str, infos)) if(!_mpSeg.cut(str, infos))
{ {
LogError("_mpSeg cutDAG failed."); LogError("_mpSeg cutDAG [%s] failed.", str.c_str());
return false; return false;
} }
res.clear(); res.clear();

View File

@ -30,15 +30,6 @@ namespace CppJieba
dispose(); dispose();
} }
bool Trie::_getInitFlag()
{
return _initFlag;
}
void Trie::_setInitFlag(bool on)
{
_initFlag = on;
}
bool Trie::init() bool Trie::init()
{ {
if(_getInitFlag()) if(_getInitFlag())
@ -256,54 +247,78 @@ namespace CppJieba
return NULL; return NULL;
} }
double Trie::getWeight(const string& str) bool Trie::find(const Unicode& unico, vector<pair<uint, TrieNodeInfo*> >& res)
{ {
if(!_getInitFlag())
Unicode uintVec;
TransCode::decode(str, uintVec);
return getWeight(uintVec);
}
double Trie::getWeight(const Unicode& uintVec)
{
if(uintVec.empty())
{ {
return getMinLogFreq(); LogFatal("trie not initted!");
return false;
} }
const TrieNodeInfo * p = find(uintVec); res.clear();
if(NULL != p) TrieNode* p = _root;
//for(Unicode::const_iterator it = begin; it != end; it++)
for(uint i = 0; i < unico.size(); i++)
{ {
return p->logFreq; if(p->hmap.find(unico[i]) == p-> hmap.end())
{
break;
}
p = p->hmap[unico[i]];
if(p->isLeaf)
{
uint pos = p->nodeInfoVecPos;
if(pos < _nodeInfoVec.size())
{
res.push_back(make_pair(i, &_nodeInfoVec[pos]));
}
else
{
LogFatal("node's nodeInfoVecPos is out of _nodeInfoVec's range");
return false;
}
}
} }
else return !res.empty();
{ }
return getMinLogFreq();
}
}
double Trie::getWeight(Unicode::const_iterator begin, Unicode::const_iterator end) //double Trie::getWeight(const string& str)
{ //{
const TrieNodeInfo * p = find(begin, end);
if(NULL != p)
{
return p->logFreq;
}
else
{
return getMinLogFreq();
}
}
double Trie::getMinLogFreq() // Unicode uintVec;
{ // TransCode::decode(str, uintVec);
return _minLogFreq; // return getWeight(uintVec);
} //}
int64_t Trie::getTotalCount() //double Trie::getWeight(const Unicode& uintVec)
{ //{
return _freqSum; // if(uintVec.empty())
} // {
// return getMinLogFreq();
// }
// const TrieNodeInfo * p = find(uintVec);
// if(NULL != p)
// {
// return p->logFreq;
// }
// else
// {
// return getMinLogFreq();
// }
//
//}
//double Trie::getWeight(Unicode::const_iterator begin, Unicode::const_iterator end)
//{
// const TrieNodeInfo * p = find(begin, end);
// if(NULL != p)
// {
// return p->logFreq;
// }
// else
// {
// return getMinLogFreq();
// }
//}
bool Trie::_deleteNode(TrieNode* node) bool Trie::_deleteNode(TrieNode* node)
{ {

View File

@ -43,9 +43,9 @@ namespace CppJieba
TrieNode* _root; TrieNode* _root;
vector<TrieNodeInfo> _nodeInfoVec; vector<TrieNodeInfo> _nodeInfoVec;
bool _initFlag;
int64_t _freqSum; int64_t _freqSum;
double _minLogFreq; double _minLogFreq;
bool _initFlag;
public: public:
typedef vector<TrieNodeInfo>::iterator iterator; typedef vector<TrieNodeInfo>::iterator iterator;
@ -62,22 +62,24 @@ namespace CppJieba
bool dispose(); bool dispose();
private: private:
void _setInitFlag(bool on); void _setInitFlag(bool on){_initFlag = on;};
bool _getInitFlag(); bool _getInitFlag(){return _initFlag;};
public: public:
TrieNodeInfo* find(const string& str); TrieNodeInfo* find(const string& str);
TrieNodeInfo* find(const Unicode& uintVec); TrieNodeInfo* find(const Unicode& uintVec);
TrieNodeInfo* find(Unicode::const_iterator begin, Unicode::const_iterator end); TrieNodeInfo* find(Unicode::const_iterator begin, Unicode::const_iterator end);
bool find(const Unicode& unico, vector<pair<uint, TrieNodeInfo*> >& res);
const TrieNodeInfo* findPrefix(const string& str); const TrieNodeInfo* findPrefix(const string& str);
public: public:
double getWeight(const string& str); //double getWeight(const string& str);
double getWeight(const Unicode& uintVec); //double getWeight(const Unicode& uintVec);
double getWeight(Unicode::const_iterator begin, Unicode::const_iterator end); //double getWeight(Unicode::const_iterator begin, Unicode::const_iterator end);
double getMinLogFreq(); double getMinLogFreq(){return _minLogFreq;};
int64_t getTotalCount(); int64_t getTotalCount(){return _freqSum;};
bool insert(const TrieNodeInfo& nodeInfo); bool insert(const TrieNodeInfo& nodeInfo);