mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
inlining some funct int Trie.cpp/h and add find(const Unicode&, vector<pair<uint, Trienodeinfo*> >&res)
This commit is contained in:
parent
b9882f8297
commit
dc41068d58
@ -34,10 +34,14 @@ namespace CppJieba
|
||||
|
||||
bool MixSegment::cut(const string& str, vector<string>& res)
|
||||
{
|
||||
if(str.empty())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
vector<TrieNodeInfo> infos;
|
||||
if(!_mpSeg.cut(str, infos))
|
||||
{
|
||||
LogError("_mpSeg cutDAG failed.");
|
||||
LogError("_mpSeg cutDAG [%s] failed.", str.c_str());
|
||||
return false;
|
||||
}
|
||||
res.clear();
|
||||
|
115
src/Trie.cpp
115
src/Trie.cpp
@ -30,15 +30,6 @@ namespace CppJieba
|
||||
dispose();
|
||||
}
|
||||
|
||||
bool Trie::_getInitFlag()
|
||||
{
|
||||
return _initFlag;
|
||||
}
|
||||
void Trie::_setInitFlag(bool on)
|
||||
{
|
||||
_initFlag = on;
|
||||
}
|
||||
|
||||
bool Trie::init()
|
||||
{
|
||||
if(_getInitFlag())
|
||||
@ -256,54 +247,78 @@ namespace CppJieba
|
||||
return NULL;
|
||||
}
|
||||
|
||||
double Trie::getWeight(const string& str)
|
||||
{
|
||||
|
||||
Unicode uintVec;
|
||||
TransCode::decode(str, uintVec);
|
||||
return getWeight(uintVec);
|
||||
}
|
||||
|
||||
double Trie::getWeight(const Unicode& uintVec)
|
||||
{
|
||||
if(uintVec.empty())
|
||||
bool Trie::find(const Unicode& unico, vector<pair<uint, TrieNodeInfo*> >& res)
|
||||
{
|
||||
if(!_getInitFlag())
|
||||
{
|
||||
return getMinLogFreq();
|
||||
LogFatal("trie not initted!");
|
||||
return false;
|
||||
}
|
||||
const TrieNodeInfo * p = find(uintVec);
|
||||
if(NULL != p)
|
||||
res.clear();
|
||||
TrieNode* p = _root;
|
||||
//for(Unicode::const_iterator it = begin; it != end; it++)
|
||||
for(uint i = 0; i < unico.size(); i++)
|
||||
{
|
||||
return p->logFreq;
|
||||
if(p->hmap.find(unico[i]) == p-> hmap.end())
|
||||
{
|
||||
break;
|
||||
}
|
||||
p = p->hmap[unico[i]];
|
||||
if(p->isLeaf)
|
||||
{
|
||||
uint pos = p->nodeInfoVecPos;
|
||||
if(pos < _nodeInfoVec.size())
|
||||
{
|
||||
res.push_back(make_pair(i, &_nodeInfoVec[pos]));
|
||||
}
|
||||
else
|
||||
{
|
||||
LogFatal("node's nodeInfoVecPos is out of _nodeInfoVec's range");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
return getMinLogFreq();
|
||||
}
|
||||
|
||||
}
|
||||
return !res.empty();
|
||||
}
|
||||
|
||||
double Trie::getWeight(Unicode::const_iterator begin, Unicode::const_iterator end)
|
||||
{
|
||||
const TrieNodeInfo * p = find(begin, end);
|
||||
if(NULL != p)
|
||||
{
|
||||
return p->logFreq;
|
||||
}
|
||||
else
|
||||
{
|
||||
return getMinLogFreq();
|
||||
}
|
||||
}
|
||||
//double Trie::getWeight(const string& str)
|
||||
//{
|
||||
|
||||
double Trie::getMinLogFreq()
|
||||
{
|
||||
return _minLogFreq;
|
||||
}
|
||||
// Unicode uintVec;
|
||||
// TransCode::decode(str, uintVec);
|
||||
// return getWeight(uintVec);
|
||||
//}
|
||||
|
||||
int64_t Trie::getTotalCount()
|
||||
{
|
||||
return _freqSum;
|
||||
}
|
||||
//double Trie::getWeight(const Unicode& uintVec)
|
||||
//{
|
||||
// if(uintVec.empty())
|
||||
// {
|
||||
// return getMinLogFreq();
|
||||
// }
|
||||
// const TrieNodeInfo * p = find(uintVec);
|
||||
// if(NULL != p)
|
||||
// {
|
||||
// return p->logFreq;
|
||||
// }
|
||||
// else
|
||||
// {
|
||||
// return getMinLogFreq();
|
||||
// }
|
||||
//
|
||||
//}
|
||||
|
||||
//double Trie::getWeight(Unicode::const_iterator begin, Unicode::const_iterator end)
|
||||
//{
|
||||
// const TrieNodeInfo * p = find(begin, end);
|
||||
// if(NULL != p)
|
||||
// {
|
||||
// return p->logFreq;
|
||||
// }
|
||||
// else
|
||||
// {
|
||||
// return getMinLogFreq();
|
||||
// }
|
||||
//}
|
||||
|
||||
bool Trie::_deleteNode(TrieNode* node)
|
||||
{
|
||||
|
18
src/Trie.h
18
src/Trie.h
@ -43,9 +43,9 @@ namespace CppJieba
|
||||
TrieNode* _root;
|
||||
vector<TrieNodeInfo> _nodeInfoVec;
|
||||
|
||||
bool _initFlag;
|
||||
int64_t _freqSum;
|
||||
double _minLogFreq;
|
||||
bool _initFlag;
|
||||
|
||||
public:
|
||||
typedef vector<TrieNodeInfo>::iterator iterator;
|
||||
@ -62,22 +62,24 @@ namespace CppJieba
|
||||
bool dispose();
|
||||
|
||||
private:
|
||||
void _setInitFlag(bool on);
|
||||
bool _getInitFlag();
|
||||
void _setInitFlag(bool on){_initFlag = on;};
|
||||
bool _getInitFlag(){return _initFlag;};
|
||||
|
||||
public:
|
||||
TrieNodeInfo* find(const string& str);
|
||||
TrieNodeInfo* find(const Unicode& uintVec);
|
||||
TrieNodeInfo* find(Unicode::const_iterator begin, Unicode::const_iterator end);
|
||||
bool find(const Unicode& unico, vector<pair<uint, TrieNodeInfo*> >& res);
|
||||
|
||||
const TrieNodeInfo* findPrefix(const string& str);
|
||||
|
||||
public:
|
||||
double getWeight(const string& str);
|
||||
double getWeight(const Unicode& uintVec);
|
||||
double getWeight(Unicode::const_iterator begin, Unicode::const_iterator end);
|
||||
double getMinLogFreq();
|
||||
//double getWeight(const string& str);
|
||||
//double getWeight(const Unicode& uintVec);
|
||||
//double getWeight(Unicode::const_iterator begin, Unicode::const_iterator end);
|
||||
double getMinLogFreq(){return _minLogFreq;};
|
||||
|
||||
int64_t getTotalCount();
|
||||
int64_t getTotalCount(){return _freqSum;};
|
||||
|
||||
bool insert(const TrieNodeInfo& nodeInfo);
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user