mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
update fullsegment for reducing memory cost
This commit is contained in:
parent
f6762e07ae
commit
5bfd3d0c49
@ -15,23 +15,47 @@ namespace CppJieba
|
|||||||
class FullSegment: public SegmentBase
|
class FullSegment: public SegmentBase
|
||||||
{
|
{
|
||||||
private:
|
private:
|
||||||
DictTrie _dictTrie;
|
const DictTrie* _dictTrie;
|
||||||
|
bool _isBorrowed;
|
||||||
public:
|
public:
|
||||||
FullSegment(){_setInitFlag(false);};
|
FullSegment()
|
||||||
explicit FullSegment(const string& dictPath){_setInitFlag(init(dictPath));}
|
{
|
||||||
virtual ~FullSegment(){};
|
_dictTrie = NULL;
|
||||||
|
_isBorrowed = false;
|
||||||
|
}
|
||||||
|
explicit FullSegment(const string& dictPath)
|
||||||
|
{
|
||||||
|
_dictTrie = NULL;
|
||||||
|
init(dictPath);
|
||||||
|
}
|
||||||
|
explicit FullSegment(const DictTrie* dictTrie)
|
||||||
|
{
|
||||||
|
_dictTrie = NULL;
|
||||||
|
init(dictTrie);
|
||||||
|
}
|
||||||
|
virtual ~FullSegment()
|
||||||
|
{
|
||||||
|
if(_dictTrie && ! _isBorrowed)
|
||||||
|
{
|
||||||
|
delete _dictTrie;
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
public:
|
public:
|
||||||
bool init(const string& dictPath)
|
bool init(const string& dictPath)
|
||||||
{
|
{
|
||||||
if(_getInitFlag())
|
assert(_dictTrie == NULL);
|
||||||
{
|
_dictTrie = new DictTrie(dictPath);
|
||||||
LogError("already inited before now.");
|
_isBorrowed = false;
|
||||||
return false;
|
return true;
|
||||||
}
|
}
|
||||||
_dictTrie.init(dictPath.c_str());
|
bool init(const DictTrie* dictTrie)
|
||||||
assert(_dictTrie);
|
{
|
||||||
return _setInitFlag(true);
|
assert(_dictTrie == NULL);
|
||||||
|
assert(dictTrie);
|
||||||
|
_dictTrie = dictTrie;
|
||||||
|
_isBorrowed = true;
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
public:
|
public:
|
||||||
@ -40,7 +64,7 @@ namespace CppJieba
|
|||||||
public:
|
public:
|
||||||
bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const
|
bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const
|
||||||
{
|
{
|
||||||
assert(_getInitFlag());
|
assert(_dictTrie);
|
||||||
if (begin >= end)
|
if (begin >= end)
|
||||||
{
|
{
|
||||||
LogError("begin >= end");
|
LogError("begin >= end");
|
||||||
@ -61,7 +85,7 @@ namespace CppJieba
|
|||||||
for (Unicode::const_iterator uItr = begin; uItr != end; uItr++)
|
for (Unicode::const_iterator uItr = begin; uItr != end; uItr++)
|
||||||
{
|
{
|
||||||
//find word start from uItr
|
//find word start from uItr
|
||||||
if (_dictTrie.find(uItr, end, tRes, 0))
|
if (_dictTrie->find(uItr, end, tRes, 0))
|
||||||
{
|
{
|
||||||
for(DagType::const_iterator itr = tRes.begin(); itr != tRes.end(); itr++)
|
for(DagType::const_iterator itr = tRes.begin(); itr != tRes.end(); itr++)
|
||||||
//for (vector<pair<size_t, const DictUnit*> >::const_iterator itr = tRes.begin(); itr != tRes.end(); itr++)
|
//for (vector<pair<size_t, const DictUnit*> >::const_iterator itr = tRes.begin(); itr != tRes.end(); itr++)
|
||||||
@ -94,7 +118,7 @@ namespace CppJieba
|
|||||||
|
|
||||||
bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<string>& res) const
|
bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<string>& res) const
|
||||||
{
|
{
|
||||||
assert(_getInitFlag());
|
assert(_dictTrie);
|
||||||
if (begin >= end)
|
if (begin >= end)
|
||||||
{
|
{
|
||||||
LogError("begin >= end");
|
LogError("begin >= end");
|
||||||
|
@ -13,7 +13,7 @@ namespace Limonp
|
|||||||
bool _getInitFlag()const{return _isInited;};
|
bool _getInitFlag()const{return _isInited;};
|
||||||
bool _setInitFlag(bool flag){return _isInited = flag;};
|
bool _setInitFlag(bool flag){return _isInited = flag;};
|
||||||
public:
|
public:
|
||||||
operator bool(){return _getInitFlag();};
|
operator bool() const {return _getInitFlag();};
|
||||||
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
@ -114,6 +114,10 @@ namespace CppJieba
|
|||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
const DictTrie* getDictTrie() const
|
||||||
|
{
|
||||||
|
return &_dictTrie;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void _calcDP(vector<SegmentChar>& SegmentChars) const
|
void _calcDP(vector<SegmentChar>& SegmentChars) const
|
||||||
|
@ -122,6 +122,11 @@ namespace CppJieba
|
|||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const DictTrie* getDictTrie() const
|
||||||
|
{
|
||||||
|
return _mpSeg.getDictTrie();
|
||||||
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -42,7 +42,7 @@ namespace CppJieba
|
|||||||
LogError("_mixSeg init");
|
LogError("_mixSeg init");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (!_fullSeg.init(dict))
|
if (!_fullSeg.init(_mixSeg.getDictTrie()))
|
||||||
{
|
{
|
||||||
LogError("_fullSeg init");
|
LogError("_fullSeg init");
|
||||||
return false;
|
return false;
|
||||||
|
@ -42,7 +42,6 @@ namespace CppJieba
|
|||||||
virtual bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<string>& res) const = 0;
|
virtual bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<string>& res) const = 0;
|
||||||
virtual bool cut(const string& str, vector<string>& res) const
|
virtual bool cut(const string& str, vector<string>& res) const
|
||||||
{
|
{
|
||||||
assert(_getInitFlag());
|
|
||||||
res.clear();
|
res.clear();
|
||||||
|
|
||||||
Unicode unicode;
|
Unicode unicode;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user