update fullsegment for reducing memory cost

This commit is contained in:
wyy 2014-08-11 23:34:29 +08:00
parent f6762e07ae
commit 5bfd3d0c49
6 changed files with 51 additions and 19 deletions

View File

@ -15,23 +15,47 @@ namespace CppJieba
class FullSegment: public SegmentBase
{
private:
DictTrie _dictTrie;
const DictTrie* _dictTrie;
bool _isBorrowed;
public:
FullSegment(){_setInitFlag(false);};
explicit FullSegment(const string& dictPath){_setInitFlag(init(dictPath));}
virtual ~FullSegment(){};
FullSegment()
{
_dictTrie = NULL;
_isBorrowed = false;
}
explicit FullSegment(const string& dictPath)
{
_dictTrie = NULL;
init(dictPath);
}
explicit FullSegment(const DictTrie* dictTrie)
{
_dictTrie = NULL;
init(dictTrie);
}
virtual ~FullSegment()
{
if(_dictTrie && ! _isBorrowed)
{
delete _dictTrie;
}
};
public:
bool init(const string& dictPath)
{
if(_getInitFlag())
{
LogError("already inited before now.");
return false;
}
_dictTrie.init(dictPath.c_str());
assert(_dictTrie);
return _setInitFlag(true);
assert(_dictTrie == NULL);
_dictTrie = new DictTrie(dictPath);
_isBorrowed = false;
return true;
}
bool init(const DictTrie* dictTrie)
{
assert(_dictTrie == NULL);
assert(dictTrie);
_dictTrie = dictTrie;
_isBorrowed = true;
return true;
}
public:
@ -40,7 +64,7 @@ namespace CppJieba
public:
bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const
{
assert(_getInitFlag());
assert(_dictTrie);
if (begin >= end)
{
LogError("begin >= end");
@ -61,7 +85,7 @@ namespace CppJieba
for (Unicode::const_iterator uItr = begin; uItr != end; uItr++)
{
//find word start from uItr
if (_dictTrie.find(uItr, end, tRes, 0))
if (_dictTrie->find(uItr, end, tRes, 0))
{
for(DagType::const_iterator itr = tRes.begin(); itr != tRes.end(); itr++)
//for (vector<pair<size_t, const DictUnit*> >::const_iterator itr = tRes.begin(); itr != tRes.end(); itr++)
@ -94,7 +118,7 @@ namespace CppJieba
bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<string>& res) const
{
assert(_getInitFlag());
assert(_dictTrie);
if (begin >= end)
{
LogError("begin >= end");

View File

@ -13,7 +13,7 @@ namespace Limonp
bool _getInitFlag()const{return _isInited;};
bool _setInitFlag(bool flag){return _isInited = flag;};
public:
operator bool(){return _getInitFlag();};
operator bool() const {return _getInitFlag();};
};
}

View File

@ -114,6 +114,10 @@ namespace CppJieba
return true;
}
const DictTrie* getDictTrie() const
{
return &_dictTrie;
}
private:
void _calcDP(vector<SegmentChar>& SegmentChars) const

View File

@ -122,6 +122,11 @@ namespace CppJieba
}
return true;
}
const DictTrie* getDictTrie() const
{
return _mpSeg.getDictTrie();
}
};
}

View File

@ -42,7 +42,7 @@ namespace CppJieba
LogError("_mixSeg init");
return false;
}
if (!_fullSeg.init(dict))
if (!_fullSeg.init(_mixSeg.getDictTrie()))
{
LogError("_fullSeg init");
return false;

View File

@ -42,7 +42,6 @@ namespace CppJieba
virtual bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<string>& res) const = 0;
virtual bool cut(const string& str, vector<string>& res) const
{
assert(_getInitFlag());
res.clear();
Unicode unicode;