update fullsegment for reducing memory cost

This commit is contained in:
wyy 2014-08-11 23:34:29 +08:00
parent f6762e07ae
commit 5bfd3d0c49
6 changed files with 51 additions and 19 deletions

View File

@ -15,23 +15,47 @@ namespace CppJieba
class FullSegment: public SegmentBase class FullSegment: public SegmentBase
{ {
private: private:
DictTrie _dictTrie; const DictTrie* _dictTrie;
bool _isBorrowed;
public: public:
FullSegment(){_setInitFlag(false);}; FullSegment()
explicit FullSegment(const string& dictPath){_setInitFlag(init(dictPath));} {
virtual ~FullSegment(){}; _dictTrie = NULL;
_isBorrowed = false;
}
explicit FullSegment(const string& dictPath)
{
_dictTrie = NULL;
init(dictPath);
}
explicit FullSegment(const DictTrie* dictTrie)
{
_dictTrie = NULL;
init(dictTrie);
}
virtual ~FullSegment()
{
if(_dictTrie && ! _isBorrowed)
{
delete _dictTrie;
}
};
public: public:
bool init(const string& dictPath) bool init(const string& dictPath)
{ {
if(_getInitFlag()) assert(_dictTrie == NULL);
{ _dictTrie = new DictTrie(dictPath);
LogError("already inited before now."); _isBorrowed = false;
return false; return true;
} }
_dictTrie.init(dictPath.c_str()); bool init(const DictTrie* dictTrie)
assert(_dictTrie); {
return _setInitFlag(true); assert(_dictTrie == NULL);
assert(dictTrie);
_dictTrie = dictTrie;
_isBorrowed = true;
return true;
} }
public: public:
@ -40,7 +64,7 @@ namespace CppJieba
public: public:
bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const
{ {
assert(_getInitFlag()); assert(_dictTrie);
if (begin >= end) if (begin >= end)
{ {
LogError("begin >= end"); LogError("begin >= end");
@ -61,7 +85,7 @@ namespace CppJieba
for (Unicode::const_iterator uItr = begin; uItr != end; uItr++) for (Unicode::const_iterator uItr = begin; uItr != end; uItr++)
{ {
//find word start from uItr //find word start from uItr
if (_dictTrie.find(uItr, end, tRes, 0)) if (_dictTrie->find(uItr, end, tRes, 0))
{ {
for(DagType::const_iterator itr = tRes.begin(); itr != tRes.end(); itr++) for(DagType::const_iterator itr = tRes.begin(); itr != tRes.end(); itr++)
//for (vector<pair<size_t, const DictUnit*> >::const_iterator itr = tRes.begin(); itr != tRes.end(); itr++) //for (vector<pair<size_t, const DictUnit*> >::const_iterator itr = tRes.begin(); itr != tRes.end(); itr++)
@ -94,7 +118,7 @@ namespace CppJieba
bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<string>& res) const bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<string>& res) const
{ {
assert(_getInitFlag()); assert(_dictTrie);
if (begin >= end) if (begin >= end)
{ {
LogError("begin >= end"); LogError("begin >= end");

View File

@ -13,7 +13,7 @@ namespace Limonp
bool _getInitFlag()const{return _isInited;}; bool _getInitFlag()const{return _isInited;};
bool _setInitFlag(bool flag){return _isInited = flag;}; bool _setInitFlag(bool flag){return _isInited = flag;};
public: public:
operator bool(){return _getInitFlag();}; operator bool() const {return _getInitFlag();};
}; };
} }

View File

@ -114,6 +114,10 @@ namespace CppJieba
return true; return true;
} }
const DictTrie* getDictTrie() const
{
return &_dictTrie;
}
private: private:
void _calcDP(vector<SegmentChar>& SegmentChars) const void _calcDP(vector<SegmentChar>& SegmentChars) const

View File

@ -122,6 +122,11 @@ namespace CppJieba
} }
return true; return true;
} }
const DictTrie* getDictTrie() const
{
return _mpSeg.getDictTrie();
}
}; };
} }

View File

@ -42,7 +42,7 @@ namespace CppJieba
LogError("_mixSeg init"); LogError("_mixSeg init");
return false; return false;
} }
if (!_fullSeg.init(dict)) if (!_fullSeg.init(_mixSeg.getDictTrie()))
{ {
LogError("_fullSeg init"); LogError("_fullSeg init");
return false; return false;

View File

@ -42,7 +42,6 @@ namespace CppJieba
virtual bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<string>& res) const = 0; virtual bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<string>& res) const = 0;
virtual bool cut(const string& str, vector<string>& res) const virtual bool cut(const string& str, vector<string>& res) const
{ {
assert(_getInitFlag());
res.clear(); res.clear();
Unicode unicode; Unicode unicode;