diff --git a/src/DictTrie.hpp b/src/DictTrie.hpp index adbd67e..92a4645 100644 --- a/src/DictTrie.hpp +++ b/src/DictTrie.hpp @@ -26,60 +26,60 @@ class DictTrie { public: DictTrie() { - _trie = NULL; - _minWeight = MAX_DOUBLE; + trie_ = NULL; + minWeight_ = MAX_DOUBLE; } DictTrie(const string& dictPath, const string& userDictPath = "") { new (this) DictTrie(); init(dictPath, userDictPath); } ~DictTrie() { - if(_trie) { - delete _trie; + if(trie_) { + delete trie_; } } bool init(const string& dictPath, const string& userDictPath = "") { - if(_trie != NULL) { + if(trie_ != NULL) { LogFatal("trie already initted"); } - _loadDict(dictPath); - _calculateWeight(_nodeInfos); - _minWeight = _findMinWeight(_nodeInfos); + loadDict_(dictPath); + calculateWeight_(nodeInfos_); + minWeight_ = findMinWeight_(nodeInfos_); if(userDictPath.size()) { - double maxWeight = _findMaxWeight(_nodeInfos); - _loadUserDict(userDictPath, maxWeight, UNKNOWN_TAG); + double maxWeight = findMaxWeight_(nodeInfos_); + loadUserDict_(userDictPath, maxWeight, UNKNOWN_TAG); } - _shrink(_nodeInfos); - _trie = _createTrie(_nodeInfos); - assert(_trie); + shrink_(nodeInfos_); + trie_ = createTrie_(nodeInfos_); + assert(trie_); return true; } const DictUnit* find(Unicode::const_iterator begin, Unicode::const_iterator end) const { - return _trie->find(begin, end); + return trie_->find(begin, end); } bool find(Unicode::const_iterator begin, Unicode::const_iterator end, DagType& dag, size_t offset = 0) const { - return _trie->find(begin, end, dag, offset); + return trie_->find(begin, end, dag, offset); } void find( Unicode::const_iterator begin, Unicode::const_iterator end, vector& res ) const { - _trie->find(begin, end, res); + trie_->find(begin, end, res); } bool isUserDictSingleChineseWord(const Unicode::value_type& word) const { - return isIn(_userDictSingleChineseWord, word); + return isIn(userDictSingleChineseWord_, word); } double getMinWeight() const { - return _minWeight; + return minWeight_; }; private: - Trie * _createTrie(const vector& dictUnits) { + Trie * createTrie_(const vector& dictUnits) { assert(dictUnits.size()); vector words; vector valuePointers; @@ -91,7 +91,7 @@ class DictTrie { Trie * trie = new Trie(words, valuePointers); return trie; } - void _loadUserDict(const string& filePath, double defaultWeight, const string& defaultTag) { + void loadUserDict_(const string& filePath, double defaultWeight, const string& defaultTag) { ifstream ifs(filePath.c_str()); if(!ifs.is_open()) { LogFatal("file %s open failed.", filePath.c_str()); @@ -111,15 +111,15 @@ class DictTrie { continue; } if(nodeInfo.word.size() == 1) { - _userDictSingleChineseWord.insert(nodeInfo.word[0]); + userDictSingleChineseWord_.insert(nodeInfo.word[0]); } nodeInfo.weight = defaultWeight; nodeInfo.tag = (buf.size() == 2 ? buf[1] : defaultTag); - _nodeInfos.push_back(nodeInfo); + nodeInfos_.push_back(nodeInfo); } LogInfo("load userdict[%s] ok. lines[%u]", filePath.c_str(), lineno); } - void _loadDict(const string& filePath) { + void loadDict_(const string& filePath) { ifstream ifs(filePath.c_str()); if(!ifs.is_open()) { LogFatal("file %s open failed.", filePath.c_str()); @@ -141,17 +141,17 @@ class DictTrie { nodeInfo.weight = atof(buf[1].c_str()); nodeInfo.tag = buf[2]; - _nodeInfos.push_back(nodeInfo); + nodeInfos_.push_back(nodeInfo); } } - double _findMinWeight(const vector& nodeInfos) const { + double findMinWeight_(const vector& nodeInfos) const { double ret = MAX_DOUBLE; for(size_t i = 0; i < nodeInfos.size(); i++) { ret = min(nodeInfos[i].weight, ret); } return ret; } - double _findMaxWeight(const vector& nodeInfos) const { + double findMaxWeight_(const vector& nodeInfos) const { double ret = MIN_DOUBLE; for(size_t i = 0; i < nodeInfos.size(); i++) { ret = max(nodeInfos[i].weight, ret); @@ -159,7 +159,7 @@ class DictTrie { return ret; } - void _calculateWeight(vector& nodeInfos) const { + void calculateWeight_(vector& nodeInfos) const { double sum = 0.0; for(size_t i = 0; i < nodeInfos.size(); i++) { sum += nodeInfos[i].weight; @@ -172,16 +172,16 @@ class DictTrie { } } - void _shrink(vector& units) const { + void shrink_(vector& units) const { vector(units.begin(), units.end()).swap(units); } private: - vector _nodeInfos; - Trie * _trie; + vector nodeInfos_; + Trie * trie_; - double _minWeight; - unordered_set _userDictSingleChineseWord; + double minWeight_; + unordered_set userDictSingleChineseWord_; }; } diff --git a/src/FullSegment.hpp b/src/FullSegment.hpp index a8b60a1..65e482a 100644 --- a/src/FullSegment.hpp +++ b/src/FullSegment.hpp @@ -14,40 +14,40 @@ namespace CppJieba { class FullSegment: public SegmentBase { public: FullSegment() { - _dictTrie = NULL; - _isBorrowed = false; + dictTrie_ = NULL; + isBorrowed_ = false; } explicit FullSegment(const string& dictPath) { - _dictTrie = NULL; + dictTrie_ = NULL; init(dictPath); } explicit FullSegment(const DictTrie* dictTrie) { - _dictTrie = NULL; + dictTrie_ = NULL; init(dictTrie); } virtual ~FullSegment() { - if(_dictTrie && ! _isBorrowed) { - delete _dictTrie; + if(dictTrie_ && ! isBorrowed_) { + delete dictTrie_; } }; bool init(const string& dictPath) { - assert(_dictTrie == NULL); - _dictTrie = new DictTrie(dictPath); - _isBorrowed = false; + assert(dictTrie_ == NULL); + dictTrie_ = new DictTrie(dictPath); + isBorrowed_ = false; return true; } bool init(const DictTrie* dictTrie) { - assert(_dictTrie == NULL); + assert(dictTrie_ == NULL); assert(dictTrie); - _dictTrie = dictTrie; - _isBorrowed = true; + dictTrie_ = dictTrie; + isBorrowed_ = true; return true; } using SegmentBase::cut; bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector& res) const { - assert(_dictTrie); + assert(dictTrie_); if (begin >= end) { LogError("begin >= end"); return false; @@ -66,7 +66,7 @@ class FullSegment: public SegmentBase { int wordLen = 0; for (Unicode::const_iterator uItr = begin; uItr != end; uItr++) { //find word start from uItr - if (_dictTrie->find(uItr, end, tRes, 0)) { + if (dictTrie_->find(uItr, end, tRes, 0)) { for(DagType::const_iterator itr = tRes.begin(); itr != tRes.end(); itr++) //for (vector >::const_iterator itr = tRes.begin(); itr != tRes.end(); itr++) { @@ -93,7 +93,7 @@ class FullSegment: public SegmentBase { } bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector& res) const { - assert(_dictTrie); + assert(dictTrie_); if (begin >= end) { LogError("begin >= end"); return false; @@ -117,8 +117,8 @@ class FullSegment: public SegmentBase { return true; } private: - const DictTrie* _dictTrie; - bool _isBorrowed; + const DictTrie* dictTrie_; + bool isBorrowed_; }; } diff --git a/src/HMMSegment.hpp b/src/HMMSegment.hpp index d000bce..9d624f2 100644 --- a/src/HMMSegment.hpp +++ b/src/HMMSegment.hpp @@ -31,17 +31,17 @@ class HMMSegment: public SegmentBase { virtual ~HMMSegment() {} public: bool init(const string& filePath) { - memset(_startProb, 0, sizeof(_startProb)); - memset(_transProb, 0, sizeof(_transProb)); - _statMap[0] = 'B'; - _statMap[1] = 'E'; - _statMap[2] = 'M'; - _statMap[3] = 'S'; - _emitProbVec.push_back(&_emitProbB); - _emitProbVec.push_back(&_emitProbE); - _emitProbVec.push_back(&_emitProbM); - _emitProbVec.push_back(&_emitProbS); - LIMONP_CHECK(_loadModel(filePath.c_str())); + memset(startProb_, 0, sizeof(startProb_)); + memset(transProb_, 0, sizeof(transProb_)); + statMap_[0] = 'B'; + statMap_[1] = 'E'; + statMap_[2] = 'M'; + statMap_[3] = 'S'; + emitProbVec_.push_back(&emitProbB_); + emitProbVec_.push_back(&emitProbE_); + emitProbVec_.push_back(&emitProbM_); + emitProbVec_.push_back(&emitProbS_); + LIMONP_CHECK(loadModel_(filePath.c_str())); LogInfo("HMMSegment init(%s) ok.", filePath.c_str()); return true; } @@ -53,16 +53,16 @@ class HMMSegment: public SegmentBase { Unicode::const_iterator right = begin; while(right != end) { if(*right < 0x80) { - if(left != right && !_cut(left, right, res)) { + if(left != right && !cut_(left, right, res)) { return false; } left = right; do { - right = _sequentialLetterRule(left, end); + right = sequentialLetterRule_(left, end); if(right != left) { break; } - right = _numbersRule(left, end); + right = numbersRule_(left, end); if(right != left) { break; } @@ -74,7 +74,7 @@ class HMMSegment: public SegmentBase { right++; } } - if(left != right && !_cut(left, right, res)) { + if(left != right && !cut_(left, right, res)) { return false; } return true; @@ -100,7 +100,7 @@ class HMMSegment: public SegmentBase { } private: // sequential letters rule - Unicode::const_iterator _sequentialLetterRule(Unicode::const_iterator begin, Unicode::const_iterator end) const { + Unicode::const_iterator sequentialLetterRule_(Unicode::const_iterator begin, Unicode::const_iterator end) const { Unicode::value_type x = *begin; if (('a' <= x && x <= 'z') || ('A' <= x && x <= 'Z')) { begin ++; @@ -118,7 +118,7 @@ class HMMSegment: public SegmentBase { return begin; } // - Unicode::const_iterator _numbersRule(Unicode::const_iterator begin, Unicode::const_iterator end) const { + Unicode::const_iterator numbersRule_(Unicode::const_iterator begin, Unicode::const_iterator end) const { Unicode::value_type x = *begin; if('0' <= x && x <= '9') { begin ++; @@ -135,10 +135,10 @@ class HMMSegment: public SegmentBase { } return begin; } - bool _cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector& res) const { + bool cut_(Unicode::const_iterator begin, Unicode::const_iterator end, vector& res) const { vector status; - if(!_viterbi(begin, end, status)) { - LogError("_viterbi failed."); + if(!viterbi_(begin, end, status)) { + LogError("viterbi_ failed."); return false; } @@ -154,7 +154,7 @@ class HMMSegment: public SegmentBase { return true; } - bool _viterbi(Unicode::const_iterator begin, Unicode::const_iterator end, vector& status)const { + bool viterbi_(Unicode::const_iterator begin, Unicode::const_iterator end, vector& status)const { if(begin == end) { return false; } @@ -171,7 +171,7 @@ class HMMSegment: public SegmentBase { //start for(size_t y = 0; y < Y; y++) { - weight[0 + y * X] = _startProb[y] + _getEmitProb(_emitProbVec[y], *begin, MIN_DOUBLE); + weight[0 + y * X] = startProb_[y] + getEmitProb_(emitProbVec_[y], *begin, MIN_DOUBLE); path[0 + y * X] = -1; } @@ -183,10 +183,10 @@ class HMMSegment: public SegmentBase { now = x + y*X; weight[now] = MIN_DOUBLE; path[now] = E; // warning - emitProb = _getEmitProb(_emitProbVec[y], *(begin+x), MIN_DOUBLE); + emitProb = getEmitProb_(emitProbVec_[y], *(begin+x), MIN_DOUBLE); for(size_t preY = 0; preY < Y; preY++) { old = x - 1 + preY * X; - tmp = weight[old] + _transProb[preY][y] + emitProb; + tmp = weight[old] + transProb_[preY][y] + emitProb; if(tmp > weight[now]) { weight[now] = tmp; path[now] = preY; @@ -212,13 +212,13 @@ class HMMSegment: public SegmentBase { return true; } - bool _loadModel(const char* const filePath) { + bool loadModel_(const char* const filePath) { ifstream ifile(filePath); string line; vector tmp; vector tmp2; - //load _startProb - if(!_getLine(ifile, line)) { + //load startProb_ + if(!getLine_(ifile, line)) { return false; } split(line, tmp, " "); @@ -227,12 +227,12 @@ class HMMSegment: public SegmentBase { return false; } for(size_t j = 0; j< tmp.size(); j++) { - _startProb[j] = atof(tmp[j].c_str()); + startProb_[j] = atof(tmp[j].c_str()); } - //load _transProb + //load transProb_ for(size_t i = 0; i < STATUS_SUM; i++) { - if(!_getLine(ifile, line)) { + if(!getLine_(ifile, line)) { return false; } split(line, tmp, " "); @@ -241,33 +241,33 @@ class HMMSegment: public SegmentBase { return false; } for(size_t j =0; j < STATUS_SUM; j++) { - _transProb[i][j] = atof(tmp[j].c_str()); + transProb_[i][j] = atof(tmp[j].c_str()); } } - //load _emitProbB - if(!_getLine(ifile, line) || !_loadEmitProb(line, _emitProbB)) { + //load emitProbB_ + if(!getLine_(ifile, line) || !loadEmitProb_(line, emitProbB_)) { return false; } - //load _emitProbE - if(!_getLine(ifile, line) || !_loadEmitProb(line, _emitProbE)) { + //load emitProbE_ + if(!getLine_(ifile, line) || !loadEmitProb_(line, emitProbE_)) { return false; } - //load _emitProbM - if(!_getLine(ifile, line) || !_loadEmitProb(line, _emitProbM)) { + //load emitProbM_ + if(!getLine_(ifile, line) || !loadEmitProb_(line, emitProbM_)) { return false; } - //load _emitProbS - if(!_getLine(ifile, line) || !_loadEmitProb(line, _emitProbS)) { + //load emitProbS_ + if(!getLine_(ifile, line) || !loadEmitProb_(line, emitProbS_)) { return false; } return true; } - bool _getLine(ifstream& ifile, string& line) { + bool getLine_(ifstream& ifile, string& line) { while(getline(ifile, line)) { trim(line); if(line.empty()) { @@ -280,7 +280,7 @@ class HMMSegment: public SegmentBase { } return false; } - bool _loadEmitProb(const string& line, EmitProbMap& mp) { + bool loadEmitProb_(const string& line, EmitProbMap& mp) { if(line.empty()) { return false; } @@ -290,7 +290,7 @@ class HMMSegment: public SegmentBase { for(size_t i = 0; i < tmp.size(); i++) { split(tmp[i], tmp2, ":"); if(2 != tmp2.size()) { - LogError("_emitProb illegal."); + LogError("emitProb_ illegal."); return false; } if(!TransCode::decode(tmp2[0], unicode) || unicode.size() != 1) { @@ -301,7 +301,7 @@ class HMMSegment: public SegmentBase { } return true; } - double _getEmitProb(const EmitProbMap* ptMp, uint16_t key, double defVal)const { + double getEmitProb_(const EmitProbMap* ptMp, uint16_t key, double defVal)const { EmitProbMap::const_iterator cit = ptMp->find(key); if(cit == ptMp->end()) { return defVal; @@ -311,14 +311,14 @@ class HMMSegment: public SegmentBase { } private: - char _statMap[STATUS_SUM]; - double _startProb[STATUS_SUM]; - double _transProb[STATUS_SUM][STATUS_SUM]; - EmitProbMap _emitProbB; - EmitProbMap _emitProbE; - EmitProbMap _emitProbM; - EmitProbMap _emitProbS; - vector _emitProbVec; + char statMap_[STATUS_SUM]; + double startProb_[STATUS_SUM]; + double transProb_[STATUS_SUM][STATUS_SUM]; + EmitProbMap emitProbB_; + EmitProbMap emitProbE_; + EmitProbMap emitProbM_; + EmitProbMap emitProbS_; + vector emitProbVec_; }; } diff --git a/src/KeywordExtractor.hpp b/src/KeywordExtractor.hpp index 3a4f1c5..8114556 100644 --- a/src/KeywordExtractor.hpp +++ b/src/KeywordExtractor.hpp @@ -18,9 +18,9 @@ class KeywordExtractor { ~KeywordExtractor() {}; void init(const string& dictPath, const string& hmmFilePath, const string& idfPath, const string& stopWordPath, const string& userDict = "") { - _loadIdfDict(idfPath); - _loadStopWordDict(stopWordPath); - LIMONP_CHECK(_segment.init(dictPath, hmmFilePath, userDict)); + loadIdfDict_(idfPath); + loadStopWordDict_(stopWordPath); + LIMONP_CHECK(segment_.init(dictPath, hmmFilePath, userDict)); }; bool extract(const string& str, vector& keywords, size_t topN) const { @@ -36,30 +36,30 @@ class KeywordExtractor { bool extract(const string& str, vector >& keywords, size_t topN) const { vector words; - if(!_segment.cut(str, words)) { + if(!segment_.cut(str, words)) { LogError("segment cut(%s) failed.", str.c_str()); return false; } map wordmap; for(vector::iterator iter = words.begin(); iter != words.end(); iter++) { - if(_isSingleWord(*iter)) { + if(isSingleWord_(*iter)) { continue; } wordmap[*iter] += 1.0; } for(map::iterator itr = wordmap.begin(); itr != wordmap.end(); ) { - if(_stopWords.end() != _stopWords.find(itr->first)) { + if(stopWords_.end() != stopWords_.find(itr->first)) { wordmap.erase(itr++); continue; } - unordered_map::const_iterator cit = _idfMap.find(itr->first); - if(cit != _idfMap.end()) { + unordered_map::const_iterator cit = idfMap_.find(itr->first); + if(cit != idfMap_.end()) { itr->second *= cit->second; } else { - itr->second *= _idfAverage; + itr->second *= idfAverage_; } itr ++; } @@ -67,12 +67,12 @@ class KeywordExtractor { keywords.clear(); std::copy(wordmap.begin(), wordmap.end(), std::inserter(keywords, keywords.begin())); topN = min(topN, keywords.size()); - partial_sort(keywords.begin(), keywords.begin() + topN, keywords.end(), _cmp); + partial_sort(keywords.begin(), keywords.begin() + topN, keywords.end(), cmp_); keywords.resize(topN); return true; } private: - void _loadIdfDict(const string& idfPath) { + void loadIdfDict_(const string& idfPath) { ifstream ifs(idfPath.c_str()); if(!ifs.is_open()) { LogFatal("open %s failed.", idfPath.c_str()); @@ -93,28 +93,28 @@ class KeywordExtractor { continue; } idf = atof(buf[1].c_str()); - _idfMap[buf[0]] = idf; + idfMap_[buf[0]] = idf; idfSum += idf; } assert(lineno); - _idfAverage = idfSum / lineno; - assert(_idfAverage > 0.0); + idfAverage_ = idfSum / lineno; + assert(idfAverage_ > 0.0); } - void _loadStopWordDict(const string& filePath) { + void loadStopWordDict_(const string& filePath) { ifstream ifs(filePath.c_str()); if(!ifs.is_open()) { LogFatal("open %s failed.", filePath.c_str()); } string line ; while(getline(ifs, line)) { - _stopWords.insert(line); + stopWords_.insert(line); } - assert(_stopWords.size()); + assert(stopWords_.size()); } - bool _isSingleWord(const string& str) const { + bool isSingleWord_(const string& str) const { Unicode unicode; TransCode::decode(str, unicode); if(unicode.size() == 1) @@ -122,16 +122,16 @@ class KeywordExtractor { return false; } - static bool _cmp(const pair& lhs, const pair& rhs) { + static bool cmp_(const pair& lhs, const pair& rhs) { return lhs.second > rhs.second; } private: - MixSegment _segment; - unordered_map _idfMap; - double _idfAverage; + MixSegment segment_; + unordered_map idfMap_; + double idfAverage_; - unordered_set _stopWords; + unordered_set stopWords_; }; } diff --git a/src/MPSegment.hpp b/src/MPSegment.hpp index 971da1a..398df5a 100644 --- a/src/MPSegment.hpp +++ b/src/MPSegment.hpp @@ -21,12 +21,12 @@ class MPSegment: public SegmentBase { virtual ~MPSegment() {}; bool init(const string& dictPath, const string& userDictPath = "") { - LIMONP_CHECK(_dictTrie.init(dictPath, userDictPath)); + LIMONP_CHECK(dictTrie_.init(dictPath, userDictPath)); LogInfo("MPSegment init(%s) ok", dictPath.c_str()); return true; } bool isUserDictSingleChineseWord(const Unicode::value_type & value) const { - return _dictTrie.isUserDictSingleChineseWord(value); + return dictTrie_.isUserDictSingleChineseWord(value); } using SegmentBase::cut; @@ -57,20 +57,20 @@ class MPSegment: public SegmentBase { } vector segmentChars; - _dictTrie.find(begin, end, segmentChars); + dictTrie_.find(begin, end, segmentChars); - _calcDP(segmentChars); + calcDP_(segmentChars); - _cut(segmentChars, res); + cut_(segmentChars, res); return true; } const DictTrie* getDictTrie() const { - return &_dictTrie; + return &dictTrie_; } private: - void _calcDP(vector& segmentChars) const { + void calcDP_(vector& segmentChars) const { size_t nextPos; const DictUnit* p; double val; @@ -90,7 +90,7 @@ class MPSegment: public SegmentBase { if(p) { val += p->weight; } else { - val += _dictTrie.getMinWeight(); + val += dictTrie_.getMinWeight(); } if(val > segmentChars[i].weight) { segmentChars[i].pInfo = p; @@ -99,7 +99,7 @@ class MPSegment: public SegmentBase { } } } - void _cut(const vector& segmentChars, vector& res) const { + void cut_(const vector& segmentChars, vector& res) const { size_t i = 0; while(i < segmentChars.size()) { const DictUnit* p = segmentChars[i].pInfo; @@ -114,7 +114,7 @@ class MPSegment: public SegmentBase { } private: - DictTrie _dictTrie; + DictTrie dictTrie_; }; } diff --git a/src/MixSegment.hpp b/src/MixSegment.hpp index 2cc5a53..6d47e89 100644 --- a/src/MixSegment.hpp +++ b/src/MixSegment.hpp @@ -17,8 +17,8 @@ class MixSegment: public SegmentBase { virtual ~MixSegment() { } bool init(const string& mpSegDict, const string& hmmSegDict, const string& userDict = "") { - LIMONP_CHECK(_mpSeg.init(mpSegDict, userDict)); - LIMONP_CHECK(_hmmSeg.init(hmmSegDict)); + LIMONP_CHECK(mpSeg_.init(mpSegDict, userDict)); + LIMONP_CHECK(hmmSeg_.init(hmmSegDict)); LogInfo("MixSegment init(%s, %s)", mpSegDict.c_str(), hmmSegDict.c_str()); return true; } @@ -26,7 +26,7 @@ class MixSegment: public SegmentBase { virtual bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector& res) const { vector words; words.reserve(end - begin); - if(!_mpSeg.cut(begin, end, words)) { + if(!mpSeg_.cut(begin, end, words)) { LogError("mpSeg cutDAG failed."); return false; } @@ -37,21 +37,21 @@ class MixSegment: public SegmentBase { piece.reserve(end - begin); for (size_t i = 0, j = 0; i < words.size(); i++) { //if mp get a word, it's ok, put it into result - if (1 != words[i].size() || (words[i].size() == 1 && _mpSeg.isUserDictSingleChineseWord(words[i][0]))) { + if (1 != words[i].size() || (words[i].size() == 1 && mpSeg_.isUserDictSingleChineseWord(words[i][0]))) { res.push_back(words[i]); continue; } // if mp get a single one and it is not in userdict, collect it in sequence j = i; - while (j < words.size() && 1 == words[j].size() && !_mpSeg.isUserDictSingleChineseWord(words[j][0])) { + while (j < words.size() && 1 == words[j].size() && !mpSeg_.isUserDictSingleChineseWord(words[j][0])) { piece.push_back(words[j][0]); j++; } // cut the sequence with hmm - if (!_hmmSeg.cut(piece.begin(), piece.end(), hmmRes)) { - LogError("_hmmSeg cut failed."); + if (!hmmSeg_.cut(piece.begin(), piece.end(), hmmRes)) { + LogError("hmmSeg_ cut failed."); return false; } @@ -92,11 +92,11 @@ class MixSegment: public SegmentBase { } const DictTrie* getDictTrie() const { - return _mpSeg.getDictTrie(); + return mpSeg_.getDictTrie(); } private: - MPSegment _mpSeg; - HMMSegment _hmmSeg; + MPSegment mpSeg_; + HMMSegment hmmSeg_; }; } diff --git a/src/PosTagger.hpp b/src/PosTagger.hpp index e11f1df..908e024 100644 --- a/src/PosTagger.hpp +++ b/src/PosTagger.hpp @@ -30,16 +30,16 @@ class PosTagger { const string& hmmFilePath, const string& userDictPath = "" ) { - LIMONP_CHECK(_segment.init(dictPath, hmmFilePath, userDictPath)); - _dictTrie = _segment.getDictTrie(); - LIMONP_CHECK(_dictTrie); + LIMONP_CHECK(segment_.init(dictPath, hmmFilePath, userDictPath)); + dictTrie_ = segment_.getDictTrie(); + LIMONP_CHECK(dictTrie_); }; bool tag(const string& src, vector >& res) const { vector cutRes; - if (!_segment.cut(src, cutRes)) { - LogError("_mixSegment cut failed"); + if (!segment_.cut(src, cutRes)) { + LogError("mixSegment_ cut failed"); return false; } @@ -50,9 +50,9 @@ class PosTagger { LogError("decode failed."); return false; } - tmp = _dictTrie->find(unico.begin(), unico.end()); + tmp = dictTrie_->find(unico.begin(), unico.end()); if(tmp == NULL || tmp->tag.empty()) { - res.push_back(make_pair(*itr, _specialRule(unico))); + res.push_back(make_pair(*itr, specialRule_(unico))); } else { res.push_back(make_pair(*itr, tmp->tag)); } @@ -60,7 +60,7 @@ class PosTagger { return !res.empty(); } private: - const char* _specialRule(const Unicode& unicode) const { + const char* specialRule_(const Unicode& unicode) const { size_t m = 0; size_t eng = 0; for(size_t i = 0; i < unicode.size() && eng < unicode.size() / 2; i++) { @@ -83,8 +83,8 @@ class PosTagger { return POS_ENG; } private: - MixSegment _segment; - const DictTrie * _dictTrie; + MixSegment segment_; + const DictTrie * dictTrie_; }; } diff --git a/src/QuerySegment.hpp b/src/QuerySegment.hpp index c787d24..317ce7c 100644 --- a/src/QuerySegment.hpp +++ b/src/QuerySegment.hpp @@ -22,10 +22,10 @@ class QuerySegment: public SegmentBase { }; virtual ~QuerySegment() {}; bool init(const string& dict, const string& model, size_t maxWordLen, const string& userDict = "") { - LIMONP_CHECK(_mixSeg.init(dict, model, userDict)); - LIMONP_CHECK(_fullSeg.init(_mixSeg.getDictTrie())); + LIMONP_CHECK(mixSeg_.init(dict, model, userDict)); + LIMONP_CHECK(fullSeg_.init(mixSeg_.getDictTrie())); assert(maxWordLen); - _maxWordLen = maxWordLen; + maxWordLen_ = maxWordLen; return true; } using SegmentBase::cut; @@ -37,17 +37,17 @@ class QuerySegment: public SegmentBase { //use mix cut first vector mixRes; - if (!_mixSeg.cut(begin, end, mixRes)) { - LogError("_mixSeg cut failed."); + if (!mixSeg_.cut(begin, end, mixRes)) { + LogError("mixSeg_ cut failed."); return false; } vector fullRes; for (vector::const_iterator mixResItr = mixRes.begin(); mixResItr != mixRes.end(); mixResItr++) { - // if it's too long, cut with _fullSeg, put fullRes in res - if (mixResItr->size() > _maxWordLen) { - if (_fullSeg.cut(mixResItr->begin(), mixResItr->end(), fullRes)) { + // if it's too long, cut with fullSeg_, put fullRes in res + if (mixResItr->size() > maxWordLen_) { + if (fullSeg_.cut(mixResItr->begin(), mixResItr->end(), fullRes)) { for (vector::const_iterator fullResItr = fullRes.begin(); fullResItr != fullRes.end(); fullResItr++) { res.push_back(*fullResItr); } @@ -88,9 +88,9 @@ class QuerySegment: public SegmentBase { return true; } private: - MixSegment _mixSeg; - FullSegment _fullSeg; - size_t _maxWordLen; + MixSegment mixSeg_; + FullSegment fullSeg_; + size_t maxWordLen_; }; } diff --git a/src/SegmentBase.hpp b/src/SegmentBase.hpp index 4288a31..9e6f4e4 100644 --- a/src/SegmentBase.hpp +++ b/src/SegmentBase.hpp @@ -22,7 +22,7 @@ const UnicodeValueType SPECIAL_SYMBOL[] = {32u, 9u, 10u}; class SegmentBase: public ISegment, public NonCopyable { public: SegmentBase() { - _loadSpecialSymbols(); + loadSpecialSymbols_(); }; virtual ~SegmentBase() {}; public: @@ -39,7 +39,7 @@ class SegmentBase: public ISegment, public NonCopyable { Unicode::const_iterator right; for(right = unicode.begin(); right != unicode.end(); right++) { - if(isIn(_specialSymbols, *right)) { + if(isIn(specialSymbols_, *right)) { if(left != right) { cut(left, right, res); } @@ -55,15 +55,15 @@ class SegmentBase: public ISegment, public NonCopyable { return true; } private: - void _loadSpecialSymbols() { + void loadSpecialSymbols_() { size_t size = sizeof(SPECIAL_SYMBOL)/sizeof(*SPECIAL_SYMBOL); for(size_t i = 0; i < size; i ++) { - _specialSymbols.insert(SPECIAL_SYMBOL[i]); + specialSymbols_.insert(SPECIAL_SYMBOL[i]); } - assert(_specialSymbols.size()); + assert(specialSymbols_.size()); } private: - unordered_set _specialSymbols; + unordered_set specialSymbols_; }; } diff --git a/src/Trie.hpp b/src/Trie.hpp index 1a35973..8cc1176 100644 --- a/src/Trie.hpp +++ b/src/Trie.hpp @@ -61,19 +61,19 @@ class TrieNode { class Trie { public: Trie(const vector& keys, const vector & valuePointers) { - _root = new TrieNode; - _createTrie(keys, valuePointers); - _build();// build automation + root_ = new TrieNode; + createTrie_(keys, valuePointers); + build_();// build automation } ~Trie() { - if(_root) { - _deleteNode(_root); + if(root_) { + deleteNode_(root_); } } public: const DictUnit* find(Unicode::const_iterator begin, Unicode::const_iterator end) const { TrieNode::NextMap::const_iterator citer; - const TrieNode* ptNode = _root; + const TrieNode* ptNode = root_; for(Unicode::const_iterator it = begin; it != end; it++) { // build automation assert(ptNode); @@ -91,7 +91,7 @@ class Trie { vector& res ) const { res.resize(end - begin); - const TrieNode * now = _root; + const TrieNode * now = root_; const TrieNode* node; // compiler will complain warnings if only "i < end - begin" . for (size_t i = 0; i < size_t(end - begin); i++) { @@ -102,7 +102,7 @@ class Trie { bool flag = false; // rollback - while( now != _root ) { + while( now != root_ ) { node = now->findNext(ch); if (node != NULL) { flag = true; @@ -116,11 +116,11 @@ class Trie { node = now->findNext(ch); } if(node == NULL) { - now = _root; + now = root_; } else { now = node; const TrieNode * temp = now; - while(temp != _root) { + while(temp != root_) { if (temp->ptValue) { size_t pos = i - temp->ptValue->word.size() + 1; res[pos].dag.push_back(pair::size_type, const DictUnit* >(i, temp->ptValue)); @@ -139,7 +139,7 @@ class Trie { Unicode::const_iterator end, DagType & res, size_t offset = 0) const { - const TrieNode * ptNode = _root; + const TrieNode * ptNode = root_; TrieNode::NextMap::const_iterator citer; for(Unicode::const_iterator itr = begin; itr != end ; itr++) { assert(ptNode); @@ -158,13 +158,13 @@ class Trie { return !res.empty(); } private: - void _build() { + void build_() { queue que; - assert(_root->ptValue == NULL); - assert(_root->next); - _root->fail = NULL; - for(TrieNode::NextMap::iterator iter = _root->next->begin(); iter != _root->next->end(); iter++) { - iter->second->fail = _root; + assert(root_->ptValue == NULL); + assert(root_->next); + root_->fail = NULL; + for(TrieNode::NextMap::iterator iter = root_->next->begin(); iter != root_->next->end(); iter++) { + iter->second->fail = root_; que.push(iter->second); } TrieNode* back = NULL; @@ -185,24 +185,24 @@ class Trie { back = back->fail; } if(back == NULL) { - iter->second->fail = _root; + iter->second->fail = root_; } que.push(iter->second); } } } - void _createTrie(const vector& keys, const vector & valuePointers) { + void createTrie_(const vector& keys, const vector & valuePointers) { if(valuePointers.empty() || keys.empty()) { return; } assert(keys.size() == valuePointers.size()); for(size_t i = 0; i < keys.size(); i++) { - _insertNode(keys[i], valuePointers[i]); + insertNode_(keys[i], valuePointers[i]); } } - void _insertNode(const Unicode& key, const DictUnit* ptValue) { - TrieNode* ptNode = _root; + void insertNode_(const Unicode& key, const DictUnit* ptValue) { + TrieNode* ptNode = root_; TrieNode::NextMap::const_iterator kmIter; @@ -224,21 +224,21 @@ class Trie { } ptNode->ptValue = ptValue; } - void _deleteNode(TrieNode* node) { + void deleteNode_(TrieNode* node) { if(!node) { return; } if(node->next) { TrieNode::NextMap::iterator it; for(it = node->next->begin(); it != node->next->end(); it++) { - _deleteNode(it->second); + deleteNode_(it->second); } delete node->next; } delete node; } private: - TrieNode* _root; + TrieNode* root_; }; }