diff --git a/README.md b/README.md index 534bdca..b7127f8 100644 --- a/README.md +++ b/README.md @@ -92,10 +92,11 @@ curl -d "南京市长江大桥" "http://127.0.0.1:11200/" ["南京市", "长江大桥"] ``` -如果有需要**安装使用**的,可以按照如下操作: +因为 HTTP GET 请求有长度限制,如果需要请求长文的,请使用POST请求。 ### 安装服务 +如果有需要**安装使用**的,可以按照如下操作: ``` sudo make install ``` diff --git a/src/DictTrie.hpp b/src/DictTrie.hpp index e433bc0..15b04fb 100644 --- a/src/DictTrie.hpp +++ b/src/DictTrie.hpp @@ -25,22 +25,8 @@ namespace CppJieba class DictTrie { - private: - vector _nodeInfos; - Trie * _trie; + public: - double _minWeight; - private: - unordered_set _userDictSingleChineseWord; - public: - bool isUserDictSingleChineseWord(const Unicode::value_type& word) const - { - return isIn(_userDictSingleChineseWord, word); - } - public: - double getMinWeight() const {return _minWeight;}; - - public: DictTrie() { _trie = NULL; @@ -59,7 +45,6 @@ namespace CppJieba } } - public: bool init(const string& dictPath, const string& userDictPath = "") { assert(!_trie); @@ -78,7 +63,6 @@ namespace CppJieba return true; } - public: const DictUnit* find(Unicode::const_iterator begin, Unicode::const_iterator end) const { return _trie->find(begin, end); @@ -95,6 +79,11 @@ namespace CppJieba { _trie->find(begin, end, res); } + bool isUserDictSingleChineseWord(const Unicode::value_type& word) const + { + return isIn(_userDictSingleChineseWord, word); + } + double getMinWeight() const {return _minWeight;}; private: @@ -204,7 +193,12 @@ namespace CppJieba vector(units.begin(), units.end()).swap(units); } + private: + vector _nodeInfos; + Trie * _trie; + double _minWeight; + unordered_set _userDictSingleChineseWord; }; } diff --git a/src/FullSegment.hpp b/src/FullSegment.hpp index b582f0d..0a3e747 100644 --- a/src/FullSegment.hpp +++ b/src/FullSegment.hpp @@ -14,9 +14,6 @@ namespace CppJieba { class FullSegment: public SegmentBase { - private: - const DictTrie* _dictTrie; - bool _isBorrowed; public: FullSegment() { @@ -41,7 +38,6 @@ namespace CppJieba } }; - public: bool init(const string& dictPath) { assert(_dictTrie == NULL); @@ -58,10 +54,7 @@ namespace CppJieba return true; } - public: using SegmentBase::cut; - - public: bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector& res) const { assert(_dictTrie); @@ -147,6 +140,9 @@ namespace CppJieba return true; } + private: + const DictTrie* _dictTrie; + bool _isBorrowed; }; } diff --git a/src/HMMSegment.hpp b/src/HMMSegment.hpp index c832092..838eeef 100644 --- a/src/HMMSegment.hpp +++ b/src/HMMSegment.hpp @@ -24,15 +24,6 @@ namespace CppJieba * 0:B, 1:E, 2:M, 3:S * */ enum {B = 0, E = 1, M = 2, S = 3, STATUS_SUM = 4}; - private: - char _statMap[STATUS_SUM]; - double _startProb[STATUS_SUM]; - double _transProb[STATUS_SUM][STATUS_SUM]; - EmitProbMap _emitProbB; - EmitProbMap _emitProbE; - EmitProbMap _emitProbM; - EmitProbMap _emitProbS; - vector _emitProbVec; public: HMMSegment(){} @@ -101,6 +92,30 @@ namespace CppJieba } return true; } + public: + virtual bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector& res)const + { + if(begin == end) + { + return false; + } + vector words; + words.reserve(end - begin); + if(!cut(begin, end, words)) + { + return false; + } + size_t offset = res.size(); + res.resize(res.size() + words.size()); + for(size_t i = 0; i < words.size(); i++) + { + if(!TransCode::encode(words[i], res[offset + i])) + { + LogError("encode failed."); + } + } + return true; + } private: // sequential letters rule Unicode::const_iterator _sequentialLetterRule(Unicode::const_iterator begin, Unicode::const_iterator end) const @@ -168,32 +183,7 @@ namespace CppJieba } return true; } - public: - virtual bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector& res)const - { - if(begin == end) - { - return false; - } - vector words; - words.reserve(end - begin); - if(!cut(begin, end, words)) - { - return false; - } - size_t offset = res.size(); - res.resize(res.size() + words.size()); - for(size_t i = 0; i < words.size(); i++) - { - if(!TransCode::encode(words[i], res[offset + i])) - { - LogError("encode failed."); - } - } - return true; - } - private: bool _viterbi(Unicode::const_iterator begin, Unicode::const_iterator end, vector& status)const { if(begin == end) @@ -384,6 +374,15 @@ namespace CppJieba } + private: + char _statMap[STATUS_SUM]; + double _startProb[STATUS_SUM]; + double _transProb[STATUS_SUM][STATUS_SUM]; + EmitProbMap _emitProbB; + EmitProbMap _emitProbE; + EmitProbMap _emitProbM; + EmitProbMap _emitProbS; + vector _emitProbVec; }; } diff --git a/src/ISegment.hpp b/src/ISegment.hpp index 5099fa0..167e2f9 100644 --- a/src/ISegment.hpp +++ b/src/ISegment.hpp @@ -8,7 +8,6 @@ namespace CppJieba { public: virtual ~ISegment(){}; - public: virtual bool cut(Unicode::const_iterator begin , Unicode::const_iterator end, vector& res) const = 0; virtual bool cut(const string& str, vector& res) const = 0; }; diff --git a/src/KeywordExtractor.hpp b/src/KeywordExtractor.hpp index e1c6bf4..1020be8 100644 --- a/src/KeywordExtractor.hpp +++ b/src/KeywordExtractor.hpp @@ -12,13 +12,6 @@ namespace CppJieba /*utf8*/ class KeywordExtractor { - private: - MixSegment _segment; - private: - unordered_map _idfMap; - double _idfAverage; - - unordered_set _stopWords; public: KeywordExtractor(){}; KeywordExtractor(const string& dictPath, const string& hmmFilePath, const string& idfPath, const string& stopWordPath, const string& userDict = "") @@ -27,14 +20,12 @@ namespace CppJieba }; ~KeywordExtractor(){}; - public: void init(const string& dictPath, const string& hmmFilePath, const string& idfPath, const string& stopWordPath, const string& userDict = "") { _loadIdfDict(idfPath); _loadStopWordDict(stopWordPath); LIMONP_CHECK(_segment.init(dictPath, hmmFilePath, userDict)); }; - public: bool extract(const string& str, vector& keywords, size_t topN) const { @@ -148,7 +139,7 @@ namespace CppJieba } assert(_stopWords.size()); } - private: + bool _isSingleWord(const string& str) const { Unicode unicode; @@ -158,12 +149,17 @@ namespace CppJieba return false; } - private: static bool _cmp(const pair& lhs, const pair& rhs) { return lhs.second > rhs.second; } + private: + MixSegment _segment; + unordered_map _idfMap; + double _idfAverage; + + unordered_set _stopWords; }; } diff --git a/src/MPSegment.hpp b/src/MPSegment.hpp index f6976fd..36b756a 100644 --- a/src/MPSegment.hpp +++ b/src/MPSegment.hpp @@ -14,8 +14,6 @@ namespace CppJieba class MPSegment: public SegmentBase { - private: - DictTrie _dictTrie; public: MPSegment(){}; @@ -24,7 +22,7 @@ namespace CppJieba LIMONP_CHECK(init(dictPath, userDictPath)); }; virtual ~MPSegment(){}; - public: + bool init(const string& dictPath, const string& userDictPath = "") { LIMONP_CHECK(_dictTrie.init(dictPath, userDictPath)); @@ -35,7 +33,7 @@ namespace CppJieba { return _dictTrie.isUserDictSingleChineseWord(value); } - public: + using SegmentBase::cut; virtual bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector& res)const { @@ -141,6 +139,8 @@ namespace CppJieba } } + private: + DictTrie _dictTrie; }; } diff --git a/src/MixSegment.hpp b/src/MixSegment.hpp index ff06f47..80e6615 100644 --- a/src/MixSegment.hpp +++ b/src/MixSegment.hpp @@ -10,17 +10,17 @@ namespace CppJieba { class MixSegment: public SegmentBase { - private: - MPSegment _mpSeg; - HMMSegment _hmmSeg; public: - MixSegment(){}; + MixSegment() + { + } MixSegment(const string& mpSegDict, const string& hmmSegDict, const string& userDict = "") { LIMONP_CHECK(init(mpSegDict, hmmSegDict, userDict)); } - virtual ~MixSegment(){} - public: + virtual ~MixSegment() + { + } bool init(const string& mpSegDict, const string& hmmSegDict, const string& userDict = "") { LIMONP_CHECK(_mpSeg.init(mpSegDict, userDict)); @@ -28,9 +28,7 @@ namespace CppJieba LogInfo("MixSegment init(%s, %s)", mpSegDict.c_str(), hmmSegDict.c_str()); return true; } - public: using SegmentBase::cut; - public: virtual bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector& res) const { vector words; @@ -115,6 +113,9 @@ namespace CppJieba { return _mpSeg.getDictTrie(); } + private: + MPSegment _mpSeg; + HMMSegment _hmmSeg; }; } diff --git a/src/PosTagger.hpp b/src/PosTagger.hpp index a44956d..6d16695 100644 --- a/src/PosTagger.hpp +++ b/src/PosTagger.hpp @@ -15,13 +15,10 @@ namespace CppJieba class PosTagger { - private: - MixSegment _segment; - const DictTrie * _dictTrie; - public: PosTagger() - {} + { + } PosTagger( const string& dictPath, const string& hmmFilePath, @@ -29,9 +26,10 @@ namespace CppJieba ) { init(dictPath, hmmFilePath, userDictPath); - }; - ~PosTagger(){}; - public: + } + ~PosTagger() + { + } void init( const string& dictPath, const string& hmmFilePath, @@ -103,6 +101,9 @@ namespace CppJieba // the ascii chars contain english letter return POS_ENG; } + private: + MixSegment _segment; + const DictTrie * _dictTrie; }; } diff --git a/src/QuerySegment.hpp b/src/QuerySegment.hpp index dfd2072..76a6c0e 100644 --- a/src/QuerySegment.hpp +++ b/src/QuerySegment.hpp @@ -17,11 +17,6 @@ namespace CppJieba { class QuerySegment: public SegmentBase { - private: - MixSegment _mixSeg; - FullSegment _fullSeg; - size_t _maxWordLen; - public: QuerySegment(){}; QuerySegment(const string& dict, const string& model, size_t maxWordLen, const string& userDict = "") @@ -29,7 +24,6 @@ namespace CppJieba init(dict, model, maxWordLen, userDict); }; virtual ~QuerySegment(){}; - public: bool init(const string& dict, const string& model, size_t maxWordLen, const string& userDict = "") { LIMONP_CHECK(_mixSeg.init(dict, model, userDict)); @@ -38,11 +32,7 @@ namespace CppJieba _maxWordLen = maxWordLen; return true; } - - public: using SegmentBase::cut; - - public: bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector& res) const { if (begin >= end) @@ -117,6 +107,11 @@ namespace CppJieba return true; } + private: + MixSegment _mixSeg; + FullSegment _fullSeg; + size_t _maxWordLen; + }; } diff --git a/src/SegmentBase.hpp b/src/SegmentBase.hpp index 25d384f..55c881d 100644 --- a/src/SegmentBase.hpp +++ b/src/SegmentBase.hpp @@ -25,19 +25,6 @@ namespace CppJieba public: SegmentBase(){_loadSpecialSymbols();}; virtual ~SegmentBase(){}; - private: - unordered_set _specialSymbols; - private: - void _loadSpecialSymbols() - { - size_t size = sizeof(SPECIAL_SYMBOL)/sizeof(*SPECIAL_SYMBOL); - for(size_t i = 0; i < size; i ++) - { - _specialSymbols.insert(SPECIAL_SYMBOL[i]); - } - assert(_specialSymbols.size()); - } - public: virtual bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector& res) const = 0; virtual bool cut(const string& str, vector& res) const @@ -72,6 +59,19 @@ namespace CppJieba return true; } + private: + void _loadSpecialSymbols() + { + size_t size = sizeof(SPECIAL_SYMBOL)/sizeof(*SPECIAL_SYMBOL); + for(size_t i = 0; i < size; i ++) + { + _specialSymbols.insert(SPECIAL_SYMBOL[i]); + } + assert(_specialSymbols.size()); + } + private: + unordered_set _specialSymbols; + }; } diff --git a/src/Trie.hpp b/src/Trie.hpp index 3105926..6297443 100644 --- a/src/Trie.hpp +++ b/src/Trie.hpp @@ -43,12 +43,6 @@ namespace CppJieba class TrieNode { - public: - typedef unordered_map NextMap; - public: - TrieNode * fail; - NextMap * next; - const DictUnit * ptValue; public: TrieNode(): fail(NULL), next(NULL), ptValue(NULL) {} @@ -65,12 +59,15 @@ namespace CppJieba } return iter->second; } + public: + typedef unordered_map NextMap; + TrieNode * fail; + NextMap * next; + const DictUnit * ptValue; }; class Trie { - private: - TrieNode* _root; public: Trie(const vector& keys, const vector & valuePointers) { @@ -230,7 +227,6 @@ namespace CppJieba } } } - private: void _createTrie(const vector& keys, const vector & valuePointers) { if(valuePointers.empty() || keys.empty()) @@ -244,7 +240,6 @@ namespace CppJieba _insertNode(keys[i], valuePointers[i]); } } - private: void _insertNode(const Unicode& key, const DictUnit* ptValue) { TrieNode* ptNode = _root; @@ -291,6 +286,8 @@ namespace CppJieba } delete node; } + private: + TrieNode* _root; }; }