From 55c64e9893d2dc728587b5110062aedc57ad1a7a Mon Sep 17 00:00:00 2001 From: wyy Date: Sat, 30 Nov 2013 12:34:57 +0800 Subject: [PATCH] merge HMMSegment.h/cpp into hpp --- src/CMakeLists.txt | 2 +- src/HMMSegment.h | 59 ------ src/{HMMSegment.cpp => HMMSegment.hpp} | 257 +++++++++++++------------ src/MixSegment.h | 2 +- src/segment.cpp | 2 +- src/server.cpp | 2 +- 6 files changed, 135 insertions(+), 189 deletions(-) delete mode 100644 src/HMMSegment.h rename src/{HMMSegment.cpp => HMMSegment.hpp} (76%) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 66803c6..7d7a317 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,7 +1,7 @@ SET(EXECUTABLE_OUTPUT_PATH ${PROJECT_BINARY_DIR}/bin) SET(LIBRARY_OUTPUT_PATH ${PROJECT_BINARY_DIR}/lib) -SET(LIBCPPJIEBA_SRC HMMSegment.cpp MixSegment.cpp) +SET(LIBCPPJIEBA_SRC MixSegment.cpp) ADD_LIBRARY(cppjieba STATIC ${LIBCPPJIEBA_SRC}) ADD_EXECUTABLE(cjsegment segment.cpp) ADD_EXECUTABLE(cjserver server.cpp) diff --git a/src/HMMSegment.h b/src/HMMSegment.h deleted file mode 100644 index a71081e..0000000 --- a/src/HMMSegment.h +++ /dev/null @@ -1,59 +0,0 @@ -#ifndef CPPJIBEA_HMMSEGMENT_H -#define CPPJIBEA_HMMSEGMENT_H - -#include -#include -#include -#include "Limonp/str_functs.hpp" -#include "Limonp/logger.hpp" -#include "globals.h" -#include "TransCode.hpp" -#include "ISegment.hpp" -#include "SegmentBase.hpp" - -namespace CppJieba -{ - using namespace Limonp; - class HMMSegment: public SegmentBase - { - public: - /* - * STATUS: - * 0:B, 1:E, 2:M, 3:S - * */ - enum {B = 0, E = 1, M = 2, S = 3, STATUS_SUM = 4}; - private: - char _statMap[STATUS_SUM]; - double _startProb[STATUS_SUM]; - double _transProb[STATUS_SUM][STATUS_SUM]; - EmitProbMap _emitProbB; - EmitProbMap _emitProbE; - EmitProbMap _emitProbM; - EmitProbMap _emitProbS; - vector _emitProbVec; - - public: - HMMSegment(); - virtual ~HMMSegment(); - public: - bool init(const char* const modelPath); - bool dispose(); - public: - bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector& res)const ; - bool cut(const string& str, vector& res)const; - bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector& res)const; - //virtual bool cut(const string& str, vector& res)const; - - private: - bool _viterbi(Unicode::const_iterator begin, Unicode::const_iterator end, vector& status)const; - bool _loadModel(const char* const filePath); - bool _getLine(ifstream& ifile, string& line); - bool _loadEmitProb(const string& line, EmitProbMap& mp); - bool _decodeOne(const string& str, uint16_t& res); - double _getEmitProb(const EmitProbMap* ptMp, uint16_t key, double defVal)const ; - - - }; -} - -#endif diff --git a/src/HMMSegment.cpp b/src/HMMSegment.hpp similarity index 76% rename from src/HMMSegment.cpp rename to src/HMMSegment.hpp index 37f3f99..39696b6 100644 --- a/src/HMMSegment.cpp +++ b/src/HMMSegment.hpp @@ -1,9 +1,40 @@ -#include "HMMSegment.h" +#ifndef CPPJIBEA_HMMSEGMENT_H +#define CPPJIBEA_HMMSEGMENT_H + +#include +#include +#include +#include "Limonp/str_functs.hpp" +#include "Limonp/logger.hpp" +#include "globals.h" +#include "TransCode.hpp" +#include "ISegment.hpp" +#include "SegmentBase.hpp" namespace CppJieba { - HMMSegment::HMMSegment() + using namespace Limonp; + class HMMSegment: public SegmentBase { + public: + /* + * STATUS: + * 0:B, 1:E, 2:M, 3:S + * */ + enum {B = 0, E = 1, M = 2, S = 3, STATUS_SUM = 4}; + private: + char _statMap[STATUS_SUM]; + double _startProb[STATUS_SUM]; + double _transProb[STATUS_SUM][STATUS_SUM]; + EmitProbMap _emitProbB; + EmitProbMap _emitProbE; + EmitProbMap _emitProbM; + EmitProbMap _emitProbS; + vector _emitProbVec; + + public: + HMMSegment() + { memset(_startProb, 0, sizeof(_startProb)); memset(_transProb, 0, sizeof(_transProb)); _statMap[0] = 'B'; @@ -15,98 +46,23 @@ namespace CppJieba _emitProbVec.push_back(&_emitProbM); _emitProbVec.push_back(&_emitProbS); } - - HMMSegment::~HMMSegment() - { + virtual ~HMMSegment() + { dispose(); } - - bool HMMSegment::init(const char* const modelPath) - { + public: + bool init(const char* const modelPath) + { return _setInitFlag(_loadModel(modelPath)); } - - bool HMMSegment::dispose() - { + bool dispose() + { _setInitFlag(false); return true; } - - bool HMMSegment::_loadModel(const char* const filePath) - { - LogInfo("loadModel [%s] start ...", filePath); - ifstream ifile(filePath); - string line; - vector tmp; - vector tmp2; - //load _startProb - if(!_getLine(ifile, line)) - { - return false; - } - splitStr(line, tmp, " "); - if(tmp.size() != STATUS_SUM) - { - LogError("start_p illegal"); - return false; - } - for(uint j = 0; j< tmp.size(); j++) - { - _startProb[j] = atof(tmp[j].c_str()); - //cout<<_startProb[j]<& res)const { - return false; - } - splitStr(line, tmp, " "); - if(tmp.size() != STATUS_SUM) - { - LogError("trans_p illegal"); - return false; - } - for(uint j =0; j < STATUS_SUM; j++) - { - _transProb[i][j] = atof(tmp[j].c_str()); - //cout<<_transProb[i][j]<& res)const - { if(!_getInitFlag()) { LogError("not inited."); @@ -132,14 +88,12 @@ namespace CppJieba } return true; } - - bool HMMSegment::cut(const string& str, vector& res)const - { + bool cut(const string& str, vector& res)const + { return SegmentBase::cut(str, res); } - - bool HMMSegment::cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector& res) const - { + bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector& res)const + { if(!_getInitFlag()) { LogError("not inited."); @@ -164,9 +118,11 @@ namespace CppJieba } return true; } + //virtual bool cut(const string& str, vector& res)const; - bool HMMSegment::_viterbi(Unicode::const_iterator begin, Unicode::const_iterator end, vector& status)const - { + private: + bool _viterbi(Unicode::const_iterator begin, Unicode::const_iterator end, vector& status)const + { if(begin == end) { return false; @@ -247,9 +203,80 @@ namespace CppJieba delete [] weight; return true; } + bool _loadModel(const char* const filePath) + { + LogInfo("loadModel [%s] start ...", filePath); + ifstream ifile(filePath); + string line; + vector tmp; + vector tmp2; + //load _startProb + if(!_getLine(ifile, line)) + { + return false; + } + splitStr(line, tmp, " "); + if(tmp.size() != STATUS_SUM) + { + LogError("start_p illegal"); + return false; + } + for(uint j = 0; j< tmp.size(); j++) + { + _startProb[j] = atof(tmp[j].c_str()); + //cout<<_startProb[j]<find(key); if(cit == ptMp->end()) { @@ -314,28 +338,9 @@ namespace CppJieba return cit->second; } -} - -#ifdef HMMSEGMENT_UT -using namespace CppJieba; - - -size_t add(size_t a, size_t b) -{ - return a*b; -} -int main() -{ - TransCode::setUtf8Enc(); - HMMSegment hmm; - hmm.loadModel("../dicts/hmm_model.utf8"); - vector res; - hmm.cut("小明硕士毕业于北邮网络研究院。。.", res); - cout< #include "Limonp/ArgvContext.hpp" #include "MPSegment.hpp" -#include "HMMSegment.h" +#include "HMMSegment.hpp" #include "MixSegment.h" using namespace CppJieba; diff --git a/src/server.cpp b/src/server.cpp index ae56f9a..ef8f2b9 100644 --- a/src/server.cpp +++ b/src/server.cpp @@ -8,7 +8,7 @@ #include "Husky/Daemon.h" #include "Husky/ServerFrame.h" #include "MPSegment.hpp" -#include "HMMSegment.h" +#include "HMMSegment.hpp" #include "MixSegment.h" using namespace Husky;