From 3eb0470c2f581e7f23e2c461f09614a574300e62 Mon Sep 17 00:00:00 2001 From: wyy Date: Mon, 23 Dec 2013 23:58:54 -0800 Subject: [PATCH] update husky and limonp --- src/CMakeLists.txt | 2 + src/HMMSegment.hpp | 10 +- src/Husky/HttpReqInfo.hpp | 4 +- src/Husky/ServerFrame.hpp | 302 ------------------------------------ src/Husky/ThreadManager.hpp | 45 ++---- src/KeywordExtractor.hpp | 29 +++- src/Limonp/ArgvContext.hpp | 4 +- src/Limonp/Config.hpp | 19 ++- src/Limonp/logger.hpp | 14 +- src/Limonp/str_functs.hpp | 80 ++++------ src/MPSegment.hpp | 2 +- src/Trie.hpp | 30 ++-- src/server.cpp | 8 +- 13 files changed, 127 insertions(+), 422 deletions(-) delete mode 100644 src/Husky/ServerFrame.hpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 1816f9c..9cc83df 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,6 +1,8 @@ SET(EXECUTABLE_OUTPUT_PATH ${PROJECT_BINARY_DIR}/bin) SET(LIBRARY_OUTPUT_PATH ${PROJECT_BINARY_DIR}/lib) +INCLUDE_DIRECTORIES(${PROJECT_SOURCE_DIR}/src) + ADD_EXECUTABLE(cjsegment segment.cpp) ADD_EXECUTABLE(cjserver server.cpp) TARGET_LINK_LIBRARIES(cjserver pthread) diff --git a/src/HMMSegment.hpp b/src/HMMSegment.hpp index c2782af..4ad039b 100644 --- a/src/HMMSegment.hpp +++ b/src/HMMSegment.hpp @@ -215,7 +215,7 @@ namespace CppJieba { return false; } - splitStr(line, tmp, " "); + split(line, tmp, " "); if(tmp.size() != STATUS_SUM) { LogError("start_p illegal"); @@ -234,7 +234,7 @@ namespace CppJieba { return false; } - splitStr(line, tmp, " "); + split(line, tmp, " "); if(tmp.size() != STATUS_SUM) { LogError("trans_p illegal"); @@ -284,7 +284,7 @@ namespace CppJieba { continue; } - if(strStartsWith(line, "#")) + if(startsWith(line, "#")) { continue; } @@ -300,10 +300,10 @@ namespace CppJieba } vector tmp, tmp2; uint16_t unico = 0; - splitStr(line, tmp, ","); + split(line, tmp, ","); for(uint i = 0; i < tmp.size(); i++) { - splitStr(tmp[i], tmp2, ":"); + split(tmp[i], tmp2, ":"); if(2 != tmp2.size()) { LogError("_emitProb illegal."); diff --git a/src/Husky/HttpReqInfo.hpp b/src/Husky/HttpReqInfo.hpp index c0fa92a..2d52b73 100644 --- a/src/Husky/HttpReqInfo.hpp +++ b/src/Husky/HttpReqInfo.hpp @@ -3,7 +3,7 @@ #include #include -#include "../Limonp/logger.hpp" +#include "Limonp/logger.hpp" namespace Husky { @@ -88,7 +88,7 @@ namespace Husky } string firstline(headerStr, lpos, rpos - lpos); trim(firstline); - if(!splitStr(firstline, buf, " ") || 3 != buf.size()) + if(!split(firstline, buf, " ") || 3 != buf.size()) { LogFatal("parse header first line failed."); return false; diff --git a/src/Husky/ServerFrame.hpp b/src/Husky/ServerFrame.hpp deleted file mode 100644 index 114c14c..0000000 --- a/src/Husky/ServerFrame.hpp +++ /dev/null @@ -1,302 +0,0 @@ -#ifndef HUSKY_SERVERFRAME_H -#define HUSKY_SERVERFRAME_H - -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "ThreadManager.hpp" -#include "HttpReqInfo.hpp" - -#define INVALID_SOCKET -1 -#define SOCKET_ERROR -1 -#define closesocket close -#define RECV_BUFFER 10240 -#define LISEN_QUEUR_LEN 1024 - - -namespace Husky -{ - using namespace Limonp; - typedef int SOCKET; - const struct timeval SOCKET_TIMEOUT = {2, 0}; - const char* const RESPONSE_FORMAT = "HTTP/1.1 200 OK\r\nConnection: close\r\nServer: FrameServer/1.0.0\r\nContent-Type: text/json; charset=%s\r\nContent-Length: %d\r\n\r\n"; - const char* const RESPONSE_CHARSET_UTF8 = "UTF-8"; - const char* const RESPONSE_CHARSET_GB2312 = "GB2312"; - const char* const CLIENT_IP_K = "CLIENT_IP"; - typedef unsigned short u_short; - typedef unsigned int u_int; - - class IRequestHandler - { - public: - virtual ~IRequestHandler(){}; - public: - virtual bool do_GET(const HttpReqInfo& httpReq, string& res) = 0; - - }; - - struct SPara - { - SOCKET hSock; - IRequestHandler * pHandler; - pthread_mutex_t * ppmAccept; - bool * pShutdown; - }; - - class ServerFrame//: public IWorkHandler - { - private: - pthread_mutex_t m_pmAccept; - bool m_bShutdown; - public: - ServerFrame(unsigned nPort, unsigned nThreadCount, IRequestHandler* pHandler) - { - m_bShutdown = false; - m_nLsnPort = nPort; - m_nThreadCount = nThreadCount; - m_pHandler = pHandler; - assert(pHandler); - pthread_mutex_init(&m_pmAccept,NULL); - }; - virtual ~ServerFrame(){pthread_mutex_destroy(&m_pmAccept);}; - virtual bool init() - { - - if (!BindToLocalHost(m_lsnSock,m_nLsnPort)) - { - LogFatal("BindToLocalHost failed."); - return false; - } - LogInfo("init ok {port:%d, threadNum:%d}", m_nLsnPort, m_nThreadCount); - - return true; - } - virtual bool dispose() - { - m_bShutdown=true; - if (SOCKET_ERROR==closesocket(m_lsnSock)) - { - LogError("error [%s]", strerror(errno)); - return false; - } - - int sockfd; - struct sockaddr_in dest; - - if ((sockfd = socket(AF_INET, SOCK_STREAM, 0)) < 0) - { - LogError("error [%s]", strerror(errno)); - return false; - } - - bzero(&dest, sizeof(dest)); - dest.sin_family = AF_INET; - dest.sin_port = htons(m_nLsnPort); - if (inet_aton("127.0.0.1", (struct in_addr *) &dest.sin_addr.s_addr) == 0) - { - LogError("error [%s]", strerror(errno)); - return false; - } - - if (connect(sockfd, (struct sockaddr *) &dest, sizeof(dest)) < 0) - { - LogError("error [%s]", strerror(errno)); - } - close(sockfd); - return true; - } - virtual bool run() - { - if(SOCKET_ERROR==listen(m_lsnSock,LISEN_QUEUR_LEN)) - { - LogError("error [%s]", strerror(errno)); - return false; - } - ThreadManager thrMngr; - int i; - SPara para; - para.hSock=m_lsnSock; - para.pHandler=m_pHandler; - para.ppmAccept=&m_pmAccept; - para.pShutdown=&m_bShutdown; - for (i=0;ihSock; - IRequestHandler *pHandler=pPara->pHandler; - bool* pShutdown = pPara->pShutdown; - pthread_mutex_t* ppmAccept = pPara->ppmAccept; - - int nRetCode; - linger lng; - char chRecvBuf[RECV_BUFFER]; - - SOCKET hClientSock; - string strHttpResp; - - sockaddr_in clientaddr; - socklen_t nSize = sizeof(clientaddr); - while(!(*pShutdown)) - { - HttpReqInfo httpReq; - pthread_mutex_lock(ppmAccept); - hClientSock=accept(hSockLsn,(sockaddr *)&clientaddr, &nSize); - pthread_mutex_unlock(ppmAccept); - - if(hClientSock==SOCKET_ERROR) - { - if(!(*pShutdown)) - LogError("error [%s]", strerror(errno)); - continue; - } - - httpReq[CLIENT_IP_K] = inet_ntoa(clientaddr.sin_addr);// inet_ntoa is not thread safety at some version - - lng.l_linger=1; - lng.l_onoff=1; - if(SOCKET_ERROR==setsockopt(hClientSock,SOL_SOCKET,SO_LINGER,(char*)&lng,sizeof(lng))) - { - LogError("error [%s]", strerror(errno)); - } - - if(SOCKET_ERROR==setsockopt(hClientSock,SOL_SOCKET,SO_RCVTIMEO,(char*)&SOCKET_TIMEOUT,sizeof(SOCKET_TIMEOUT))) - { - LogError("error [%s]", strerror(errno)); - } - - if(SOCKET_ERROR==setsockopt(hClientSock,SOL_SOCKET,SO_SNDTIMEO,(char*)&SOCKET_TIMEOUT,sizeof(SOCKET_TIMEOUT))) - { - LogError("error [%s]", strerror(errno)); - } - - - string strRec; - string strSnd; - memset(chRecvBuf,0,sizeof(chRecvBuf)); - nRetCode = recv(hClientSock, chRecvBuf, RECV_BUFFER, 0); - strRec = chRecvBuf; - -#ifdef HUKSY_DEBUG - LogDebug("request[%s]", strRec.c_str()); -#endif - - if(SOCKET_ERROR==nRetCode) - { - LogDebug("error [%s]", strerror(errno)); - closesocket(hClientSock); - continue; - } - if(0==nRetCode) - { - LogDebug("connection has been gracefully closed"); - closesocket(hClientSock); - continue; - } - httpReq.load(strRec); - - pHandler->do_GET(httpReq, strSnd); - - char chHttpHeader[2048]; - - sprintf(chHttpHeader, RESPONSE_FORMAT, RESPONSE_CHARSET_UTF8, int(strSnd.length())); - - strHttpResp=chHttpHeader; - strHttpResp+=strSnd; -#ifdef HUKSY_DEBUG - LogDebug("response'body [%s]", strSnd.c_str()); -#endif - - if (SOCKET_ERROR==send(hClientSock,strHttpResp.c_str(),strHttpResp.length(),0)) - { - LogError("error [%s]", strerror(errno)); - } - - - closesocket(hClientSock); - } - - return 0; - - } - - private: - u_short m_nLsnPort; - u_short m_nThreadCount; - SOCKET m_lsnSock; - IRequestHandler *m_pHandler; - //static bool m_bShutdown; - //static pthread_mutex_t m_pmAccept; - - }; - - -} -#endif diff --git a/src/Husky/ThreadManager.hpp b/src/Husky/ThreadManager.hpp index b3fce5f..147b23c 100644 --- a/src/Husky/ThreadManager.hpp +++ b/src/Husky/ThreadManager.hpp @@ -5,8 +5,6 @@ #include #include -#define INFINITE 0 - namespace Husky { using namespace std; @@ -15,47 +13,43 @@ namespace Husky { private: typedef int HANDLE; - typedef int DWORD; typedef void *(* PThreadFunc)(void* param); public: ThreadManager(){;} ~ThreadManager(){} - unsigned int HandleCount(){return m_vecHandle.size();} + size_t HandleCount(){return _handles.size();} void clear() { - m_vecHandle.clear(); + _handles.clear(); } HANDLE CreateThread( PThreadFunc pFunc,void *pPara) { pthread_t pt; - int nErrorCode=pthread_create(&pt,NULL,pFunc,pPara); - if(nErrorCode!=0) + int nErrorCode = pthread_create(&pt,NULL,pFunc,pPara); + if(nErrorCode != 0) return nErrorCode; - m_vecHandle.push_back(pt); //加入线程列表 为WaitForMultipleObjects准备 + _handles.push_back(pt); return nErrorCode; } - //hThread (thread handler) : 为0时为默认最后一个加入管理器的线程句柄 - //dwMilliseconds等待时间 : 单位毫秒,默认值无穷时间 - //return value : -1句柄无效,其他值 WaitForSingleObject函数的返回值 - DWORD Wait(HANDLE hThread=0,DWORD dwMilliseconds=INFINITE ) + int Wait(HANDLE hThread = 0) { - if( hThread==0)//最后一个加入的线程 + if( hThread == 0)//the last handle { - if(!m_vecHandle.empty()) + if(!_handles.empty()) { - return pthread_join(m_vecHandle.back(),NULL); + return pthread_join(_handles.back(),NULL); } else return -1; } else { - if (find(m_vecHandle.begin(),m_vecHandle.end(),hThread)==m_vecHandle.end())//不存在此句柄 + if (find(_handles.begin(),_handles.end(),hThread) == _handles.end()) { return -1; } @@ -65,31 +59,26 @@ namespace Husky } - - //等待所有线程执行完毕 - //bWaitAll是否所有线程 : 默认值1等待所有线程,0有任何线程结束,此函数返回 - //dwMilliseconds : 单位毫秒,默认值无穷时间 - //return value : -1没有任何句柄,其他值 WaitForMultipleObjects函数的返回值 - DWORD WaitMultipleThread( bool bWaitAll=1,DWORD dwMilliseconds=INFINITE) + int WaitMultipleThread() { - if (m_vecHandle.empty()) + if (_handles.empty()) return -1; int nErrorcode; - for (uint i=0;i m_vecHandle; + vector _handles; private: ThreadManager(const ThreadManager&){;}// copy forbidden - void operator=(const ThreadManager &){}// copy forbidden + void operator = (const ThreadManager &){}// copy forbidden }; } diff --git a/src/KeywordExtractor.hpp b/src/KeywordExtractor.hpp index 7627bb1..f677a22 100644 --- a/src/KeywordExtractor.hpp +++ b/src/KeywordExtractor.hpp @@ -7,10 +7,20 @@ namespace CppJieba { using namespace Limonp; - class KeywordExtractor + struct KeyWordInfo + { + + uint freq; + double weight; + }; + + class KeywordExtractor//: public MPSegment { private: MPSegment _segment; + private: + unordered_map _wordIndex; + vector _words; protected: bool _isInited; bool _getInitFlag()const{return _isInited;}; @@ -22,7 +32,22 @@ namespace CppJieba explicit KeywordExtractor(const string& dictPath){_setInitFlag(init(dictPath));}; ~KeywordExtractor(){}; public: - bool init(const string& dictPath){return _setInitFlag(_segment.init(dictPath));}; + bool init(const string& dictPath) + { + ifstream ifs(dictPath.c_str()); + if(!ifs) + { + LogError("open %s failed.", dictPath.c_str()); + return false; + } + string line ; + vector buf; + for(uint lineno = 0; getline(ifs, line); lineno++) + { + buf.clear(); + } + return _setInitFlag(_segment.init(dictPath)); + }; public: bool extract(const string& str, vector& keywords, uint topN) { diff --git a/src/Limonp/ArgvContext.hpp b/src/Limonp/ArgvContext.hpp index 4838a81..dba3997 100644 --- a/src/Limonp/ArgvContext.hpp +++ b/src/Limonp/ArgvContext.hpp @@ -22,9 +22,9 @@ namespace Limonp for(int i = 0; i < argc; i++) { - if(strStartsWith(argv[i], "-")) + if(startsWith(argv[i], "-")) { - if(i + 1 < argc && !strStartsWith(argv[i + 1], "-")) + if(i + 1 < argc && !startsWith(argv[i + 1], "-")) { _mpss[argv[i]] = argv[i+1]; i++; diff --git a/src/Limonp/Config.hpp b/src/Limonp/Config.hpp index f107b9e..5f8de9d 100644 --- a/src/Limonp/Config.hpp +++ b/src/Limonp/Config.hpp @@ -18,7 +18,17 @@ namespace Limonp class Config { public: - bool loadFile(const char * const filePath) + Config(const char * const filePath) + { + _loadFile(filePath); + } + public: + operator bool () + { + return !_map.empty(); + } + private: + bool _loadFile(const char * const filePath) { ifstream ifs(filePath); if(!ifs) @@ -33,12 +43,12 @@ namespace Limonp { lineno ++; trim(line); - if(line.empty() || strStartsWith(line, "#")) + if(line.empty() || startsWith(line, "#")) { continue; } vecBuf.clear(); - if(!splitStr(line, vecBuf, "=") || 2 != vecBuf.size()) + if(!split(line, vecBuf, "=") || 2 != vecBuf.size()) { LogFatal("line[%d:%s] is illegal.", lineno, line.c_str()); return false; @@ -57,6 +67,7 @@ namespace Limonp ifs.close(); return true; } + public: bool get(const string& key, string& value) const { map::const_iterator it = _map.find(key); @@ -73,7 +84,7 @@ namespace Limonp friend ostream& operator << (ostream& os, const Config& config); }; - ostream& operator << (ostream& os, const Config& config) + inline ostream& operator << (ostream& os, const Config& config) { return os << config._map; } diff --git a/src/Limonp/logger.hpp b/src/Limonp/logger.hpp index 763f26d..a6c2760 100644 --- a/src/Limonp/logger.hpp +++ b/src/Limonp/logger.hpp @@ -11,6 +11,7 @@ #include #include #include +#include #include "io_functs.hpp" #include "str_functs.hpp" @@ -23,6 +24,7 @@ #define LogFatal(fmt, ...) Logger::LoggingF(LL_FATAL, FILE_BASENAME, __LINE__, fmt, ## __VA_ARGS__) + namespace Limonp { using namespace std; @@ -36,16 +38,11 @@ namespace Limonp public: static bool Logging(uint level, const string& msg, const char* fileName, int lineNo) { - if(level > LL_FATAL) - { - cerr<<"level's value is out of range"<& src, string& dest, const string& connectorStr) - //{ - // if(src.empty()) - // { - // return false; - // } - // for(uint i = 0; i < src.size() - 1; i++) - // { - // dest += src[i]; - // dest += connectorStr; - // } - // dest += src[src.size() - 1]; - // return true; - //} - - //inline string joinStr(const vector& source, const string& connector) - //{ - // string res; - // joinStr(source, res, connector); - // return res; - //} - template void join(T begin, T end, string& res, const string& connector) { @@ -122,7 +100,7 @@ namespace Limonp - inline bool splitStr(const string& src, vector& res, const string& pattern) + inline bool split(const string& src, vector& res, const string& pattern) { if(src.empty()) { @@ -181,20 +159,9 @@ namespace Limonp } - inline uint16_t twocharToUint16(char high, char low) - { - return (((uint16_t(high) & 0x00ff ) << 8) | (uint16_t(low) & 0x00ff)); - } - inline pair uint16ToChar2(uint16_t in) - { - pair res; - res.first = (in>>8) & 0x00ff; //high - res.second = (in) & 0x00ff; //low - return res; - } - inline bool strStartsWith(const string& str, const string& prefix) + inline bool startsWith(const string& str, const string& prefix) { //return str.substr(0, prefix.size()) == prefix; if(prefix.length() > str.length()) @@ -204,7 +171,7 @@ namespace Limonp return 0 == str.compare(0, prefix.length(), prefix); } - inline bool strEndsWith(const string& str, const string& suffix) + inline bool endsWith(const string& str, const string& suffix) { if(suffix.length() > str.length()) { @@ -218,13 +185,19 @@ namespace Limonp return str.find(ch) != string::npos; } + inline uint16_t twocharToUint16(char high, char low) + { + return (((uint16_t(high) & 0x00ff ) << 8) | (uint16_t(low) & 0x00ff)); + } + inline bool utf8ToUnicode(const char * const str, uint len, vector& vec) { - char ch1, ch2; if(!str) { return false; } + char ch1, ch2; + uint16_t tmp; vec.clear(); for(uint i = 0;i < len;) { @@ -237,14 +210,16 @@ namespace Limonp { ch1 = (str[i] >> 2) & 0x07; ch2 = (str[i+1] & 0x3f) | ((str[i] & 0x03) << 6 ); - vec.push_back(twocharToUint16(ch1, ch2)); + tmp = (((uint16_t(ch1) & 0x00ff ) << 8) | (uint16_t(ch2) & 0x00ff)); + vec.push_back(tmp); i += 2; } else if((unsigned char)str[i] <= 0xef && i + 2 < len) { ch1 = (str[i] << 4) | ((str[i+1] >> 2) & 0x0f ); ch2 = ((str[i+1]<<6) & 0xc0) | (str[i+2] & 0x3f); - vec.push_back(twocharToUint16(ch1, ch2)); + tmp = (((uint16_t(ch1) & 0x00ff ) << 8) | (uint16_t(ch2) & 0x00ff)); + vec.push_back(tmp); i += 3; } else @@ -310,7 +285,8 @@ namespace Limonp { if(i + 1 < len) //&& (str[i+1] & 0x80)) { - vec.push_back(twocharToUint16(str[i], str[i + 1])); + uint16_t tmp = (((uint16_t(str[i]) & 0x00ff ) << 8) | (uint16_t(str[i+1]) & 0x00ff)); + vec.push_back(tmp); i += 2; } else @@ -321,11 +297,20 @@ namespace Limonp } return true; } + inline bool gbkTrans(const string& str, vector& vec) { return gbkTrans(str.c_str(), str.size(), vec); } + //inline pair uint16ToChar2(uint16_t in) + //{ + // pair res; + // res.first = (in>>8) & 0x00ff; //high + // res.second = (in) & 0x00ff; //low + // return res; + //} + inline bool gbkTrans(vector::const_iterator begin, vector::const_iterator end, string& res) { if(begin >= end) @@ -333,18 +318,21 @@ namespace Limonp return false; } res.clear(); - pair pa; + //pair pa; + char first, second; while(begin != end) { - pa = uint16ToChar2(*begin); - if(pa.first & 0x80) + //pa = uint16ToChar2(*begin); + first = ((*begin)>>8) & 0x00ff; + second = (*begin) & 0x00ff; + if(first & 0x80) { - res += pa.first; - res += pa.second; + res += first; + res += second; } else { - res += pa.second; + res += second; } begin++; } diff --git a/src/MPSegment.hpp b/src/MPSegment.hpp index 9b0353a..5176c36 100644 --- a/src/MPSegment.hpp +++ b/src/MPSegment.hpp @@ -32,7 +32,7 @@ namespace CppJieba class MPSegment: public SegmentBase { - private: + protected: Trie* _trie; public: diff --git a/src/Trie.hpp b/src/Trie.hpp index 3b0fb45..78864e4 100644 --- a/src/Trie.hpp +++ b/src/Trie.hpp @@ -130,26 +130,13 @@ namespace CppJieba } bool loadDict(const char * const filePath) { - if(!_getInitFlag()) - { - LogError("not initted."); - return false; - } - - if(!checkFileExist(filePath)) - { - LogError("cann't find fiel[%s].",filePath); - return false; - } - bool res = false; - res = _trieInsert(filePath); - if(!res) + assert(_getInitFlag()); + if(!_trieInsert(filePath)) { LogError("_trieInsert failed."); return false; } - res = _countWeight(); - if(!res) + if(!_countWeight()) { LogError("_countWeight failed."); return false; @@ -339,15 +326,20 @@ namespace CppJieba private: bool _trieInsert(const char * const filePath) { - ifstream ifile(filePath); + ifstream ifs(filePath); + if(!ifs) + { + LogError("open %s failed.", filePath); + return false; + } string line; vector vecBuf; TrieNodeInfo nodeInfo; - while(getline(ifile, line)) + while(getline(ifs, line)) { vecBuf.clear(); - splitStr(line, vecBuf, " "); + split(line, vecBuf, " "); if(3 < vecBuf.size()) { LogError("line[%s] illegal.", line.c_str()); diff --git a/src/server.cpp b/src/server.cpp index ffda036..0d1e83b 100644 --- a/src/server.cpp +++ b/src/server.cpp @@ -4,7 +4,7 @@ #include #include #include "Limonp/Config.hpp" -#include "Husky/ServerFrame.hpp" +#include "Husky/HuskyServer.hpp" #include "MPSegment.hpp" #include "HMMSegment.hpp" #include "MixSegment.hpp" @@ -38,8 +38,8 @@ bool run(int argc, char** argv) { return false; } - Config conf; - if(!conf.loadFile(argv[1])) + Config conf(argv[1]); + if(!conf) { return false; } @@ -90,7 +90,7 @@ bool run(int argc, char** argv) } ReqHandler reqHandler(dictPath, modelPath); - ServerFrame sf(port, threadNum, &reqHandler); + HuskyServer sf(port, threadNum, &reqHandler); return sf.init() && sf.run(); }