Merge branch 'master' into less_memory

This commit is contained in:
wyy 2014-03-27 01:44:23 -07:00
commit 22c1a6e670
25 changed files with 483 additions and 654 deletions

View File

@ -1,11 +1,11 @@
PROJECT(CPPJIEBA) PROJECT(CPPJIEBA)
CMAKE_MINIMUM_REQUIRED (VERSION 2.8) CMAKE_MINIMUM_REQUIRED (VERSION 2.6)
if (CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT) if (CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
set (CMAKE_INSTALL_PREFIX "/usr" CACHE PATH "default install path" FORCE ) set (CMAKE_INSTALL_PREFIX "/usr" CACHE PATH "default install path" FORCE )
endif() endif()
ADD_DEFINITIONS(-std=c++0x -O3 -Wall -g) ADD_DEFINITIONS(-O3 -Wall -g)
IF (DEFINED ENC) IF (DEFINED ENC)
ADD_DEFINITIONS(-DCPPJIEBA_${ENC}) ADD_DEFINITIONS(-DCPPJIEBA_${ENC})
ENDIF() ENDIF()

View File

@ -1,6 +1,7 @@
## v2.3.5 is coming ## v2.3.5 is coming
1. 适配低级版本的`g++`,已在`g++ 4.4.7`上测试通过。 1. 适配更低级版本的`g++``cmake`,已在`g++ 4.1.2``cmake 2.6`上测试通过。
2. 修改一些测试用例的文件,减少测试时编译的时间。
## v2.3.4 ## v2.3.4

View File

@ -16,8 +16,8 @@
### 依赖 ### 依赖
* g++ (version >= 4.4.7 recommended); * g++ (version >= 4.1 recommended);
* cmake (version >= 2.8 recommended); * cmake (version >= 2.6 recommended);
### 下载和安装 ### 下载和安装

View File

@ -3,8 +3,6 @@
#socket listen port #socket listen port
port=11200 port=11200
#number of thread
thread_num=4
#deamonize #deamonize
daemonize=true daemonize=true

281
src/Husky/EpollServer.hpp Normal file
View File

@ -0,0 +1,281 @@
#ifndef HUSKY_EPOLLSERVER_H
#define HUSKY_EPOLLSERVER_H
#include <stdio.h>
#include <string.h>
#include <cassert>
#include <sys/socket.h>
#include <sys/types.h>
#include <arpa/inet.h>
#include <stdlib.h>
#include <pthread.h>
#include <string.h>
#include <errno.h>
#include <unistd.h>
#include <vector>
#include <sys/epoll.h>
#include <fcntl.h>
#include "HttpReqInfo.hpp"
namespace Husky
{
using namespace Limonp;
const char* const HTTP_FORMAT = "HTTP/1.1 200 OK\r\nConnection: close\r\nServer: HuskyServer/1.0.0\r\nContent-Type: text/json; charset=%s\r\nContent-Length: %d\r\n\r\n%s";
const char* const CHARSET_UTF8 = "UTF-8";
const char* const CLIENT_IP_K = "CLIENT_IP";
const struct linger LNG = {1, 1};
const struct timeval SOCKET_TIMEOUT = {5, 0};
class IRequestHandler
{
public:
virtual ~IRequestHandler(){};
public:
virtual bool do_GET(const HttpReqInfo& httpReq, string& res) const = 0;
};
class EpollServer
{
private:
static const size_t LISTEN_QUEUE_LEN = 1024;
static const size_t RECV_BUFFER_SIZE = 1024 * 8;
static const int MAXEPOLLSIZE = 512;
private:
const IRequestHandler* _reqHandler;
int _host_socket;
int _epoll_fd;
bool _isShutDown;
int _epollSize;
unordered_map<int, string> _sockIpMap;
private:
bool _isInited;
bool _getInitFlag() const {return _isInited;}
bool _setInitFlag(bool flag) {return _isInited = flag;}
public:
explicit EpollServer(uint port, const IRequestHandler* pHandler): _reqHandler(pHandler), _host_socket(-1), _isShutDown(false), _epollSize(0)
{
assert(_reqHandler);
_setInitFlag(_init_epoll(port));
};
~EpollServer(){};// unfinished;
public:
operator bool() const
{
return _getInitFlag();
}
public:
bool start()
{
//int clientSock;
sockaddr_in clientaddr;
socklen_t nSize = sizeof(clientaddr);
//char recvBuf[RECV_BUFFER_SIZE];
struct epoll_event events[MAXEPOLLSIZE];
int nfds, clientSock;
while(!_isShutDown)
{
if(-1 == (nfds = epoll_wait(_epoll_fd, events, _epollSize, -1)))
{
LogFatal(strerror(errno));
return false;
}
//LogDebug("epoll_wait return event sum[%d]", nfds);
for(int i = 0; i < nfds; i++)
{
if(events[i].data.fd == _host_socket) /*new connect coming.*/
{
if(-1 == (clientSock = accept(_host_socket, (struct sockaddr*) &clientaddr, &nSize)))
{
LogError(strerror(errno));
continue;
}
if(!_epoll_add(clientSock, EPOLLIN | EPOLLET))
{
LogError("_epoll_add(%d, EPOLLIN | EPOLLET)", clientSock);
_closesocket(clientSock);
continue;
}
//LogInfo("connecting from: %d:%d client socket: %d\n", inet_ntoa(clientaddr.sin_addr), ntohs(clientaddr.sin_port), clientSock);
/* inet_ntoa is not thread safety at some version */
//_sockIpMap[clientSock] = inet_ntoa(clientaddr.sin_addr);
}
else /*client socket data to be received*/
{
_response(events[i].data.fd);
/*close socket will case it to be removed from epoll automatically*/
_closesocket(events[i].data.fd);
}
}
}
return true;
}
void stop()
{
_isShutDown = true;
if(-1 == close(_host_socket))
{
LogError(strerror(errno));
return;
}
int sockfd;
struct sockaddr_in dest;
if((sockfd = socket(AF_INET, SOCK_STREAM, 0)) < 0)
{
LogError(strerror(errno));
return;
}
bzero(&dest, sizeof(dest));
dest.sin_family = AF_INET;
dest.sin_port = htons(_host_socket);
if(0 == inet_aton("127.0.0.1", (struct in_addr *) &dest.sin_addr.s_addr))
{
LogError(strerror(errno));
return;
}
if(connect(sockfd, (struct sockaddr *) &dest, sizeof(dest)) < 0)
{
LogError(strerror(errno));
}
_closesocket(sockfd);
}
private:
bool _epoll_add(int sockfd, uint32_t events)
{
if (!_setNonBLock(sockfd))
{
LogError(strerror(errno));
return false;
}
struct epoll_event ev;
ev.data.fd = sockfd;
ev.events = events;
if(epoll_ctl(_epoll_fd, EPOLL_CTL_ADD, sockfd, &ev) < 0)
{
LogError("insert socket '%d' into epoll failed: %s", sockfd, strerror(errno));
return false;
}
_epollSize ++;
return true;
}
bool _response(int sockfd) const
{
if(-1 == setsockopt(sockfd, SOL_SOCKET, SO_LINGER, (const char*)&LNG, sizeof(LNG)))
{
LogError(strerror(errno));
return false;
}
if(-1 == setsockopt(sockfd, SOL_SOCKET, SO_RCVTIMEO, (const char*)&SOCKET_TIMEOUT, sizeof(SOCKET_TIMEOUT)))
{
LogError(strerror(errno));
return false;
}
if(-1 == setsockopt(sockfd, SOL_SOCKET, SO_SNDTIMEO, (const char*)&SOCKET_TIMEOUT, sizeof(SOCKET_TIMEOUT)))
{
LogError(strerror(errno));
return false;
}
string strRec, strSnd, strRetByHandler;
strRec.resize(RECV_BUFFER_SIZE);
int nRetCode = recv(sockfd, (char*)strRec.c_str(), strRec.size(), 0);
if(-1 == nRetCode)
{
LogDebug(strerror(errno));
return false;
}
if(0 == nRetCode)
{
LogDebug("client socket closed gracefully.");
return false;
}
HttpReqInfo httpReq(strRec);
if(!_reqHandler->do_GET(httpReq, strRetByHandler))
{
LogError("do_GET failed.");
return false;
}
string_format(strSnd, HTTP_FORMAT, CHARSET_UTF8, strRetByHandler.length(), strRetByHandler.c_str());
if(-1 == send(sockfd, strSnd.c_str(), strSnd.length(), 0))
{
LogError(strerror(errno));
return false;
}
LogInfo("{response:%s, epollsize:%d}", strRetByHandler.c_str(), _epollSize);
return true;
}
bool _init_epoll(uint port)
{
_host_socket = socket(AF_INET, SOCK_STREAM, 0);
if(-1 == _host_socket)
{
LogError(strerror(errno));
return false;
}
int nRet = 1;
if(-1 == setsockopt(_host_socket, SOL_SOCKET, SO_REUSEADDR, (char*)&nRet, sizeof(nRet)))
{
LogError(strerror(errno));
return false;
}
struct sockaddr_in addrSock;
addrSock.sin_family = AF_INET;
addrSock.sin_port = htons(port);
addrSock.sin_addr.s_addr = htonl(INADDR_ANY);
if(-1 == ::bind(_host_socket, (sockaddr*)&addrSock, sizeof(sockaddr)))
{
LogError(strerror(errno));
_closesocket(_host_socket);
return false;
}
if(-1 == listen(_host_socket, LISTEN_QUEUE_LEN))
{
LogError(strerror(errno));
return false;
}
if(-1 == (_epoll_fd = epoll_create(MAXEPOLLSIZE)))
{
LogError(strerror(errno));
return false;
}
if(!_epoll_add(_host_socket, EPOLLIN))
{
LogError("_epoll_add(%d, EPOLLIN) failed.", _host_socket);
return false;
}
LogInfo("create socket listening port[%u], epoll{size:%d} init ok", port, _epollSize);
return true;
}
void _closesocket(int sockfd)
{
if(-1 == close(sockfd))
{
LogError(strerror(errno));
return;
}
_epollSize--;
}
static bool _setNonBLock(int sockfd)
{
return -1 != fcntl(sockfd, F_SETFL, fcntl(sockfd, F_GETFD, 0)|O_NONBLOCK);
}
};
}
#endif

View File

@ -4,6 +4,7 @@
#include <iostream> #include <iostream>
#include <string> #include <string>
#include "Limonp/logger.hpp" #include "Limonp/logger.hpp"
#include "Limonp/str_functs.hpp"
namespace Husky namespace Husky
{ {
@ -76,22 +77,22 @@ namespace Husky
class HttpReqInfo class HttpReqInfo
{ {
public: public:
bool load(const string& headerStr) HttpReqInfo(const string& headerStr)
{ {
size_t lpos = 0, rpos = 0; size_t lpos = 0, rpos = 0;
vector<string> buf; vector<string> buf;
rpos = headerStr.find("\n", lpos); rpos = headerStr.find("\n", lpos);
if(string::npos == rpos) if(string::npos == rpos)
{ {
LogFatal("headerStr illegal."); LogError("headerStr illegal.");
return false; return;
} }
string firstline(headerStr, lpos, rpos - lpos); string firstline(headerStr, lpos, rpos - lpos);
trim(firstline); trim(firstline);
if(!split(firstline, buf, " ") || 3 != buf.size()) if(!split(firstline, buf, " ") || 3 != buf.size())
{ {
LogFatal("parse header first line failed."); LogError("parse header first line failed.");
return false; return;
} }
_headerMap[KEY_METHOD] = trim(buf[0]); _headerMap[KEY_METHOD] = trim(buf[0]);
_headerMap[KEY_PATH] = trim(buf[1]); _headerMap[KEY_PATH] = trim(buf[1]);
@ -103,12 +104,11 @@ namespace Husky
_parseUrl(firstline, _methodGetMap); _parseUrl(firstline, _methodGetMap);
} }
lpos = rpos + 1; lpos = rpos + 1;
if(lpos >= headerStr.size()) if(lpos >= headerStr.size())
{ {
LogFatal("headerStr illegal"); LogError("headerStr illegal");
return false; return;
} }
//message header begin //message header begin
while(lpos < headerStr.size() && string::npos != (rpos = headerStr.find('\n', lpos)) && rpos > lpos) while(lpos < headerStr.size() && string::npos != (rpos = headerStr.find('\n', lpos)) && rpos > lpos)
@ -125,8 +125,8 @@ namespace Husky
trim(v); trim(v);
if(k.empty()||v.empty()) if(k.empty()||v.empty())
{ {
LogFatal("headerStr illegal."); LogError("headerStr illegal.");
return false; return;
} }
upper(k); upper(k);
_headerMap[k] = v; _headerMap[k] = v;
@ -136,7 +136,6 @@ namespace Husky
//body begin //body begin
return true;
} }
public: public:
string& operator[] (const string& key) string& operator[] (const string& key)
@ -156,15 +155,15 @@ namespace Husky
return _find(_methodPostMap, argKey, res); return _find(_methodPostMap, argKey, res);
} }
private: private:
HashMap<string, string> _headerMap; std::unordered_map<string, string> _headerMap;
HashMap<string, string> _methodGetMap; std::unordered_map<string, string> _methodGetMap;
HashMap<string, string> _methodPostMap; std::unordered_map<string, string> _methodPostMap;
//public: //public:
friend ostream& operator<<(ostream& os, const HttpReqInfo& obj); friend ostream& operator<<(ostream& os, const HttpReqInfo& obj);
private: private:
bool _find(const HashMap<string, string>& mp, const string& key, string& res)const bool _find(const std::unordered_map<string, string>& mp, const string& key, string& res)const
{ {
HashMap<string, string>::const_iterator it = mp.find(key); std::unordered_map<string, string>::const_iterator it = mp.find(key);
if(it == mp.end()) if(it == mp.end())
{ {
return false; return false;
@ -173,7 +172,7 @@ namespace Husky
return true; return true;
} }
private: private:
bool _parseUrl(const string& url, HashMap<string, string>& mp) bool _parseUrl(const string& url, std::unordered_map<string, string>& mp)
{ {
if(url.empty()) if(url.empty())
{ {

View File

@ -1,300 +0,0 @@
#ifndef HUSKY_SERVERFRAME_H
#define HUSKY_SERVERFRAME_H
#include <stdio.h>
#include <string.h>
#include <cassert>
#include <sys/socket.h>
#include <sys/types.h>
#include <arpa/inet.h>
#include <stdlib.h>
#include <pthread.h>
#include <string.h>
#include <errno.h>
#include <unistd.h>
#include <vector>
#include "ThreadManager.hpp"
#include "HttpReqInfo.hpp"
#define INVALID_SOCKET -1
#define SOCKET_ERROR -1
#define closesocket close
#define RECV_BUFFER 10240
#define LISEN_QUEUR_LEN 1024
namespace Husky
{
using namespace Limonp;
typedef int SOCKET;
const struct timeval SOCKET_TIMEOUT = {2, 0};
const char* const RESPONSE_FORMAT = "HTTP/1.1 200 OK\r\nConnection: close\r\nServer: HuskyServer/1.0.0\r\nContent-Type: text/json; charset=%s\r\nContent-Length: %d\r\n\r\n";
const char* const RESPONSE_CHARSET_UTF8 = "UTF-8";
const char* const RESPONSE_CHARSET_GB2312 = "GB2312";
const char* const CLIENT_IP_K = "CLIENT_IP";
typedef unsigned short u_short;
typedef unsigned int u_int;
class IRequestHandler
{
public:
virtual ~IRequestHandler(){};
public:
virtual bool do_GET(const HttpReqInfo& httpReq, string& res) = 0;
};
struct SPara
{
SOCKET hSock;
IRequestHandler * pHandler;
pthread_mutex_t * ppmAccept;
bool * pShutdown;
};
class HuskyServer
{
private:
pthread_mutex_t m_pmAccept;
bool m_bShutdown;
public:
explicit HuskyServer(unsigned nPort, unsigned nThreadCount, IRequestHandler* pHandler)
{
m_bShutdown = false;
m_nLsnPort = nPort;
m_nThreadCount = nThreadCount;
m_pHandler = pHandler;
assert(pHandler);
pthread_mutex_init(&m_pmAccept,NULL);
};
virtual ~HuskyServer(){pthread_mutex_destroy(&m_pmAccept);};
virtual bool init()
{
if (!BindToLocalHost(m_lsnSock,m_nLsnPort))
{
LogFatal("BindToLocalHost failed.");
return false;
}
LogInfo("init ok {port:%d, threadNum:%d}", m_nLsnPort, m_nThreadCount);
return true;
}
virtual bool dispose()
{
m_bShutdown=true;
if (SOCKET_ERROR==closesocket(m_lsnSock))
{
LogError("error [%s]", strerror(errno));
return false;
}
int sockfd;
struct sockaddr_in dest;
if ((sockfd = socket(AF_INET, SOCK_STREAM, 0)) < 0)
{
LogError("error [%s]", strerror(errno));
return false;
}
bzero(&dest, sizeof(dest));
dest.sin_family = AF_INET;
dest.sin_port = htons(m_nLsnPort);
if (inet_aton("127.0.0.1", (struct in_addr *) &dest.sin_addr.s_addr) == 0)
{
LogError("error [%s]", strerror(errno));
return false;
}
if (connect(sockfd, (struct sockaddr *) &dest, sizeof(dest)) < 0)
{
LogError("error [%s]", strerror(errno));
}
close(sockfd);
return true;
}
virtual bool run()
{
if(SOCKET_ERROR==listen(m_lsnSock,LISEN_QUEUR_LEN))
{
LogError("error [%s]", strerror(errno));
return false;
}
ThreadManager thrMngr;
int i;
SPara para;
para.hSock=m_lsnSock;
para.pHandler=m_pHandler;
para.ppmAccept=&m_pmAccept;
para.pShutdown=&m_bShutdown;
for (i=0;i<m_nThreadCount;i++)
{
if (0!=thrMngr.CreateThread(ServerThread, &para))
{
break;
}
}
LogDebug("expect thread count %d, real count %d",m_nThreadCount,i);
if(i==0)
{
LogError("error [%s]", strerror(errno));
return false;
}
LogInfo("server start to run.........");
if (thrMngr.WaitMultipleThread()!=0)
{
return false;
}
return true;
}
protected:
bool BindToLocalHost(SOCKET &sock,u_short nPort)
{
sock=socket(AF_INET,SOCK_STREAM,0);
if(INVALID_SOCKET==sock)
{
LogError("error [%s]", strerror(errno));
return false;
}
int nRet = 1;
if(SOCKET_ERROR==setsockopt(m_lsnSock, SOL_SOCKET, SO_REUSEADDR, (char*)&nRet, sizeof(nRet)))
{
LogError("error [%s]", strerror(errno));
}
struct sockaddr_in addrSock;
addrSock.sin_family=AF_INET;
addrSock.sin_port=htons(nPort);
addrSock.sin_addr.s_addr=htonl(INADDR_ANY);
int retval;
retval = ::bind(sock,(sockaddr*)&addrSock,sizeof(sockaddr));
if(SOCKET_ERROR==retval)
{
LogError("error [%s]", strerror(errno));
closesocket(sock);
return false;
}
return true;
}
static void * ServerThread(void * lpParameter )
{
SPara *pPara=(SPara*)lpParameter;
SOCKET hSockLsn=pPara->hSock;
IRequestHandler *pHandler=pPara->pHandler;
bool* pShutdown = pPara->pShutdown;
pthread_mutex_t* ppmAccept = pPara->ppmAccept;
int nRetCode;
linger lng;
char chRecvBuf[RECV_BUFFER];
SOCKET hClientSock;
string strHttpResp;
sockaddr_in clientaddr;
socklen_t nSize = sizeof(clientaddr);
while(!(*pShutdown))
{
HttpReqInfo httpReq;
pthread_mutex_lock(ppmAccept);
hClientSock=accept(hSockLsn,(sockaddr *)&clientaddr, &nSize);
pthread_mutex_unlock(ppmAccept);
if(hClientSock==SOCKET_ERROR)
{
if(!(*pShutdown))
LogError("error [%s]", strerror(errno));
continue;
}
httpReq[CLIENT_IP_K] = inet_ntoa(clientaddr.sin_addr);// inet_ntoa is not thread safety at some version
lng.l_linger=1;
lng.l_onoff=1;
if(SOCKET_ERROR==setsockopt(hClientSock,SOL_SOCKET,SO_LINGER,(char*)&lng,sizeof(lng)))
{
LogError("error [%s]", strerror(errno));
}
if(SOCKET_ERROR==setsockopt(hClientSock,SOL_SOCKET,SO_RCVTIMEO,(char*)&SOCKET_TIMEOUT,sizeof(SOCKET_TIMEOUT)))
{
LogError("error [%s]", strerror(errno));
}
if(SOCKET_ERROR==setsockopt(hClientSock,SOL_SOCKET,SO_SNDTIMEO,(char*)&SOCKET_TIMEOUT,sizeof(SOCKET_TIMEOUT)))
{
LogError("error [%s]", strerror(errno));
}
string strRec;
string strSnd;
memset(chRecvBuf,0,sizeof(chRecvBuf));
nRetCode = recv(hClientSock, chRecvBuf, RECV_BUFFER, 0);
strRec = chRecvBuf;
#ifdef HUKSY_DEBUG
LogDebug("request[%s]", strRec.c_str());
#endif
if(SOCKET_ERROR==nRetCode)
{
LogDebug("error [%s]", strerror(errno));
closesocket(hClientSock);
continue;
}
if(0==nRetCode)
{
LogDebug("connection has been gracefully closed");
closesocket(hClientSock);
continue;
}
httpReq.load(strRec);
pHandler->do_GET(httpReq, strSnd);
char chHttpHeader[2048];
sprintf(chHttpHeader, RESPONSE_FORMAT, RESPONSE_CHARSET_UTF8, int(strSnd.length()));
strHttpResp=chHttpHeader;
strHttpResp+=strSnd;
#ifdef HUKSY_DEBUG
LogDebug("response'body [%s]", strSnd.c_str());
#endif
if (SOCKET_ERROR==send(hClientSock,strHttpResp.c_str(),strHttpResp.length(),0))
{
LogError("error [%s]", strerror(errno));
}
closesocket(hClientSock);
}
return 0;
}
private:
u_short m_nLsnPort;
u_short m_nThreadCount;
SOCKET m_lsnSock;
IRequestHandler *m_pHandler;
};
}
#endif

View File

@ -1,87 +0,0 @@
#ifndef HUSKY_THREAD_MANAGER_H
#define HUSKY_THREAD_MANAGER_H
#include <pthread.h>
#include <algorithm>
#include <vector>
#include <map>
namespace Husky
{
using namespace std;
class ThreadManager
{
private:
typedef pthread_t HANDLE;
typedef void *(* PThreadFunc)(void* param);
public:
ThreadManager(){;}
~ThreadManager(){}
size_t HandleCount(){return _handles.size();}
void clear()
{
_handles.clear();
}
int CreateThread( PThreadFunc pFunc,void *pPara)
{
pthread_t pt;
int nErrorCode = pthread_create(&pt,NULL,pFunc,pPara);
if(nErrorCode != 0)
return nErrorCode;
_handles.push_back(pt);
return nErrorCode;
}
int Wait(HANDLE hThread = 0)
{
if( hThread == 0)//the last handle
{
if(!_handles.empty())
{
return pthread_join(_handles.back(),NULL);
}
else
return -1;
}
else
{
if (find(_handles.begin(),_handles.end(),hThread) == _handles.end())
{
return -1;
}
return pthread_join(hThread, NULL);
}
}
int WaitMultipleThread()
{
if (_handles.empty())
return -1;
int nErrorcode;
for (uint i = 0; i < _handles.size(); i++)
{
nErrorcode = pthread_join(_handles[i], NULL);
if (nErrorcode != 0)
return nErrorcode;
}
return 0;
}
private:
vector<pthread_t> _handles;
private:
ThreadManager(const ThreadManager&){;}// copy forbidden
void operator = (const ThreadManager &){}// copy forbidden
};
}
#endif

View File

@ -3,7 +3,8 @@
#include "MixSegment.hpp" #include "MixSegment.hpp"
#include <cmath> #include <cmath>
#include <unordered_set> #include <set>
#define MIN(X,Y) ((X) < (Y) ? (X) : (Y)) #define MIN(X,Y) ((X) < (Y) ? (X) : (Y))
namespace CppJieba namespace CppJieba

View File

@ -9,7 +9,6 @@
#include <set> #include <set>
#include <sstream> #include <sstream>
#include "str_functs.hpp" #include "str_functs.hpp"
#include "map_functs.hpp"
namespace Limonp namespace Limonp
{ {

View File

@ -26,7 +26,7 @@
namespace Limonp namespace Limonp
{ {
using namespace std; using namespace std;
enum {LL_DEBUG = 0, LL_INFO = 1, LL_WARN = 2, LL_ERROR = 3, LL_FATAL = 4, LEVEL_ARRAY_SIZE = 5, CSTR_BUFFER_SIZE = 1024}; enum {LL_DEBUG = 0, LL_INFO = 1, LL_WARN = 2, LL_ERROR = 3, LL_FATAL = 4, LEVEL_ARRAY_SIZE = 5, CSTR_BUFFER_SIZE = 32};
static const char * LOG_LEVEL_ARRAY[LEVEL_ARRAY_SIZE]= {"DEBUG","INFO","WARN","ERROR","FATAL"}; static const char * LOG_LEVEL_ARRAY[LEVEL_ARRAY_SIZE]= {"DEBUG","INFO","WARN","ERROR","FATAL"};
static const char * LOG_FORMAT = "%s %s:%d %s %s\n"; static const char * LOG_FORMAT = "%s %s:%d %s %s\n";
static const char * LOG_TIME_FORMAT = "%Y-%m-%d %H:%M:%S"; static const char * LOG_TIME_FORMAT = "%Y-%m-%d %H:%M:%S";
@ -34,24 +34,19 @@ namespace Limonp
class Logger class Logger
{ {
public: public:
static bool Logging(size_t level, const string& msg, const char* fileName, int lineNo) static void Logging(size_t level, const string& msg, const char* fileName, int lineno)
{ {
assert(level <= LL_FATAL); assert(level <= LL_FATAL);
char buf[CSTR_BUFFER_SIZE]; char buf[CSTR_BUFFER_SIZE];
time_t timeNow; time_t timeNow;
time(&timeNow); time(&timeNow);
if(!strftime(buf, sizeof(buf), LOG_TIME_FORMAT, localtime(&timeNow))) strftime(buf, sizeof(buf), LOG_TIME_FORMAT, localtime(&timeNow));
{ fprintf(stderr, LOG_FORMAT, buf, fileName, lineno,LOG_LEVEL_ARRAY[level], msg.c_str());
fprintf(stderr, "stftime failed.\n");
return false;
}
fprintf(stderr, LOG_FORMAT, buf, fileName, lineNo,LOG_LEVEL_ARRAY[level], msg.c_str());
return true;
} }
static bool LoggingF(size_t level, const char* fileName, int lineNo, const string& fmt, ...) static void LoggingF(size_t level, const char* fileName, int lineno, const string& fmt, ...)
{ {
#ifdef LOGGER_LEVEL #ifdef LOGGER_LEVEL
if(level < LOGGER_LEVEL) return true; if(level < LOGGER_LEVEL) return;
#endif #endif
int size = 256; int size = 256;
string msg; string msg;
@ -70,7 +65,7 @@ namespace Limonp
else else
size *= 2; size *= 2;
} }
return Logging(level, msg, fileName, lineNo); Logging(level, msg, fileName, lineno);
} }
}; };
} }

View File

@ -1,46 +0,0 @@
/************************************
* file enc : ascii
* author : wuyanyi09@gmail.com
************************************/
#ifndef LIMONP_MAP_FUNCTS_H
#define LIMONP_MAP_FUNCTS_H
#include <map>
#include <set>
#include <vector>
#include <iostream>
#include <sstream>
#include <unordered_map>
#define HashMap std::unordered_map
namespace Limonp
{
using namespace std;
template<class kT, class vT>
vT getMap(const map<kT, vT>& mp, const kT & key, const vT & defaultVal)
{
typename map<kT, vT>::const_iterator it;
it = mp.find(key);
if(mp.end() == it)
{
return defaultVal;
}
return it->second;
}
template<class kT, class vT>
void map2Vec(const map<kT, vT>& mp, vector<pair<kT, vT> > & res)
{
typename map<kT, vT>::const_iterator it = mp.begin();
for(; it != mp.end(); it++)
{
res.push_back(*it);
}
}
}
#endif

View File

@ -1,15 +1,29 @@
#ifndef LIMONP_STD_OUTBOUND_H #ifndef LIMONP_STD_OUTBOUND_H
#define LIMONP_STD_OUTBOUND_H #define LIMONP_STD_OUTBOUND_H
#include "map_functs.hpp"
#include <map> #include <map>
#if(__cplusplus == 201103L)
#include <unordered_map>
#include <unordered_set>
#else
#include <tr1/unordered_map>
#include <tr1/unordered_set>
namespace std
{
using std::tr1::unordered_map;
using std::tr1::unordered_set;
}
#endif
#include <set> #include <set>
#include <vector> #include <vector>
#include <fstream> #include <fstream>
#include <sstream>
namespace std namespace std
{ {
using namespace std;
template<typename T> template<typename T>
ostream& operator << (ostream& os, const vector<T>& vec) ostream& operator << (ostream& os, const vector<T>& vec)
{ {
@ -18,7 +32,7 @@ namespace std
return os << "[]"; return os << "[]";
} }
os<<"[\""<<vec[0]; os<<"[\""<<vec[0];
for(uint i = 1; i < vec.size(); i++) for(size_t i = 1; i < vec.size(); i++)
{ {
os<<"\", \""<<vec[i]; os<<"\", \""<<vec[i];
} }
@ -62,14 +76,14 @@ namespace std
return os; return os;
} }
template<class T1, class T2> template<class T1, class T2>
ostream& operator << (ostream& os, const HashMap<T1, T2>& mp) ostream& operator << (ostream& os, const std::unordered_map<T1, T2>& mp)
{ {
if(mp.empty()) if(mp.empty())
{ {
return os << "{}"; return os << "{}";
} }
os<<'{'; os<<'{';
typename HashMap<T1, T2>::const_iterator it = mp.begin(); typename std::unordered_map<T1, T2>::const_iterator it = mp.begin();
os<<*it; os<<*it;
it++; it++;
while(it != mp.end()) while(it != mp.end())

View File

@ -22,7 +22,6 @@
#include <iterator> #include <iterator>
#include <algorithm> #include <algorithm>
#include "std_outbound.hpp" #include "std_outbound.hpp"
#include "map_functs.hpp"
#define print(x) cout<< #x": " << x <<endl #define print(x) cout<< #x": " << x <<endl

View File

@ -5,7 +5,7 @@
#include <string.h> #include <string.h>
#include "Limonp/Config.hpp" #include "Limonp/Config.hpp"
#include "Limonp/io_functs.hpp" #include "Limonp/io_functs.hpp"
#include "Husky/HuskyServer.hpp" #include "Husky/EpollServer.hpp"
#include "MPSegment.hpp" #include "MPSegment.hpp"
#include "HMMSegment.hpp" #include "HMMSegment.hpp"
#include "MixSegment.hpp" #include "MixSegment.hpp"
@ -19,7 +19,7 @@ class ReqHandler: public IRequestHandler
ReqHandler(const string& dictPath, const string& modelPath): _segment(dictPath, modelPath){}; ReqHandler(const string& dictPath, const string& modelPath): _segment(dictPath, modelPath){};
virtual ~ReqHandler(){}; virtual ~ReqHandler(){};
public: public:
virtual bool do_GET(const HttpReqInfo& httpReq, string& strSnd) virtual bool do_GET(const HttpReqInfo& httpReq, string& strSnd) const
{ {
string sentence, tmp; string sentence, tmp;
vector<string> words; vector<string> words;
@ -45,7 +45,6 @@ bool run(int argc, char** argv)
return false; return false;
} }
unsigned int port = 0; unsigned int port = 0;
unsigned int threadNum = 0;
string dictPath; string dictPath;
string modelPath; string modelPath;
string val; string val;
@ -55,12 +54,6 @@ bool run(int argc, char** argv)
return false; return false;
} }
port = atoi(val.c_str()); port = atoi(val.c_str());
if(!conf.get("thread_num", val))
{
LogFatal("conf get thread_num failed.");
return false;
}
threadNum = atoi(val.c_str());
if(!conf.get("dict_path", dictPath)) if(!conf.get("dict_path", dictPath))
{ {
@ -92,8 +85,8 @@ bool run(int argc, char** argv)
} }
ReqHandler reqHandler(dictPath, modelPath); ReqHandler reqHandler(dictPath, modelPath);
HuskyServer sf(port, threadNum, &reqHandler); EpollServer sf(port, &reqHandler);
return sf.init() && sf.run(); return sf.start();
} }
int main(int argc, char* argv[]) int main(int argc, char* argv[])

View File

@ -1,4 +1,4 @@
SET(EXECUTABLE_OUTPUT_PATH ${PROJECT_BINARY_DIR}/test) SET(EXECUTABLE_OUTPUT_PATH ${PROJECT_BINARY_DIR})
ADD_EXECUTABLE(segment.demo segment.cpp) ADD_EXECUTABLE(segment.demo segment.cpp)
ADD_EXECUTABLE(keyword.demo keyword_demo.cpp) ADD_EXECUTABLE(keyword.demo keyword_demo.cpp)

View File

@ -1,4 +1,5 @@
#include <iostream> #include <iostream>
#include <ctime>
#include <fstream> #include <fstream>
#include "../src/Limonp/ArgvContext.hpp" #include "../src/Limonp/ArgvContext.hpp"
#include "../src/Limonp/io_functs.hpp" #include "../src/Limonp/io_functs.hpp"
@ -30,14 +31,15 @@ void cut(const ISegment * seg, const char * const filePath, size_t times = 10)
int main(int argc, char ** argv) int main(int argc, char ** argv)
{ {
MixSegment seg("../dict/jieba.dict.utf8", "../dict/hmm_model.utf8");
if(!seg)
{ {
MixSegment seg("../dict/jieba.dict.utf8", "../dict/hmm_model.utf8"); cout<<"seg init failed."<<endl;
if(!seg) return EXIT_FAILURE;
{
cout<<"seg init failed."<<endl;
return EXIT_FAILURE;
}
cut(&seg, "../test/testdata/weicheng.utf8");
} }
long beginTime = clock();
cut(&seg, "../test/testdata/weicheng.utf8");
long endTime = clock();
printf("[%.3lf seconds]time consumeed.\n", double(endTime - beginTime)/CLOCKS_PER_SEC);
return EXIT_SUCCESS; return EXIT_SUCCESS;
} }

View File

@ -1,12 +1,12 @@
SET(EXECUTABLE_OUTPUT_PATH ${PROJECT_BINARY_DIR}/test) SET(EXECUTABLE_OUTPUT_PATH ${PROJECT_BINARY_DIR})
SET(LIBRARY_OUTPUT_PATH ${PROJECT_BINARY_DIR}/test/lib) SET(LIBRARY_OUTPUT_PATH ${PROJECT_BINARY_DIR}/lib)
SET(GTEST_ROOT_DIR gtest-1.6.0) SET(GTEST_ROOT_DIR gtest-1.6.0)
ADD_DEFINITIONS(-DLOGGER_LEVEL=LL_WARN) ADD_DEFINITIONS(-DLOGGER_LEVEL=LL_WARN)
INCLUDE_DIRECTORIES(${GTEST_ROOT_DIR} ${GTEST_ROOT_DIR}/include ${PROJECT_SOURCE_DIR}) INCLUDE_DIRECTORIES(${GTEST_ROOT_DIR} ${GTEST_ROOT_DIR}/include ${PROJECT_SOURCE_DIR})
ADD_LIBRARY(gtest STATIC ${GTEST_ROOT_DIR}/src/gtest-all.cc) ADD_LIBRARY(gtest STATIC ${GTEST_ROOT_DIR}/src/gtest-all.cc)
ADD_EXECUTABLE(test.run gtest_main.cpp TKeywordExtractor.cpp TMPSegment.cpp TTrie.cpp TFullSegment.cpp TQuerySegment.cpp THMMSegment.cpp TMixSegment.cpp TSegmentBase.cpp) ADD_EXECUTABLE(test.run gtest_main.cpp TKeywordExtractor.cpp TTrie.cpp TSegments.cpp )
TARGET_LINK_LIBRARIES(gtest pthread) TARGET_LINK_LIBRARIES(gtest pthread)
TARGET_LINK_LIBRARIES(test.run gtest pthread) TARGET_LINK_LIBRARIES(test.run gtest pthread)

View File

@ -1,18 +0,0 @@
#include "src/FullSegment.hpp"
#include "gtest/gtest.h"
using namespace CppJieba;
TEST(FullSegment, Test1)
{
FullSegment segment("../dict/extra_dict/jieba.dict.small.utf8");
const char* str = "我来自北京邮电大学。。。 学号 123456";
vector<string> words;
ASSERT_EQ(segment.cut(str, words), true);
string s;
s << words;
ASSERT_EQ(s, "[\"\", \"来自\", \"北京\", \"北京邮电大学\", \"邮电\", \"电大\", \"大学\", \"\", \"\", \"\", \" \", \"\", \"\", \" 123456\"]");
}

View File

@ -1,19 +0,0 @@
#include "src/HMMSegment.hpp"
#include "gtest/gtest.h"
using namespace CppJieba;
TEST(HMMSegmentTest, Test1)
{
HMMSegment segment("../dict/hmm_model.utf8");;
const char* str = "我来自北京邮电大学。。。 学号 123456";
const char* res[] = {"我来", "自北京", "邮电大学", "", "", "", " ", "学号", " 123456"};
//string s;
//vector<string> buf(res, res + sizeof(res)/sizeof(res[0]));
vector<string> words;
ASSERT_TRUE(segment);
ASSERT_TRUE(segment.cut(str, words));
//print(words);
EXPECT_EQ(words, vector<string>(res, res + sizeof(res)/sizeof(res[0])));
}

View File

@ -1,46 +0,0 @@
#include "src/MPSegment.hpp"
#include "src/Limonp/io_functs.hpp"
#include "gtest/gtest.h"
using namespace CppJieba;
using namespace Limonp;
TEST(MPSegmentTest, Test1)
{
MPSegment segment("../dict/extra_dict/jieba.dict.small.utf8");;
const char* str = "我来自北京邮电大学。。。 学号 123456";
const char* res[] = {"", "来自", "北京邮电大学", "","",""," ","","", " 123456"};
vector<string> words;
ASSERT_TRUE(segment);
ASSERT_TRUE(segment.cut(str, words));
//print(words);
EXPECT_EQ(words, vector<string>(res, res + sizeof(res)/sizeof(res[0])));
}
TEST(MPSegmentTest, Test2)
{
MPSegment segment("../dict/extra_dict/jieba.dict.small.utf8");
string line;
ifstream ifs("../test/testdata/review.100");
vector<string> words;
string eRes;
loadFile2Str("../test/testdata/review.100.res", eRes);
string res;
while(getline(ifs, line))
{
res += line;
res += '\n';
words.clear();
segment.cut(line, words);
string s;
s << words;
res += s;
res += '\n';
}
WriteStr2File("../test/testdata/review.100.res", res.c_str(), "w");
//ASSERT_EQ(res, eRes);
}

View File

@ -1,16 +0,0 @@
#include "src/MixSegment.hpp"
#include "gtest/gtest.h"
using namespace CppJieba;
TEST(MixSegmentTest, Test1)
{
MixSegment segment("../dict/extra_dict/jieba.dict.small.utf8", "../dict/hmm_model.utf8");;
const char* str = "我来自北京邮电大学。。。 学号 123456";
const char* res[] = {"", "来自", "北京邮电大学", "","",""," ","学号", " 123456"};
vector<string> words;
ASSERT_TRUE(segment);
ASSERT_TRUE(segment.cut(str, words));
EXPECT_EQ(words, vector<string>(res, res + sizeof(res)/sizeof(res[0])));
}

View File

@ -1,20 +0,0 @@
#include "src/QuerySegment.hpp"
#include "gtest/gtest.h"
using namespace CppJieba;
TEST(QuerySegment, Test1)
{
QuerySegment segment("../dict/extra_dict/jieba.dict.small.utf8", "../dict/hmm_model.utf8", 3);
const char* str = "小明硕士毕业于中国科学院计算所,后在日本京都大学深造";
vector<string> words;
ASSERT_TRUE(segment.cut(str, words));
string s1, s2;
s1 << words;
s2 = "[\"小明\", \"硕士\", \"毕业\", \"\", \"中国\", \"中国科学院\", \"科学\", \"科学院\", \"学院\", \"计算所\", \"\", \"\", \"\", \"日本\", \"京都\", \"京都大学\", \"大学\", \"深造\"]";
ASSERT_EQ(s1, s2);
}

View File

@ -1,36 +0,0 @@
#include "src/SegmentBase.hpp"
#include "gtest/gtest.h"
using namespace CppJieba;
TEST(SegmentBaseTest, Test1)
{
const char* str = "heheh你好...hh";
string s;
vector<string> buf;
buf.push_back("heheh");
buf.push_back("你好");
buf.push_back("...hh");
vector<string> res;
size_t size = strlen(str);
size_t offset = 0;
while(offset < size)
{
size_t len = 0;
const char* t = str + offset;
SegmentBase::filterAscii(t, size - offset, len);
s.assign(t, len);
res.push_back(s);
//cout<<s<<","<<ret<<","<<len<<endl;
//cout<<str<<endl;
offset += len;
}
EXPECT_EQ(res, buf);
}
//int main(int argc, char** argv)
//{
// //ChineseFilter chFilter;
// return 0;
//}

135
test/unittest/TSegments.cpp Normal file
View File

@ -0,0 +1,135 @@
#include "src/SegmentBase.hpp"
#include "src/MixSegment.hpp"
#include "src/MPSegment.hpp"
#include "src/HMMSegment.hpp"
#include "src/Limonp/io_functs.hpp"
#include "src/FullSegment.hpp"
#include "src/QuerySegment.hpp"
#include "gtest/gtest.h"
using namespace CppJieba;
TEST(SegmentBaseTest, Test1)
{
const char* str = "heheh你好...hh";
string s;
vector<string> buf;
buf.push_back("heheh");
buf.push_back("你好");
buf.push_back("...hh");
vector<string> res;
size_t size = strlen(str);
size_t offset = 0;
while(offset < size)
{
size_t len = 0;
const char* t = str + offset;
SegmentBase::filterAscii(t, size - offset, len);
s.assign(t, len);
res.push_back(s);
//cout<<s<<","<<ret<<","<<len<<endl;
//cout<<str<<endl;
offset += len;
}
EXPECT_EQ(res, buf);
}
//int main(int argc, char** argv)
//{
// //ChineseFilter chFilter;
// return 0;
//}
TEST(MixSegmentTest, Test1)
{
MixSegment segment("../dict/extra_dict/jieba.dict.small.utf8", "../dict/hmm_model.utf8");;
const char* str = "我来自北京邮电大学。。。 学号 123456";
const char* res[] = {"", "来自", "北京邮电大学", "","",""," ","学号", " 123456"};
vector<string> words;
ASSERT_TRUE(segment);
ASSERT_TRUE(segment.cut(str, words));
EXPECT_EQ(words, vector<string>(res, res + sizeof(res)/sizeof(res[0])));
}
TEST(MPSegmentTest, Test1)
{
MPSegment segment("../dict/extra_dict/jieba.dict.small.utf8");;
const char* str = "我来自北京邮电大学。。。 学号 123456";
const char* res[] = {"", "来自", "北京邮电大学", "","",""," ","","", " 123456"};
vector<string> words;
ASSERT_TRUE(segment);
ASSERT_TRUE(segment.cut(str, words));
//print(words);
EXPECT_EQ(words, vector<string>(res, res + sizeof(res)/sizeof(res[0])));
}
TEST(MPSegmentTest, Test2)
{
MPSegment segment("../dict/extra_dict/jieba.dict.small.utf8");
string line;
ifstream ifs("../test/testdata/review.100");
vector<string> words;
string eRes;
loadFile2Str("../test/testdata/review.100.res", eRes);
string res;
while(getline(ifs, line))
{
res += line;
res += '\n';
words.clear();
segment.cut(line, words);
string s;
s << words;
res += s;
res += '\n';
}
WriteStr2File("../test/testdata/review.100.res", res.c_str(), "w");
//ASSERT_EQ(res, eRes);
}
TEST(HMMSegmentTest, Test1)
{
HMMSegment segment("../dict/hmm_model.utf8");;
const char* str = "我来自北京邮电大学。。。 学号 123456";
const char* res[] = {"我来", "自北京", "邮电大学", "", "", "", " ", "学号", " 123456"};
//string s;
//vector<string> buf(res, res + sizeof(res)/sizeof(res[0]));
vector<string> words;
ASSERT_TRUE(segment);
ASSERT_TRUE(segment.cut(str, words));
//print(words);
EXPECT_EQ(words, vector<string>(res, res + sizeof(res)/sizeof(res[0])));
}
TEST(FullSegment, Test1)
{
FullSegment segment("../dict/extra_dict/jieba.dict.small.utf8");
const char* str = "我来自北京邮电大学。。。 学号 123456";
vector<string> words;
ASSERT_EQ(segment.cut(str, words), true);
string s;
s << words;
ASSERT_EQ(s, "[\"\", \"来自\", \"北京\", \"北京邮电大学\", \"邮电\", \"电大\", \"大学\", \"\", \"\", \"\", \" \", \"\", \"\", \" 123456\"]");
}
TEST(QuerySegment, Test1)
{
QuerySegment segment("../dict/extra_dict/jieba.dict.small.utf8", "../dict/hmm_model.utf8", 3);
const char* str = "小明硕士毕业于中国科学院计算所,后在日本京都大学深造";
vector<string> words;
ASSERT_TRUE(segment.cut(str, words));
string s1, s2;
s1 << words;
s2 = "[\"小明\", \"硕士\", \"毕业\", \"\", \"中国\", \"中国科学院\", \"科学\", \"科学院\", \"学院\", \"计算所\", \"\", \"\", \"\", \"日本\", \"京都\", \"京都大学\", \"大学\", \"深造\"]";
ASSERT_EQ(s1, s2);
}