From 34668aa379a57a1230cd6eefbc9bf6207dc8ca21 Mon Sep 17 00:00:00 2001 From: yanyiwu Date: Thu, 21 Jan 2016 01:07:31 +0800 Subject: [PATCH] remove server, see details in ChangeLog.md --- CMakeLists.txt | 5 - ChangeLog.md | 8 + Dockerfile | 11 -- README.md | 139 ++--------------- README_EN.md | 16 +- conf/CMakeLists.txt | 1 - conf/server.conf | 19 --- conf/server_example.conf | 18 --- deps/husky/http_req_info.h | 264 -------------------------------- deps/husky/irequest_handler.h | 17 -- deps/husky/net_util.h | 47 ------ deps/husky/thread_pool_server.h | 126 --------------- dict/CMakeLists.txt | 9 -- script/CMakeLists.txt | 6 - script/cjserver.start | 12 -- script/cjserver.stop | 13 -- server/CMakeLists.txt | 6 - server/server.cpp | 101 ------------ test/servertest/go_load_test.sh | 2 - test/servertest/load_test.py | 91 ----------- test/servertest/run_curl.sh | 11 -- 21 files changed, 34 insertions(+), 888 deletions(-) delete mode 100644 Dockerfile delete mode 100644 conf/CMakeLists.txt delete mode 100644 conf/server.conf delete mode 100644 conf/server_example.conf delete mode 100644 deps/husky/http_req_info.h delete mode 100644 deps/husky/irequest_handler.h delete mode 100644 deps/husky/net_util.h delete mode 100644 deps/husky/thread_pool_server.h delete mode 100644 dict/CMakeLists.txt delete mode 100644 script/CMakeLists.txt delete mode 100755 script/cjserver.start delete mode 100755 script/cjserver.stop delete mode 100644 server/CMakeLists.txt delete mode 100644 server/server.cpp delete mode 100755 test/servertest/go_load_test.sh delete mode 100755 test/servertest/load_test.py delete mode 100755 test/servertest/run_curl.sh diff --git a/CMakeLists.txt b/CMakeLists.txt index efe1585..02d6c5c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -20,11 +20,6 @@ endif() # ENDIF() ADD_SUBDIRECTORY(deps) -ADD_SUBDIRECTORY(server) -ADD_SUBDIRECTORY(dict) -ADD_SUBDIRECTORY(script) -ADD_SUBDIRECTORY(conf) - ADD_SUBDIRECTORY(test) ENABLE_TESTING() diff --git a/ChangeLog.md b/ChangeLog.md index d2deca3..35b15b1 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -1,5 +1,13 @@ # CppJieba ChangeLog +## next version + ++ 加代码容易删代码难,思索良久,还是决定把 Server 功能的源码剥离出这个项目。 ++ 让 [cppjieba] 回到当年情窦未开时清纯的感觉,删除那些无关紧要的server代码,让整个项目轻装上阵,专注分词的核心代码。 +毕竟,不要因为走得太远,忘记了为什么出发。 ++ By the way, 之前的 server 相关的代码,如果你真的需要它,就去新的项目仓库 [cppjieba-server](https://github.com/yanyiwu/cppjieba-server) 找它吧, +当然,不管你找还是不找,它就在那里,不喜不悲。 + ## v4.3.3 + Yet Another Incompatibility Problem Repair: Upgrade [limonp] to version v0.5.3, fix incompatibility problem in Windows diff --git a/Dockerfile b/Dockerfile deleted file mode 100644 index 8743a93..0000000 --- a/Dockerfile +++ /dev/null @@ -1,11 +0,0 @@ -FROM ubuntu:14.04 -MAINTAINER yanyiwu -RUN apt-get update -RUN apt-get install -y g++ cmake git -RUN git clone https://github.com/yanyiwu/cppjieba.git -RUN mkdir cppjieba/build -WORKDIR /cppjieba/build -RUN cmake .. -RUN make -EXPOSE 11200 -CMD ["./bin/cjserver", "../test/testdata/server.conf"] diff --git a/README.md b/README.md index 4ae54f3..5b4462d 100644 --- a/README.md +++ b/README.md @@ -9,18 +9,14 @@ CppJieba是"结巴(Jieba)"中文分词的C++版本 -代码细节详解请见 [代码详解] - ## 特性 -+ 源代码都写进头文件`src/*.hpp`里,`include`即可使用。 ++ 源代码都写进头文件`include/cppjieba/*.hpp`里,`include`即可使用。 + 支持`utf-8, gbk`编码,但是推荐使用`utf-8`编码, 因为`gbk`编码缺少严格测试,慎用。 -+ 内置分词服务`server/server.cpp`,在linux环境下可安装使用(可选),可通过http参数选择不同分词算法进行分词。 + 项目自带较为完善的单元测试,核心功能中文分词(utf8)的稳定性接受过线上环境检验。 + 支持载自定义用户词典,多路径时支持分隔符'|'或者';'分隔。 + 支持 `Linux` , `Mac OSX`, `Windows` 操作系统(Visual Studio 2012中编译通过,需要开Release模式,如果在Debug模式下会因为isspace之类的标准函数实现对中文支持不太好的原因导致运行终止)。 -+ 支持 `Docker`。 -+ 提供 C语言 api接口调用 [cjieba]。 ++ 代码细节详解请见 [代码详解] ## 用法 @@ -78,116 +74,6 @@ make test 详细请看 `test/demo.cpp`. - -## 服务使用 - -服务默认使用 MixSegment 切词方式,如果想要修改成其他方式,请参考 `server/server.cpp` 源码文件。 -将对应的方式的代码行注释去掉,重新编译即可。 - -### 启动服务 - -``` -./bin/cjserver ../conf/server_example.conf -``` - -### 客户端请求示例 - -``` -curl "http://127.0.0.1:11200/?key=南京市长江大桥" -``` - -``` -["南京市", "长江大桥"] -``` - -``` -curl "http://127.0.0.1:11200/?key=南京市长江大桥&format=simple" -``` - -``` -南京市 长江大桥 -``` - -默认切词算法是MixSegment切词算法,如果想要使用其他算法切词,可以使用参数method来设置。 -示例如下: - -``` -curl "http://127.0.0.1:11200/?key=南京市长江大桥&format=simple&method=MP" -curl "http://127.0.0.1:11200/?key=南京市长江大桥&format=simple&method=HMM" -curl "http://127.0.0.1:11200/?key=南京市长江大桥&format=simple&method=MIX" -curl "http://127.0.0.1:11200/?key=南京市长江大桥&format=simple&method=FULL" -curl "http://127.0.0.1:11200/?key=南京市长江大桥&format=simple&method=QUERY" -``` - -用 chrome 浏览器打开也行 ( chrome 设置默认编码是`utf-8`): - -同时,也支持HTTP POST模式,使用如下调用: - -``` -curl -d "南京市长江大桥" "http://127.0.0.1:11200/" -``` - -返回结果如下: - -``` -["南京市", "长江大桥"] -``` - -因为 HTTP GET 请求有长度限制,如果需要请求长文的,请使用POST请求。 - -### 安装服务(仅限 linux 系统) - -如果有需要**安装使用**的,可以按照如下操作: -``` -sudo make install -``` - -### 服务启动和停止(仅限 linux 系统) - -``` -cd /usr/local/cppjieba -./script/cjserver.start -./script/cjserver.stop -``` - -### 卸载服务(仅限 linux 系统) - -```sh -rm -rf /usr/local/cppjieba -``` - -## Docker 示例 - -安装和启动 - -``` -sudo docker pull yanyiwu/cppjieba -sudo docker run -d -P yanyiwu/cppjieba -``` - -``` -sudo docker ps -``` - -``` -CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES -7c29325e9c20 yanyiwu/cppjieba:latest "./bin/cjserver ../t 4 minutes ago Up 4 minutes 0.0.0.0:49160->11200/tcp angry_wilson -``` - -可以看到正在运行的 Docker 容器(容器内运行着 `cjserver` 服务),并且服务的端口号被映射为 `0.0.0.0:49160` 。 - -所以现在可以来一发测试了: - -``` -curl "http://0.0.0.0:49160/?key=南京市长江大桥" -``` - -预期结果如下: - -``` -["南京市", "长江大桥"] -``` - ### 分词结果示例 **MPSegment** @@ -323,23 +209,22 @@ Query方法先使用Mix方法切词,对于切出来的较长的词再使用Ful ## 应用 + [GoJieba] go语言版本的结巴中文分词。 -+ [cppjiebapy] 由 [jannson] 开发的供 python 模块调用的项目 [cppjiebapy], 相关讨论 [cppjiebapy_discussion] . + [NodeJieba] Node.js 版本的结巴中文分词。 + [simhash] 中文文档的的相似度计算 + [exjieba] Erlang 版本的结巴中文分词。 + [jiebaR] R语言版本的结巴中文分词。 -+ [libcppjieba] 是最简单易懂的CppJieba头文件库使用示例库。 -+ [KeywordServer] 50行搭建一个中文关键词抽取服务。 + [cjieba] C语言版本的结巴分词。 + [jieba_rb] Ruby 版本的结巴分词。 + [iosjieba] iOS 版本的结巴分词。 ++ [gitbook-plugin-search-pro] 支持中文搜索的 gitbook 插件。 + [pg_jieba] PostgreSQL 数据库的分词插件。 + [ngx_http_cppjieba_module] Nginx 分词插件。 -+ [gitbook-plugin-search-pro] 支持中文搜索的 gitbook 插件。 ++ [cppjiebapy] 由 [jannson] 开发的供 python 模块调用的项目 [cppjiebapy], 相关讨论 [cppjiebapy_discussion] . ++ [KeywordServer] 50行搭建一个中文关键词抽取服务。 ## 线上演示 -http://cppjieba-webdemo.herokuapp.com/ +[Web-Demo](http://cppjieba-webdemo.herokuapp.com/) (建议使用chrome打开) ## 性能评测 @@ -350,21 +235,20 @@ http://cppjieba-webdemo.herokuapp.com/ + Email: `i@yanyiwu.com` + QQ: 64162451 - -![image](http://7viirv.com1.z0.glb.clouddn.com/5a7d1b5c0d_yanyiwu_personal_qrcodes.jpg) ++ WeChat: ![image](http://7viirv.com1.z0.glb.clouddn.com/5a7d1b5c0d_yanyiwu_personal_qrcodes.jpg) ## 鸣谢 -"结巴"中文分词作者: SunJunyi https://github.com/fxsjy/jieba +"结巴"中文分词作者: [SunJunyi](https://github.com/fxsjy) ## 许可证 -MIT http://yanyiwu.mit-license.org +[MIT](http://yanyiwu.mit-license.org) ## 作者 -- yanyiwu https://github.com/yanyiwu i@yanyiwu.com -- aholic https://github.com/aholic ruochen.xu@gmail.com +- [yanyiwu](yanyiwu.com) +- [aholic](https://github.com/aholic) [GoJieba]:https://github.com/yanyiwu/gojieba [CppJieba]:https://github.com/yanyiwu/cppjieba @@ -375,7 +259,6 @@ MIT http://yanyiwu.mit-license.org [jiebaR]:https://github.com/qinwf/jiebaR [simhash]:https://github.com/yanyiwu/simhash [代码详解]:https://github.com/yanyiwu/cppjieba/wiki/CppJieba%E4%BB%A3%E7%A0%81%E8%AF%A6%E8%A7%A3 -[libcppjieba]:https://github.com/yanyiwu/libcppjieba [issue25]:https://github.com/yanyiwu/cppjieba/issues/25 [exjieba]:https://github.com/falood/exjieba [KeywordServer]:https://github.com/yanyiwu/keyword_server diff --git a/README_EN.md b/README_EN.md index cb25395..0b11b23 100644 --- a/README_EN.md +++ b/README_EN.md @@ -80,5 +80,19 @@ Please see details in `test/demo.cpp`. + Email: `i@yanyiwu.com` + QQ: 64162451 ++ WeChat: ![image](http://7viirv.com1.z0.glb.clouddn.com/5a7d1b5c0d_yanyiwu_personal_qrcodes.jpg) -![image](http://7viirv.com1.z0.glb.clouddn.com/5a7d1b5c0d_yanyiwu_personal_qrcodes.jpg) +[GoJieba]:https://github.com/yanyiwu/gojieba +[CppJieba]:https://github.com/yanyiwu/cppjieba +[jannson]:https://github.com/jannson +[cppjiebapy]:https://github.com/jannson/cppjiebapy +[cppjiebapy_discussion]:https://github.com/yanyiwu/cppjieba/issues/1 +[NodeJieba]:https://github.com/yanyiwu/nodejieba +[jiebaR]:https://github.com/qinwf/jiebaR +[simhash]:https://github.com/yanyiwu/simhash +[exjieba]:https://github.com/falood/exjieba +[cjieba]:http://github.com/yanyiwu/cjieba +[jieba_rb]:https://github.com/altkatz/jieba_rb +[iosjieba]:https://github.com/yanyiwu/iosjieba +[pg_jieba]:https://github.com/jaiminpan/pg_jieba +[gitbook-plugin-search-pro]:https://plugins.gitbook.com/plugin/search-pro diff --git a/conf/CMakeLists.txt b/conf/CMakeLists.txt deleted file mode 100644 index be5588c..0000000 --- a/conf/CMakeLists.txt +++ /dev/null @@ -1 +0,0 @@ -INSTALL(FILES server.conf DESTINATION conf) diff --git a/conf/server.conf b/conf/server.conf deleted file mode 100644 index b53c9ca..0000000 --- a/conf/server.conf +++ /dev/null @@ -1,19 +0,0 @@ -# config - -#socket listen port -port=11200 - -thread_number=4 - -#dict path -dict_path=/usr/local/cppjieba/dict/jieba.dict.utf8 - -#model path -model_path=/usr/local/cppjieba/dict/hmm_model.utf8 - -#user_dict_path -user_dict_path=/usr/local/cppjieba/dict/user.dict.utf8 - -idf_path=/usr/local/cppjieba/dict/idf.utf8 - -stop_words_path=/usr/local/cppjieba/dict/stop_words.utf8 diff --git a/conf/server_example.conf b/conf/server_example.conf deleted file mode 100644 index c749ca4..0000000 --- a/conf/server_example.conf +++ /dev/null @@ -1,18 +0,0 @@ -# config - -#socket listen port -port=11200 - -thread_number=4 - -#dict path -dict_path=../dict/jieba.dict.utf8 - -#model path -model_path=../dict/hmm_model.utf8 - -user_dict_path=../dict/user.dict.utf8 - -idf_path=../dict/idf.utf8 - -stop_words_path=../dict/stop_words.utf8 diff --git a/deps/husky/http_req_info.h b/deps/husky/http_req_info.h deleted file mode 100644 index 10947c1..0000000 --- a/deps/husky/http_req_info.h +++ /dev/null @@ -1,264 +0,0 @@ -#ifndef HUSKY_HTTP_REQINFO_H -#define HUSKY_HTTP_REQINFO_H - -#include -#include -#include "limonp/Logging.hpp" -#include "limonp/StringUtil.hpp" - -namespace husky { -using namespace limonp; -using namespace std; - -static const char* const KEY_METHOD = "METHOD"; -static const char* const KEY_URI = "URI"; -static const char* const KEY_PROTOCOL = "PROTOCOL"; - -typedef unsigned char BYTE; - -inline BYTE ToHex(BYTE x) { - return x > 9 ? x -10 + 'A': x + '0'; -} - -inline BYTE FromHex(BYTE x) { - return isdigit(x) ? x-'0' : x-'A'+10; -} - -inline void URLEncode(const string &sIn, string& sOut) { - for( size_t ix = 0; ix < sIn.size(); ix++ ) { - BYTE buf[4]; - memset( buf, 0, 4 ); - if( isalnum( (BYTE)sIn[ix] ) ) { - buf[0] = sIn[ix]; - } else { - buf[0] = '%'; - buf[1] = ToHex( (BYTE)sIn[ix] >> 4 ); - buf[2] = ToHex( (BYTE)sIn[ix] % 16); - } - sOut += (char *)buf; - } -}; - -inline void URLDecode(const string &sIn, string& sOut) { - for( size_t ix = 0; ix < sIn.size(); ix++ ) { - BYTE ch = 0; - if(sIn[ix]=='%') { - ch = (FromHex(sIn[ix+1])<<4); - ch |= FromHex(sIn[ix+2]); - ix += 2; - } else if(sIn[ix] == '+') { - ch = ' '; - } else { - ch = sIn[ix]; - } - sOut += (char)ch; - } -} - -class HttpReqInfo { - public: - HttpReqInfo() { - is_header_finished_ = false; - is_body_finished_ = false; - content_length_ = 0; - } - - bool ParseHeader(const string& buffer) { - return ParseHeader(buffer.c_str(), buffer.size()); - } - bool ParseHeader(const char* buffer, size_t len) { - string headerStr(buffer, len); - size_t lpos = 0, rpos = 0; - vector buf; - rpos = headerStr.find("\n", lpos); - if(string::npos == rpos) { - LOG(ERROR) << "headerStr[" << headerStr << "] illegal."; - return false; - } - string firstline(headerStr, lpos, rpos - lpos); - Trim(firstline); - Split(firstline, buf, " "); - if (3 != buf.size()) { - LOG(ERROR) << "parse header firstline [" << firstline << "] failed."; - return false; - } - header_map_[KEY_METHOD] = Trim(buf[0]); - header_map_[KEY_URI] = Trim(buf[1]); - header_map_[KEY_PROTOCOL] = Trim(buf[2]); - ParseUri(header_map_[KEY_URI], path_, method_get_map_); - - lpos = rpos + 1; - if(lpos >= headerStr.size()) { - LOG(ERROR) << "headerStr[" << headerStr << "] illegal."; - return false; - } - //message header begin - while(lpos < headerStr.size() && string::npos != (rpos = headerStr.find('\n', lpos)) && rpos > lpos) { - string s(headerStr, lpos, rpos - lpos); - size_t p = s.find(':'); - if(string::npos == p) { - break;//encounter empty line - } - string k(s, 0, p); - string v(s, p+1); - Trim(k); - Trim(v); - if(k.empty()||v.empty()) { - LOG(ERROR) << "headerStr[" << headerStr << "] illegal."; - return false; - } - Upper(k); - header_map_[k] = v; - lpos = rpos + 1; - } - rpos ++; - is_header_finished_ = true; - string content_length; - if(!Find("CONTENT-LENGTH", content_length) || 0 == (content_length_ = atoi(content_length.c_str()))) { - is_body_finished_ = true; - return true; - } - content_length_ = atoi(content_length.c_str()); - if(rpos < headerStr.size()) { - AppendBody(headerStr.c_str() + rpos, headerStr.size() - rpos); - } - return true; - //message header end - } - void AppendBody(const char* buffer, size_t len) { - if(is_body_finished_) { - return; - } - body_.append(buffer, len); - if(body_.size() >= content_length_) { - is_body_finished_ = true; - } else { - is_body_finished_ = false; - } - } - bool IsHeaderFinished() const { - return is_header_finished_; - } - bool IsBodyFinished() const { - return is_body_finished_; - } - - const string& Set(const string& key, const string& value) { - return header_map_[key] = value; - } - bool Find(const string& key, string& res)const { - return Find(header_map_, key, res); - } - bool GET(const string& argKey, string& res)const { - string tmp; - if (!Find(method_get_map_, argKey, tmp)) { - return false; - } - URLDecode(tmp, res); - return true; - } - bool GET(const string& argKey, int& res) const { - string tmp; - if (!GET(argKey, tmp)) { - return false; - } - res = atoi(tmp.c_str()); - return true; - } - bool GET(const string& argKey, size_t& res) const { - int tmp = 0; - if (!GET(argKey, tmp) || tmp < 0) { - return false; - } - res = tmp; - return true; - } - - bool IsGET() const { - string str; - if(!Find(header_map_, KEY_METHOD, str)) { - return false; - } - return str == "GET"; - } - bool IsPOST() const { - string str; - if(!Find(header_map_, KEY_METHOD, str)) { - return false; - } - return str == "POST"; - } - const unordered_map & GetMethodGetMap() const { - return method_get_map_; - } - const unordered_map & GetHeaders() const { - return header_map_; - } - const string& GetBody() const { - return body_; - } - const string& GetPath() const { - return path_; - } - - private: - bool is_header_finished_; - bool is_body_finished_; - size_t content_length_; - unordered_map header_map_; - unordered_map method_get_map_; - string path_; - string body_; - friend ostream& operator<<(ostream& os, const HttpReqInfo& obj); - - bool Find(const std::unordered_map& mp, const string& key, string& res)const { - std::unordered_map::const_iterator it = mp.find(key); - if(it == mp.end()) { - return false; - } - res = it->second; - return true; - } - - void ParseUri(const string& uri, string& path, std::unordered_map& mp) { - if(uri.empty()) { - return; - } - - size_t pos = uri.find('?'); - path = uri.substr(0, pos); - if(string::npos == pos) { - return ; - } - size_t kleft = 0, kright = 0; - size_t vleft = 0, vright = 0; - for(size_t i = pos + 1; i < uri.size();) { - kleft = i; - while(i < uri.size() && uri[i] != '=') { - i++; - } - if(i >= uri.size()) { - break; - } - kright = i; - i++; - vleft = i; - while(i < uri.size() && uri[i] != '&' && uri[i] != ' ') { - i++; - } - vright = i; - mp[uri.substr(kleft, kright - kleft)] = uri.substr(vleft, vright - vleft); - i++; - } - - return; - } -}; - -inline std::ostream& operator << (std::ostream& os, const husky::HttpReqInfo& obj) { - return os << obj.header_map_ << obj.method_get_map_/* << obj._methodPostMap*/ << obj.path_ << obj.body_ ; -} - -} - -#endif diff --git a/deps/husky/irequest_handler.h b/deps/husky/irequest_handler.h deleted file mode 100644 index 248cef0..0000000 --- a/deps/husky/irequest_handler.h +++ /dev/null @@ -1,17 +0,0 @@ -#ifndef HUSKY_IREQUESTHANDLER_HPP -#define HUSKY_IREQUESTHANDLER_HPP - -#include "http_req_info.h" - -namespace husky { -class IRequestHandler { - public: - virtual ~IRequestHandler() { - } - - virtual bool DoGET(const HttpReqInfo& httpReq, string& res) = 0; - virtual bool DoPOST(const HttpReqInfo& httpReq, string& res) = 0; -}; -} - -#endif diff --git a/deps/husky/net_util.h b/deps/husky/net_util.h deleted file mode 100644 index 188a80d..0000000 --- a/deps/husky/net_util.h +++ /dev/null @@ -1,47 +0,0 @@ -#ifndef HUSKY_NET_UTILS_HPP -#define HUSKY_NET_UTILS_HPP - -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "limonp/StdExtension.hpp" -#include "limonp/Logging.hpp" - -namespace husky { -static const size_t LISTEN_QUEUE_LEN = 1024; - -typedef int SocketFd; -inline SocketFd CreateAndListenSocket(int port) { - SocketFd sock = socket(AF_INET, SOCK_STREAM, 0); - CHECK(sock != -1); - - int optval = 1; // nozero - CHECK(-1 != setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &optval, sizeof(optval))); - - struct sockaddr_in addr; - addr.sin_family = AF_INET; - addr.sin_port = htons(port); - addr.sin_addr.s_addr = htonl(INADDR_ANY); - CHECK(-1 != ::bind(sock, (sockaddr*)&addr, sizeof(addr))); - CHECK(-1 != ::listen(sock, LISTEN_QUEUE_LEN)); - - return sock; -} - -const char* const HTTP_FORMAT = "HTTP/1.1 200 OK\r\nConnection: close\r\nServer: HuskyServer/1.0.0\r\nContent-Type: text/json; charset=%s\r\nContent-Length: %d\r\n\r\n%s"; -const char* const CHARSET_UTF8 = "UTF-8"; -} // namespace husky - - -#endif diff --git a/deps/husky/thread_pool_server.h b/deps/husky/thread_pool_server.h deleted file mode 100644 index 3716274..0000000 --- a/deps/husky/thread_pool_server.h +++ /dev/null @@ -1,126 +0,0 @@ -#ifndef HUSKY_THREADPOOLSERVER_H -#define HUSKY_THREADPOOLSERVER_H - -#include "net_util.h" -#include "irequest_handler.h" -#include "limonp/ThreadPool.hpp" - -namespace husky { -using namespace limonp; - -const char* const CLIENT_IP_K = "CLIENT_IP"; -const size_t RECV_BUFFER_SIZE = 16 * 1024; - -const struct linger LNG = {1, 1}; -const struct timeval SOCKET_TIMEOUT = {16, 0}; - - -class ThreadPoolServer { - public: - ThreadPoolServer(size_t thread_number, size_t port, IRequestHandler & handler): - pool_(thread_number), req_handler_(handler), host_socket_(-1) { - host_socket_ = CreateAndListenSocket(port); - } - ~ThreadPoolServer() {}; - - bool Start() { - pool_.Start(); - sockaddr_in clientaddr; - socklen_t nSize = sizeof(clientaddr); - int clientSock; - - while(true) { - if(-1 == (clientSock = accept(host_socket_, (struct sockaddr*) &clientaddr, &nSize))) { - LOG(ERROR) << strerror(errno); - break; - } - pool_.Add(NewClosure(this, &ThreadPoolServer::Run, clientSock)); - //pool_.Add(CreateTask(clientSock, req_handler_)); - } - return true; - } - - private: - void Run(int sockfd) { - do { - if(!SetSockopt(sockfd)) { - LOG(ERROR) << "_getsockopt failed."; - break; - } - string strSnd, strRetByHandler; - HttpReqInfo httpReq; - if(!Receive(sockfd, httpReq)) { - LOG(ERROR) << "Receive failed."; - break; - } - - if(httpReq.IsGET() && !req_handler_.DoGET(httpReq, strRetByHandler)) { - LOG(ERROR) << "DoGET failed."; - break; - } - if(httpReq.IsPOST() && !req_handler_.DoPOST(httpReq, strRetByHandler)) { - LOG(ERROR) << "DoPOST failed."; - break; - } - strSnd = StringFormat(HTTP_FORMAT, CHARSET_UTF8, strRetByHandler.length(), strRetByHandler.c_str()); - - if(!Send(sockfd, strSnd)) { - LOG(ERROR) << "Send failed."; - break; - } - } while(false); - - - if(-1 == close(sockfd)) { - LOG(ERROR) << strerror(errno); - } - } - bool Receive(int sockfd, HttpReqInfo& httpInfo) const { - char recvBuf[RECV_BUFFER_SIZE]; - int n = 0; - while(!httpInfo.IsBodyFinished() && (n = recv(sockfd, recvBuf, RECV_BUFFER_SIZE, 0)) > 0) { - if(!httpInfo.IsHeaderFinished()) { - if(!httpInfo.ParseHeader(recvBuf, n)) { - LOG(ERROR) << "ParseHeader failed. "; - return false; - } - continue; - } - httpInfo.AppendBody(recvBuf, n); - } - if(n < 0) { - LOG(ERROR) << strerror(errno); - return false; - } - return true; - } - bool Send(int sockfd, const string& strSnd) const { - if(-1 == send(sockfd, strSnd.c_str(), strSnd.length(), 0)) { - LOG(ERROR) << strerror(errno); - return false; - } - return true; - } - bool SetSockopt(int sockfd) const { - if(-1 == setsockopt(sockfd, SOL_SOCKET, SO_LINGER, (const char*)&LNG, sizeof(LNG))) { - LOG(ERROR) << strerror(errno); - return false; - } - if(-1 == setsockopt(sockfd, SOL_SOCKET, SO_RCVTIMEO, (const char*)&SOCKET_TIMEOUT, sizeof(SOCKET_TIMEOUT))) { - LOG(ERROR) << strerror(errno); - return false; - } - if(-1 == setsockopt(sockfd, SOL_SOCKET, SO_SNDTIMEO, (const char*)&SOCKET_TIMEOUT, sizeof(SOCKET_TIMEOUT))) { - LOG(ERROR) << strerror(errno); - return false; - } - return true; - } - - ThreadPool pool_; - IRequestHandler & req_handler_; - int host_socket_; -}; // class ThreadPoolServer -} // namespace husky - -#endif diff --git a/dict/CMakeLists.txt b/dict/CMakeLists.txt deleted file mode 100644 index 2310959..0000000 --- a/dict/CMakeLists.txt +++ /dev/null @@ -1,9 +0,0 @@ -INSTALL(FILES - hmm_model.utf8 - jieba.dict.utf8 - user.dict.utf8 - idf.utf8 - stop_words.utf8 - DESTINATION - dict -) diff --git a/script/CMakeLists.txt b/script/CMakeLists.txt deleted file mode 100644 index 33d3525..0000000 --- a/script/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -INSTALL(PROGRAMS - cjserver.start - cjserver.stop - DESTINATION - script -) diff --git a/script/cjserver.start b/script/cjserver.start deleted file mode 100755 index f480a72..0000000 --- a/script/cjserver.start +++ /dev/null @@ -1,12 +0,0 @@ -#!/bin/sh - -PATH=/usr/bin/:/usr/local/bin/:/sbin/:$PATH - -PID=`pidof cjserver` -if [ ! -z "${PID}" ] -then - echo "please stop cjserver first." -else - /usr/local/cppjieba/bin/cjserver /usr/local/cppjieba/conf/server.conf >> /dev/null 2>&1 & - echo "service started." -fi diff --git a/script/cjserver.stop b/script/cjserver.stop deleted file mode 100755 index fddd124..0000000 --- a/script/cjserver.stop +++ /dev/null @@ -1,13 +0,0 @@ -#!/bin/sh - -PATH=/usr/bin/:/usr/local/bin/:/sbin/:$PATH - -PID=`pidof cjserver` -if [ ! -z "${PID}" ] -then - kill ${PID} - sleep 1 - echo "service stop ok." -else - echo "cjserver is not running." -fi diff --git a/server/CMakeLists.txt b/server/CMakeLists.txt deleted file mode 100644 index 1697231..0000000 --- a/server/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -SET(EXECUTABLE_OUTPUT_PATH ${PROJECT_BINARY_DIR}/bin) - -ADD_EXECUTABLE(cjserver server.cpp) -TARGET_LINK_LIBRARIES(cjserver pthread) - -INSTALL(TARGETS cjserver DESTINATION bin) diff --git a/server/server.cpp b/server/server.cpp deleted file mode 100644 index de01d95..0000000 --- a/server/server.cpp +++ /dev/null @@ -1,101 +0,0 @@ -#include -#include -#include -#include -#include -#include "limonp/Config.hpp" -#include "husky/thread_pool_server.h" -#include "cppjieba/Jieba.hpp" - -using namespace husky; -using namespace cppjieba; - -class ReqHandler: public IRequestHandler { - public: - ReqHandler(const cppjieba::Jieba& jieba) - : jieba_(jieba) { - } - - virtual ~ReqHandler() { - } - - virtual bool DoGET(const HttpReqInfo& httpReq, string& strSnd) { - string sentence, method, format; - string tmp; - vector words; - httpReq.GET("key", tmp); - URLDecode(tmp, sentence); - httpReq.GET("method", method); - jieba_.Cut(sentence, words, true); - httpReq.GET("format", format); - Run(sentence, method, format, strSnd); - return true; - } - - virtual bool DoPOST(const HttpReqInfo& httpReq, string& strSnd) { - vector words; - Run(httpReq.GetBody(), "MIX", "simple", strSnd); - return true; - } - - void Run(const string& sentence, - const string& method, - const string& format, - string& strSnd) const { - vector words; - if ("MP" == method) { - jieba_.Cut(sentence, words, false); - } else if ("HMM" == method) { - jieba_.CutHMM(sentence, words); - } else if ("MIX" == method) { - jieba_.Cut(sentence, words, true); - } else if ("FULL" == method) { - jieba_.CutAll(sentence, words); - } else if ("QUERY" == method) { - jieba_.CutForSearch(sentence, words); - } else { // default - jieba_.Cut(sentence, words, false); - } - if (format == "simple") { - Join(words.begin(), words.end(), strSnd, " "); - } else { - strSnd << words; - } - } - private: - const cppjieba::Jieba& jieba_; -}; - -bool Run(int argc, char** argv) { - if (argc < 2) { - return false; - } - Config conf(argv[1]); - if (!conf) { - return false; - } - int port = conf.Get("port", 1339); - int threadNumber = conf.Get("thread_number", 4); - string dictPath = conf.Get("dict_path", ""); - string modelPath = conf.Get("model_path", ""); - string userDictPath = conf.Get("user_dict_path", ""); - - LOG(INFO) << "config info: " << conf.GetConfigInfo(); - - cppjieba::Jieba jieba(dictPath, - modelPath, - userDictPath); - - ReqHandler reqHandler(jieba); - ThreadPoolServer server(threadNumber, port, reqHandler); - return server.Start(); -} - -int main(int argc, char* argv[]) { - if (!Run(argc, argv)) { - printf("usage: %s \n", argv[0]); - return EXIT_FAILURE; - } - return EXIT_SUCCESS; -} - diff --git a/test/servertest/go_load_test.sh b/test/servertest/go_load_test.sh deleted file mode 100755 index 298e240..0000000 --- a/test/servertest/go_load_test.sh +++ /dev/null @@ -1,2 +0,0 @@ -# go get github.com/yanyiwu/go_http_load -go_http_load -method=GET -get_urls="../test/testdata/load_test.urls" -loop_count=500 -goroutines=2 diff --git a/test/servertest/load_test.py b/test/servertest/load_test.py deleted file mode 100755 index d70c2b6..0000000 --- a/test/servertest/load_test.py +++ /dev/null @@ -1,91 +0,0 @@ -#!/usr/bin/python -# coding:utf-8 -import time -import urllib2 -import threading -from Queue import Queue -from time import sleep -import sys - -# 性能测试页面 -#PERF_TEST_URL = "http://10.2.66.38/?yyid=-1&suv=1309231700203264&callback=xxxxx" -URLS = [line for line in open("../test/testdata/load_test.urls", "r")] - -# 配置:压力测试 -THREAD_NUM = 10 # 并发线程总数 -ONE_WORKER_NUM = 500 # 每个线程的循环次数 -LOOP_SLEEP = 0.01 # 每次请求时间间隔(秒) - -# 配置:模拟运行状态 -#THREAD_NUM = 10 # 并发线程总数 -#ONE_WORKER_NUM = 10 # 每个线程的循环次数 -#LOOP_SLEEP = 0 # 每次请求时间间隔(秒) - - -# 出错数 -ERROR_NUM = 0 - - -#具体的处理函数,负责处理单个任务 -def doWork(index, url): - t = threading.currentThread() - #print "["+t.name+" "+str(index)+"] "+PERF_TEST_URL - - try: - html = urllib2.urlopen(url).read() - except urllib2.URLError, e: - print "["+t.name+" "+str(index)+"] " - print e - global ERROR_NUM - ERROR_NUM += 1 - - -#这个是工作进程,负责不断从队列取数据并处理 -def working(): - t = threading.currentThread() - print "["+t.name+"] Sub Thread Begin" - - i = 0 - while i < ONE_WORKER_NUM: - i += 1 - doWork(i, URLS[i % len(URLS)]) - sleep(LOOP_SLEEP) - - print "["+t.name+"] Sub Thread End" - - -def main(): - #doWork(0) - #return - - t1 = time.time() - - Threads = [] - - # 创建线程 - for i in range(THREAD_NUM): - t = threading.Thread(target=working, name="T"+str(i)) - t.setDaemon(True) - Threads.append(t) - - for t in Threads: - t.start() - - for t in Threads: - t.join() - - print "main thread end" - - t2 = time.time() - print "========================================" - #print "URL:", PERF_TEST_URL - print "任务数量:", THREAD_NUM, "*", ONE_WORKER_NUM, "=", THREAD_NUM*ONE_WORKER_NUM - print "总耗时(秒):", t2-t1 - print "每次请求耗时(秒):", (t2-t1) / (THREAD_NUM*ONE_WORKER_NUM) - print "每秒承载请求数:", 1 / ((t2-t1) / (THREAD_NUM*ONE_WORKER_NUM)) - print "错误数量:", ERROR_NUM - - -if __name__ == "__main__": - main() - diff --git a/test/servertest/run_curl.sh b/test/servertest/run_curl.sh deleted file mode 100755 index ec58b45..0000000 --- a/test/servertest/run_curl.sh +++ /dev/null @@ -1,11 +0,0 @@ -CURL_RES=../test/testdata/curl.res -TMP=curl.res.tmp -curl -s "http://127.0.0.1:11200/?key=南京市长江大桥" >> $TMP -if diff $TMP $CURL_RES >> /dev/null -then - echo "ok"; -else - echo "failed." -fi - -rm $TMP