修改 cjserver 服务,可以通过http参数使用不同切词算法进行切词。

修改 make install 的安装目录,统一安装到同一个目录 /usr/local/cppjieba
This commit is contained in:
yanyiwu 2015-06-05 21:59:16 +08:00
parent 8ce2af9706
commit 3528b6296a
16 changed files with 152 additions and 75 deletions

View File

@ -2,9 +2,9 @@ PROJECT(CPPJIEBA)
CMAKE_MINIMUM_REQUIRED (VERSION 2.6) CMAKE_MINIMUM_REQUIRED (VERSION 2.6)
if (CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT) #if (CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
set (CMAKE_INSTALL_PREFIX "/usr" CACHE PATH "default install path" FORCE ) #endif()
endif() set (CMAKE_INSTALL_PREFIX "/usr/local/cppjieba" CACHE PATH "default install path" FORCE )
ADD_DEFINITIONS(-O3 -Wall -g) ADD_DEFINITIONS(-O3 -Wall -g)
IF(APPLE) # mac os IF(APPLE) # mac os

View File

@ -7,6 +7,8 @@
3. 修改 Code Style ,参照 google code style 。 3. 修改 Code Style ,参照 google code style 。
4. 增加更详细的错误日志在初始化过程中合理使用LogFatal。 4. 增加更详细的错误日志在初始化过程中合理使用LogFatal。
5. 增加 Application 这个类整合了所有CppJieba的功能进去以后用户只需要使用这个类即可。 5. 增加 Application 这个类整合了所有CppJieba的功能进去以后用户只需要使用这个类即可。
6. 修改 cjserver 服务可以通过http参数使用不同切词算法进行切词。
7. 修改 make install 的安装目录,统一安装到同一个目录 /usr/local/cppjieba 。
## v2.4.4 ## v2.4.4

View File

@ -10,7 +10,7 @@ CppJieba是"结巴"中文分词的C++版本
+ 源代码都写进头文件`src/*.hpp`里,`include`即可使用。 + 源代码都写进头文件`src/*.hpp`里,`include`即可使用。
+ 支持`utf-8, gbk`编码,但是推荐使用`utf-8`编码, 因为`gbk`编码缺少严格测试,慎用。 + 支持`utf-8, gbk`编码,但是推荐使用`utf-8`编码, 因为`gbk`编码缺少严格测试,慎用。
+ 内置分词服务`server/server.cpp`在linux环境下可安装使用(可选)。 + 内置分词服务`server/server.cpp`在linux环境下可安装使用(可选)可通过http参数选择不同分词算法进行分词
+ 项目自带较为完善的单元测试,核心功能中文分词(utf8)的稳定性接受过线上环境检验。 + 项目自带较为完善的单元测试,核心功能中文分词(utf8)的稳定性接受过线上环境检验。
+ 支持载自定义用户词典。 + 支持载自定义用户词典。
+ 支持 `linux` , `mac osx` 操作系统。 + 支持 `linux` , `mac osx` 操作系统。
@ -59,7 +59,7 @@ make
### 启动服务 ### 启动服务
``` ```
./bin/cjserver ../test/testdata/server.conf ./bin/cjserver ../conf/server_example.conf
``` ```
### 客户端请求示例 ### 客户端请求示例
@ -80,8 +80,18 @@ curl "http://127.0.0.1:11200/?key=南京市长江大桥&format=simple"
南京市 长江大桥 南京市 长江大桥
``` ```
用 chrome 浏览器打开也行 ( chrome 设置默认编码是`utf-8`): 默认切词算法是MixSegment切词算法如果想要使用其他算法切词可以使用参数method来设置。
示例如下:
```
curl "http://127.0.0.1:11200/?key=南京市长江大桥&format=simple&method=MP"
curl "http://127.0.0.1:11200/?key=南京市长江大桥&format=simple&method=HMM"
curl "http://127.0.0.1:11200/?key=南京市长江大桥&format=simple&method=MIX"
curl "http://127.0.0.1:11200/?key=南京市长江大桥&format=simple&method=FULL"
curl "http://127.0.0.1:11200/?key=南京市长江大桥&format=simple&method=QUERY"
```
用 chrome 浏览器打开也行 ( chrome 设置默认编码是`utf-8`):
同时也支持HTTP POST模式使用如下调用: 同时也支持HTTP POST模式使用如下调用:
@ -107,15 +117,15 @@ sudo make install
### 服务启动和停止(仅限 linux 系统) ### 服务启动和停止(仅限 linux 系统)
``` ```
/etc/init.d/cjserver.start >> /dev/null 2>&1 cd /usr/local/cppjieba
/etc/init.d/cjserver.stop ./script/cjserver.start
./script/cjserver.stop
``` ```
### 卸载服务(仅限 linux 系统) ### 卸载服务(仅限 linux 系统)
```sh ```sh
cd build/ rm -rf /usr/local/cppjieba
cat install_manifest.txt | sudo xargs rm -rf
``` ```
## Docker 示例 ## Docker 示例

View File

@ -1 +1 @@
INSTALL(FILES server.conf DESTINATION /etc/CppJieba) INSTALL(FILES server.conf DESTINATION conf)

View File

@ -7,10 +7,14 @@ thread_number=4
queue_max_size=4096 queue_max_size=4096
#dict path #dict path
dict_path=/usr/share/CppJieba/dict/jieba.dict.utf8 dict_path=/usr/local/cppjieba/dict/jieba.dict.utf8
#model path #model path
model_path=/usr/share/CppJieba/dict/hmm_model.utf8 model_path=/usr/local/cppjieba/dict/hmm_model.utf8
#user_dict_path #user_dict_path
#user_dict_path=/usr/share/CppJieba/dict/user.dict.utf8 user_dict_path=/usr/local/cppjieba/dict/user.dict.utf8
idf_path=/usr/local/cppjieba/dict/idf.utf8
stop_words_path=/usr/local/cppjieba/dict/stop_words.utf8

19
conf/server_example.conf Normal file
View File

@ -0,0 +1,19 @@
# config
#socket listen port
port=11200
thread_number=4
queue_max_size=4096
#dict path
dict_path=../dict/jieba.dict.utf8
#model path
model_path=../dict/hmm_model.utf8
user_dict_path=../dict/user.dict.utf8
idf_path=../dict/idf.utf8
stop_words_path=../dict/stop_words.utf8

View File

@ -1 +1,9 @@
INSTALL(FILES hmm_model.utf8 jieba.dict.utf8 user.dict.utf8 DESTINATION share/CppJieba/dict) INSTALL(FILES
hmm_model.utf8
jieba.dict.utf8
user.dict.utf8
idf.utf8
stop_words.utf8
DESTINATION
dict
)

View File

@ -1 +1,6 @@
INSTALL(PROGRAMS cjserver.start cjserver.stop DESTINATION /etc/init.d/) INSTALL(PROGRAMS
cjserver.start
cjserver.stop
DESTINATION
script
)

View File

@ -7,6 +7,6 @@ if [ ! -z "${PID}" ]
then then
echo "please stop cjserver first." echo "please stop cjserver first."
else else
cjserver /etc/CppJieba/server.conf & /usr/local/cppjieba/bin/cjserver /usr/local/cppjieba/conf/server.conf >> /dev/null 2>&1 &
echo "service started." echo "service started."
fi fi

View File

@ -5,5 +5,4 @@ INCLUDE_DIRECTORIES(${PROJECT_SOURCE_DIR}/src)
ADD_EXECUTABLE(cjserver server.cpp) ADD_EXECUTABLE(cjserver server.cpp)
TARGET_LINK_LIBRARIES(cjserver pthread) TARGET_LINK_LIBRARIES(cjserver pthread)
INSTALL(TARGETS cjserver RUNTIME DESTINATION bin) INSTALL(TARGETS cjserver DESTINATION bin)

View File

@ -5,40 +5,63 @@
#include <string.h> #include <string.h>
#include "Limonp/Config.hpp" #include "Limonp/Config.hpp"
#include "Husky/ThreadPoolServer.hpp" #include "Husky/ThreadPoolServer.hpp"
#include "MixSegment.hpp" #include "Application.hpp"
#include "QuerySegment.hpp"
#include "FullSegment.hpp"
using namespace Husky; using namespace Husky;
using namespace CppJieba; using namespace CppJieba;
class ReqHandler: public IRequestHandler { class ReqHandler: public IRequestHandler {
public: public:
ReqHandler(const ISegment& segment): _segment(segment) { ReqHandler(const CppJieba::Application& app): app_(app) {
}
virtual ~ReqHandler() {
} }
virtual ~ReqHandler() {};
virtual bool do_GET(const HttpReqInfo& httpReq, string& strSnd) const { virtual bool do_GET(const HttpReqInfo& httpReq, string& strSnd) const {
string sentence, tmp; string sentence, method, format;
string tmp;
vector<string> words; vector<string> words;
httpReq.GET("key", tmp); httpReq.GET("key", tmp);
URLDecode(tmp, sentence); URLDecode(tmp, sentence);
_segment.cut(sentence, words); httpReq.GET("method", method);
if(httpReq.GET("format", tmp) && tmp == "simple") { app_.cut(sentence, words, CppJieba::METHOD_MIX);
join(words.begin(), words.end(), strSnd, " "); httpReq.GET("format", format);
return true; run(sentence, method, format, strSnd);
}
strSnd << words;
return true; return true;
} }
virtual bool do_POST(const HttpReqInfo& httpReq, string& strSnd) const { virtual bool do_POST(const HttpReqInfo& httpReq, string& strSnd) const {
vector<string> words; vector<string> words;
_segment.cut(httpReq.getBody(), words); run(httpReq.getBody(), "MIX", "simple", strSnd);
strSnd << words;
return true; return true;
} }
void run(const string& sentence,
const string& method,
const string& format,
string& strSnd) const {
vector<string> words;
if ("MP" == method) {
app_.cut(sentence, words, CppJieba::METHOD_MP);
} else if ("HMM" == method) {
app_.cut(sentence, words, CppJieba::METHOD_HMM);
} else if ("MIX" == method) {
app_.cut(sentence, words, CppJieba::METHOD_MIX);
} else if ("FULL" == method) {
app_.cut(sentence, words, CppJieba::METHOD_FULL);
} else if ("QUERY" == method) {
app_.cut(sentence, words, CppJieba::METHOD_QUERY);
} else { // default
app_.cut(sentence, words, CppJieba::METHOD_MIX);
}
if(format == "simple") {
join(words.begin(), words.end(), strSnd, " ");
} else {
strSnd << words;
}
}
private: private:
const ISegment& _segment; const CppJieba::Application& app_;
}; };
bool run(int argc, char** argv) { bool run(int argc, char** argv) {
@ -49,35 +72,26 @@ bool run(int argc, char** argv) {
if(!conf) { if(!conf) {
return false; return false;
} }
int port = 0; int port = conf.get("port", 1339);
int threadNumber = 0; int threadNumber = conf.get("thread_number", 4);
int queueMaxSize = 0; int queueMaxSize = conf.get("queue_max_size", 1024);
string dictPath; string dictPath = conf.get("dict_path", "");
string modelPath; string modelPath = conf.get("model_path", "");
string userDictPath; string userDictPath = conf.get("user_dict_path", "");
LIMONP_CHECK(conf.get("port", port)); string idfPath = conf.get("idf_path", "");
LIMONP_CHECK(conf.get("thread_number", threadNumber)); string stopWordsPath = conf.get("stop_words_path", "");
LIMONP_CHECK(conf.get("queue_max_size", queueMaxSize));
LIMONP_CHECK(conf.get("dict_path", dictPath));
LIMONP_CHECK(conf.get("model_path", modelPath));
if(!conf.get("user_dict_path", userDictPath)) { //optional
userDictPath = "";
}
LogInfo("config info: %s", conf.getConfigInfo().c_str()); LogInfo("config info: %s", conf.getConfigInfo().c_str());
/*
* segment can be one of (MPSegment, HMMSegment, MixSegment, QuerySegment ...)
*/
//MPSegment segment(dictPath, userDictPath);
//HMMSegment segment(modelPath);
MixSegment segment(dictPath, modelPath, userDictPath);
//QuerySegment segment(dictPath, modelPath);
ReqHandler reqHandler(segment); CppJieba::Application app(dictPath,
modelPath,
userDictPath,
idfPath,
stopWordsPath);
ReqHandler reqHandler(app);
ThreadPoolServer sf(threadNumber, queueMaxSize, port, reqHandler); ThreadPoolServer sf(threadNumber, queueMaxSize, port, reqHandler);
return sf.start(); return sf.start();
} }
int main(int argc, char* argv[]) { int main(int argc, char* argv[]) {

View File

@ -17,9 +17,13 @@ enum CutMethod {
class Application { class Application {
public: public:
Application(const string& dictDir) Application(const string& dictPath,
: dictTrie_(pathJoin(dictDir, "jieba.dict.utf8")), const string& modelPath,
model_(pathJoin(dictDir, "hmm_model.utf8")), const string& userDictPath,
const string& idfPath,
const string& stopWordsPath)
: dictTrie_(dictPath, userDictPath),
model_(modelPath),
mpSeg_(&dictTrie_), mpSeg_(&dictTrie_),
hmmSeg_(&model_), hmmSeg_(&model_),
mixSeg_(&dictTrie_, &model_), mixSeg_(&dictTrie_, &model_),
@ -28,8 +32,8 @@ class Application {
tagger_(&dictTrie_, &model_), tagger_(&dictTrie_, &model_),
extractor_(&dictTrie_, extractor_(&dictTrie_,
&model_, &model_,
pathJoin(dictDir, "idf.utf8"), idfPath,
pathJoin(dictDir, "stop_words.utf8")) { stopWordsPath) {
} }
void cut(const string& sentence, vector<string>& words, void cut(const string& sentence, vector<string>& words,
CutMethod method) const { CutMethod method) const {

View File

@ -55,21 +55,19 @@ class Config {
ifs.close(); ifs.close();
} }
public: public:
bool get(const string& key, string& value) const { string get(const string& key, const string& defaultvalue) const {
map<string, string>::const_iterator it = map_.find(key); map<string, string>::const_iterator it = map_.find(key);
if(map_.end() != it) { if(map_.end() != it) {
value = it->second; return it->second;
return true;
} }
return false; return defaultvalue;
} }
bool get(const string& key, int & value) const { int get(const string& key, int defaultvalue) const {
string str; string str = get(key, "");
if(!get(key, str)) { if("" == str) {
return false; return defaultvalue;
} }
value = atoi(str.c_str()); return atoi(str.c_str());
return true;
} }
const char* operator [] (const char* key) const { const char* operator [] (const char* key) const {
if(NULL == key) { if(NULL == key) {

View File

@ -16,7 +16,11 @@ void LoadSentences(const string& filepath, vector<string>& sentences) {
} }
int main(int argc, char** argv) { int main(int argc, char** argv) {
CppJieba::Application app("../dict/"); CppJieba::Application app("../dict/jieba.dict.utf8",
"../dict/hmm_model.utf8",
"../dict/user.dict.utf8",
"../dict/idf.utf8",
"../dict/stop_words.utf8");
vector<string> words; vector<string> words;
string result; string result;
string s; string s;

View File

@ -11,3 +11,9 @@ dict_path=../dict/jieba.dict.utf8
#model path #model path
model_path=../dict/hmm_model.utf8 model_path=../dict/hmm_model.utf8
user_dict_path=../dict/user.dict.utf8
idf_path=../dict/idf.utf8
stop_words_path=../dict/stop_words.utf8

View File

@ -4,7 +4,11 @@
using namespace CppJieba; using namespace CppJieba;
TEST(ApplicationTest, Test1) { TEST(ApplicationTest, Test1) {
Application app("../dict/"); CppJieba::Application app("../dict/jieba.dict.utf8",
"../dict/hmm_model.utf8",
"../dict/user.dict.utf8",
"../dict/idf.utf8",
"../dict/stop_words.utf8");
vector<string> words; vector<string> words;
string result; string result;