修改 cjserver 服务,可以通过http参数使用不同切词算法进行切词。

修改 make install 的安装目录,统一安装到同一个目录 /usr/local/cppjieba
This commit is contained in:
yanyiwu 2015-06-05 21:59:16 +08:00
parent 8ce2af9706
commit 3528b6296a
16 changed files with 152 additions and 75 deletions

View File

@ -2,9 +2,9 @@ PROJECT(CPPJIEBA)
CMAKE_MINIMUM_REQUIRED (VERSION 2.6)
if (CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
set (CMAKE_INSTALL_PREFIX "/usr" CACHE PATH "default install path" FORCE )
endif()
#if (CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
#endif()
set (CMAKE_INSTALL_PREFIX "/usr/local/cppjieba" CACHE PATH "default install path" FORCE )
ADD_DEFINITIONS(-O3 -Wall -g)
IF(APPLE) # mac os

View File

@ -7,6 +7,8 @@
3. 修改 Code Style ,参照 google code style 。
4. 增加更详细的错误日志在初始化过程中合理使用LogFatal。
5. 增加 Application 这个类整合了所有CppJieba的功能进去以后用户只需要使用这个类即可。
6. 修改 cjserver 服务可以通过http参数使用不同切词算法进行切词。
7. 修改 make install 的安装目录,统一安装到同一个目录 /usr/local/cppjieba 。
## v2.4.4

View File

@ -10,7 +10,7 @@ CppJieba是"结巴"中文分词的C++版本
+ 源代码都写进头文件`src/*.hpp`里,`include`即可使用。
+ 支持`utf-8, gbk`编码,但是推荐使用`utf-8`编码, 因为`gbk`编码缺少严格测试,慎用。
+ 内置分词服务`server/server.cpp`在linux环境下可安装使用(可选)。
+ 内置分词服务`server/server.cpp`在linux环境下可安装使用(可选)可通过http参数选择不同分词算法进行分词
+ 项目自带较为完善的单元测试,核心功能中文分词(utf8)的稳定性接受过线上环境检验。
+ 支持载自定义用户词典。
+ 支持 `linux` , `mac osx` 操作系统。
@ -59,7 +59,7 @@ make
### 启动服务
```
./bin/cjserver ../test/testdata/server.conf
./bin/cjserver ../conf/server_example.conf
```
### 客户端请求示例
@ -80,8 +80,18 @@ curl "http://127.0.0.1:11200/?key=南京市长江大桥&format=simple"
南京市 长江大桥
```
用 chrome 浏览器打开也行 ( chrome 设置默认编码是`utf-8`):
默认切词算法是MixSegment切词算法如果想要使用其他算法切词可以使用参数method来设置。
示例如下:
```
curl "http://127.0.0.1:11200/?key=南京市长江大桥&format=simple&method=MP"
curl "http://127.0.0.1:11200/?key=南京市长江大桥&format=simple&method=HMM"
curl "http://127.0.0.1:11200/?key=南京市长江大桥&format=simple&method=MIX"
curl "http://127.0.0.1:11200/?key=南京市长江大桥&format=simple&method=FULL"
curl "http://127.0.0.1:11200/?key=南京市长江大桥&format=simple&method=QUERY"
```
用 chrome 浏览器打开也行 ( chrome 设置默认编码是`utf-8`):
同时也支持HTTP POST模式使用如下调用:
@ -107,15 +117,15 @@ sudo make install
### 服务启动和停止(仅限 linux 系统)
```
/etc/init.d/cjserver.start >> /dev/null 2>&1
/etc/init.d/cjserver.stop
cd /usr/local/cppjieba
./script/cjserver.start
./script/cjserver.stop
```
### 卸载服务(仅限 linux 系统)
```sh
cd build/
cat install_manifest.txt | sudo xargs rm -rf
rm -rf /usr/local/cppjieba
```
## Docker 示例

View File

@ -1 +1 @@
INSTALL(FILES server.conf DESTINATION /etc/CppJieba)
INSTALL(FILES server.conf DESTINATION conf)

View File

@ -7,10 +7,14 @@ thread_number=4
queue_max_size=4096
#dict path
dict_path=/usr/share/CppJieba/dict/jieba.dict.utf8
dict_path=/usr/local/cppjieba/dict/jieba.dict.utf8
#model path
model_path=/usr/share/CppJieba/dict/hmm_model.utf8
model_path=/usr/local/cppjieba/dict/hmm_model.utf8
#user_dict_path
#user_dict_path=/usr/share/CppJieba/dict/user.dict.utf8
user_dict_path=/usr/local/cppjieba/dict/user.dict.utf8
idf_path=/usr/local/cppjieba/dict/idf.utf8
stop_words_path=/usr/local/cppjieba/dict/stop_words.utf8

19
conf/server_example.conf Normal file
View File

@ -0,0 +1,19 @@
# config
#socket listen port
port=11200
thread_number=4
queue_max_size=4096
#dict path
dict_path=../dict/jieba.dict.utf8
#model path
model_path=../dict/hmm_model.utf8
user_dict_path=../dict/user.dict.utf8
idf_path=../dict/idf.utf8
stop_words_path=../dict/stop_words.utf8

View File

@ -1 +1,9 @@
INSTALL(FILES hmm_model.utf8 jieba.dict.utf8 user.dict.utf8 DESTINATION share/CppJieba/dict)
INSTALL(FILES
hmm_model.utf8
jieba.dict.utf8
user.dict.utf8
idf.utf8
stop_words.utf8
DESTINATION
dict
)

View File

@ -1 +1,6 @@
INSTALL(PROGRAMS cjserver.start cjserver.stop DESTINATION /etc/init.d/)
INSTALL(PROGRAMS
cjserver.start
cjserver.stop
DESTINATION
script
)

View File

@ -7,6 +7,6 @@ if [ ! -z "${PID}" ]
then
echo "please stop cjserver first."
else
cjserver /etc/CppJieba/server.conf &
/usr/local/cppjieba/bin/cjserver /usr/local/cppjieba/conf/server.conf >> /dev/null 2>&1 &
echo "service started."
fi

View File

@ -5,5 +5,4 @@ INCLUDE_DIRECTORIES(${PROJECT_SOURCE_DIR}/src)
ADD_EXECUTABLE(cjserver server.cpp)
TARGET_LINK_LIBRARIES(cjserver pthread)
INSTALL(TARGETS cjserver RUNTIME DESTINATION bin)
INSTALL(TARGETS cjserver DESTINATION bin)

View File

@ -5,40 +5,63 @@
#include <string.h>
#include "Limonp/Config.hpp"
#include "Husky/ThreadPoolServer.hpp"
#include "MixSegment.hpp"
#include "QuerySegment.hpp"
#include "FullSegment.hpp"
#include "Application.hpp"
using namespace Husky;
using namespace CppJieba;
class ReqHandler: public IRequestHandler {
public:
ReqHandler(const ISegment& segment): _segment(segment) {
ReqHandler(const CppJieba::Application& app): app_(app) {
}
virtual ~ReqHandler() {
}
virtual ~ReqHandler() {};
virtual bool do_GET(const HttpReqInfo& httpReq, string& strSnd) const {
string sentence, tmp;
string sentence, method, format;
string tmp;
vector<string> words;
httpReq.GET("key", tmp);
URLDecode(tmp, sentence);
_segment.cut(sentence, words);
if(httpReq.GET("format", tmp) && tmp == "simple") {
join(words.begin(), words.end(), strSnd, " ");
return true;
}
strSnd << words;
httpReq.GET("method", method);
app_.cut(sentence, words, CppJieba::METHOD_MIX);
httpReq.GET("format", format);
run(sentence, method, format, strSnd);
return true;
}
virtual bool do_POST(const HttpReqInfo& httpReq, string& strSnd) const {
vector<string> words;
_segment.cut(httpReq.getBody(), words);
strSnd << words;
run(httpReq.getBody(), "MIX", "simple", strSnd);
return true;
}
void run(const string& sentence,
const string& method,
const string& format,
string& strSnd) const {
vector<string> words;
if ("MP" == method) {
app_.cut(sentence, words, CppJieba::METHOD_MP);
} else if ("HMM" == method) {
app_.cut(sentence, words, CppJieba::METHOD_HMM);
} else if ("MIX" == method) {
app_.cut(sentence, words, CppJieba::METHOD_MIX);
} else if ("FULL" == method) {
app_.cut(sentence, words, CppJieba::METHOD_FULL);
} else if ("QUERY" == method) {
app_.cut(sentence, words, CppJieba::METHOD_QUERY);
} else { // default
app_.cut(sentence, words, CppJieba::METHOD_MIX);
}
if(format == "simple") {
join(words.begin(), words.end(), strSnd, " ");
} else {
strSnd << words;
}
}
private:
const ISegment& _segment;
const CppJieba::Application& app_;
};
bool run(int argc, char** argv) {
@ -49,35 +72,26 @@ bool run(int argc, char** argv) {
if(!conf) {
return false;
}
int port = 0;
int threadNumber = 0;
int queueMaxSize = 0;
string dictPath;
string modelPath;
string userDictPath;
LIMONP_CHECK(conf.get("port", port));
LIMONP_CHECK(conf.get("thread_number", threadNumber));
LIMONP_CHECK(conf.get("queue_max_size", queueMaxSize));
LIMONP_CHECK(conf.get("dict_path", dictPath));
LIMONP_CHECK(conf.get("model_path", modelPath));
if(!conf.get("user_dict_path", userDictPath)) { //optional
userDictPath = "";
}
int port = conf.get("port", 1339);
int threadNumber = conf.get("thread_number", 4);
int queueMaxSize = conf.get("queue_max_size", 1024);
string dictPath = conf.get("dict_path", "");
string modelPath = conf.get("model_path", "");
string userDictPath = conf.get("user_dict_path", "");
string idfPath = conf.get("idf_path", "");
string stopWordsPath = conf.get("stop_words_path", "");
LogInfo("config info: %s", conf.getConfigInfo().c_str());
/*
* segment can be one of (MPSegment, HMMSegment, MixSegment, QuerySegment ...)
*/
//MPSegment segment(dictPath, userDictPath);
//HMMSegment segment(modelPath);
MixSegment segment(dictPath, modelPath, userDictPath);
//QuerySegment segment(dictPath, modelPath);
ReqHandler reqHandler(segment);
CppJieba::Application app(dictPath,
modelPath,
userDictPath,
idfPath,
stopWordsPath);
ReqHandler reqHandler(app);
ThreadPoolServer sf(threadNumber, queueMaxSize, port, reqHandler);
return sf.start();
}
int main(int argc, char* argv[]) {

View File

@ -17,9 +17,13 @@ enum CutMethod {
class Application {
public:
Application(const string& dictDir)
: dictTrie_(pathJoin(dictDir, "jieba.dict.utf8")),
model_(pathJoin(dictDir, "hmm_model.utf8")),
Application(const string& dictPath,
const string& modelPath,
const string& userDictPath,
const string& idfPath,
const string& stopWordsPath)
: dictTrie_(dictPath, userDictPath),
model_(modelPath),
mpSeg_(&dictTrie_),
hmmSeg_(&model_),
mixSeg_(&dictTrie_, &model_),
@ -28,8 +32,8 @@ class Application {
tagger_(&dictTrie_, &model_),
extractor_(&dictTrie_,
&model_,
pathJoin(dictDir, "idf.utf8"),
pathJoin(dictDir, "stop_words.utf8")) {
idfPath,
stopWordsPath) {
}
void cut(const string& sentence, vector<string>& words,
CutMethod method) const {

View File

@ -55,21 +55,19 @@ class Config {
ifs.close();
}
public:
bool get(const string& key, string& value) const {
string get(const string& key, const string& defaultvalue) const {
map<string, string>::const_iterator it = map_.find(key);
if(map_.end() != it) {
value = it->second;
return true;
return it->second;
}
return false;
return defaultvalue;
}
bool get(const string& key, int & value) const {
string str;
if(!get(key, str)) {
return false;
int get(const string& key, int defaultvalue) const {
string str = get(key, "");
if("" == str) {
return defaultvalue;
}
value = atoi(str.c_str());
return true;
return atoi(str.c_str());
}
const char* operator [] (const char* key) const {
if(NULL == key) {

View File

@ -16,7 +16,11 @@ void LoadSentences(const string& filepath, vector<string>& sentences) {
}
int main(int argc, char** argv) {
CppJieba::Application app("../dict/");
CppJieba::Application app("../dict/jieba.dict.utf8",
"../dict/hmm_model.utf8",
"../dict/user.dict.utf8",
"../dict/idf.utf8",
"../dict/stop_words.utf8");
vector<string> words;
string result;
string s;

View File

@ -11,3 +11,9 @@ dict_path=../dict/jieba.dict.utf8
#model path
model_path=../dict/hmm_model.utf8
user_dict_path=../dict/user.dict.utf8
idf_path=../dict/idf.utf8
stop_words_path=../dict/stop_words.utf8

View File

@ -4,7 +4,11 @@
using namespace CppJieba;
TEST(ApplicationTest, Test1) {
Application app("../dict/");
CppJieba::Application app("../dict/jieba.dict.utf8",
"../dict/hmm_model.utf8",
"../dict/user.dict.utf8",
"../dict/idf.utf8",
"../dict/stop_words.utf8");
vector<string> words;
string result;