mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
修改 cjserver 服务,可以通过http参数使用不同切词算法进行切词。
修改 make install 的安装目录,统一安装到同一个目录 /usr/local/cppjieba
This commit is contained in:
parent
8ce2af9706
commit
3528b6296a
@ -2,9 +2,9 @@ PROJECT(CPPJIEBA)
|
||||
|
||||
CMAKE_MINIMUM_REQUIRED (VERSION 2.6)
|
||||
|
||||
if (CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
|
||||
set (CMAKE_INSTALL_PREFIX "/usr" CACHE PATH "default install path" FORCE )
|
||||
endif()
|
||||
#if (CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
|
||||
#endif()
|
||||
set (CMAKE_INSTALL_PREFIX "/usr/local/cppjieba" CACHE PATH "default install path" FORCE )
|
||||
|
||||
ADD_DEFINITIONS(-O3 -Wall -g)
|
||||
IF(APPLE) # mac os
|
||||
|
@ -7,6 +7,8 @@
|
||||
3. 修改 Code Style ,参照 google code style 。
|
||||
4. 增加更详细的错误日志,在初始化过程中合理使用LogFatal。
|
||||
5. 增加 Application 这个类,整合了所有CppJieba的功能进去,以后用户只需要使用这个类即可。
|
||||
6. 修改 cjserver 服务,可以通过http参数使用不同切词算法进行切词。
|
||||
7. 修改 make install 的安装目录,统一安装到同一个目录 /usr/local/cppjieba 。
|
||||
|
||||
## v2.4.4
|
||||
|
||||
|
24
README.md
24
README.md
@ -10,7 +10,7 @@ CppJieba是"结巴"中文分词的C++版本
|
||||
|
||||
+ 源代码都写进头文件`src/*.hpp`里,`include`即可使用。
|
||||
+ 支持`utf-8, gbk`编码,但是推荐使用`utf-8`编码, 因为`gbk`编码缺少严格测试,慎用。
|
||||
+ 内置分词服务`server/server.cpp`,在linux环境下可安装使用(可选)。
|
||||
+ 内置分词服务`server/server.cpp`,在linux环境下可安装使用(可选),可通过http参数选择不同分词算法进行分词。
|
||||
+ 项目自带较为完善的单元测试,核心功能中文分词(utf8)的稳定性接受过线上环境检验。
|
||||
+ 支持载自定义用户词典。
|
||||
+ 支持 `linux` , `mac osx` 操作系统。
|
||||
@ -59,7 +59,7 @@ make
|
||||
### 启动服务
|
||||
|
||||
```
|
||||
./bin/cjserver ../test/testdata/server.conf
|
||||
./bin/cjserver ../conf/server_example.conf
|
||||
```
|
||||
|
||||
### 客户端请求示例
|
||||
@ -80,8 +80,18 @@ curl "http://127.0.0.1:11200/?key=南京市长江大桥&format=simple"
|
||||
南京市 长江大桥
|
||||
```
|
||||
|
||||
用 chrome 浏览器打开也行 ( chrome 设置默认编码是`utf-8`):
|
||||
默认切词算法是MixSegment切词算法,如果想要使用其他算法切词,可以使用参数method来设置。
|
||||
示例如下:
|
||||
|
||||
```
|
||||
curl "http://127.0.0.1:11200/?key=南京市长江大桥&format=simple&method=MP"
|
||||
curl "http://127.0.0.1:11200/?key=南京市长江大桥&format=simple&method=HMM"
|
||||
curl "http://127.0.0.1:11200/?key=南京市长江大桥&format=simple&method=MIX"
|
||||
curl "http://127.0.0.1:11200/?key=南京市长江大桥&format=simple&method=FULL"
|
||||
curl "http://127.0.0.1:11200/?key=南京市长江大桥&format=simple&method=QUERY"
|
||||
```
|
||||
|
||||
用 chrome 浏览器打开也行 ( chrome 设置默认编码是`utf-8`):
|
||||
|
||||
同时,也支持HTTP POST模式,使用如下调用:
|
||||
|
||||
@ -107,15 +117,15 @@ sudo make install
|
||||
### 服务启动和停止(仅限 linux 系统)
|
||||
|
||||
```
|
||||
/etc/init.d/cjserver.start >> /dev/null 2>&1
|
||||
/etc/init.d/cjserver.stop
|
||||
cd /usr/local/cppjieba
|
||||
./script/cjserver.start
|
||||
./script/cjserver.stop
|
||||
```
|
||||
|
||||
### 卸载服务(仅限 linux 系统)
|
||||
|
||||
```sh
|
||||
cd build/
|
||||
cat install_manifest.txt | sudo xargs rm -rf
|
||||
rm -rf /usr/local/cppjieba
|
||||
```
|
||||
|
||||
## Docker 示例
|
||||
|
@ -1 +1 @@
|
||||
INSTALL(FILES server.conf DESTINATION /etc/CppJieba)
|
||||
INSTALL(FILES server.conf DESTINATION conf)
|
||||
|
@ -7,10 +7,14 @@ thread_number=4
|
||||
queue_max_size=4096
|
||||
|
||||
#dict path
|
||||
dict_path=/usr/share/CppJieba/dict/jieba.dict.utf8
|
||||
dict_path=/usr/local/cppjieba/dict/jieba.dict.utf8
|
||||
|
||||
#model path
|
||||
model_path=/usr/share/CppJieba/dict/hmm_model.utf8
|
||||
model_path=/usr/local/cppjieba/dict/hmm_model.utf8
|
||||
|
||||
#user_dict_path
|
||||
#user_dict_path=/usr/share/CppJieba/dict/user.dict.utf8
|
||||
user_dict_path=/usr/local/cppjieba/dict/user.dict.utf8
|
||||
|
||||
idf_path=/usr/local/cppjieba/dict/idf.utf8
|
||||
|
||||
stop_words_path=/usr/local/cppjieba/dict/stop_words.utf8
|
||||
|
19
conf/server_example.conf
Normal file
19
conf/server_example.conf
Normal file
@ -0,0 +1,19 @@
|
||||
# config
|
||||
|
||||
#socket listen port
|
||||
port=11200
|
||||
|
||||
thread_number=4
|
||||
queue_max_size=4096
|
||||
|
||||
#dict path
|
||||
dict_path=../dict/jieba.dict.utf8
|
||||
|
||||
#model path
|
||||
model_path=../dict/hmm_model.utf8
|
||||
|
||||
user_dict_path=../dict/user.dict.utf8
|
||||
|
||||
idf_path=../dict/idf.utf8
|
||||
|
||||
stop_words_path=../dict/stop_words.utf8
|
@ -1 +1,9 @@
|
||||
INSTALL(FILES hmm_model.utf8 jieba.dict.utf8 user.dict.utf8 DESTINATION share/CppJieba/dict)
|
||||
INSTALL(FILES
|
||||
hmm_model.utf8
|
||||
jieba.dict.utf8
|
||||
user.dict.utf8
|
||||
idf.utf8
|
||||
stop_words.utf8
|
||||
DESTINATION
|
||||
dict
|
||||
)
|
||||
|
@ -1 +1,6 @@
|
||||
INSTALL(PROGRAMS cjserver.start cjserver.stop DESTINATION /etc/init.d/)
|
||||
INSTALL(PROGRAMS
|
||||
cjserver.start
|
||||
cjserver.stop
|
||||
DESTINATION
|
||||
script
|
||||
)
|
||||
|
@ -7,6 +7,6 @@ if [ ! -z "${PID}" ]
|
||||
then
|
||||
echo "please stop cjserver first."
|
||||
else
|
||||
cjserver /etc/CppJieba/server.conf &
|
||||
/usr/local/cppjieba/bin/cjserver /usr/local/cppjieba/conf/server.conf >> /dev/null 2>&1 &
|
||||
echo "service started."
|
||||
fi
|
||||
|
@ -5,5 +5,4 @@ INCLUDE_DIRECTORIES(${PROJECT_SOURCE_DIR}/src)
|
||||
ADD_EXECUTABLE(cjserver server.cpp)
|
||||
TARGET_LINK_LIBRARIES(cjserver pthread)
|
||||
|
||||
INSTALL(TARGETS cjserver RUNTIME DESTINATION bin)
|
||||
|
||||
INSTALL(TARGETS cjserver DESTINATION bin)
|
||||
|
@ -5,40 +5,63 @@
|
||||
#include <string.h>
|
||||
#include "Limonp/Config.hpp"
|
||||
#include "Husky/ThreadPoolServer.hpp"
|
||||
#include "MixSegment.hpp"
|
||||
#include "QuerySegment.hpp"
|
||||
#include "FullSegment.hpp"
|
||||
#include "Application.hpp"
|
||||
|
||||
using namespace Husky;
|
||||
using namespace CppJieba;
|
||||
|
||||
class ReqHandler: public IRequestHandler {
|
||||
public:
|
||||
ReqHandler(const ISegment& segment): _segment(segment) {
|
||||
ReqHandler(const CppJieba::Application& app): app_(app) {
|
||||
}
|
||||
virtual ~ReqHandler() {
|
||||
}
|
||||
virtual ~ReqHandler() {};
|
||||
|
||||
virtual bool do_GET(const HttpReqInfo& httpReq, string& strSnd) const {
|
||||
string sentence, tmp;
|
||||
string sentence, method, format;
|
||||
string tmp;
|
||||
vector<string> words;
|
||||
httpReq.GET("key", tmp);
|
||||
URLDecode(tmp, sentence);
|
||||
_segment.cut(sentence, words);
|
||||
if(httpReq.GET("format", tmp) && tmp == "simple") {
|
||||
join(words.begin(), words.end(), strSnd, " ");
|
||||
return true;
|
||||
}
|
||||
strSnd << words;
|
||||
httpReq.GET("method", method);
|
||||
app_.cut(sentence, words, CppJieba::METHOD_MIX);
|
||||
httpReq.GET("format", format);
|
||||
run(sentence, method, format, strSnd);
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual bool do_POST(const HttpReqInfo& httpReq, string& strSnd) const {
|
||||
vector<string> words;
|
||||
_segment.cut(httpReq.getBody(), words);
|
||||
strSnd << words;
|
||||
run(httpReq.getBody(), "MIX", "simple", strSnd);
|
||||
return true;
|
||||
}
|
||||
|
||||
void run(const string& sentence,
|
||||
const string& method,
|
||||
const string& format,
|
||||
string& strSnd) const {
|
||||
vector<string> words;
|
||||
if ("MP" == method) {
|
||||
app_.cut(sentence, words, CppJieba::METHOD_MP);
|
||||
} else if ("HMM" == method) {
|
||||
app_.cut(sentence, words, CppJieba::METHOD_HMM);
|
||||
} else if ("MIX" == method) {
|
||||
app_.cut(sentence, words, CppJieba::METHOD_MIX);
|
||||
} else if ("FULL" == method) {
|
||||
app_.cut(sentence, words, CppJieba::METHOD_FULL);
|
||||
} else if ("QUERY" == method) {
|
||||
app_.cut(sentence, words, CppJieba::METHOD_QUERY);
|
||||
} else { // default
|
||||
app_.cut(sentence, words, CppJieba::METHOD_MIX);
|
||||
}
|
||||
if(format == "simple") {
|
||||
join(words.begin(), words.end(), strSnd, " ");
|
||||
} else {
|
||||
strSnd << words;
|
||||
}
|
||||
}
|
||||
private:
|
||||
const ISegment& _segment;
|
||||
const CppJieba::Application& app_;
|
||||
};
|
||||
|
||||
bool run(int argc, char** argv) {
|
||||
@ -49,35 +72,26 @@ bool run(int argc, char** argv) {
|
||||
if(!conf) {
|
||||
return false;
|
||||
}
|
||||
int port = 0;
|
||||
int threadNumber = 0;
|
||||
int queueMaxSize = 0;
|
||||
string dictPath;
|
||||
string modelPath;
|
||||
string userDictPath;
|
||||
LIMONP_CHECK(conf.get("port", port));
|
||||
LIMONP_CHECK(conf.get("thread_number", threadNumber));
|
||||
LIMONP_CHECK(conf.get("queue_max_size", queueMaxSize));
|
||||
LIMONP_CHECK(conf.get("dict_path", dictPath));
|
||||
LIMONP_CHECK(conf.get("model_path", modelPath));
|
||||
if(!conf.get("user_dict_path", userDictPath)) { //optional
|
||||
userDictPath = "";
|
||||
}
|
||||
int port = conf.get("port", 1339);
|
||||
int threadNumber = conf.get("thread_number", 4);
|
||||
int queueMaxSize = conf.get("queue_max_size", 1024);
|
||||
string dictPath = conf.get("dict_path", "");
|
||||
string modelPath = conf.get("model_path", "");
|
||||
string userDictPath = conf.get("user_dict_path", "");
|
||||
string idfPath = conf.get("idf_path", "");
|
||||
string stopWordsPath = conf.get("stop_words_path", "");
|
||||
|
||||
LogInfo("config info: %s", conf.getConfigInfo().c_str());
|
||||
|
||||
/*
|
||||
* segment can be one of (MPSegment, HMMSegment, MixSegment, QuerySegment ...)
|
||||
*/
|
||||
//MPSegment segment(dictPath, userDictPath);
|
||||
//HMMSegment segment(modelPath);
|
||||
MixSegment segment(dictPath, modelPath, userDictPath);
|
||||
//QuerySegment segment(dictPath, modelPath);
|
||||
|
||||
ReqHandler reqHandler(segment);
|
||||
CppJieba::Application app(dictPath,
|
||||
modelPath,
|
||||
userDictPath,
|
||||
idfPath,
|
||||
stopWordsPath);
|
||||
|
||||
ReqHandler reqHandler(app);
|
||||
ThreadPoolServer sf(threadNumber, queueMaxSize, port, reqHandler);
|
||||
return sf.start();
|
||||
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
|
@ -17,9 +17,13 @@ enum CutMethod {
|
||||
|
||||
class Application {
|
||||
public:
|
||||
Application(const string& dictDir)
|
||||
: dictTrie_(pathJoin(dictDir, "jieba.dict.utf8")),
|
||||
model_(pathJoin(dictDir, "hmm_model.utf8")),
|
||||
Application(const string& dictPath,
|
||||
const string& modelPath,
|
||||
const string& userDictPath,
|
||||
const string& idfPath,
|
||||
const string& stopWordsPath)
|
||||
: dictTrie_(dictPath, userDictPath),
|
||||
model_(modelPath),
|
||||
mpSeg_(&dictTrie_),
|
||||
hmmSeg_(&model_),
|
||||
mixSeg_(&dictTrie_, &model_),
|
||||
@ -28,8 +32,8 @@ class Application {
|
||||
tagger_(&dictTrie_, &model_),
|
||||
extractor_(&dictTrie_,
|
||||
&model_,
|
||||
pathJoin(dictDir, "idf.utf8"),
|
||||
pathJoin(dictDir, "stop_words.utf8")) {
|
||||
idfPath,
|
||||
stopWordsPath) {
|
||||
}
|
||||
void cut(const string& sentence, vector<string>& words,
|
||||
CutMethod method) const {
|
||||
|
@ -55,21 +55,19 @@ class Config {
|
||||
ifs.close();
|
||||
}
|
||||
public:
|
||||
bool get(const string& key, string& value) const {
|
||||
string get(const string& key, const string& defaultvalue) const {
|
||||
map<string, string>::const_iterator it = map_.find(key);
|
||||
if(map_.end() != it) {
|
||||
value = it->second;
|
||||
return true;
|
||||
return it->second;
|
||||
}
|
||||
return false;
|
||||
return defaultvalue;
|
||||
}
|
||||
bool get(const string& key, int & value) const {
|
||||
string str;
|
||||
if(!get(key, str)) {
|
||||
return false;
|
||||
int get(const string& key, int defaultvalue) const {
|
||||
string str = get(key, "");
|
||||
if("" == str) {
|
||||
return defaultvalue;
|
||||
}
|
||||
value = atoi(str.c_str());
|
||||
return true;
|
||||
return atoi(str.c_str());
|
||||
}
|
||||
const char* operator [] (const char* key) const {
|
||||
if(NULL == key) {
|
||||
|
@ -16,7 +16,11 @@ void LoadSentences(const string& filepath, vector<string>& sentences) {
|
||||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
CppJieba::Application app("../dict/");
|
||||
CppJieba::Application app("../dict/jieba.dict.utf8",
|
||||
"../dict/hmm_model.utf8",
|
||||
"../dict/user.dict.utf8",
|
||||
"../dict/idf.utf8",
|
||||
"../dict/stop_words.utf8");
|
||||
vector<string> words;
|
||||
string result;
|
||||
string s;
|
||||
|
6
test/testdata/server.conf
vendored
6
test/testdata/server.conf
vendored
@ -11,3 +11,9 @@ dict_path=../dict/jieba.dict.utf8
|
||||
|
||||
#model path
|
||||
model_path=../dict/hmm_model.utf8
|
||||
|
||||
user_dict_path=../dict/user.dict.utf8
|
||||
|
||||
idf_path=../dict/idf.utf8
|
||||
|
||||
stop_words_path=../dict/stop_words.utf8
|
||||
|
@ -4,7 +4,11 @@
|
||||
using namespace CppJieba;
|
||||
|
||||
TEST(ApplicationTest, Test1) {
|
||||
Application app("../dict/");
|
||||
CppJieba::Application app("../dict/jieba.dict.utf8",
|
||||
"../dict/hmm_model.utf8",
|
||||
"../dict/user.dict.utf8",
|
||||
"../dict/idf.utf8",
|
||||
"../dict/stop_words.utf8");
|
||||
vector<string> words;
|
||||
string result;
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user