mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
修改 cjserver 服务,可以通过http参数使用不同切词算法进行切词。
修改 make install 的安装目录,统一安装到同一个目录 /usr/local/cppjieba
This commit is contained in:
parent
8ce2af9706
commit
3528b6296a
@ -2,9 +2,9 @@ PROJECT(CPPJIEBA)
|
|||||||
|
|
||||||
CMAKE_MINIMUM_REQUIRED (VERSION 2.6)
|
CMAKE_MINIMUM_REQUIRED (VERSION 2.6)
|
||||||
|
|
||||||
if (CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
|
#if (CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
|
||||||
set (CMAKE_INSTALL_PREFIX "/usr" CACHE PATH "default install path" FORCE )
|
#endif()
|
||||||
endif()
|
set (CMAKE_INSTALL_PREFIX "/usr/local/cppjieba" CACHE PATH "default install path" FORCE )
|
||||||
|
|
||||||
ADD_DEFINITIONS(-O3 -Wall -g)
|
ADD_DEFINITIONS(-O3 -Wall -g)
|
||||||
IF(APPLE) # mac os
|
IF(APPLE) # mac os
|
||||||
|
@ -7,6 +7,8 @@
|
|||||||
3. 修改 Code Style ,参照 google code style 。
|
3. 修改 Code Style ,参照 google code style 。
|
||||||
4. 增加更详细的错误日志,在初始化过程中合理使用LogFatal。
|
4. 增加更详细的错误日志,在初始化过程中合理使用LogFatal。
|
||||||
5. 增加 Application 这个类,整合了所有CppJieba的功能进去,以后用户只需要使用这个类即可。
|
5. 增加 Application 这个类,整合了所有CppJieba的功能进去,以后用户只需要使用这个类即可。
|
||||||
|
6. 修改 cjserver 服务,可以通过http参数使用不同切词算法进行切词。
|
||||||
|
7. 修改 make install 的安装目录,统一安装到同一个目录 /usr/local/cppjieba 。
|
||||||
|
|
||||||
## v2.4.4
|
## v2.4.4
|
||||||
|
|
||||||
|
24
README.md
24
README.md
@ -10,7 +10,7 @@ CppJieba是"结巴"中文分词的C++版本
|
|||||||
|
|
||||||
+ 源代码都写进头文件`src/*.hpp`里,`include`即可使用。
|
+ 源代码都写进头文件`src/*.hpp`里,`include`即可使用。
|
||||||
+ 支持`utf-8, gbk`编码,但是推荐使用`utf-8`编码, 因为`gbk`编码缺少严格测试,慎用。
|
+ 支持`utf-8, gbk`编码,但是推荐使用`utf-8`编码, 因为`gbk`编码缺少严格测试,慎用。
|
||||||
+ 内置分词服务`server/server.cpp`,在linux环境下可安装使用(可选)。
|
+ 内置分词服务`server/server.cpp`,在linux环境下可安装使用(可选),可通过http参数选择不同分词算法进行分词。
|
||||||
+ 项目自带较为完善的单元测试,核心功能中文分词(utf8)的稳定性接受过线上环境检验。
|
+ 项目自带较为完善的单元测试,核心功能中文分词(utf8)的稳定性接受过线上环境检验。
|
||||||
+ 支持载自定义用户词典。
|
+ 支持载自定义用户词典。
|
||||||
+ 支持 `linux` , `mac osx` 操作系统。
|
+ 支持 `linux` , `mac osx` 操作系统。
|
||||||
@ -59,7 +59,7 @@ make
|
|||||||
### 启动服务
|
### 启动服务
|
||||||
|
|
||||||
```
|
```
|
||||||
./bin/cjserver ../test/testdata/server.conf
|
./bin/cjserver ../conf/server_example.conf
|
||||||
```
|
```
|
||||||
|
|
||||||
### 客户端请求示例
|
### 客户端请求示例
|
||||||
@ -80,8 +80,18 @@ curl "http://127.0.0.1:11200/?key=南京市长江大桥&format=simple"
|
|||||||
南京市 长江大桥
|
南京市 长江大桥
|
||||||
```
|
```
|
||||||
|
|
||||||
用 chrome 浏览器打开也行 ( chrome 设置默认编码是`utf-8`):
|
默认切词算法是MixSegment切词算法,如果想要使用其他算法切词,可以使用参数method来设置。
|
||||||
|
示例如下:
|
||||||
|
|
||||||
|
```
|
||||||
|
curl "http://127.0.0.1:11200/?key=南京市长江大桥&format=simple&method=MP"
|
||||||
|
curl "http://127.0.0.1:11200/?key=南京市长江大桥&format=simple&method=HMM"
|
||||||
|
curl "http://127.0.0.1:11200/?key=南京市长江大桥&format=simple&method=MIX"
|
||||||
|
curl "http://127.0.0.1:11200/?key=南京市长江大桥&format=simple&method=FULL"
|
||||||
|
curl "http://127.0.0.1:11200/?key=南京市长江大桥&format=simple&method=QUERY"
|
||||||
|
```
|
||||||
|
|
||||||
|
用 chrome 浏览器打开也行 ( chrome 设置默认编码是`utf-8`):
|
||||||
|
|
||||||
同时,也支持HTTP POST模式,使用如下调用:
|
同时,也支持HTTP POST模式,使用如下调用:
|
||||||
|
|
||||||
@ -107,15 +117,15 @@ sudo make install
|
|||||||
### 服务启动和停止(仅限 linux 系统)
|
### 服务启动和停止(仅限 linux 系统)
|
||||||
|
|
||||||
```
|
```
|
||||||
/etc/init.d/cjserver.start >> /dev/null 2>&1
|
cd /usr/local/cppjieba
|
||||||
/etc/init.d/cjserver.stop
|
./script/cjserver.start
|
||||||
|
./script/cjserver.stop
|
||||||
```
|
```
|
||||||
|
|
||||||
### 卸载服务(仅限 linux 系统)
|
### 卸载服务(仅限 linux 系统)
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
cd build/
|
rm -rf /usr/local/cppjieba
|
||||||
cat install_manifest.txt | sudo xargs rm -rf
|
|
||||||
```
|
```
|
||||||
|
|
||||||
## Docker 示例
|
## Docker 示例
|
||||||
|
@ -1 +1 @@
|
|||||||
INSTALL(FILES server.conf DESTINATION /etc/CppJieba)
|
INSTALL(FILES server.conf DESTINATION conf)
|
||||||
|
@ -7,10 +7,14 @@ thread_number=4
|
|||||||
queue_max_size=4096
|
queue_max_size=4096
|
||||||
|
|
||||||
#dict path
|
#dict path
|
||||||
dict_path=/usr/share/CppJieba/dict/jieba.dict.utf8
|
dict_path=/usr/local/cppjieba/dict/jieba.dict.utf8
|
||||||
|
|
||||||
#model path
|
#model path
|
||||||
model_path=/usr/share/CppJieba/dict/hmm_model.utf8
|
model_path=/usr/local/cppjieba/dict/hmm_model.utf8
|
||||||
|
|
||||||
#user_dict_path
|
#user_dict_path
|
||||||
#user_dict_path=/usr/share/CppJieba/dict/user.dict.utf8
|
user_dict_path=/usr/local/cppjieba/dict/user.dict.utf8
|
||||||
|
|
||||||
|
idf_path=/usr/local/cppjieba/dict/idf.utf8
|
||||||
|
|
||||||
|
stop_words_path=/usr/local/cppjieba/dict/stop_words.utf8
|
||||||
|
19
conf/server_example.conf
Normal file
19
conf/server_example.conf
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
# config
|
||||||
|
|
||||||
|
#socket listen port
|
||||||
|
port=11200
|
||||||
|
|
||||||
|
thread_number=4
|
||||||
|
queue_max_size=4096
|
||||||
|
|
||||||
|
#dict path
|
||||||
|
dict_path=../dict/jieba.dict.utf8
|
||||||
|
|
||||||
|
#model path
|
||||||
|
model_path=../dict/hmm_model.utf8
|
||||||
|
|
||||||
|
user_dict_path=../dict/user.dict.utf8
|
||||||
|
|
||||||
|
idf_path=../dict/idf.utf8
|
||||||
|
|
||||||
|
stop_words_path=../dict/stop_words.utf8
|
@ -1 +1,9 @@
|
|||||||
INSTALL(FILES hmm_model.utf8 jieba.dict.utf8 user.dict.utf8 DESTINATION share/CppJieba/dict)
|
INSTALL(FILES
|
||||||
|
hmm_model.utf8
|
||||||
|
jieba.dict.utf8
|
||||||
|
user.dict.utf8
|
||||||
|
idf.utf8
|
||||||
|
stop_words.utf8
|
||||||
|
DESTINATION
|
||||||
|
dict
|
||||||
|
)
|
||||||
|
@ -1 +1,6 @@
|
|||||||
INSTALL(PROGRAMS cjserver.start cjserver.stop DESTINATION /etc/init.d/)
|
INSTALL(PROGRAMS
|
||||||
|
cjserver.start
|
||||||
|
cjserver.stop
|
||||||
|
DESTINATION
|
||||||
|
script
|
||||||
|
)
|
||||||
|
@ -7,6 +7,6 @@ if [ ! -z "${PID}" ]
|
|||||||
then
|
then
|
||||||
echo "please stop cjserver first."
|
echo "please stop cjserver first."
|
||||||
else
|
else
|
||||||
cjserver /etc/CppJieba/server.conf &
|
/usr/local/cppjieba/bin/cjserver /usr/local/cppjieba/conf/server.conf >> /dev/null 2>&1 &
|
||||||
echo "service started."
|
echo "service started."
|
||||||
fi
|
fi
|
||||||
|
@ -5,5 +5,4 @@ INCLUDE_DIRECTORIES(${PROJECT_SOURCE_DIR}/src)
|
|||||||
ADD_EXECUTABLE(cjserver server.cpp)
|
ADD_EXECUTABLE(cjserver server.cpp)
|
||||||
TARGET_LINK_LIBRARIES(cjserver pthread)
|
TARGET_LINK_LIBRARIES(cjserver pthread)
|
||||||
|
|
||||||
INSTALL(TARGETS cjserver RUNTIME DESTINATION bin)
|
INSTALL(TARGETS cjserver DESTINATION bin)
|
||||||
|
|
||||||
|
@ -5,40 +5,63 @@
|
|||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include "Limonp/Config.hpp"
|
#include "Limonp/Config.hpp"
|
||||||
#include "Husky/ThreadPoolServer.hpp"
|
#include "Husky/ThreadPoolServer.hpp"
|
||||||
#include "MixSegment.hpp"
|
#include "Application.hpp"
|
||||||
#include "QuerySegment.hpp"
|
|
||||||
#include "FullSegment.hpp"
|
|
||||||
|
|
||||||
using namespace Husky;
|
using namespace Husky;
|
||||||
using namespace CppJieba;
|
using namespace CppJieba;
|
||||||
|
|
||||||
class ReqHandler: public IRequestHandler {
|
class ReqHandler: public IRequestHandler {
|
||||||
public:
|
public:
|
||||||
ReqHandler(const ISegment& segment): _segment(segment) {
|
ReqHandler(const CppJieba::Application& app): app_(app) {
|
||||||
|
}
|
||||||
|
virtual ~ReqHandler() {
|
||||||
}
|
}
|
||||||
virtual ~ReqHandler() {};
|
|
||||||
|
|
||||||
virtual bool do_GET(const HttpReqInfo& httpReq, string& strSnd) const {
|
virtual bool do_GET(const HttpReqInfo& httpReq, string& strSnd) const {
|
||||||
string sentence, tmp;
|
string sentence, method, format;
|
||||||
|
string tmp;
|
||||||
vector<string> words;
|
vector<string> words;
|
||||||
httpReq.GET("key", tmp);
|
httpReq.GET("key", tmp);
|
||||||
URLDecode(tmp, sentence);
|
URLDecode(tmp, sentence);
|
||||||
_segment.cut(sentence, words);
|
httpReq.GET("method", method);
|
||||||
if(httpReq.GET("format", tmp) && tmp == "simple") {
|
app_.cut(sentence, words, CppJieba::METHOD_MIX);
|
||||||
join(words.begin(), words.end(), strSnd, " ");
|
httpReq.GET("format", format);
|
||||||
return true;
|
run(sentence, method, format, strSnd);
|
||||||
}
|
|
||||||
strSnd << words;
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual bool do_POST(const HttpReqInfo& httpReq, string& strSnd) const {
|
virtual bool do_POST(const HttpReqInfo& httpReq, string& strSnd) const {
|
||||||
vector<string> words;
|
vector<string> words;
|
||||||
_segment.cut(httpReq.getBody(), words);
|
run(httpReq.getBody(), "MIX", "simple", strSnd);
|
||||||
strSnd << words;
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void run(const string& sentence,
|
||||||
|
const string& method,
|
||||||
|
const string& format,
|
||||||
|
string& strSnd) const {
|
||||||
|
vector<string> words;
|
||||||
|
if ("MP" == method) {
|
||||||
|
app_.cut(sentence, words, CppJieba::METHOD_MP);
|
||||||
|
} else if ("HMM" == method) {
|
||||||
|
app_.cut(sentence, words, CppJieba::METHOD_HMM);
|
||||||
|
} else if ("MIX" == method) {
|
||||||
|
app_.cut(sentence, words, CppJieba::METHOD_MIX);
|
||||||
|
} else if ("FULL" == method) {
|
||||||
|
app_.cut(sentence, words, CppJieba::METHOD_FULL);
|
||||||
|
} else if ("QUERY" == method) {
|
||||||
|
app_.cut(sentence, words, CppJieba::METHOD_QUERY);
|
||||||
|
} else { // default
|
||||||
|
app_.cut(sentence, words, CppJieba::METHOD_MIX);
|
||||||
|
}
|
||||||
|
if(format == "simple") {
|
||||||
|
join(words.begin(), words.end(), strSnd, " ");
|
||||||
|
} else {
|
||||||
|
strSnd << words;
|
||||||
|
}
|
||||||
|
}
|
||||||
private:
|
private:
|
||||||
const ISegment& _segment;
|
const CppJieba::Application& app_;
|
||||||
};
|
};
|
||||||
|
|
||||||
bool run(int argc, char** argv) {
|
bool run(int argc, char** argv) {
|
||||||
@ -49,35 +72,26 @@ bool run(int argc, char** argv) {
|
|||||||
if(!conf) {
|
if(!conf) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
int port = 0;
|
int port = conf.get("port", 1339);
|
||||||
int threadNumber = 0;
|
int threadNumber = conf.get("thread_number", 4);
|
||||||
int queueMaxSize = 0;
|
int queueMaxSize = conf.get("queue_max_size", 1024);
|
||||||
string dictPath;
|
string dictPath = conf.get("dict_path", "");
|
||||||
string modelPath;
|
string modelPath = conf.get("model_path", "");
|
||||||
string userDictPath;
|
string userDictPath = conf.get("user_dict_path", "");
|
||||||
LIMONP_CHECK(conf.get("port", port));
|
string idfPath = conf.get("idf_path", "");
|
||||||
LIMONP_CHECK(conf.get("thread_number", threadNumber));
|
string stopWordsPath = conf.get("stop_words_path", "");
|
||||||
LIMONP_CHECK(conf.get("queue_max_size", queueMaxSize));
|
|
||||||
LIMONP_CHECK(conf.get("dict_path", dictPath));
|
|
||||||
LIMONP_CHECK(conf.get("model_path", modelPath));
|
|
||||||
if(!conf.get("user_dict_path", userDictPath)) { //optional
|
|
||||||
userDictPath = "";
|
|
||||||
}
|
|
||||||
|
|
||||||
LogInfo("config info: %s", conf.getConfigInfo().c_str());
|
LogInfo("config info: %s", conf.getConfigInfo().c_str());
|
||||||
|
|
||||||
/*
|
CppJieba::Application app(dictPath,
|
||||||
* segment can be one of (MPSegment, HMMSegment, MixSegment, QuerySegment ...)
|
modelPath,
|
||||||
*/
|
userDictPath,
|
||||||
//MPSegment segment(dictPath, userDictPath);
|
idfPath,
|
||||||
//HMMSegment segment(modelPath);
|
stopWordsPath);
|
||||||
MixSegment segment(dictPath, modelPath, userDictPath);
|
|
||||||
//QuerySegment segment(dictPath, modelPath);
|
|
||||||
|
|
||||||
ReqHandler reqHandler(segment);
|
ReqHandler reqHandler(app);
|
||||||
ThreadPoolServer sf(threadNumber, queueMaxSize, port, reqHandler);
|
ThreadPoolServer sf(threadNumber, queueMaxSize, port, reqHandler);
|
||||||
return sf.start();
|
return sf.start();
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char* argv[]) {
|
int main(int argc, char* argv[]) {
|
||||||
|
@ -17,9 +17,13 @@ enum CutMethod {
|
|||||||
|
|
||||||
class Application {
|
class Application {
|
||||||
public:
|
public:
|
||||||
Application(const string& dictDir)
|
Application(const string& dictPath,
|
||||||
: dictTrie_(pathJoin(dictDir, "jieba.dict.utf8")),
|
const string& modelPath,
|
||||||
model_(pathJoin(dictDir, "hmm_model.utf8")),
|
const string& userDictPath,
|
||||||
|
const string& idfPath,
|
||||||
|
const string& stopWordsPath)
|
||||||
|
: dictTrie_(dictPath, userDictPath),
|
||||||
|
model_(modelPath),
|
||||||
mpSeg_(&dictTrie_),
|
mpSeg_(&dictTrie_),
|
||||||
hmmSeg_(&model_),
|
hmmSeg_(&model_),
|
||||||
mixSeg_(&dictTrie_, &model_),
|
mixSeg_(&dictTrie_, &model_),
|
||||||
@ -28,8 +32,8 @@ class Application {
|
|||||||
tagger_(&dictTrie_, &model_),
|
tagger_(&dictTrie_, &model_),
|
||||||
extractor_(&dictTrie_,
|
extractor_(&dictTrie_,
|
||||||
&model_,
|
&model_,
|
||||||
pathJoin(dictDir, "idf.utf8"),
|
idfPath,
|
||||||
pathJoin(dictDir, "stop_words.utf8")) {
|
stopWordsPath) {
|
||||||
}
|
}
|
||||||
void cut(const string& sentence, vector<string>& words,
|
void cut(const string& sentence, vector<string>& words,
|
||||||
CutMethod method) const {
|
CutMethod method) const {
|
||||||
|
@ -55,21 +55,19 @@ class Config {
|
|||||||
ifs.close();
|
ifs.close();
|
||||||
}
|
}
|
||||||
public:
|
public:
|
||||||
bool get(const string& key, string& value) const {
|
string get(const string& key, const string& defaultvalue) const {
|
||||||
map<string, string>::const_iterator it = map_.find(key);
|
map<string, string>::const_iterator it = map_.find(key);
|
||||||
if(map_.end() != it) {
|
if(map_.end() != it) {
|
||||||
value = it->second;
|
return it->second;
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
return false;
|
return defaultvalue;
|
||||||
}
|
}
|
||||||
bool get(const string& key, int & value) const {
|
int get(const string& key, int defaultvalue) const {
|
||||||
string str;
|
string str = get(key, "");
|
||||||
if(!get(key, str)) {
|
if("" == str) {
|
||||||
return false;
|
return defaultvalue;
|
||||||
}
|
}
|
||||||
value = atoi(str.c_str());
|
return atoi(str.c_str());
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
const char* operator [] (const char* key) const {
|
const char* operator [] (const char* key) const {
|
||||||
if(NULL == key) {
|
if(NULL == key) {
|
||||||
|
@ -16,7 +16,11 @@ void LoadSentences(const string& filepath, vector<string>& sentences) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char** argv) {
|
int main(int argc, char** argv) {
|
||||||
CppJieba::Application app("../dict/");
|
CppJieba::Application app("../dict/jieba.dict.utf8",
|
||||||
|
"../dict/hmm_model.utf8",
|
||||||
|
"../dict/user.dict.utf8",
|
||||||
|
"../dict/idf.utf8",
|
||||||
|
"../dict/stop_words.utf8");
|
||||||
vector<string> words;
|
vector<string> words;
|
||||||
string result;
|
string result;
|
||||||
string s;
|
string s;
|
||||||
|
6
test/testdata/server.conf
vendored
6
test/testdata/server.conf
vendored
@ -11,3 +11,9 @@ dict_path=../dict/jieba.dict.utf8
|
|||||||
|
|
||||||
#model path
|
#model path
|
||||||
model_path=../dict/hmm_model.utf8
|
model_path=../dict/hmm_model.utf8
|
||||||
|
|
||||||
|
user_dict_path=../dict/user.dict.utf8
|
||||||
|
|
||||||
|
idf_path=../dict/idf.utf8
|
||||||
|
|
||||||
|
stop_words_path=../dict/stop_words.utf8
|
||||||
|
@ -4,7 +4,11 @@
|
|||||||
using namespace CppJieba;
|
using namespace CppJieba;
|
||||||
|
|
||||||
TEST(ApplicationTest, Test1) {
|
TEST(ApplicationTest, Test1) {
|
||||||
Application app("../dict/");
|
CppJieba::Application app("../dict/jieba.dict.utf8",
|
||||||
|
"../dict/hmm_model.utf8",
|
||||||
|
"../dict/user.dict.utf8",
|
||||||
|
"../dict/idf.utf8",
|
||||||
|
"../dict/stop_words.utf8");
|
||||||
vector<string> words;
|
vector<string> words;
|
||||||
string result;
|
string result;
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user