mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
delete Application.hpp, use Jieba.hpp instead
This commit is contained in:
parent
4d56be920b
commit
16b69e35c1
@ -5,15 +5,17 @@
|
||||
#include <string.h>
|
||||
#include "limonp/Config.hpp"
|
||||
#include "husky/ThreadPoolServer.hpp"
|
||||
#include "Application.hpp"
|
||||
#include "Jieba.hpp"
|
||||
|
||||
using namespace husky;
|
||||
using namespace CppJieba;
|
||||
|
||||
class ReqHandler: public IRequestHandler {
|
||||
public:
|
||||
ReqHandler(const CppJieba::Application& app): app_(app) {
|
||||
ReqHandler(const CppJieba::Jieba& jieba)
|
||||
: jieba_(jieba) {
|
||||
}
|
||||
|
||||
virtual ~ReqHandler() {
|
||||
}
|
||||
|
||||
@ -24,35 +26,35 @@ class ReqHandler: public IRequestHandler {
|
||||
httpReq.GET("key", tmp);
|
||||
URLDecode(tmp, sentence);
|
||||
httpReq.GET("method", method);
|
||||
app_.cut(sentence, words, CppJieba::METHOD_MIX);
|
||||
jieba_.Cut(sentence, words, true);
|
||||
httpReq.GET("format", format);
|
||||
run(sentence, method, format, strSnd);
|
||||
Run(sentence, method, format, strSnd);
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual bool doPOST(const HttpReqInfo& httpReq, string& strSnd) {
|
||||
vector<string> words;
|
||||
run(httpReq.getBody(), "MIX", "simple", strSnd);
|
||||
Run(httpReq.getBody(), "MIX", "simple", strSnd);
|
||||
return true;
|
||||
}
|
||||
|
||||
void run(const string& sentence,
|
||||
void Run(const string& sentence,
|
||||
const string& method,
|
||||
const string& format,
|
||||
string& strSnd) const {
|
||||
vector<string> words;
|
||||
if ("MP" == method) {
|
||||
app_.cut(sentence, words, CppJieba::METHOD_MP);
|
||||
jieba_.Cut(sentence, words, false);
|
||||
} else if ("HMM" == method) {
|
||||
app_.cut(sentence, words, CppJieba::METHOD_HMM);
|
||||
jieba_.CutHMM(sentence, words);
|
||||
} else if ("MIX" == method) {
|
||||
app_.cut(sentence, words, CppJieba::METHOD_MIX);
|
||||
jieba_.Cut(sentence, words, true);
|
||||
} else if ("FULL" == method) {
|
||||
app_.cut(sentence, words, CppJieba::METHOD_FULL);
|
||||
jieba_.CutAll(sentence, words);
|
||||
} else if ("QUERY" == method) {
|
||||
app_.cut(sentence, words, CppJieba::METHOD_QUERY);
|
||||
jieba_.CutForSearch(sentence, words);
|
||||
} else { // default
|
||||
app_.cut(sentence, words, CppJieba::METHOD_MIX);
|
||||
jieba_.Cut(sentence, words, false);
|
||||
}
|
||||
if (format == "simple") {
|
||||
join(words.begin(), words.end(), strSnd, " ");
|
||||
@ -61,10 +63,10 @@ class ReqHandler: public IRequestHandler {
|
||||
}
|
||||
}
|
||||
private:
|
||||
const CppJieba::Application& app_;
|
||||
const CppJieba::Jieba& jieba_;
|
||||
};
|
||||
|
||||
bool run(int argc, char** argv) {
|
||||
bool Run(int argc, char** argv) {
|
||||
if (argc < 2) {
|
||||
return false;
|
||||
}
|
||||
@ -78,24 +80,20 @@ bool run(int argc, char** argv) {
|
||||
string dictPath = conf.get("dict_path", "");
|
||||
string modelPath = conf.get("model_path", "");
|
||||
string userDictPath = conf.get("user_dict_path", "");
|
||||
string idfPath = conf.get("idf_path", "");
|
||||
string stopWordsPath = conf.get("stop_words_path", "");
|
||||
|
||||
LogInfo("config info: %s", conf.getConfigInfo().c_str());
|
||||
|
||||
CppJieba::Application app(dictPath,
|
||||
CppJieba::Jieba jieba(dictPath,
|
||||
modelPath,
|
||||
userDictPath,
|
||||
idfPath,
|
||||
stopWordsPath);
|
||||
userDictPath);
|
||||
|
||||
ReqHandler reqHandler(app);
|
||||
ReqHandler reqHandler(jieba);
|
||||
ThreadPoolServer sf(threadNumber, queueMaxSize, port, reqHandler);
|
||||
return sf.start();
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
if (!run(argc, argv)) {
|
||||
if (!Run(argc, argv)) {
|
||||
printf("usage: %s <config_file>\n", argv[0]);
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
@ -1,87 +0,0 @@
|
||||
#ifndef CPPJIEBA_APPLICATION_H
|
||||
#define CPPJIEBA_APPLICATION_H
|
||||
|
||||
#include "Jieba.hpp"
|
||||
#include "PosTagger.hpp"
|
||||
#include "KeywordExtractor.hpp"
|
||||
|
||||
namespace CppJieba {
|
||||
|
||||
enum CutMethod {
|
||||
METHOD_MP,
|
||||
METHOD_HMM,
|
||||
METHOD_MIX,
|
||||
METHOD_FULL,
|
||||
METHOD_QUERY,
|
||||
METHOD_LEVEL
|
||||
};
|
||||
|
||||
class Application {
|
||||
public:
|
||||
Application(const string& dict_path,
|
||||
const string& model_path,
|
||||
const string& user_dict_path,
|
||||
const string& idf_path,
|
||||
const string& stopWords_path)
|
||||
: jieba_(dict_path, model_path, user_dict_path),
|
||||
tagger_(jieba_.GetDictTrie(), jieba_.GetHMMModel()),
|
||||
extractor_(jieba_.GetDictTrie(),
|
||||
jieba_.GetHMMModel(),
|
||||
idf_path,
|
||||
stopWords_path) {
|
||||
}
|
||||
void cut(const string& sentence, vector<string>& words,
|
||||
CutMethod method = METHOD_MIX) const {
|
||||
switch(method) {
|
||||
case METHOD_MP:
|
||||
jieba_.Cut(sentence, words);
|
||||
break;
|
||||
case METHOD_HMM:
|
||||
jieba_.CutHMM(sentence, words);
|
||||
break;
|
||||
case METHOD_MIX:
|
||||
jieba_.Cut(sentence, words);
|
||||
break;
|
||||
case METHOD_FULL:
|
||||
jieba_.CutAll(sentence, words);
|
||||
break;
|
||||
case METHOD_QUERY:
|
||||
jieba_.CutForSearch(sentence, words);
|
||||
break;
|
||||
case METHOD_LEVEL:
|
||||
jieba_.CutLevel(sentence, words);
|
||||
break;
|
||||
default:
|
||||
LogError("argument method is illegal.");
|
||||
}
|
||||
}
|
||||
void cut(const string& sentence,
|
||||
vector<pair<string, size_t> >& words) const {
|
||||
jieba_.CutLevel(sentence, words);
|
||||
}
|
||||
void cut(const string& sentence,
|
||||
vector<string>& words, size_t max_word_len) const {
|
||||
jieba_.CutSmall(sentence, words, max_word_len);
|
||||
}
|
||||
bool InsertUserWord(const string& word, const string& tag = UNKNOWN_TAG) {
|
||||
return jieba_.InsertUserWord(word, tag);
|
||||
}
|
||||
void tag(const string& str, vector<pair<string, string> >& res) const {
|
||||
tagger_.tag(str, res);
|
||||
}
|
||||
void extract(const string& str, vector<pair<string, double> >& keyword,
|
||||
size_t topN) const {
|
||||
extractor_.extract(str, keyword, topN);
|
||||
}
|
||||
~Application() {
|
||||
}
|
||||
|
||||
private:
|
||||
Jieba jieba_;
|
||||
PosTagger tagger_;
|
||||
KeywordExtractor extractor_;
|
||||
}; // class Application
|
||||
|
||||
} // namespace CppJieba
|
||||
|
||||
#endif // CPPJIEBA_APPLICATION_H
|
@ -1,59 +1,49 @@
|
||||
#include "../src/Application.hpp"
|
||||
#include "../src/Jieba.hpp"
|
||||
|
||||
using namespace CppJieba;
|
||||
using namespace std;
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
CppJieba::Application app("../dict/jieba.dict.utf8",
|
||||
CppJieba::Jieba jieba("../dict/jieba.dict.utf8",
|
||||
"../dict/hmm_model.utf8",
|
||||
"../dict/user.dict.utf8",
|
||||
"../dict/idf.utf8",
|
||||
"../dict/stop_words.utf8");
|
||||
"../dict/user.dict.utf8");
|
||||
vector<string> words;
|
||||
string result;
|
||||
string s = "我是拖拉机学院手扶拖拉机专业的。不用多久,我就会升职加薪,当上CEO,走上人生巅峰。";
|
||||
|
||||
cout << "[demo] DEFAULT" << endl;
|
||||
app.cut(s, words);
|
||||
cout << join(words.begin(), words.end(), "/") << endl;
|
||||
cout << "[demo] Cut With HMM" << endl;
|
||||
jieba.Cut(s, words, true);
|
||||
cout << limonp::join(words.begin(), words.end(), "/") << endl;
|
||||
|
||||
cout << "[demo] METHOD_MP" << endl;
|
||||
app.cut(s, words, METHOD_MP);
|
||||
cout << join(words.begin(), words.end(), "/") << endl;
|
||||
cout << "[demo] Cut Without HMM " << endl;
|
||||
jieba.Cut(s, words, false);
|
||||
cout << limonp::join(words.begin(), words.end(), "/") << endl;
|
||||
|
||||
cout << "[demo] METHOD_HMM" << endl;
|
||||
app.cut(s, words, METHOD_HMM);
|
||||
cout << join(words.begin(), words.end(), "/") << endl;
|
||||
cout << "[demo] CutAll" << endl;
|
||||
jieba.CutAll(s, words);
|
||||
cout << limonp::join(words.begin(), words.end(), "/") << endl;
|
||||
|
||||
cout << "[demo] METHOD_MIX" << endl;
|
||||
app.cut(s, words, METHOD_MIX);
|
||||
cout << join(words.begin(), words.end(), "/") << endl;
|
||||
|
||||
cout << "[demo] METHOD_FULL" << endl;
|
||||
app.cut(s, words, METHOD_FULL);
|
||||
cout << join(words.begin(), words.end(), "/") << endl;
|
||||
|
||||
cout << "[demo] METHOD_QUERY" << endl;
|
||||
app.cut(s, words, METHOD_QUERY);
|
||||
cout << join(words.begin(), words.end(), "/") << endl;
|
||||
|
||||
cout << "[demo] TAGGING" << endl;
|
||||
vector<pair<string, string> > tagres;
|
||||
app.tag(s, tagres);
|
||||
cout << s << endl;
|
||||
cout << tagres << endl;;
|
||||
|
||||
cout << "[demo] KEYWORD" << endl;
|
||||
vector<pair<string, double> > keywordres;
|
||||
app.extract(s, keywordres, 5);
|
||||
cout << s << endl;
|
||||
cout << keywordres << endl;
|
||||
cout << "[demo] CutForSearch" << endl;
|
||||
jieba.CutForSearch(s, words);
|
||||
cout << limonp::join(words.begin(), words.end(), "/") << endl;
|
||||
|
||||
cout << "[demo] Insert User Word" << endl;
|
||||
app.cut("男默女泪", words);
|
||||
cout << join(words.begin(), words.end(), "/") << endl;
|
||||
app.InsertUserWord("男默女泪");
|
||||
app.cut("男默女泪", words);
|
||||
cout << join(words.begin(), words.end(), "/") << endl;
|
||||
jieba.Cut("男默女泪", words);
|
||||
cout << limonp::join(words.begin(), words.end(), "/") << endl;
|
||||
jieba.InsertUserWord("男默女泪");
|
||||
jieba.Cut("男默女泪", words);
|
||||
cout << limonp::join(words.begin(), words.end(), "/") << endl;
|
||||
|
||||
//cout << "[demo] TAGGING" << endl;
|
||||
//vector<pair<string, string> > tagres;
|
||||
//jieba.tag(s, tagres);
|
||||
//cout << s << endl;
|
||||
//cout << tagres << endl;;
|
||||
|
||||
//cout << "[demo] KEYWORD" << endl;
|
||||
//vector<pair<string, double> > keywordres;
|
||||
//jieba.extract(s, keywordres, 5);
|
||||
//cout << s << endl;
|
||||
//cout << keywordres << endl;
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
@ -13,7 +13,7 @@ ADD_EXECUTABLE(test.run
|
||||
trie_test.cpp
|
||||
segments_test.cpp
|
||||
pos_tagger_test.cpp
|
||||
application_test.cpp
|
||||
jieba_test.cpp
|
||||
pre_filter_test.cpp
|
||||
)
|
||||
TARGET_LINK_LIBRARIES(gtest pthread)
|
||||
|
@ -1,93 +0,0 @@
|
||||
#include "src/Application.hpp"
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
using namespace CppJieba;
|
||||
|
||||
TEST(ApplicationTest, Test1) {
|
||||
CppJieba::Application app("../dict/jieba.dict.utf8",
|
||||
"../dict/hmm_model.utf8",
|
||||
"../dict/user.dict.utf8",
|
||||
"../dict/idf.utf8",
|
||||
"../dict/stop_words.utf8");
|
||||
vector<string> words;
|
||||
string result;
|
||||
|
||||
app.cut("他来到了网易杭研大厦", words);
|
||||
result << words;
|
||||
ASSERT_EQ("[\"他\", \"来到\", \"了\", \"网易\", \"杭研\", \"大厦\"]", result);
|
||||
|
||||
app.cut("我来自北京邮电大学。", words, METHOD_MP);
|
||||
result << words;
|
||||
ASSERT_EQ("[\"我\", \"来自\", \"北京邮电大学\", \"。\"]", result);
|
||||
|
||||
app.cut("南京市长江大桥", words, 3);
|
||||
ASSERT_EQ("[\"南京市\", \"长江\", \"大桥\"]", result << words);
|
||||
|
||||
app.cut("我来自北京邮电大学。。。学号123456", words, METHOD_HMM);
|
||||
result << words;
|
||||
ASSERT_EQ("[\"我来\", \"自北京\", \"邮电大学\", \"。\", \"。\", \"。\", \"学号\", \"123456\"]", result);
|
||||
|
||||
app.cut("我来自北京邮电大学。。。学号123456,用AK47", words, METHOD_MIX);
|
||||
result << words;
|
||||
ASSERT_EQ("[\"我\", \"来自\", \"北京邮电大学\", \"。\", \"。\", \"。\", \"学号\", \"123456\", \",\", \"用\", \"AK47\"]", result);
|
||||
|
||||
app.cut("他来到了网易杭研大厦", words, METHOD_MIX);
|
||||
result << words;
|
||||
ASSERT_EQ("[\"他\", \"来到\", \"了\", \"网易\", \"杭研\", \"大厦\"]", result);
|
||||
|
||||
app.cut("我来自北京邮电大学", words, METHOD_FULL);
|
||||
result << words;
|
||||
ASSERT_EQ(result, "[\"我\", \"来自\", \"北京\", \"北京邮电\", \"北京邮电大学\", \"邮电\", \"邮电大学\", \"电大\", \"大学\"]");
|
||||
|
||||
app.cut("他来到了网易杭研大厦", words, METHOD_QUERY);
|
||||
result << words;
|
||||
ASSERT_EQ("[\"他\", \"来到\", \"了\", \"网易\", \"杭研\", \"大厦\"]", result);
|
||||
|
||||
app.cut("南京市长江大桥", words, METHOD_LEVEL);
|
||||
result << words;
|
||||
ASSERT_EQ("[\"南京市\", \"长江大桥\", \"南京\", \"长江\", \"大桥\"]", result);
|
||||
|
||||
vector<pair<string, size_t> > word_levels;
|
||||
app.cut("南京市长江大桥", word_levels);
|
||||
result << word_levels;
|
||||
ASSERT_EQ("[\"南京市:0\", \"长江大桥:0\", \"南京:1\", \"长江:1\", \"大桥:1\"]", result);
|
||||
|
||||
vector<pair<string, string> > tagres;
|
||||
app.tag("iPhone6手机的最大特点是很容易弯曲。", tagres);
|
||||
result << tagres;
|
||||
ASSERT_EQ("[\"iPhone6:eng\", \"手机:n\", \"的:uj\", \"最大:a\", \"特点:n\", \"是:v\", \"很:zg\", \"容易:a\", \"弯曲:v\", \"。:x\"]", result);
|
||||
|
||||
vector<pair<string, double> > keywordres;
|
||||
app.extract("我是拖拉机学院手扶拖拉机专业的。不用多久,我就会升职加薪,当上CEO,走上人生巅峰。", keywordres, 5);
|
||||
result << keywordres;
|
||||
ASSERT_EQ(result, "[\"CEO:11.7392\", \"升职:10.8562\", \"加薪:10.6426\", \"手扶拖拉机:10.0089\", \"巅峰:9.49396\"]");
|
||||
}
|
||||
|
||||
TEST(ApplicationTest, InsertUserWord) {
|
||||
CppJieba::Application app("../dict/jieba.dict.utf8",
|
||||
"../dict/hmm_model.utf8",
|
||||
"../dict/user.dict.utf8",
|
||||
"../dict/idf.utf8",
|
||||
"../dict/stop_words.utf8");
|
||||
vector<string> words;
|
||||
string result;
|
||||
|
||||
app.cut("男默女泪", words);
|
||||
result << words;
|
||||
ASSERT_EQ("[\"男默\", \"女泪\"]", result);
|
||||
|
||||
ASSERT_TRUE(app.InsertUserWord("男默女泪"));
|
||||
|
||||
app.cut("男默女泪", words);
|
||||
result << words;
|
||||
ASSERT_EQ("[\"男默女泪\"]", result);
|
||||
|
||||
for (size_t i = 0; i < 100; i++) {
|
||||
string newWord;
|
||||
newWord << rand();
|
||||
ASSERT_TRUE(app.InsertUserWord(newWord));
|
||||
app.cut(newWord, words);
|
||||
result << words;
|
||||
ASSERT_EQ(result, string_format("[\"%s\"]", newWord.c_str()));
|
||||
}
|
||||
}
|
85
test/unittest/jieba_test.cpp
Normal file
85
test/unittest/jieba_test.cpp
Normal file
@ -0,0 +1,85 @@
|
||||
#include "src/Jieba.hpp"
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
using namespace CppJieba;
|
||||
|
||||
TEST(JiebaTest, Test1) {
|
||||
CppJieba::Jieba jieba("../dict/jieba.dict.utf8",
|
||||
"../dict/hmm_model.utf8",
|
||||
"../dict/user.dict.utf8");
|
||||
vector<string> words;
|
||||
string result;
|
||||
|
||||
jieba.Cut("他来到了网易杭研大厦", words);
|
||||
result << words;
|
||||
ASSERT_EQ("[\"他\", \"来到\", \"了\", \"网易\", \"杭研\", \"大厦\"]", result);
|
||||
|
||||
jieba.Cut("我来自北京邮电大学。", words, false);
|
||||
result << words;
|
||||
ASSERT_EQ("[\"我\", \"来自\", \"北京邮电大学\", \"。\"]", result);
|
||||
|
||||
jieba.CutSmall("南京市长江大桥", words, 3);
|
||||
ASSERT_EQ("[\"南京市\", \"长江\", \"大桥\"]", result << words);
|
||||
|
||||
jieba.CutHMM("我来自北京邮电大学。。。学号123456", words);
|
||||
result << words;
|
||||
ASSERT_EQ("[\"我来\", \"自北京\", \"邮电大学\", \"。\", \"。\", \"。\", \"学号\", \"123456\"]", result);
|
||||
|
||||
jieba.Cut("我来自北京邮电大学。。。学号123456,用AK47", words);
|
||||
result << words;
|
||||
ASSERT_EQ("[\"我\", \"来自\", \"北京邮电大学\", \"。\", \"。\", \"。\", \"学号\", \"123456\", \",\", \"用\", \"AK47\"]", result);
|
||||
|
||||
jieba.CutAll("我来自北京邮电大学", words);
|
||||
result << words;
|
||||
ASSERT_EQ(result, "[\"我\", \"来自\", \"北京\", \"北京邮电\", \"北京邮电大学\", \"邮电\", \"邮电大学\", \"电大\", \"大学\"]");
|
||||
|
||||
jieba.CutForSearch("他来到了网易杭研大厦", words);
|
||||
result << words;
|
||||
ASSERT_EQ("[\"他\", \"来到\", \"了\", \"网易\", \"杭研\", \"大厦\"]", result);
|
||||
|
||||
jieba.CutLevel("南京市长江大桥", words);
|
||||
result << words;
|
||||
ASSERT_EQ("[\"南京市\", \"长江大桥\", \"南京\", \"长江\", \"大桥\"]", result);
|
||||
|
||||
vector<pair<string, size_t> > word_levels;
|
||||
jieba.CutLevel("南京市长江大桥", word_levels);
|
||||
result << word_levels;
|
||||
ASSERT_EQ("[\"南京市:0\", \"长江大桥:0\", \"南京:1\", \"长江:1\", \"大桥:1\"]", result);
|
||||
|
||||
//vector<pair<string, string> > tagres;
|
||||
//jieba.tag("iPhone6手机的最大特点是很容易弯曲。", tagres);
|
||||
//result << tagres;
|
||||
//ASSERT_EQ("[\"iPhone6:eng\", \"手机:n\", \"的:uj\", \"最大:a\", \"特点:n\", \"是:v\", \"很:zg\", \"容易:a\", \"弯曲:v\", \"。:x\"]", result);
|
||||
|
||||
//vector<pair<string, double> > keywordres;
|
||||
//jieba.extract("我是拖拉机学院手扶拖拉机专业的。不用多久,我就会升职加薪,当上CEO,走上人生巅峰。", keywordres, 5);
|
||||
//result << keywordres;
|
||||
//ASSERT_EQ(result, "[\"CEO:11.7392\", \"升职:10.8562\", \"加薪:10.6426\", \"手扶拖拉机:10.0089\", \"巅峰:9.49396\"]");
|
||||
}
|
||||
|
||||
TEST(JiebaTest, InsertUserWord) {
|
||||
CppJieba::Jieba jieba("../dict/jieba.dict.utf8",
|
||||
"../dict/hmm_model.utf8",
|
||||
"../dict/user.dict.utf8");
|
||||
vector<string> words;
|
||||
string result;
|
||||
|
||||
jieba.Cut("男默女泪", words);
|
||||
result << words;
|
||||
ASSERT_EQ("[\"男默\", \"女泪\"]", result);
|
||||
|
||||
ASSERT_TRUE(jieba.InsertUserWord("男默女泪"));
|
||||
|
||||
jieba.Cut("男默女泪", words);
|
||||
result << words;
|
||||
ASSERT_EQ("[\"男默女泪\"]", result);
|
||||
|
||||
for (size_t i = 0; i < 100; i++) {
|
||||
string newWord;
|
||||
newWord << rand();
|
||||
ASSERT_TRUE(jieba.InsertUserWord(newWord));
|
||||
jieba.Cut(newWord, words);
|
||||
result << words;
|
||||
ASSERT_EQ(result, string_format("[\"%s\"]", newWord.c_str()));
|
||||
}
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user