override constructor in KeywordExtractor

This commit is contained in:
yanyiwu 2016-01-13 00:40:46 +08:00
parent a6c6e8df8c
commit 193e717d22
5 changed files with 38 additions and 14 deletions

View File

@ -1,5 +1,9 @@
# CppJieba ChangeLog
## next version
+ 重载 KeywordExtractor 的构造函数,可以传入 Jieba 进行字典和模型的构造。
## v4.3.0
源码目录布局调整:

View File

@ -71,6 +71,9 @@ make test
[demo] TAGGING
我是拖拉机学院手扶拖拉机专业的。不用多久我就会升职加薪当上CEO走上人生巅峰。
["我:r", "是:v", "拖拉机:n", "学院:n", "手扶拖拉机:n", "专业:n", "的:uj", "。:x", "不用:v", "多久:m", ":x", "我:r", "就:d", "会:v", "升职:v", "加薪:nr", ":x", "当上:t", "CEO:eng", ":x", "走上:v", "人生:n", "巅峰:n", "。:x"]
[demo] KEYWORD
我是拖拉机学院手扶拖拉机专业的。不用多久我就会升职加薪当上CEO走上人生巅峰。
["CEO:11.7392", "升职:10.8562", "加薪:10.6426", "手扶拖拉机:10.0089", "巅峰:9.49396"]
```
详细请看 `test/demo.cpp`.

View File

@ -95,6 +95,6 @@ class Jieba {
}; // class Jieba
} // namespace Jieba
} // namespace cppjieba
#endif // CPPJIEAB_JIEBA_H

View File

@ -1,9 +1,9 @@
#ifndef CPPJIEBA_KEYWORD_EXTRACTOR_H
#define CPPJIEBA_KEYWORD_EXTRACTOR_H
#include "MixSegment.hpp"
#include <cmath>
#include <set>
#include "Jieba.hpp"
namespace cppjieba {
using namespace limonp;
@ -24,7 +24,14 @@ class KeywordExtractor {
const HMMModel* model,
const string& idfPath,
const string& stopWordPath)
: segment_(dictTrie, model){
: segment_(dictTrie, model) {
LoadIdfDict(idfPath);
LoadStopWordDict(stopWordPath);
}
KeywordExtractor(const Jieba& jieba,
const string& idfPath,
const string& stopWordPath)
: segment_(jieba.GetDictTrie(), jieba.GetHMMModel()) {
LoadIdfDict(idfPath);
LoadStopWordDict(stopWordPath);
}
@ -133,8 +140,8 @@ class KeywordExtractor {
double idfAverage_;
unordered_set<string> stopWords_;
};
}
}; // class Jieba
} // namespace cppjieba
#endif

View File

@ -1,11 +1,18 @@
#include "cppjieba/Jieba.hpp"
#include "cppjieba/KeywordExtractor.hpp"
using namespace std;
const char* const DICT_PATH = "../dict/jieba.dict.utf8";
const char* const HMM_PATH = "../dict/hmm_model.utf8";
const char* const USER_DICT_PATH = "../dict/user.dict.utf8";
const char* const IDF_PATH = "../dict/idf.utf8";
const char* const STOP_WORD_PATH = "../dict/stop_words.utf8";
int main(int argc, char** argv) {
cppjieba::Jieba jieba("../dict/jieba.dict.utf8",
"../dict/hmm_model.utf8",
"../dict/user.dict.utf8");
cppjieba::Jieba jieba(DICT_PATH,
HMM_PATH,
USER_DICT_PATH);
vector<string> words;
string result;
string s = "我是拖拉机学院手扶拖拉机专业的。不用多久我就会升职加薪当上CEO走上人生巅峰。";
@ -50,11 +57,14 @@ int main(int argc, char** argv) {
cout << s << endl;
cout << tagres << endl;;
//cout << "[demo] KEYWORD" << endl;
//vector<pair<string, double> > keywordres;
//jieba.Extract(s, keywordres, 5);
//cout << s << endl;
//cout << keywordres << endl;
cppjieba::KeywordExtractor extractor(jieba,
IDF_PATH,
STOP_WORD_PATH);
cout << "[demo] KEYWORD" << endl;
const size_t topk = 5;
vector<pair<string, double> > keywordres;
extractor.Extract(s, keywordres, topk);
cout << s << endl;
cout << keywordres << endl;
return EXIT_SUCCESS;
}