mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
override constructor in KeywordExtractor
This commit is contained in:
parent
a6c6e8df8c
commit
193e717d22
@ -1,5 +1,9 @@
|
||||
# CppJieba ChangeLog
|
||||
|
||||
## next version
|
||||
|
||||
+ 重载 KeywordExtractor 的构造函数,可以传入 Jieba 进行字典和模型的构造。
|
||||
|
||||
## v4.3.0
|
||||
|
||||
源码目录布局调整:
|
||||
|
@ -71,6 +71,9 @@ make test
|
||||
[demo] TAGGING
|
||||
我是拖拉机学院手扶拖拉机专业的。不用多久,我就会升职加薪,当上CEO,走上人生巅峰。
|
||||
["我:r", "是:v", "拖拉机:n", "学院:n", "手扶拖拉机:n", "专业:n", "的:uj", "。:x", "不用:v", "多久:m", ",:x", "我:r", "就:d", "会:v", "升职:v", "加薪:nr", ",:x", "当上:t", "CEO:eng", ",:x", "走上:v", "人生:n", "巅峰:n", "。:x"]
|
||||
[demo] KEYWORD
|
||||
我是拖拉机学院手扶拖拉机专业的。不用多久,我就会升职加薪,当上CEO,走上人生巅峰。
|
||||
["CEO:11.7392", "升职:10.8562", "加薪:10.6426", "手扶拖拉机:10.0089", "巅峰:9.49396"]
|
||||
```
|
||||
|
||||
详细请看 `test/demo.cpp`.
|
||||
|
@ -95,6 +95,6 @@ class Jieba {
|
||||
|
||||
}; // class Jieba
|
||||
|
||||
} // namespace Jieba
|
||||
} // namespace cppjieba
|
||||
|
||||
#endif // CPPJIEAB_JIEBA_H
|
||||
|
@ -1,9 +1,9 @@
|
||||
#ifndef CPPJIEBA_KEYWORD_EXTRACTOR_H
|
||||
#define CPPJIEBA_KEYWORD_EXTRACTOR_H
|
||||
|
||||
#include "MixSegment.hpp"
|
||||
#include <cmath>
|
||||
#include <set>
|
||||
#include "Jieba.hpp"
|
||||
|
||||
namespace cppjieba {
|
||||
using namespace limonp;
|
||||
@ -24,7 +24,14 @@ class KeywordExtractor {
|
||||
const HMMModel* model,
|
||||
const string& idfPath,
|
||||
const string& stopWordPath)
|
||||
: segment_(dictTrie, model){
|
||||
: segment_(dictTrie, model) {
|
||||
LoadIdfDict(idfPath);
|
||||
LoadStopWordDict(stopWordPath);
|
||||
}
|
||||
KeywordExtractor(const Jieba& jieba,
|
||||
const string& idfPath,
|
||||
const string& stopWordPath)
|
||||
: segment_(jieba.GetDictTrie(), jieba.GetHMMModel()) {
|
||||
LoadIdfDict(idfPath);
|
||||
LoadStopWordDict(stopWordPath);
|
||||
}
|
||||
@ -133,8 +140,8 @@ class KeywordExtractor {
|
||||
double idfAverage_;
|
||||
|
||||
unordered_set<string> stopWords_;
|
||||
};
|
||||
}
|
||||
}; // class Jieba
|
||||
} // namespace cppjieba
|
||||
|
||||
#endif
|
||||
|
||||
|
@ -1,11 +1,18 @@
|
||||
#include "cppjieba/Jieba.hpp"
|
||||
#include "cppjieba/KeywordExtractor.hpp"
|
||||
|
||||
using namespace std;
|
||||
|
||||
const char* const DICT_PATH = "../dict/jieba.dict.utf8";
|
||||
const char* const HMM_PATH = "../dict/hmm_model.utf8";
|
||||
const char* const USER_DICT_PATH = "../dict/user.dict.utf8";
|
||||
const char* const IDF_PATH = "../dict/idf.utf8";
|
||||
const char* const STOP_WORD_PATH = "../dict/stop_words.utf8";
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
cppjieba::Jieba jieba("../dict/jieba.dict.utf8",
|
||||
"../dict/hmm_model.utf8",
|
||||
"../dict/user.dict.utf8");
|
||||
cppjieba::Jieba jieba(DICT_PATH,
|
||||
HMM_PATH,
|
||||
USER_DICT_PATH);
|
||||
vector<string> words;
|
||||
string result;
|
||||
string s = "我是拖拉机学院手扶拖拉机专业的。不用多久,我就会升职加薪,当上CEO,走上人生巅峰。";
|
||||
@ -50,11 +57,14 @@ int main(int argc, char** argv) {
|
||||
cout << s << endl;
|
||||
cout << tagres << endl;;
|
||||
|
||||
//cout << "[demo] KEYWORD" << endl;
|
||||
//vector<pair<string, double> > keywordres;
|
||||
//jieba.Extract(s, keywordres, 5);
|
||||
//cout << s << endl;
|
||||
//cout << keywordres << endl;
|
||||
|
||||
cppjieba::KeywordExtractor extractor(jieba,
|
||||
IDF_PATH,
|
||||
STOP_WORD_PATH);
|
||||
cout << "[demo] KEYWORD" << endl;
|
||||
const size_t topk = 5;
|
||||
vector<pair<string, double> > keywordres;
|
||||
extractor.Extract(s, keywordres, topk);
|
||||
cout << s << endl;
|
||||
cout << keywordres << endl;
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user