mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
override constructor in KeywordExtractor
This commit is contained in:
parent
a6c6e8df8c
commit
193e717d22
@ -1,5 +1,9 @@
|
|||||||
# CppJieba ChangeLog
|
# CppJieba ChangeLog
|
||||||
|
|
||||||
|
## next version
|
||||||
|
|
||||||
|
+ 重载 KeywordExtractor 的构造函数,可以传入 Jieba 进行字典和模型的构造。
|
||||||
|
|
||||||
## v4.3.0
|
## v4.3.0
|
||||||
|
|
||||||
源码目录布局调整:
|
源码目录布局调整:
|
||||||
|
@ -71,6 +71,9 @@ make test
|
|||||||
[demo] TAGGING
|
[demo] TAGGING
|
||||||
我是拖拉机学院手扶拖拉机专业的。不用多久,我就会升职加薪,当上CEO,走上人生巅峰。
|
我是拖拉机学院手扶拖拉机专业的。不用多久,我就会升职加薪,当上CEO,走上人生巅峰。
|
||||||
["我:r", "是:v", "拖拉机:n", "学院:n", "手扶拖拉机:n", "专业:n", "的:uj", "。:x", "不用:v", "多久:m", ",:x", "我:r", "就:d", "会:v", "升职:v", "加薪:nr", ",:x", "当上:t", "CEO:eng", ",:x", "走上:v", "人生:n", "巅峰:n", "。:x"]
|
["我:r", "是:v", "拖拉机:n", "学院:n", "手扶拖拉机:n", "专业:n", "的:uj", "。:x", "不用:v", "多久:m", ",:x", "我:r", "就:d", "会:v", "升职:v", "加薪:nr", ",:x", "当上:t", "CEO:eng", ",:x", "走上:v", "人生:n", "巅峰:n", "。:x"]
|
||||||
|
[demo] KEYWORD
|
||||||
|
我是拖拉机学院手扶拖拉机专业的。不用多久,我就会升职加薪,当上CEO,走上人生巅峰。
|
||||||
|
["CEO:11.7392", "升职:10.8562", "加薪:10.6426", "手扶拖拉机:10.0089", "巅峰:9.49396"]
|
||||||
```
|
```
|
||||||
|
|
||||||
详细请看 `test/demo.cpp`.
|
详细请看 `test/demo.cpp`.
|
||||||
|
@ -95,6 +95,6 @@ class Jieba {
|
|||||||
|
|
||||||
}; // class Jieba
|
}; // class Jieba
|
||||||
|
|
||||||
} // namespace Jieba
|
} // namespace cppjieba
|
||||||
|
|
||||||
#endif // CPPJIEAB_JIEBA_H
|
#endif // CPPJIEAB_JIEBA_H
|
||||||
|
@ -1,9 +1,9 @@
|
|||||||
#ifndef CPPJIEBA_KEYWORD_EXTRACTOR_H
|
#ifndef CPPJIEBA_KEYWORD_EXTRACTOR_H
|
||||||
#define CPPJIEBA_KEYWORD_EXTRACTOR_H
|
#define CPPJIEBA_KEYWORD_EXTRACTOR_H
|
||||||
|
|
||||||
#include "MixSegment.hpp"
|
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <set>
|
#include <set>
|
||||||
|
#include "Jieba.hpp"
|
||||||
|
|
||||||
namespace cppjieba {
|
namespace cppjieba {
|
||||||
using namespace limonp;
|
using namespace limonp;
|
||||||
@ -24,7 +24,14 @@ class KeywordExtractor {
|
|||||||
const HMMModel* model,
|
const HMMModel* model,
|
||||||
const string& idfPath,
|
const string& idfPath,
|
||||||
const string& stopWordPath)
|
const string& stopWordPath)
|
||||||
: segment_(dictTrie, model){
|
: segment_(dictTrie, model) {
|
||||||
|
LoadIdfDict(idfPath);
|
||||||
|
LoadStopWordDict(stopWordPath);
|
||||||
|
}
|
||||||
|
KeywordExtractor(const Jieba& jieba,
|
||||||
|
const string& idfPath,
|
||||||
|
const string& stopWordPath)
|
||||||
|
: segment_(jieba.GetDictTrie(), jieba.GetHMMModel()) {
|
||||||
LoadIdfDict(idfPath);
|
LoadIdfDict(idfPath);
|
||||||
LoadStopWordDict(stopWordPath);
|
LoadStopWordDict(stopWordPath);
|
||||||
}
|
}
|
||||||
@ -133,8 +140,8 @@ class KeywordExtractor {
|
|||||||
double idfAverage_;
|
double idfAverage_;
|
||||||
|
|
||||||
unordered_set<string> stopWords_;
|
unordered_set<string> stopWords_;
|
||||||
};
|
}; // class Jieba
|
||||||
}
|
} // namespace cppjieba
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -1,11 +1,18 @@
|
|||||||
#include "cppjieba/Jieba.hpp"
|
#include "cppjieba/Jieba.hpp"
|
||||||
|
#include "cppjieba/KeywordExtractor.hpp"
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
|
const char* const DICT_PATH = "../dict/jieba.dict.utf8";
|
||||||
|
const char* const HMM_PATH = "../dict/hmm_model.utf8";
|
||||||
|
const char* const USER_DICT_PATH = "../dict/user.dict.utf8";
|
||||||
|
const char* const IDF_PATH = "../dict/idf.utf8";
|
||||||
|
const char* const STOP_WORD_PATH = "../dict/stop_words.utf8";
|
||||||
|
|
||||||
int main(int argc, char** argv) {
|
int main(int argc, char** argv) {
|
||||||
cppjieba::Jieba jieba("../dict/jieba.dict.utf8",
|
cppjieba::Jieba jieba(DICT_PATH,
|
||||||
"../dict/hmm_model.utf8",
|
HMM_PATH,
|
||||||
"../dict/user.dict.utf8");
|
USER_DICT_PATH);
|
||||||
vector<string> words;
|
vector<string> words;
|
||||||
string result;
|
string result;
|
||||||
string s = "我是拖拉机学院手扶拖拉机专业的。不用多久,我就会升职加薪,当上CEO,走上人生巅峰。";
|
string s = "我是拖拉机学院手扶拖拉机专业的。不用多久,我就会升职加薪,当上CEO,走上人生巅峰。";
|
||||||
@ -50,11 +57,14 @@ int main(int argc, char** argv) {
|
|||||||
cout << s << endl;
|
cout << s << endl;
|
||||||
cout << tagres << endl;;
|
cout << tagres << endl;;
|
||||||
|
|
||||||
//cout << "[demo] KEYWORD" << endl;
|
cppjieba::KeywordExtractor extractor(jieba,
|
||||||
//vector<pair<string, double> > keywordres;
|
IDF_PATH,
|
||||||
//jieba.Extract(s, keywordres, 5);
|
STOP_WORD_PATH);
|
||||||
//cout << s << endl;
|
cout << "[demo] KEYWORD" << endl;
|
||||||
//cout << keywordres << endl;
|
const size_t topk = 5;
|
||||||
|
vector<pair<string, double> > keywordres;
|
||||||
|
extractor.Extract(s, keywordres, topk);
|
||||||
|
cout << s << endl;
|
||||||
|
cout << keywordres << endl;
|
||||||
return EXIT_SUCCESS;
|
return EXIT_SUCCESS;
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user