From 193e717d22ad1201abeb7442fabbc26347c89169 Mon Sep 17 00:00:00 2001 From: yanyiwu Date: Wed, 13 Jan 2016 00:40:46 +0800 Subject: [PATCH] override constructor in KeywordExtractor --- ChangeLog.md | 4 ++++ README.md | 3 +++ include/cppjieba/Jieba.hpp | 2 +- include/cppjieba/KeywordExtractor.hpp | 15 ++++++++++---- test/demo.cpp | 28 ++++++++++++++++++--------- 5 files changed, 38 insertions(+), 14 deletions(-) diff --git a/ChangeLog.md b/ChangeLog.md index 67ba6ea..eb82adf 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -1,5 +1,9 @@ # CppJieba ChangeLog +## next version + ++ 重载 KeywordExtractor 的构造函数,可以传入 Jieba 进行字典和模型的构造。 + ## v4.3.0 源码目录布局调整: diff --git a/README.md b/README.md index b3f09c4..757a9b5 100644 --- a/README.md +++ b/README.md @@ -71,6 +71,9 @@ make test [demo] TAGGING 我是拖拉机学院手扶拖拉机专业的。不用多久,我就会升职加薪,当上CEO,走上人生巅峰。 ["我:r", "是:v", "拖拉机:n", "学院:n", "手扶拖拉机:n", "专业:n", "的:uj", "。:x", "不用:v", "多久:m", ",:x", "我:r", "就:d", "会:v", "升职:v", "加薪:nr", ",:x", "当上:t", "CEO:eng", ",:x", "走上:v", "人生:n", "巅峰:n", "。:x"] +[demo] KEYWORD +我是拖拉机学院手扶拖拉机专业的。不用多久,我就会升职加薪,当上CEO,走上人生巅峰。 +["CEO:11.7392", "升职:10.8562", "加薪:10.6426", "手扶拖拉机:10.0089", "巅峰:9.49396"] ``` 详细请看 `test/demo.cpp`. diff --git a/include/cppjieba/Jieba.hpp b/include/cppjieba/Jieba.hpp index 381c292..b931fad 100644 --- a/include/cppjieba/Jieba.hpp +++ b/include/cppjieba/Jieba.hpp @@ -95,6 +95,6 @@ class Jieba { }; // class Jieba -} // namespace Jieba +} // namespace cppjieba #endif // CPPJIEAB_JIEBA_H diff --git a/include/cppjieba/KeywordExtractor.hpp b/include/cppjieba/KeywordExtractor.hpp index b55f91c..02c2efb 100644 --- a/include/cppjieba/KeywordExtractor.hpp +++ b/include/cppjieba/KeywordExtractor.hpp @@ -1,9 +1,9 @@ #ifndef CPPJIEBA_KEYWORD_EXTRACTOR_H #define CPPJIEBA_KEYWORD_EXTRACTOR_H -#include "MixSegment.hpp" #include #include +#include "Jieba.hpp" namespace cppjieba { using namespace limonp; @@ -24,7 +24,14 @@ class KeywordExtractor { const HMMModel* model, const string& idfPath, const string& stopWordPath) - : segment_(dictTrie, model){ + : segment_(dictTrie, model) { + LoadIdfDict(idfPath); + LoadStopWordDict(stopWordPath); + } + KeywordExtractor(const Jieba& jieba, + const string& idfPath, + const string& stopWordPath) + : segment_(jieba.GetDictTrie(), jieba.GetHMMModel()) { LoadIdfDict(idfPath); LoadStopWordDict(stopWordPath); } @@ -133,8 +140,8 @@ class KeywordExtractor { double idfAverage_; unordered_set stopWords_; -}; -} +}; // class Jieba +} // namespace cppjieba #endif diff --git a/test/demo.cpp b/test/demo.cpp index 6d6e8b8..b352d52 100644 --- a/test/demo.cpp +++ b/test/demo.cpp @@ -1,11 +1,18 @@ #include "cppjieba/Jieba.hpp" +#include "cppjieba/KeywordExtractor.hpp" using namespace std; +const char* const DICT_PATH = "../dict/jieba.dict.utf8"; +const char* const HMM_PATH = "../dict/hmm_model.utf8"; +const char* const USER_DICT_PATH = "../dict/user.dict.utf8"; +const char* const IDF_PATH = "../dict/idf.utf8"; +const char* const STOP_WORD_PATH = "../dict/stop_words.utf8"; + int main(int argc, char** argv) { - cppjieba::Jieba jieba("../dict/jieba.dict.utf8", - "../dict/hmm_model.utf8", - "../dict/user.dict.utf8"); + cppjieba::Jieba jieba(DICT_PATH, + HMM_PATH, + USER_DICT_PATH); vector words; string result; string s = "我是拖拉机学院手扶拖拉机专业的。不用多久,我就会升职加薪,当上CEO,走上人生巅峰。"; @@ -50,11 +57,14 @@ int main(int argc, char** argv) { cout << s << endl; cout << tagres << endl;; - //cout << "[demo] KEYWORD" << endl; - //vector > keywordres; - //jieba.Extract(s, keywordres, 5); - //cout << s << endl; - //cout << keywordres << endl; - + cppjieba::KeywordExtractor extractor(jieba, + IDF_PATH, + STOP_WORD_PATH); + cout << "[demo] KEYWORD" << endl; + const size_t topk = 5; + vector > keywordres; + extractor.Extract(s, keywordres, topk); + cout << s << endl; + cout << keywordres << endl; return EXIT_SUCCESS; }