mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
add Jieba.hpp
This commit is contained in:
parent
63ca914176
commit
710ddacd38
@ -5,6 +5,7 @@
|
||||
1. 使用工程上比较 tricky 的 Trie树优化办法。废弃了之前的 `Aho-Corasick-Automation` 实现,可读性更好,性能更高。
|
||||
2. 新增层次分词器: LevelSegment 。
|
||||
3. 增加MPSegment的细粒度分词功能。
|
||||
4. 增加 class Jieba ,提供可读性更好的接口。
|
||||
|
||||
## v3.1.0
|
||||
|
||||
|
@ -1,9 +1,8 @@
|
||||
#ifndef CPPJIEBA_APPLICATION_H
|
||||
#define CPPJIEBA_APPLICATION_H
|
||||
|
||||
#include "QuerySegment.hpp"
|
||||
#include "Jieba.hpp"
|
||||
#include "PosTagger.hpp"
|
||||
#include "LevelSegment.hpp"
|
||||
#include "KeywordExtractor.hpp"
|
||||
|
||||
namespace CppJieba {
|
||||
@ -19,45 +18,38 @@ enum CutMethod {
|
||||
|
||||
class Application {
|
||||
public:
|
||||
Application(const string& dictPath,
|
||||
const string& modelPath,
|
||||
const string& userDictPath,
|
||||
const string& idfPath,
|
||||
const string& stopWordsPath)
|
||||
: dictTrie_(dictPath, userDictPath),
|
||||
model_(modelPath),
|
||||
mpSeg_(&dictTrie_),
|
||||
hmmSeg_(&model_),
|
||||
mixSeg_(&dictTrie_, &model_),
|
||||
fullSeg_(&dictTrie_),
|
||||
querySeg_(&dictTrie_, &model_),
|
||||
levelSeg_(&dictTrie_),
|
||||
tagger_(&dictTrie_, &model_),
|
||||
extractor_(&dictTrie_,
|
||||
&model_,
|
||||
idfPath,
|
||||
stopWordsPath) {
|
||||
Application(const string& dict_path,
|
||||
const string& model_path,
|
||||
const string& user_dict_path,
|
||||
const string& idf_path,
|
||||
const string& stopWords_path)
|
||||
: jieba_(dict_path, model_path, user_dict_path),
|
||||
tagger_(jieba_.GetDictTrie(), jieba_.GetHMMModel()),
|
||||
extractor_(jieba_.GetDictTrie(),
|
||||
jieba_.GetHMMModel(),
|
||||
idf_path,
|
||||
stopWords_path) {
|
||||
}
|
||||
void cut(const string& sentence, vector<string>& words,
|
||||
CutMethod method = METHOD_MIX) const {
|
||||
switch(method) {
|
||||
case METHOD_MP:
|
||||
mpSeg_.cut(sentence, words);
|
||||
jieba_.Cut(sentence, false, words);
|
||||
break;
|
||||
case METHOD_HMM:
|
||||
hmmSeg_.cut(sentence, words);
|
||||
jieba_.CutHMM(sentence, words);
|
||||
break;
|
||||
case METHOD_MIX:
|
||||
mixSeg_.cut(sentence, words);
|
||||
jieba_.Cut(sentence, true, words);
|
||||
break;
|
||||
case METHOD_FULL:
|
||||
fullSeg_.cut(sentence, words);
|
||||
jieba_.CutAll(sentence, words);
|
||||
break;
|
||||
case METHOD_QUERY:
|
||||
querySeg_.cut(sentence, words);
|
||||
jieba_.CutForSearch(sentence, words);
|
||||
break;
|
||||
case METHOD_LEVEL:
|
||||
levelSeg_.cut(sentence, words);
|
||||
jieba_.CutLevel(sentence, words);
|
||||
break;
|
||||
default:
|
||||
LogError("argument method is illegal.");
|
||||
@ -65,14 +57,14 @@ class Application {
|
||||
}
|
||||
void cut(const string& sentence,
|
||||
vector<pair<string, size_t> >& words) const {
|
||||
levelSeg_.cut(sentence, words);
|
||||
jieba_.CutLevel(sentence, words);
|
||||
}
|
||||
void cut(const string& sentence,
|
||||
vector<string>& words, size_t max_word_len) const {
|
||||
mpSeg_.cut(sentence, words, max_word_len);
|
||||
jieba_.CutSmall(sentence, words, max_word_len);
|
||||
}
|
||||
bool insertUserWord(const string& word, const string& tag = UNKNOWN_TAG) {
|
||||
return dictTrie_.insertUserWord(word, tag);
|
||||
return jieba_.InsertUserWord(word, tag);
|
||||
}
|
||||
void tag(const string& str, vector<pair<string, string> >& res) const {
|
||||
tagger_.tag(str, res);
|
||||
@ -85,17 +77,7 @@ class Application {
|
||||
}
|
||||
|
||||
private:
|
||||
DictTrie dictTrie_;
|
||||
HMMModel model_;
|
||||
|
||||
// They share the same dict trie and model
|
||||
MPSegment mpSeg_;
|
||||
HMMSegment hmmSeg_;
|
||||
MixSegment mixSeg_;
|
||||
FullSegment fullSeg_;
|
||||
QuerySegment querySeg_;
|
||||
LevelSegment levelSeg_;
|
||||
|
||||
Jieba jieba_;
|
||||
PosTagger tagger_;
|
||||
KeywordExtractor extractor_;
|
||||
}; // class Application
|
||||
|
74
src/Jieba.hpp
Normal file
74
src/Jieba.hpp
Normal file
@ -0,0 +1,74 @@
|
||||
#define CPPJIEAB_JIEBA_H
|
||||
|
||||
#include "QuerySegment.hpp"
|
||||
#include "PosTagger.hpp"
|
||||
#include "LevelSegment.hpp"
|
||||
|
||||
namespace CppJieba {
|
||||
|
||||
class Jieba {
|
||||
public:
|
||||
Jieba(const string& dict_path, const string& model_path, const string& user_dict_path)
|
||||
: dict_trie_(dict_path, user_dict_path),
|
||||
model_(model_path),
|
||||
mp_seg_(&dict_trie_),
|
||||
hmm_seg_(&model_),
|
||||
mix_seg_(&dict_trie_, &model_),
|
||||
full_seg_(&dict_trie_),
|
||||
query_seg_(&dict_trie_, &model_),
|
||||
level_seg_(&dict_trie_) {
|
||||
}
|
||||
~Jieba() {
|
||||
}
|
||||
|
||||
void Cut(const string& sentence, bool hmm, vector<string>& words) const {
|
||||
if (hmm) {
|
||||
mix_seg_.cut(sentence, words);
|
||||
} else {
|
||||
mp_seg_.cut(sentence, words);
|
||||
}
|
||||
}
|
||||
void CutAll(const string& sentence, vector<string>& words) const {
|
||||
full_seg_.cut(sentence, words);
|
||||
}
|
||||
void CutForSearch(const string& sentence, vector<string>& words) const {
|
||||
query_seg_.cut(sentence, words);
|
||||
}
|
||||
void CutHMM(const string& sentence, vector<string>& words) const {
|
||||
hmm_seg_.cut(sentence, words);
|
||||
}
|
||||
void CutLevel(const string& sentence, vector<string>& words) const {
|
||||
level_seg_.cut(sentence, words);
|
||||
}
|
||||
void CutLevel(const string& sentence, vector<pair<string, size_t> >& words) const {
|
||||
level_seg_.cut(sentence, words);
|
||||
}
|
||||
void CutSmall(const string& sentence, vector<string>& words, size_t max_word_len) const {
|
||||
mp_seg_.cut(sentence, words, max_word_len);
|
||||
}
|
||||
bool InsertUserWord(const string& word, const string& tag = UNKNOWN_TAG) {
|
||||
return dict_trie_.insertUserWord(word, tag);
|
||||
}
|
||||
|
||||
const DictTrie* GetDictTrie() const {
|
||||
return &dict_trie_;
|
||||
}
|
||||
const HMMModel* GetHMMModel() const {
|
||||
return &model_;
|
||||
}
|
||||
|
||||
private:
|
||||
DictTrie dict_trie_;
|
||||
HMMModel model_;
|
||||
|
||||
// They share the same dict trie and model
|
||||
MPSegment mp_seg_;
|
||||
HMMSegment hmm_seg_;
|
||||
MixSegment mix_seg_;
|
||||
FullSegment full_seg_;
|
||||
QuerySegment query_seg_;
|
||||
LevelSegment level_seg_;
|
||||
|
||||
}; // class
|
||||
|
||||
} // namespace
|
Loading…
x
Reference in New Issue
Block a user