mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
集成LevelSegment进Application
This commit is contained in:
parent
0a6b01c374
commit
0e0318f6ad
@ -3,6 +3,7 @@
|
||||
|
||||
#include "QuerySegment.hpp"
|
||||
#include "PosTagger.hpp"
|
||||
#include "LevelSegment.hpp"
|
||||
#include "KeywordExtractor.hpp"
|
||||
|
||||
namespace CppJieba {
|
||||
@ -12,7 +13,8 @@ enum CutMethod {
|
||||
METHOD_HMM,
|
||||
METHOD_MIX,
|
||||
METHOD_FULL,
|
||||
METHOD_QUERY
|
||||
METHOD_QUERY,
|
||||
METHOD_LEVEL
|
||||
};
|
||||
|
||||
class Application {
|
||||
@ -29,6 +31,7 @@ class Application {
|
||||
mixSeg_(&dictTrie_, &model_),
|
||||
fullSeg_(&dictTrie_),
|
||||
querySeg_(&dictTrie_, &model_),
|
||||
levelSeg_(&dictTrie_),
|
||||
tagger_(&dictTrie_, &model_),
|
||||
extractor_(&dictTrie_,
|
||||
&model_,
|
||||
@ -53,10 +56,17 @@ class Application {
|
||||
case METHOD_QUERY:
|
||||
querySeg_.cut(sentence, words);
|
||||
break;
|
||||
case METHOD_LEVEL:
|
||||
levelSeg_.cut(sentence, words);
|
||||
break;
|
||||
default:
|
||||
LogError("argument method is illegal.");
|
||||
}
|
||||
}
|
||||
void cut(const string& sentence,
|
||||
vector<pair<string, size_t> >& words) const {
|
||||
levelSeg_.cut(sentence, words);
|
||||
}
|
||||
bool insertUserWord(const string& word, const string& tag = UNKNOWN_TAG) {
|
||||
return dictTrie_.insertUserWord(word, tag);
|
||||
}
|
||||
@ -73,11 +83,15 @@ class Application {
|
||||
private:
|
||||
DictTrie dictTrie_;
|
||||
HMMModel model_;
|
||||
|
||||
// They share the same dict trie and model
|
||||
MPSegment mpSeg_;
|
||||
HMMSegment hmmSeg_;
|
||||
MixSegment mixSeg_;
|
||||
FullSegment fullSeg_;
|
||||
QuerySegment querySeg_;
|
||||
LevelSegment levelSeg_;
|
||||
|
||||
PosTagger tagger_;
|
||||
KeywordExtractor extractor_;
|
||||
}; // class Application
|
||||
|
@ -21,6 +21,7 @@ class LevelSegment: public ISegment {
|
||||
void cut(Unicode::const_iterator begin,
|
||||
Unicode::const_iterator end,
|
||||
vector<pair<Unicode, size_t> >& res) const {
|
||||
res.clear();
|
||||
vector<Unicode> words;
|
||||
vector<Unicode> smallerWords;
|
||||
words.reserve(end - begin);
|
||||
@ -48,6 +49,7 @@ class LevelSegment: public ISegment {
|
||||
|
||||
void cut(const string& sentence,
|
||||
vector<pair<string, size_t> >& words) const {
|
||||
words.clear();
|
||||
Unicode unicode;
|
||||
TransCode::decode(sentence, unicode);
|
||||
vector<pair<Unicode, size_t> > unicodeWords;
|
||||
@ -63,6 +65,7 @@ class LevelSegment: public ISegment {
|
||||
vector<string>& res) const {
|
||||
vector<pair<string, size_t> > words;
|
||||
cut(sentence, words);
|
||||
res.clear();
|
||||
res.reserve(words.size());
|
||||
for (size_t i = 0; i < words.size(); i++) {
|
||||
res.push_back(words[i].first);
|
||||
|
@ -40,6 +40,15 @@ TEST(ApplicationTest, Test1) {
|
||||
result << words;
|
||||
ASSERT_EQ("[\"他\", \"来到\", \"了\", \"网易\", \"杭研\", \"大厦\"]", result);
|
||||
|
||||
app.cut("南京市长江大桥", words, METHOD_LEVEL);
|
||||
result << words;
|
||||
ASSERT_EQ("[\"南京市\", \"长江大桥\", \"南京\", \"长江\", \"大桥\"]", result);
|
||||
|
||||
vector<pair<string, size_t> > word_levels;
|
||||
app.cut("南京市长江大桥", word_levels);
|
||||
result << word_levels;
|
||||
ASSERT_EQ("[\"南京市:0\", \"长江大桥:0\", \"南京:1\", \"长江:1\", \"大桥:1\"]", result);
|
||||
|
||||
vector<pair<string, string> > tagres;
|
||||
app.tag("iPhone6手机的最大特点是很容易弯曲。", tagres);
|
||||
result << tagres;
|
||||
|
Loading…
x
Reference in New Issue
Block a user