集成LevelSegment进Application

This commit is contained in:
yanyiwu 2015-08-11 11:57:58 +08:00
parent 0a6b01c374
commit 0e0318f6ad
3 changed files with 27 additions and 1 deletions

View File

@ -3,6 +3,7 @@
#include "QuerySegment.hpp"
#include "PosTagger.hpp"
#include "LevelSegment.hpp"
#include "KeywordExtractor.hpp"
namespace CppJieba {
@ -12,7 +13,8 @@ enum CutMethod {
METHOD_HMM,
METHOD_MIX,
METHOD_FULL,
METHOD_QUERY
METHOD_QUERY,
METHOD_LEVEL
};
class Application {
@ -29,6 +31,7 @@ class Application {
mixSeg_(&dictTrie_, &model_),
fullSeg_(&dictTrie_),
querySeg_(&dictTrie_, &model_),
levelSeg_(&dictTrie_),
tagger_(&dictTrie_, &model_),
extractor_(&dictTrie_,
&model_,
@ -53,10 +56,17 @@ class Application {
case METHOD_QUERY:
querySeg_.cut(sentence, words);
break;
case METHOD_LEVEL:
levelSeg_.cut(sentence, words);
break;
default:
LogError("argument method is illegal.");
}
}
void cut(const string& sentence,
vector<pair<string, size_t> >& words) const {
levelSeg_.cut(sentence, words);
}
bool insertUserWord(const string& word, const string& tag = UNKNOWN_TAG) {
return dictTrie_.insertUserWord(word, tag);
}
@ -73,11 +83,15 @@ class Application {
private:
DictTrie dictTrie_;
HMMModel model_;
// They share the same dict trie and model
MPSegment mpSeg_;
HMMSegment hmmSeg_;
MixSegment mixSeg_;
FullSegment fullSeg_;
QuerySegment querySeg_;
LevelSegment levelSeg_;
PosTagger tagger_;
KeywordExtractor extractor_;
}; // class Application

View File

@ -21,6 +21,7 @@ class LevelSegment: public ISegment {
void cut(Unicode::const_iterator begin,
Unicode::const_iterator end,
vector<pair<Unicode, size_t> >& res) const {
res.clear();
vector<Unicode> words;
vector<Unicode> smallerWords;
words.reserve(end - begin);
@ -48,6 +49,7 @@ class LevelSegment: public ISegment {
void cut(const string& sentence,
vector<pair<string, size_t> >& words) const {
words.clear();
Unicode unicode;
TransCode::decode(sentence, unicode);
vector<pair<Unicode, size_t> > unicodeWords;
@ -63,6 +65,7 @@ class LevelSegment: public ISegment {
vector<string>& res) const {
vector<pair<string, size_t> > words;
cut(sentence, words);
res.clear();
res.reserve(words.size());
for (size_t i = 0; i < words.size(); i++) {
res.push_back(words[i].first);

View File

@ -40,6 +40,15 @@ TEST(ApplicationTest, Test1) {
result << words;
ASSERT_EQ("[\"\", \"来到\", \"\", \"网易\", \"杭研\", \"大厦\"]", result);
app.cut("南京市长江大桥", words, METHOD_LEVEL);
result << words;
ASSERT_EQ("[\"南京市\", \"长江大桥\", \"南京\", \"长江\", \"大桥\"]", result);
vector<pair<string, size_t> > word_levels;
app.cut("南京市长江大桥", word_levels);
result << word_levels;
ASSERT_EQ("[\"南京市:0\", \"长江大桥:0\", \"南京:1\", \"长江:1\", \"大桥:1\"]", result);
vector<pair<string, string> > tagres;
app.tag("iPhone6手机的最大特点是很容易弯曲。", tagres);
result << tagres;