集成LevelSegment进Application

This commit is contained in:
yanyiwu 2015-08-11 11:57:58 +08:00
parent 0a6b01c374
commit 0e0318f6ad
3 changed files with 27 additions and 1 deletions

View File

@ -3,6 +3,7 @@
#include "QuerySegment.hpp" #include "QuerySegment.hpp"
#include "PosTagger.hpp" #include "PosTagger.hpp"
#include "LevelSegment.hpp"
#include "KeywordExtractor.hpp" #include "KeywordExtractor.hpp"
namespace CppJieba { namespace CppJieba {
@ -12,7 +13,8 @@ enum CutMethod {
METHOD_HMM, METHOD_HMM,
METHOD_MIX, METHOD_MIX,
METHOD_FULL, METHOD_FULL,
METHOD_QUERY METHOD_QUERY,
METHOD_LEVEL
}; };
class Application { class Application {
@ -29,6 +31,7 @@ class Application {
mixSeg_(&dictTrie_, &model_), mixSeg_(&dictTrie_, &model_),
fullSeg_(&dictTrie_), fullSeg_(&dictTrie_),
querySeg_(&dictTrie_, &model_), querySeg_(&dictTrie_, &model_),
levelSeg_(&dictTrie_),
tagger_(&dictTrie_, &model_), tagger_(&dictTrie_, &model_),
extractor_(&dictTrie_, extractor_(&dictTrie_,
&model_, &model_,
@ -53,10 +56,17 @@ class Application {
case METHOD_QUERY: case METHOD_QUERY:
querySeg_.cut(sentence, words); querySeg_.cut(sentence, words);
break; break;
case METHOD_LEVEL:
levelSeg_.cut(sentence, words);
break;
default: default:
LogError("argument method is illegal."); LogError("argument method is illegal.");
} }
} }
void cut(const string& sentence,
vector<pair<string, size_t> >& words) const {
levelSeg_.cut(sentence, words);
}
bool insertUserWord(const string& word, const string& tag = UNKNOWN_TAG) { bool insertUserWord(const string& word, const string& tag = UNKNOWN_TAG) {
return dictTrie_.insertUserWord(word, tag); return dictTrie_.insertUserWord(word, tag);
} }
@ -73,11 +83,15 @@ class Application {
private: private:
DictTrie dictTrie_; DictTrie dictTrie_;
HMMModel model_; HMMModel model_;
// They share the same dict trie and model
MPSegment mpSeg_; MPSegment mpSeg_;
HMMSegment hmmSeg_; HMMSegment hmmSeg_;
MixSegment mixSeg_; MixSegment mixSeg_;
FullSegment fullSeg_; FullSegment fullSeg_;
QuerySegment querySeg_; QuerySegment querySeg_;
LevelSegment levelSeg_;
PosTagger tagger_; PosTagger tagger_;
KeywordExtractor extractor_; KeywordExtractor extractor_;
}; // class Application }; // class Application

View File

@ -21,6 +21,7 @@ class LevelSegment: public ISegment {
void cut(Unicode::const_iterator begin, void cut(Unicode::const_iterator begin,
Unicode::const_iterator end, Unicode::const_iterator end,
vector<pair<Unicode, size_t> >& res) const { vector<pair<Unicode, size_t> >& res) const {
res.clear();
vector<Unicode> words; vector<Unicode> words;
vector<Unicode> smallerWords; vector<Unicode> smallerWords;
words.reserve(end - begin); words.reserve(end - begin);
@ -48,6 +49,7 @@ class LevelSegment: public ISegment {
void cut(const string& sentence, void cut(const string& sentence,
vector<pair<string, size_t> >& words) const { vector<pair<string, size_t> >& words) const {
words.clear();
Unicode unicode; Unicode unicode;
TransCode::decode(sentence, unicode); TransCode::decode(sentence, unicode);
vector<pair<Unicode, size_t> > unicodeWords; vector<pair<Unicode, size_t> > unicodeWords;
@ -63,6 +65,7 @@ class LevelSegment: public ISegment {
vector<string>& res) const { vector<string>& res) const {
vector<pair<string, size_t> > words; vector<pair<string, size_t> > words;
cut(sentence, words); cut(sentence, words);
res.clear();
res.reserve(words.size()); res.reserve(words.size());
for (size_t i = 0; i < words.size(); i++) { for (size_t i = 0; i < words.size(); i++) {
res.push_back(words[i].first); res.push_back(words[i].first);

View File

@ -40,6 +40,15 @@ TEST(ApplicationTest, Test1) {
result << words; result << words;
ASSERT_EQ("[\"\", \"来到\", \"\", \"网易\", \"杭研\", \"大厦\"]", result); ASSERT_EQ("[\"\", \"来到\", \"\", \"网易\", \"杭研\", \"大厦\"]", result);
app.cut("南京市长江大桥", words, METHOD_LEVEL);
result << words;
ASSERT_EQ("[\"南京市\", \"长江大桥\", \"南京\", \"长江\", \"大桥\"]", result);
vector<pair<string, size_t> > word_levels;
app.cut("南京市长江大桥", word_levels);
result << word_levels;
ASSERT_EQ("[\"南京市:0\", \"长江大桥:0\", \"南京:1\", \"长江:1\", \"大桥:1\"]", result);
vector<pair<string, string> > tagres; vector<pair<string, string> > tagres;
app.tag("iPhone6手机的最大特点是很容易弯曲。", tagres); app.tag("iPhone6手机的最大特点是很容易弯曲。", tagres);
result << tagres; result << tagres;