mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
集成LevelSegment进Application
This commit is contained in:
parent
0a6b01c374
commit
0e0318f6ad
@ -3,6 +3,7 @@
|
|||||||
|
|
||||||
#include "QuerySegment.hpp"
|
#include "QuerySegment.hpp"
|
||||||
#include "PosTagger.hpp"
|
#include "PosTagger.hpp"
|
||||||
|
#include "LevelSegment.hpp"
|
||||||
#include "KeywordExtractor.hpp"
|
#include "KeywordExtractor.hpp"
|
||||||
|
|
||||||
namespace CppJieba {
|
namespace CppJieba {
|
||||||
@ -12,7 +13,8 @@ enum CutMethod {
|
|||||||
METHOD_HMM,
|
METHOD_HMM,
|
||||||
METHOD_MIX,
|
METHOD_MIX,
|
||||||
METHOD_FULL,
|
METHOD_FULL,
|
||||||
METHOD_QUERY
|
METHOD_QUERY,
|
||||||
|
METHOD_LEVEL
|
||||||
};
|
};
|
||||||
|
|
||||||
class Application {
|
class Application {
|
||||||
@ -29,6 +31,7 @@ class Application {
|
|||||||
mixSeg_(&dictTrie_, &model_),
|
mixSeg_(&dictTrie_, &model_),
|
||||||
fullSeg_(&dictTrie_),
|
fullSeg_(&dictTrie_),
|
||||||
querySeg_(&dictTrie_, &model_),
|
querySeg_(&dictTrie_, &model_),
|
||||||
|
levelSeg_(&dictTrie_),
|
||||||
tagger_(&dictTrie_, &model_),
|
tagger_(&dictTrie_, &model_),
|
||||||
extractor_(&dictTrie_,
|
extractor_(&dictTrie_,
|
||||||
&model_,
|
&model_,
|
||||||
@ -53,10 +56,17 @@ class Application {
|
|||||||
case METHOD_QUERY:
|
case METHOD_QUERY:
|
||||||
querySeg_.cut(sentence, words);
|
querySeg_.cut(sentence, words);
|
||||||
break;
|
break;
|
||||||
|
case METHOD_LEVEL:
|
||||||
|
levelSeg_.cut(sentence, words);
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
LogError("argument method is illegal.");
|
LogError("argument method is illegal.");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
void cut(const string& sentence,
|
||||||
|
vector<pair<string, size_t> >& words) const {
|
||||||
|
levelSeg_.cut(sentence, words);
|
||||||
|
}
|
||||||
bool insertUserWord(const string& word, const string& tag = UNKNOWN_TAG) {
|
bool insertUserWord(const string& word, const string& tag = UNKNOWN_TAG) {
|
||||||
return dictTrie_.insertUserWord(word, tag);
|
return dictTrie_.insertUserWord(word, tag);
|
||||||
}
|
}
|
||||||
@ -73,11 +83,15 @@ class Application {
|
|||||||
private:
|
private:
|
||||||
DictTrie dictTrie_;
|
DictTrie dictTrie_;
|
||||||
HMMModel model_;
|
HMMModel model_;
|
||||||
|
|
||||||
|
// They share the same dict trie and model
|
||||||
MPSegment mpSeg_;
|
MPSegment mpSeg_;
|
||||||
HMMSegment hmmSeg_;
|
HMMSegment hmmSeg_;
|
||||||
MixSegment mixSeg_;
|
MixSegment mixSeg_;
|
||||||
FullSegment fullSeg_;
|
FullSegment fullSeg_;
|
||||||
QuerySegment querySeg_;
|
QuerySegment querySeg_;
|
||||||
|
LevelSegment levelSeg_;
|
||||||
|
|
||||||
PosTagger tagger_;
|
PosTagger tagger_;
|
||||||
KeywordExtractor extractor_;
|
KeywordExtractor extractor_;
|
||||||
}; // class Application
|
}; // class Application
|
||||||
|
@ -21,6 +21,7 @@ class LevelSegment: public ISegment {
|
|||||||
void cut(Unicode::const_iterator begin,
|
void cut(Unicode::const_iterator begin,
|
||||||
Unicode::const_iterator end,
|
Unicode::const_iterator end,
|
||||||
vector<pair<Unicode, size_t> >& res) const {
|
vector<pair<Unicode, size_t> >& res) const {
|
||||||
|
res.clear();
|
||||||
vector<Unicode> words;
|
vector<Unicode> words;
|
||||||
vector<Unicode> smallerWords;
|
vector<Unicode> smallerWords;
|
||||||
words.reserve(end - begin);
|
words.reserve(end - begin);
|
||||||
@ -48,6 +49,7 @@ class LevelSegment: public ISegment {
|
|||||||
|
|
||||||
void cut(const string& sentence,
|
void cut(const string& sentence,
|
||||||
vector<pair<string, size_t> >& words) const {
|
vector<pair<string, size_t> >& words) const {
|
||||||
|
words.clear();
|
||||||
Unicode unicode;
|
Unicode unicode;
|
||||||
TransCode::decode(sentence, unicode);
|
TransCode::decode(sentence, unicode);
|
||||||
vector<pair<Unicode, size_t> > unicodeWords;
|
vector<pair<Unicode, size_t> > unicodeWords;
|
||||||
@ -63,6 +65,7 @@ class LevelSegment: public ISegment {
|
|||||||
vector<string>& res) const {
|
vector<string>& res) const {
|
||||||
vector<pair<string, size_t> > words;
|
vector<pair<string, size_t> > words;
|
||||||
cut(sentence, words);
|
cut(sentence, words);
|
||||||
|
res.clear();
|
||||||
res.reserve(words.size());
|
res.reserve(words.size());
|
||||||
for (size_t i = 0; i < words.size(); i++) {
|
for (size_t i = 0; i < words.size(); i++) {
|
||||||
res.push_back(words[i].first);
|
res.push_back(words[i].first);
|
||||||
|
@ -40,6 +40,15 @@ TEST(ApplicationTest, Test1) {
|
|||||||
result << words;
|
result << words;
|
||||||
ASSERT_EQ("[\"他\", \"来到\", \"了\", \"网易\", \"杭研\", \"大厦\"]", result);
|
ASSERT_EQ("[\"他\", \"来到\", \"了\", \"网易\", \"杭研\", \"大厦\"]", result);
|
||||||
|
|
||||||
|
app.cut("南京市长江大桥", words, METHOD_LEVEL);
|
||||||
|
result << words;
|
||||||
|
ASSERT_EQ("[\"南京市\", \"长江大桥\", \"南京\", \"长江\", \"大桥\"]", result);
|
||||||
|
|
||||||
|
vector<pair<string, size_t> > word_levels;
|
||||||
|
app.cut("南京市长江大桥", word_levels);
|
||||||
|
result << word_levels;
|
||||||
|
ASSERT_EQ("[\"南京市:0\", \"长江大桥:0\", \"南京:1\", \"长江:1\", \"大桥:1\"]", result);
|
||||||
|
|
||||||
vector<pair<string, string> > tagres;
|
vector<pair<string, string> > tagres;
|
||||||
app.tag("iPhone6手机的最大特点是很容易弯曲。", tagres);
|
app.tag("iPhone6手机的最大特点是很容易弯曲。", tagres);
|
||||||
result << tagres;
|
result << tagres;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user