From 0e0318f6ad635f0a98deeabb5f5118e18597ea31 Mon Sep 17 00:00:00 2001 From: yanyiwu Date: Tue, 11 Aug 2015 11:57:58 +0800 Subject: [PATCH] =?UTF-8?q?=E9=9B=86=E6=88=90LevelSegment=E8=BF=9BApplicat?= =?UTF-8?q?ion?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/Application.hpp | 16 +++++++++++++++- src/LevelSegment.hpp | 3 +++ test/unittest/TApplication.cpp | 9 +++++++++ 3 files changed, 27 insertions(+), 1 deletion(-) diff --git a/src/Application.hpp b/src/Application.hpp index d7b7cc9..97001b1 100644 --- a/src/Application.hpp +++ b/src/Application.hpp @@ -3,6 +3,7 @@ #include "QuerySegment.hpp" #include "PosTagger.hpp" +#include "LevelSegment.hpp" #include "KeywordExtractor.hpp" namespace CppJieba { @@ -12,7 +13,8 @@ enum CutMethod { METHOD_HMM, METHOD_MIX, METHOD_FULL, - METHOD_QUERY + METHOD_QUERY, + METHOD_LEVEL }; class Application { @@ -29,6 +31,7 @@ class Application { mixSeg_(&dictTrie_, &model_), fullSeg_(&dictTrie_), querySeg_(&dictTrie_, &model_), + levelSeg_(&dictTrie_), tagger_(&dictTrie_, &model_), extractor_(&dictTrie_, &model_, @@ -53,10 +56,17 @@ class Application { case METHOD_QUERY: querySeg_.cut(sentence, words); break; + case METHOD_LEVEL: + levelSeg_.cut(sentence, words); + break; default: LogError("argument method is illegal."); } } + void cut(const string& sentence, + vector >& words) const { + levelSeg_.cut(sentence, words); + } bool insertUserWord(const string& word, const string& tag = UNKNOWN_TAG) { return dictTrie_.insertUserWord(word, tag); } @@ -73,11 +83,15 @@ class Application { private: DictTrie dictTrie_; HMMModel model_; + + // They share the same dict trie and model MPSegment mpSeg_; HMMSegment hmmSeg_; MixSegment mixSeg_; FullSegment fullSeg_; QuerySegment querySeg_; + LevelSegment levelSeg_; + PosTagger tagger_; KeywordExtractor extractor_; }; // class Application diff --git a/src/LevelSegment.hpp b/src/LevelSegment.hpp index 912da35..a1033d1 100644 --- a/src/LevelSegment.hpp +++ b/src/LevelSegment.hpp @@ -21,6 +21,7 @@ class LevelSegment: public ISegment { void cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector >& res) const { + res.clear(); vector words; vector smallerWords; words.reserve(end - begin); @@ -48,6 +49,7 @@ class LevelSegment: public ISegment { void cut(const string& sentence, vector >& words) const { + words.clear(); Unicode unicode; TransCode::decode(sentence, unicode); vector > unicodeWords; @@ -63,6 +65,7 @@ class LevelSegment: public ISegment { vector& res) const { vector > words; cut(sentence, words); + res.clear(); res.reserve(words.size()); for (size_t i = 0; i < words.size(); i++) { res.push_back(words[i].first); diff --git a/test/unittest/TApplication.cpp b/test/unittest/TApplication.cpp index 634cae5..4a66540 100644 --- a/test/unittest/TApplication.cpp +++ b/test/unittest/TApplication.cpp @@ -40,6 +40,15 @@ TEST(ApplicationTest, Test1) { result << words; ASSERT_EQ("[\"他\", \"来到\", \"了\", \"网易\", \"杭研\", \"大厦\"]", result); + app.cut("南京市长江大桥", words, METHOD_LEVEL); + result << words; + ASSERT_EQ("[\"南京市\", \"长江大桥\", \"南京\", \"长江\", \"大桥\"]", result); + + vector > word_levels; + app.cut("南京市长江大桥", word_levels); + result << word_levels; + ASSERT_EQ("[\"南京市:0\", \"长江大桥:0\", \"南京:1\", \"长江:1\", \"大桥:1\"]", result); + vector > tagres; app.tag("iPhone6手机的最大特点是很容易弯曲。", tagres); result << tagres;