集成LevelSegment进Application

2025-07-18 00:00:12 +08:00 · 2015-08-11 11:57:58 +08:00 · 2015-08-11 11:57:58 +08:00 · 0e0318f6ad
commit 0e0318f6ad
parent 0a6b01c374
3 changed files with 27 additions and 1 deletions
--- a/src/Application.hpp
+++ b/src/Application.hpp
@ -3,6 +3,7 @@

 #include "QuerySegment.hpp"
 #include "PosTagger.hpp"
+#include "LevelSegment.hpp"
 #include "KeywordExtractor.hpp"

 namespace CppJieba {
@ -12,7 +13,8 @@ enum CutMethod {
  METHOD_HMM,
  METHOD_MIX,
  METHOD_FULL,
-  METHOD_QUERY
+  METHOD_QUERY,
+  METHOD_LEVEL
 };

 class Application {
@ -29,6 +31,7 @@ class Application {
      mixSeg_(&dictTrie_, &model_),
      fullSeg_(&dictTrie_),
      querySeg_(&dictTrie_, &model_),
+      levelSeg_(&dictTrie_),
      tagger_(&dictTrie_, &model_), 
      extractor_(&dictTrie_, 
                 &model_, 
@ -53,10 +56,17 @@ class Application {
      case METHOD_QUERY:
        querySeg_.cut(sentence, words);
        break;
+      case METHOD_LEVEL:
+        levelSeg_.cut(sentence, words);
+        break;
      default:
        LogError("argument method is illegal.");
    }
  }
+  void cut(const string& sentence, 
+        vector<pair<string, size_t> >& words) const {
+    levelSeg_.cut(sentence, words);
+  }
  bool insertUserWord(const string& word, const string& tag = UNKNOWN_TAG) {
    return dictTrie_.insertUserWord(word, tag);
  }
@ -73,11 +83,15 @@ class Application {
 private:
  DictTrie dictTrie_;
  HMMModel model_;
+
+  // They share the same dict trie and model
  MPSegment mpSeg_;
  HMMSegment hmmSeg_;
  MixSegment mixSeg_;
  FullSegment fullSeg_;
  QuerySegment querySeg_;
+  LevelSegment levelSeg_;
+
  PosTagger tagger_;
  KeywordExtractor extractor_;
 }; // class Application
--- a/src/LevelSegment.hpp
+++ b/src/LevelSegment.hpp
@ -21,6 +21,7 @@ class LevelSegment: public ISegment {
  void cut(Unicode::const_iterator begin,
        Unicode::const_iterator end, 
        vector<pair<Unicode, size_t> >& res) const {
+    res.clear();
    vector<Unicode> words;
    vector<Unicode> smallerWords;
    words.reserve(end - begin);
@ -48,6 +49,7 @@ class LevelSegment: public ISegment {

  void cut(const string& sentence, 
        vector<pair<string, size_t> >& words) const {
+    words.clear();
    Unicode unicode;
    TransCode::decode(sentence, unicode);
    vector<pair<Unicode, size_t> > unicodeWords;
@ -63,6 +65,7 @@ class LevelSegment: public ISegment {
        vector<string>& res) const {
    vector<pair<string, size_t> > words;
    cut(sentence, words);
+    res.clear();
    res.reserve(words.size());
    for (size_t i = 0; i < words.size(); i++) {
      res.push_back(words[i].first);
--- a/test/unittest/TApplication.cpp
+++ b/test/unittest/TApplication.cpp
@ -40,6 +40,15 @@ TEST(ApplicationTest, Test1) {
  result << words;
  ASSERT_EQ("[\"他\", \"来到\", \"了\", \"网易\", \"杭研\", \"大厦\"]", result);

+  app.cut("南京市长江大桥", words, METHOD_LEVEL);
+  result << words;
+  ASSERT_EQ("[\"南京市\", \"长江大桥\", \"南京\", \"长江\", \"大桥\"]", result);
+
+  vector<pair<string, size_t> > word_levels;
+  app.cut("南京市长江大桥", word_levels);
+  result << word_levels;
+  ASSERT_EQ("[\"南京市:0\", \"长江大桥:0\", \"南京:1\", \"长江:1\", \"大桥:1\"]", result);
+
  vector<pair<string, string> > tagres;
  app.tag("iPhone6手机的最大特点是很容易弯曲。", tagres);
  result << tagres;