增加 Application 这个类，整合了所有CppJieba的功能进去，以后用户只需要使用这个类即可。

2025-07-18 00:00:12 +08:00 · 2015-06-05 16:00:32 +08:00 · 2015-06-05 16:00:32 +08:00 · 45588b75cc
commit 45588b75cc
parent d56bf2cc68
7 changed files with 136 additions and 8 deletions
--- a/ChangeLog.md
+++ b/ChangeLog.md
@ -1,11 +1,12 @@
 # CppJieba ChangeLog

-## v2.5.0 (untagged)
+## v3.0.0 (untagged)

 1. 使得 QuerySegment 支持自定义词典（可选参数）。
 2. 使得 KeywordExtractor 支持自定义词典（可选参数）。
 3. 修改 Code Style ，参照 google code style 。 
 4. 增加更详细的错误日志，在初始化过程中合理使用LogFatal。
+5. 增加 Application 这个类，整合了所有CppJieba的功能进去，以后用户只需要使用这个类即可。

 ## v2.4.4

--- a/src/Application.hpp
+++ b/src/Application.hpp
@ -0,0 +1,80 @@
+#ifndef CPPJIEBA_APPLICATION_H
+#define CPPJIEBA_APPLICATION_H
+
+#include "QuerySegment.hpp"
+#include "PosTagger.hpp"
+#include "KeywordExtractor.hpp"
+
+namespace CppJieba {
+
+enum CutMethod {
+  METHOD_MP,
+  METHOD_HMM,
+  METHOD_MIX,
+  METHOD_FULL,
+  METHOD_QUERY
+};
+
+class Application {
+ public:
+  Application(const string& dictDir) 
+    : dictTrie_(pathJoin(dictDir, "jieba.dict.utf8")),
+      model_(pathJoin(dictDir, "hmm_model.utf8")),
+      mpSeg_(&dictTrie_),
+      hmmSeg_(&model_),
+      mixSeg_(&dictTrie_, &model_),
+      fullSeg_(&dictTrie_),
+      querySeg_(&dictTrie_, &model_),
+      tagger_(&dictTrie_, &model_), 
+      extractor_(&dictTrie_, 
+                 &model_, 
+                 pathJoin(dictDir, "idf.utf8"), 
+                 pathJoin(dictDir, "stop_words.utf8")) {
+  }
+  void cut(const string& sentence, vector<string>& words, 
+        CutMethod method) const {
+    switch(method) {
+      case METHOD_MP:
+        mpSeg_.cut(sentence, words);
+        break;
+      case METHOD_HMM:
+        hmmSeg_.cut(sentence, words);
+        break;
+      case METHOD_MIX:
+        mixSeg_.cut(sentence, words);
+        break;
+      case METHOD_FULL:
+        fullSeg_.cut(sentence, words);
+        break;
+      case METHOD_QUERY:
+        querySeg_.cut(sentence, words);
+        break;
+      default:
+        LogError("argument method is illegal.");
+    }
+  }
+  void tag(const string& str, vector<pair<string, string> >& res) const {
+    tagger_.tag(str, res);
+  }
+  void extract(const string& str, vector<pair<string, double> >& keyword, 
+        size_t topN) const {
+    extractor_.extract(str, keyword, topN);
+  }
+  ~Application() {
+  }
+
+ private:
+  DictTrie dictTrie_;
+  HMMModel model_;
+  MPSegment mpSeg_;
+  HMMSegment hmmSeg_;
+  MixSegment mixSeg_;
+  FullSegment fullSeg_;
+  QuerySegment querySeg_;
+  PosTagger tagger_;
+  KeywordExtractor extractor_;
+}; // class Application
+
+} // namespace CppJieba
+
+#endif // CPPJIEBA_APPLICATION_H
--- a/src/QuerySegment.hpp
+++ b/src/QuerySegment.hpp
@ -23,8 +23,8 @@ class QuerySegment: public SegmentBase {
      maxWordLen_(maxWordLen) {
    assert(maxWordLen_);
  }
-  QuerySegment(const DictTrie* dictTrie, const HMMModel* model)
-    : mixSeg_(dictTrie, model), fullSeg_(dictTrie) {
+  QuerySegment(const DictTrie* dictTrie, const HMMModel* model, size_t maxWordLen = 4)
+    : mixSeg_(dictTrie, model), fullSeg_(dictTrie), maxWordLen_(maxWordLen) {
  }
  virtual ~QuerySegment() {
  }
@ -39,7 +39,6 @@ class QuerySegment: public SegmentBase {

    vector<Unicode> fullRes;
    for (vector<Unicode>::const_iterator mixResItr = mixRes.begin(); mixResItr != mixRes.end(); mixResItr++) {
-
      // if it's too long, cut with fullSeg_, put fullRes in res
      if (mixResItr->size() > maxWordLen_) {
        if (fullSeg_.cut(mixResItr->begin(), mixResItr->end(), fullRes)) {
--- a/src/TransCode.hpp
+++ b/src/TransCode.hpp
@ -43,6 +43,10 @@ inline string encode(Unicode::const_iterator begin, Unicode::const_iterator end)
  return res;
 }

+inline string encode(const Unicode& unicode) {
+  return encode(unicode.begin(), unicode.end());
+}
+
 // compiler is expected to optimized this function to avoid return value copy
 inline Unicode decode(const string& str) {
  Unicode unicode;
@ -50,7 +54,8 @@ inline Unicode decode(const string& str) {
  decode(str, unicode);
  return unicode;
 }
-}
-}
+
+} // namespace TransCode
+} // namespace CppJieba

 #endif
--- a/test/unittest/CMakeLists.txt
+++ b/test/unittest/CMakeLists.txt
@ -22,6 +22,7 @@ ADD_EXECUTABLE(test.run
    TTrie.cpp 
    TSegments.cpp 
    TPosTagger.cpp
+    TApplication.cpp
 )
 TARGET_LINK_LIBRARIES(gtest pthread)
 TARGET_LINK_LIBRARIES(test.run gtest pthread)
--- a/test/unittest/TApplication.cpp
+++ b/test/unittest/TApplication.cpp
@ -0,0 +1,44 @@
+#include "src/Application.hpp"
+#include "gtest/gtest.h"
+
+using namespace CppJieba;
+
+TEST(ApplicationTest, Test1) {
+  Application app("../dict/");
+  vector<string> words;
+  string result;
+  
+  app.cut("我来自北京邮电大学。", words, METHOD_MP);
+  result << words;
+  ASSERT_EQ("[\"我\", \"来自\", \"北京邮电大学\", \"。\"]", result);
+
+  app.cut("我来自北京邮电大学。。。学号123456", words, METHOD_HMM);
+  result << words;
+  ASSERT_EQ("[\"我来\", \"自北京\", \"邮电大学\", \"。\", \"。\", \"。\", \"学号\", \"123456\"]", result);
+
+  app.cut("我来自北京邮电大学。。。学号123456，用AK47", words, METHOD_MIX);
+  result << words;
+  ASSERT_EQ("[\"我\", \"来自\", \"北京邮电大学\", \"。\", \"。\", \"。\", \"学号\", \"123456\", \"，\", \"用\", \"AK47\"]", result);
+
+  app.cut("他来到了网易杭研大厦", words, METHOD_MIX);
+  result << words;
+  ASSERT_EQ("[\"他\", \"来到\", \"了\", \"网易\", \"杭研\", \"大厦\"]", result);
+
+  app.cut("我来自北京邮电大学", words, METHOD_FULL);
+  result << words;
+  ASSERT_EQ(result, "[\"我\", \"来自\", \"北京\", \"北京邮电\", \"北京邮电大学\", \"邮电\", \"邮电大学\", \"电大\", \"大学\"]");
+
+  app.cut("他来到了网易杭研大厦", words, METHOD_QUERY);
+  result << words;
+  ASSERT_EQ("[\"他\", \"来到\", \"了\", \"网易\", \"杭研\", \"大厦\"]", result);
+
+  vector<pair<string, string> > tagres;
+  app.tag("iPhone6手机的最大特点是很容易弯曲。", tagres);
+  result << tagres;
+  ASSERT_EQ("[\"iPhone6:eng\", \"手机:n\", \"的:uj\", \"最大:a\", \"特点:n\", \"是:v\", \"很:zg\", \"容易:a\", \"弯曲:v\", \"。:x\"]", result);
+
+  vector<pair<string, double> > keywordres;
+  app.extract("我是拖拉机学院手扶拖拉机专业的。不用多久，我就会升职加薪，当上CEO，走上人生巅峰。", keywordres, 5);
+  result << keywordres;
+  ASSERT_EQ(result, "[\"CEO:11.7392\", \"升职:10.8562\", \"加薪:10.6426\", \"手扶拖拉机:10.0089\", \"巅峰:9.49396\"]");
+}
--- a/test/unittest/TKeywordExtractor.cpp
+++ b/test/unittest/TKeywordExtractor.cpp
@ -3,8 +3,6 @@

 using namespace CppJieba;

-
-
 TEST(KeywordExtractorTest, Test1) {
  KeywordExtractor extractor("../dict/extra_dict/jieba.dict.small.utf8", "../dict/hmm_model.utf8", "../dict/idf.utf8", "../dict/stop_words.utf8");