add optional argument: hmm

2025-07-18 00:00:12 +08:00 · 2015-09-13 17:28:49 +08:00 · 2015-09-13 17:28:49 +08:00 · f98e94869c
commit f98e94869c
parent 14974d51b4
6 changed files with 21 additions and 23 deletions
--- a/ChangeLog.md
+++ b/ChangeLog.md
@ -6,6 +6,7 @@
 2. 新增层次分词器: LevelSegment 。
 3. 增加MPSegment的细粒度分词功能。
 4. 增加 class Jieba ，提供可读性更好的接口。
+5. 放弃了统一接口ISegment，因为统一的接口限制了分词方式的灵活性，限制了一些功能的增加。

 ## v3.1.0

--- a/src/Application.hpp
+++ b/src/Application.hpp
@ -34,13 +34,13 @@ class Application {
        CutMethod method = METHOD_MIX) const {
    switch(method) {
      case METHOD_MP:
-        jieba_.Cut(sentence, false, words);
+        jieba_.Cut(sentence, words);
        break;
      case METHOD_HMM:
        jieba_.CutHMM(sentence, words);
        break;
      case METHOD_MIX:
-        jieba_.Cut(sentence, true, words);
+        jieba_.Cut(sentence, words);
        break;
      case METHOD_FULL:
        jieba_.CutAll(sentence, words);
--- a/src/Jieba.hpp
+++ b/src/Jieba.hpp
@ -21,18 +21,14 @@ class Jieba {
  ~Jieba() {
  }

-  void Cut(const string& sentence, bool hmm, vector<string>& words) const {
-    if (hmm) {
-      mix_seg_.cut(sentence, words);
-    } else {
-      mp_seg_.cut(sentence, words);
-    }
+  void Cut(const string& sentence, vector<string>& words, bool hmm = true) const {
+    mix_seg_.cut(sentence, words, hmm);
  }
  void CutAll(const string& sentence, vector<string>& words) const {
    full_seg_.cut(sentence, words);
  }
-  void CutForSearch(const string& sentence, vector<string>& words) const {
-    query_seg_.cut(sentence, words);
+  void CutForSearch(const string& sentence, vector<string>& words, bool hmm = true) const {
+    query_seg_.cut(sentence, words, hmm);
  }
  void CutHMM(const string& sentence, vector<string>& words) const {
    hmm_seg_.cut(sentence, words);
--- a/src/KeywordExtractor.hpp
+++ b/src/KeywordExtractor.hpp
@ -31,9 +31,9 @@ class KeywordExtractor {
  ~KeywordExtractor() {
  }

-  bool extract(const string& str, vector<string>& keywords, size_t topN) const {
+  bool extract(const string& sentence, vector<string>& keywords, size_t topN) const {
    vector<pair<string, double> > topWords;
-    if(!extract(str, topWords, topN)) {
+    if(!extract(sentence, topWords, topN)) {
      return false;
    }
    for(size_t i = 0; i < topWords.size(); i++) {
@ -42,9 +42,9 @@ class KeywordExtractor {
    return true;
  }

-  bool extract(const string& str, vector<pair<string, double> >& keywords, size_t topN) const {
+  bool extract(const string& sentence, vector<pair<string, double> >& keywords, size_t topN) const {
    vector<string> words;
-    segment_.cut(str, words);
+    segment_.cut(sentence, words);

    map<string, double> wordmap;
    for(vector<string>::iterator iter = words.begin(); iter != words.end(); iter++) {
--- a/src/MixSegment.hpp
+++ b/src/MixSegment.hpp
@ -21,23 +21,25 @@ class MixSegment: public SegmentBase {
  ~MixSegment() {
  }

-  void cut(const string& sentence, 
-        vector<string>& words) const {
+  void cut(const string& sentence, vector<string>& words, bool hmm = true) const {
    PreFilter pre_filter(symbols_, sentence);
    PreFilter::Range range;
    vector<Unicode> uwords;
    uwords.reserve(sentence.size());
    while (pre_filter.HasNext()) {
      range = pre_filter.Next();
-      cut(range.begin, range.end, uwords);
+      cut(range.begin, range.end, uwords, hmm);
    }
    TransCode::encode(uwords, words);
  }

-  void cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const {
+  void cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res, bool hmm) const {
    vector<Unicode> words;
    words.reserve(end - begin);
    mpSeg_.cut(begin, end, words);
+    if (!hmm) {
+      return;
+    }

    vector<Unicode> hmmRes;
    hmmRes.reserve(end - begin);
--- a/src/QuerySegment.hpp
+++ b/src/QuerySegment.hpp
@ -26,22 +26,21 @@ class QuerySegment: public SegmentBase {
  }
  ~QuerySegment() {
  }
-  void cut(const string& sentence, 
-        vector<string>& words) const {
+  void cut(const string& sentence, vector<string>& words, bool hmm = true) const {
    PreFilter pre_filter(symbols_, sentence);
    PreFilter::Range range;
    vector<Unicode> uwords;
    uwords.reserve(sentence.size());
    while (pre_filter.HasNext()) {
      range = pre_filter.Next();
-      cut(range.begin, range.end, uwords);
+      cut(range.begin, range.end, uwords, hmm);
    }
    TransCode::encode(uwords, words);
  }
-  void cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const {
+  void cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res, bool hmm) const {
    //use mix cut first
    vector<Unicode> mixRes;
-    mixSeg_.cut(begin, end, mixRes);
+    mixSeg_.cut(begin, end, mixRes, hmm);

    vector<Unicode> fullRes;
    for (vector<Unicode>::const_iterator mixResItr = mixRes.begin(); mixResItr != mixRes.end(); mixResItr++) {