From 6d105a864df9fc197c884e2a1aecc132cc36f425 Mon Sep 17 00:00:00 2001
From: Yanyi Wu <i@yanyiwu.com>
Date: Tue, 3 May 2016 19:53:40 +0800
Subject: [PATCH 1/4] Update TextRankExtractor.hpp

remove unused function which using c++11 keyword `auto`
---
 include/cppjieba/TextRankExtractor.hpp | 71 ++++++++++++--------------
 1 file changed, 32 insertions(+), 39 deletions(-)
diff --git a/include/cppjieba/TextRankExtractor.hpp b/include/cppjieba/TextRankExtractor.hpp
index 82e56f3..948f3ad 100644
--- a/include/cppjieba/TextRankExtractor.hpp
+++ b/include/cppjieba/TextRankExtractor.hpp
@@ -1,4 +1,4 @@
-﻿#ifndef CPPJIEBA_TEXTRANK_EXTRACTOR_H
+#ifndef CPPJIEBA_TEXTRANK_EXTRACTOR_H
 #define CPPJIEBA_TEXTRANK_EXTRACTOR_H
 
 #include <cmath>
@@ -82,40 +82,40 @@ namespace cppjieba {
       }
     };
 
-  public: 
-  TextRankExtractor(const string& dictPath, 
-        const string& hmmFilePath, 
-        const string& stopWordPath, 
-        const string& userDict = "") 
-    : segment_(dictPath, hmmFilePath, userDict) {
-    LoadStopWordDict(stopWordPath);
-  }
-  TextRankExtractor(const DictTrie* dictTrie, 
-        const HMMModel* model,
-        const string& stopWordPath) 
-    : segment_(dictTrie, model) {
-    LoadStopWordDict(stopWordPath);
+  public: 
+  TextRankExtractor(const string& dictPath, 
+        const string& hmmFilePath, 
+        const string& stopWordPath, 
+        const string& userDict = "") 
+    : segment_(dictPath, hmmFilePath, userDict) {
+    LoadStopWordDict(stopWordPath);
+  }
+  TextRankExtractor(const DictTrie* dictTrie, 
+        const HMMModel* model,
+        const string& stopWordPath) 
+    : segment_(dictTrie, model) {
+    LoadStopWordDict(stopWordPath);
   }
     TextRankExtractor(const Jieba& jieba, const string& stopWordPath) : segment_(jieba.GetDictTrie(), jieba.GetHMMModel()) {
         LoadStopWordDict(stopWordPath);
     }
     ~TextRankExtractor() {
     }
-
-    void Extract(const string& sentence, vector<string>& keywords, size_t topN) const {
-      vector<Word> topWords;
-      Extract(sentence, topWords, topN);
-      for (size_t i = 0; i < topWords.size(); i++) {
-        keywords.push_back(topWords[i].word);
-      }
-    }
-
-    void Extract(const string& sentence, vector<pair<string, double> >& keywords, size_t topN) const {
-      vector<Word> topWords;
-      Extract(sentence, topWords, topN);
-      for (size_t i = 0; i < topWords.size(); i++) {
-        keywords.push_back(pair<string, double>(topWords[i].word, topWords[i].weight));
-      }
+
+    void Extract(const string& sentence, vector<string>& keywords, size_t topN) const {
+      vector<Word> topWords;
+      Extract(sentence, topWords, topN);
+      for (size_t i = 0; i < topWords.size(); i++) {
+        keywords.push_back(topWords[i].word);
+      }
+    }
+
+    void Extract(const string& sentence, vector<pair<string, double> >& keywords, size_t topN) const {
+      vector<Word> topWords;
+      Extract(sentence, topWords, topN);
+      for (size_t i = 0; i < topWords.size(); i++) {
+        keywords.push_back(pair<string, double>(topWords[i].word, topWords[i].weight));
+      }
     }
 
     void Extract(const string& sentence, vector<Word>& keywords, size_t topN, size_t span=5,size_t rankTime=10) const {
@@ -176,13 +176,6 @@ namespace cppjieba {
       return false;
     }
 
-    static void sortMapValue(WordMap &map,vector<Word>& result,size_t topN){
-      for(auto i=map.begin();i!=map.end();i++){
-        result.push_back(i->second);
-      }
-      partial_sort(result.begin(),result.begin()+topN,result.end(),Compare);
-    }
-
     static bool Compare(const Word &x,const Word &y){
       return x.weight > y.weight;
     }
@@ -190,9 +183,9 @@ namespace cppjieba {
     MixSegment segment_;
     unordered_set<string> stopWords_;
   };
-  
-  inline ostream& operator << (ostream& os, const TextRankExtractor::Word& word) {
-    return os << word.word << '|' << word.offsets << '|' << word.weight; 
+  
+  inline ostream& operator << (ostream& os, const TextRankExtractor::Word& word) {
+    return os << word.word << '|' << word.offsets << '|' << word.weight; 
   }
 } // namespace cppjieba
 

From a1ea1d075778ec049d55e5f47eb749e6a4909ae9 Mon Sep 17 00:00:00 2001
From: yanyiwu <i@yanyiwu.com>
Date: Tue, 3 May 2016 20:01:44 +0800
Subject: [PATCH 2/4] add textrank unittest into cmake

---
 test/unittest/CMakeLists.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/test/unittest/CMakeLists.txt b/test/unittest/CMakeLists.txt
index 2655215..ef19de4 100644
--- a/test/unittest/CMakeLists.txt
+++ b/test/unittest/CMakeLists.txt
@@ -13,6 +13,7 @@ ADD_EXECUTABLE(test.run
     pos_tagger_test.cpp
     jieba_test.cpp
     pre_filter_test.cpp
+    textrank_test.cpp
 )
 TARGET_LINK_LIBRARIES(test.run gtest pthread)
 

From 39316114c526ed55dcb889dc9ab3eb3c1600000b Mon Sep 17 00:00:00 2001
From: yanyiwu <i@yanyiwu.com>
Date: Tue, 3 May 2016 20:49:47 +0800
Subject: [PATCH 3/4] correct unittest case

---
 test/unittest/textrank_test.cpp | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/test/unittest/textrank_test.cpp b/test/unittest/textrank_test.cpp
index 39b2163..c4ae193 100644
--- a/test/unittest/textrank_test.cpp
+++ b/test/unittest/textrank_test.cpp
@@ -42,7 +42,8 @@ TEST(TextRankExtractorTest, Test1) {
     size_t topN = 5;
     Extractor.Extract(s, wordweights, topN);
     res << wordweights;
-    ASSERT_EQ(res, "[\"\xE4\xB8\x93\xE4\xB8\x9A|[\"36\"]|1\", \"CEO|[\"94\"]|0.953149\", \"\xE6\x89\x8B\xE6\x89\xB6\xE6\x8B\x96\xE6\x8B\x89\xE6\x9C\xBA|[\"21\"]|0.794203\", \"\xE5\xBD\x93\xE4\xB8\x8A|[\"87\"]|0.78716\", \"\xE8\xB5\xB0\xE4\xB8\x8A|[\"100\"]|0.767636\"]");
+    ASSERT_EQ(res, "[\"专业|[\"36\"]|1\", \"CEO|[\"94\"]|0.94764\", \"当上|[\"87\"]|0.79271\", \"手扶拖拉机|[\"21\"]|0.789347\", \"走上|[\"100\"]|0.768261\"]");
+    // ASSERT_EQ(res, "[\"\xE4\xB8\x93\xE4\xB8\x9A|[\"36\"]|1\", \"CEO|[\"94\"]|0.953149\", \"\xE6\x89\x8B\xE6\x89\xB6\xE6\x8B\x96\xE6\x8B\x89\xE6\x9C\xBA|[\"21\"]|0.794203\", \"\xE5\xBD\x93\xE4\xB8\x8A|[\"87\"]|0.78716\", \"\xE8\xB5\xB0\xE4\xB8\x8A|[\"100\"]|0.767636\"]");
   }
 
   {
@@ -70,7 +71,8 @@ TEST(TextRankExtractorTest, Test2) {
     size_t topN = 5;
     Extractor.Extract(s, wordweights, topN);
     res << wordweights;
-    ASSERT_EQ(res, "[\"\xE4\xBC\x98\xE7\xA7\x80|[\"6\"]|1\", \"\xE6\xAF\x95\xE4\xB8\x9A\xE7\x94\x9F|[\"12\"]|0.996685\", \"\xE8\x93\x9D\xE7\xBF\x94|[\"0\"]|0.992994\"]");
+    ASSERT_EQ(res, "[\"蓝翔|[\"0\"]|1\", \"毕业生|[\"12\"]|0.996685\", \"优秀|[\"6\"]|0.992994\"]");
+    //ASSERT_EQ(res, "[\"\xE4\xBC\x98\xE7\xA7\x80|[\"6\"]|1\", \"\xE6\xAF\x95\xE4\xB8\x9A\xE7\x94\x9F|[\"12\"]|0.996685\", \"\xE8\x93\x9D\xE7\xBF\x94|[\"0\"]|0.992994\"]");
   }
 
   {

From f253db0133a8ba680acad0ad7e1f8e4f64e10059 Mon Sep 17 00:00:00 2001
From: yanyiwu <i@yanyiwu.com>
Date: Tue, 3 May 2016 21:24:40 +0800
Subject: [PATCH 4/4] use map/set instead of unordered_map/unordered_set to
 make result stable

---
 include/cppjieba/TextRankExtractor.hpp | 11 +++++++----
 test/unittest/textrank_test.cpp        | 14 +++++++++-----
 2 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/include/cppjieba/TextRankExtractor.hpp b/include/cppjieba/TextRankExtractor.hpp
index 948f3ad..a625695 100644
--- a/include/cppjieba/TextRankExtractor.hpp
+++ b/include/cppjieba/TextRankExtractor.hpp
@@ -12,15 +12,18 @@ namespace cppjieba {
   public:
     typedef struct _Word {string word;vector<size_t> offsets;double weight;}    Word; // struct Word
   private:
-    typedef std::unordered_map<string,Word> WordMap;
+    typedef std::map<string,Word> WordMap;
   
     class WordGraph{
     private:
       typedef double Score;
       typedef string Node;
-      typedef std::unordered_set<Node> NodeSet;
-      typedef std::unordered_map<Node,double> Edges;
-      typedef std::unordered_map<Node,Edges> Graph;
+      typedef std::set<Node> NodeSet;
+
+      typedef std::map<Node,double> Edges;
+      typedef std::map<Node,Edges> Graph;
+      //typedef std::unordered_map<Node,double> Edges;
+      //typedef std::unordered_map<Node,Edges> Graph;
 
       double d;
       Graph graph;
diff --git a/test/unittest/textrank_test.cpp b/test/unittest/textrank_test.cpp
index c4ae193..70dbc52 100644
--- a/test/unittest/textrank_test.cpp
+++ b/test/unittest/textrank_test.cpp
@@ -24,14 +24,16 @@ TEST(TextRankExtractorTest, Test1) {
       vector<pair<string, double> > words;
       Extractor.Extract(s, words, topN);
       res << words;
-      ASSERT_EQ(res, "[\"世界:1\", \"你好:0.514286\"]");
+      //ASSERT_EQ(res, "[\"世界:1\", \"你好:0.514286\"]");
+      ASSERT_EQ(res, "[\"\xE4\xB8\x96\xE7\x95\x8C:1\", \"\xE4\xBD\xA0\xE5\xA5\xBD:0.519787\"]");
     }
 
     {
       vector<TextRankExtractor::Word> words;
       Extractor.Extract(s, words, topN);
       res << words;
-      ASSERT_EQ(res, "[\"世界|[\"6\", \"12\"]|1\", \"你好|[\"0\"]|0.514286\"]");
+      //ASSERT_EQ(res, "[\"世界|[\"6\", \"12\"]|1\", \"你好|[\"0\"]|0.514286\"]");
+      ASSERT_EQ(res, "[\"\xE4\xB8\x96\xE7\x95\x8C|[\"6\", \"12\"]|1\", \"\xE4\xBD\xA0\xE5\xA5\xBD|[\"0\"]|0.519787\"]");
     }
   }
 
@@ -42,7 +44,7 @@ TEST(TextRankExtractorTest, Test1) {
     size_t topN = 5;
     Extractor.Extract(s, wordweights, topN);
     res << wordweights;
-    ASSERT_EQ(res, "[\"专业|[\"36\"]|1\", \"CEO|[\"94\"]|0.94764\", \"当上|[\"87\"]|0.79271\", \"手扶拖拉机|[\"21\"]|0.789347\", \"走上|[\"100\"]|0.768261\"]");
+    ASSERT_EQ(res, "[\"\xE4\xB8\x93\xE4\xB8\x9A|[\"36\"]|1\", \"CEO|[\"94\"]|0.95375\", \"\xE6\x89\x8B\xE6\x89\xB6\xE6\x8B\x96\xE6\x8B\x89\xE6\x9C\xBA|[\"21\"]|0.801701\", \"\xE5\xBD\x93\xE4\xB8\x8A|[\"87\"]|0.798968\", \"\xE8\xB5\xB0\xE4\xB8\x8A|[\"100\"]|0.775505\"]");
     // ASSERT_EQ(res, "[\"\xE4\xB8\x93\xE4\xB8\x9A|[\"36\"]|1\", \"CEO|[\"94\"]|0.953149\", \"\xE6\x89\x8B\xE6\x89\xB6\xE6\x8B\x96\xE6\x8B\x89\xE6\x9C\xBA|[\"21\"]|0.794203\", \"\xE5\xBD\x93\xE4\xB8\x8A|[\"87\"]|0.78716\", \"\xE8\xB5\xB0\xE4\xB8\x8A|[\"100\"]|0.767636\"]");
   }
 
@@ -53,7 +55,8 @@ TEST(TextRankExtractorTest, Test1) {
     size_t topN = 5;
     Extractor.Extract(s, wordweights, topN);
     res << wordweights;
-    ASSERT_EQ(res, "[\"iPhone6|[\"6\"]|1\", \"\xE4\xB8\x80\xE9\x83\xA8|[\"0\"]|0.996126\"]");
+    ASSERT_EQ(res, "[\"\xE4\xB8\x80\xE9\x83\xA8|[\"0\"]|1\", \"iPhone6|[\"6\"]|0.996126\"]");
+    //ASSERT_EQ(res, "[\"iPhone6|[\"6\"]|1\", \"\xE4\xB8\x80\xE9\x83\xA8|[\"0\"]|0.996126\"]");
   }
 }
 
@@ -82,6 +85,7 @@ TEST(TextRankExtractorTest, Test2) {
     size_t topN = 5;
     Extractor.Extract(s, wordweights, topN);
     res << wordweights;
-    ASSERT_EQ(res, "[\"iPhone6|[\"6\"]|1\", \"\xE4\xB8\x80\xE9\x83\xA8|[\"0\"]|0.996126\"]");
+    //ASSERT_EQ(res, "[\"iPhone6|[\"6\"]|1\", \"\xE4\xB8\x80\xE9\x83\xA8|[\"0\"]|0.996126\"]");
+    ASSERT_EQ(res, "[\"\xE4\xB8\x80\xE9\x83\xA8|[\"0\"]|1\", \"iPhone6|[\"6\"]|0.996126\"]");
   }
 }