From 669e971e3e4d6b60a36081f0610dfff5f6eca96b Mon Sep 17 00:00:00 2001
From: mayunyun <121999660@qq.com>
Date: Mon, 25 Apr 2016 20:20:50 +0800
Subject: [PATCH 01/10] new file: include/cppjieba/TextRankExtractor.hpp
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add TextRank Keyword Extractor to JiebaCpp
新增TextRank关键词提取
---
 include/cppjieba/TextRankExtractor.hpp | 168 +++++++++++++++++++++++++
 1 file changed, 168 insertions(+)
 create mode 100644 include/cppjieba/TextRankExtractor.hpp
diff --git a/include/cppjieba/TextRankExtractor.hpp b/include/cppjieba/TextRankExtractor.hpp
new file mode 100644
index 0000000..a97c537
--- /dev/null
+++ b/include/cppjieba/TextRankExtractor.hpp
@@ -0,0 +1,168 @@
+﻿#ifndef CPPJIEBA_TEXTRANK_EXTRACTOR_H
+#define CPPJIEBA_TEXTRANK_EXTRACTOR_H
+
+#include <cmath>
+#include "Jieba.hpp"
+
+namespace cppjieba {
+	using namespace limonp;
+	using namespace std;
+
+	class TextRankExtractor {
+	public:
+		typedef struct _Word {string word;vector<size_t> offsets;double weight;}    Word; // struct Word
+	private:
+		typedef std::unordered_map<string,Word>                                              WordMap;
+	
+		class WordGraph{
+		private:
+			typedef double                                                                                         Score;
+			typedef string                                                                                           Node;
+			typedef std::unordered_set<Node>                                                          NodeSet;
+			typedef std::unordered_map<Node,double>                                            Edges;
+			typedef std::unordered_map<Node,Edges>                                             Graph;
+
+			double d;
+			Graph graph;
+			NodeSet nodeSet;
+		public:
+			WordGraph(): d(0.85) {};
+			WordGraph(double in_d): d(in_d) {};
+
+			void addEdge(Node start,Node end,double weight){
+				Edges temp;
+				Edges::iterator gotEdges;
+				nodeSet.insert(start);
+				nodeSet.insert(end);
+				graph[start][end]+=weight;
+				graph[end][start]+=weight;
+			}
+
+			void rank(WordMap &ws,size_t rankTime=10){
+				WordMap outSum;
+				Score wsdef, min_rank, max_rank;
+
+				if( graph.size() == 0)
+					return;
+
+				wsdef = 1.0 / graph.size();
+
+				for(Graph::iterator edges=graph.begin();edges!=graph.end();++edges){
+					// edges->first start节点；edge->first end节点；edge->second 权重
+					ws[edges->first].word=edges->first;
+					ws[edges->first].weight=wsdef;
+					outSum[edges->first].weight=0;
+					for(Edges::iterator edge=edges->second.begin();edge!=edges->second.end();++edge){
+						outSum[edges->first].weight+=edge->second;
+					}
+				}
+				//sort(nodeSet.begin(),nodeSet.end()); 是否需要排序?
+				for( size_t i=0; i<rankTime; i++ ){
+					for(NodeSet::iterator node = nodeSet.begin(); node != nodeSet.end(); node++ ){
+						double s = 0;
+						for( Edges::iterator edge= graph[*node].begin(); edge != graph[*node].end(); edge++ )
+							// edge->first end节点；edge->second 权重
+							s += edge->second / outSum[edge->first].weight * ws[edge->first].weight;
+						ws[*node].weight = (1 - d) + d * s;
+					}
+				}
+
+				min_rank=max_rank=ws.begin()->second.weight;
+				for(WordMap::iterator i = ws.begin(); i != ws.end(); i ++){
+					if( i->second.weight < min_rank ){
+						min_rank = i->second.weight;
+					}
+					if( i->second.weight > max_rank ){
+						max_rank = i->second.weight;
+					}
+				}
+				for(WordMap::iterator i = ws.begin(); i != ws.end(); i ++){
+					ws[i->first].weight = (i->second.weight - min_rank / 10.0) / (max_rank - min_rank / 10.0);
+				}
+			}
+		};
+
+	public: 
+		TextRankExtractor(const Jieba& jieba, const string& stopWordPath) : segment_(jieba.GetDictTrie(), jieba.GetHMMModel()) {
+				LoadStopWordDict(stopWordPath);
+		}
+		~TextRankExtractor() {
+		}
+
+		void Extract(const string& sentence, vector<Word>& keywords, size_t topN, size_t span=5,size_t rankTime=10) const {
+			vector<string> words;
+			segment_.Cut(sentence, words);
+
+			TextRankExtractor::WordGraph graph;
+			WordMap wordmap;
+			size_t offset = 0;
+
+			for(size_t i=0; i < words.size(); i++){
+				size_t t = offset;
+				offset += words[i].size();
+				if (IsSingleWord(words[i]) || stopWords_.find(words[i]) != stopWords_.end()) {
+					continue;
+				}
+				for(size_t j=i+1;j<i+span && j<words.size();j++){
+					if (IsSingleWord(words[j]) || stopWords_.find(words[j]) != stopWords_.end()) {
+						continue;
+					}
+					graph.addEdge(words[i],words[j],1);
+				}
+				wordmap[words[i]].offsets.push_back(t);
+			}
+			if (offset != sentence.size()) {
+				XLOG(ERROR) << "words illegal";
+				return;
+			}
+
+			graph.rank(wordmap,rankTime);
+			
+			keywords.clear();
+			keywords.reserve(wordmap.size());
+			for (WordMap::iterator itr = wordmap.begin(); itr != wordmap.end(); ++itr) {
+				keywords.push_back(itr->second);
+			}
+			
+			topN = min(topN, keywords.size());
+			partial_sort(keywords.begin(), keywords.begin() + topN, keywords.end(), Compare);
+			keywords.resize(topN);
+		}
+	private:
+		void LoadStopWordDict(const string& filePath) {
+			ifstream ifs(filePath.c_str());
+			XCHECK(ifs.is_open()) << "open " << filePath << " failed";
+			string line ;
+			while (getline(ifs, line)) {
+				stopWords_.insert(line);
+			}
+			assert(stopWords_.size());
+		}
+
+		bool IsSingleWord(const string& str) const {
+			Unicode unicode;
+			TransCode::Decode(str, unicode);
+			if (unicode.size() == 1)
+				return true;
+			return false;
+		}
+
+		static void sortMapValue(WordMap &map,vector<Word>& result,size_t topN){
+			for(auto i=map.begin();i!=map.end();i++){
+				result.push_back(i->second);
+			}
+			partial_sort(result.begin(),result.begin()+topN,result.end(),Compare);
+		}
+
+		static bool Compare(const Word &x,const Word &y){
+			return x.weight > y.weight;
+		}
+
+		MixSegment segment_;
+		unordered_set<string> stopWords_;
+	};
+} // namespace cppjieba
+
+#endif
+
+

From 1aa0a32d900133b732f70f46f23c3e3cc1dc22df Mon Sep 17 00:00:00 2001
From: mayunyun <121999660@qq.com>
Date: Mon, 25 Apr 2016 20:28:47 +0800
Subject: [PATCH 02/10] code format check

---
 include/cppjieba/TextRankExtractor.hpp | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/include/cppjieba/TextRankExtractor.hpp b/include/cppjieba/TextRankExtractor.hpp
index a97c537..20d295b 100644
--- a/include/cppjieba/TextRankExtractor.hpp
+++ b/include/cppjieba/TextRankExtractor.hpp
@@ -12,15 +12,15 @@ namespace cppjieba {
 	public:
 		typedef struct _Word {string word;vector<size_t> offsets;double weight;}    Word; // struct Word
 	private:
-		typedef std::unordered_map<string,Word>                                              WordMap;
+		typedef std::unordered_map<string,Word> WordMap;
 	
 		class WordGraph{
 		private:
-			typedef double                                                                                         Score;
-			typedef string                                                                                           Node;
-			typedef std::unordered_set<Node>                                                          NodeSet;
-			typedef std::unordered_map<Node,double>                                            Edges;
-			typedef std::unordered_map<Node,Edges>                                             Graph;
+			typedef double Score;
+			typedef string Node;
+			typedef std::unordered_set<Node> NodeSet;
+			typedef std::unordered_map<Node,double> Edges;
+			typedef std::unordered_map<Node,Edges> Graph;
 
 			double d;
 			Graph graph;
@@ -117,15 +117,15 @@ namespace cppjieba {
 			}
 
 			graph.rank(wordmap,rankTime);
-			
-			keywords.clear();
+			
+			keywords.clear();
 			keywords.reserve(wordmap.size());
-			for (WordMap::iterator itr = wordmap.begin(); itr != wordmap.end(); ++itr) {
-				keywords.push_back(itr->second);
+			for (WordMap::iterator itr = wordmap.begin(); itr != wordmap.end(); ++itr) {
+				keywords.push_back(itr->second);
 			}
-			
-			topN = min(topN, keywords.size());
-			partial_sort(keywords.begin(), keywords.begin() + topN, keywords.end(), Compare);
+			
+			topN = min(topN, keywords.size());
+			partial_sort(keywords.begin(), keywords.begin() + topN, keywords.end(), Compare);
 			keywords.resize(topN);
 		}
 	private:

From f2de41c15e96c4ec2c4b2a47913705d35c8b323e Mon Sep 17 00:00:00 2001
From: mayunyun <121999660@qq.com>
Date: Tue, 3 May 2016 09:03:16 +0800
Subject: [PATCH 03/10] code layout change: tab -> space

---
 include/cppjieba/TextRankExtractor.hpp | 278 ++++++++++++-------------
 1 file changed, 139 insertions(+), 139 deletions(-)

diff --git a/include/cppjieba/TextRankExtractor.hpp b/include/cppjieba/TextRankExtractor.hpp
index 20d295b..8e7d18a 100644
--- a/include/cppjieba/TextRankExtractor.hpp
+++ b/include/cppjieba/TextRankExtractor.hpp
@@ -5,162 +5,162 @@
 #include "Jieba.hpp"
 
 namespace cppjieba {
-	using namespace limonp;
-	using namespace std;
+  using namespace limonp;
+  using namespace std;
 
-	class TextRankExtractor {
-	public:
-		typedef struct _Word {string word;vector<size_t> offsets;double weight;}    Word; // struct Word
-	private:
-		typedef std::unordered_map<string,Word> WordMap;
-	
-		class WordGraph{
-		private:
-			typedef double Score;
-			typedef string Node;
-			typedef std::unordered_set<Node> NodeSet;
-			typedef std::unordered_map<Node,double> Edges;
-			typedef std::unordered_map<Node,Edges> Graph;
+  class TextRankExtractor {
+  public:
+    typedef struct _Word {string word;vector<size_t> offsets;double weight;}    Word; // struct Word
+  private:
+    typedef std::unordered_map<string,Word> WordMap;
+  
+    class WordGraph{
+    private:
+      typedef double Score;
+      typedef string Node;
+      typedef std::unordered_set<Node> NodeSet;
+      typedef std::unordered_map<Node,double> Edges;
+      typedef std::unordered_map<Node,Edges> Graph;
 
-			double d;
-			Graph graph;
-			NodeSet nodeSet;
-		public:
-			WordGraph(): d(0.85) {};
-			WordGraph(double in_d): d(in_d) {};
+      double d;
+      Graph graph;
+      NodeSet nodeSet;
+    public:
+      WordGraph(): d(0.85) {};
+      WordGraph(double in_d): d(in_d) {};
 
-			void addEdge(Node start,Node end,double weight){
-				Edges temp;
-				Edges::iterator gotEdges;
-				nodeSet.insert(start);
-				nodeSet.insert(end);
-				graph[start][end]+=weight;
-				graph[end][start]+=weight;
-			}
+      void addEdge(Node start,Node end,double weight){
+        Edges temp;
+        Edges::iterator gotEdges;
+        nodeSet.insert(start);
+        nodeSet.insert(end);
+        graph[start][end]+=weight;
+        graph[end][start]+=weight;
+      }
 
-			void rank(WordMap &ws,size_t rankTime=10){
-				WordMap outSum;
-				Score wsdef, min_rank, max_rank;
+      void rank(WordMap &ws,size_t rankTime=10){
+        WordMap outSum;
+        Score wsdef, min_rank, max_rank;
 
-				if( graph.size() == 0)
-					return;
+        if( graph.size() == 0)
+          return;
 
-				wsdef = 1.0 / graph.size();
+        wsdef = 1.0 / graph.size();
 
-				for(Graph::iterator edges=graph.begin();edges!=graph.end();++edges){
-					// edges->first start节点；edge->first end节点；edge->second 权重
-					ws[edges->first].word=edges->first;
-					ws[edges->first].weight=wsdef;
-					outSum[edges->first].weight=0;
-					for(Edges::iterator edge=edges->second.begin();edge!=edges->second.end();++edge){
-						outSum[edges->first].weight+=edge->second;
-					}
-				}
-				//sort(nodeSet.begin(),nodeSet.end()); 是否需要排序?
-				for( size_t i=0; i<rankTime; i++ ){
-					for(NodeSet::iterator node = nodeSet.begin(); node != nodeSet.end(); node++ ){
-						double s = 0;
-						for( Edges::iterator edge= graph[*node].begin(); edge != graph[*node].end(); edge++ )
-							// edge->first end节点；edge->second 权重
-							s += edge->second / outSum[edge->first].weight * ws[edge->first].weight;
-						ws[*node].weight = (1 - d) + d * s;
-					}
-				}
+        for(Graph::iterator edges=graph.begin();edges!=graph.end();++edges){
+          // edges->first start节点；edge->first end节点；edge->second 权重
+          ws[edges->first].word=edges->first;
+          ws[edges->first].weight=wsdef;
+          outSum[edges->first].weight=0;
+          for(Edges::iterator edge=edges->second.begin();edge!=edges->second.end();++edge){
+            outSum[edges->first].weight+=edge->second;
+          }
+        }
+        //sort(nodeSet.begin(),nodeSet.end()); 是否需要排序?
+        for( size_t i=0; i<rankTime; i++ ){
+          for(NodeSet::iterator node = nodeSet.begin(); node != nodeSet.end(); node++ ){
+            double s = 0;
+            for( Edges::iterator edge= graph[*node].begin(); edge != graph[*node].end(); edge++ )
+              // edge->first end节点；edge->second 权重
+              s += edge->second / outSum[edge->first].weight * ws[edge->first].weight;
+            ws[*node].weight = (1 - d) + d * s;
+          }
+        }
 
-				min_rank=max_rank=ws.begin()->second.weight;
-				for(WordMap::iterator i = ws.begin(); i != ws.end(); i ++){
-					if( i->second.weight < min_rank ){
-						min_rank = i->second.weight;
-					}
-					if( i->second.weight > max_rank ){
-						max_rank = i->second.weight;
-					}
-				}
-				for(WordMap::iterator i = ws.begin(); i != ws.end(); i ++){
-					ws[i->first].weight = (i->second.weight - min_rank / 10.0) / (max_rank - min_rank / 10.0);
-				}
-			}
-		};
+        min_rank=max_rank=ws.begin()->second.weight;
+        for(WordMap::iterator i = ws.begin(); i != ws.end(); i ++){
+          if( i->second.weight < min_rank ){
+            min_rank = i->second.weight;
+          }
+          if( i->second.weight > max_rank ){
+            max_rank = i->second.weight;
+          }
+        }
+        for(WordMap::iterator i = ws.begin(); i != ws.end(); i ++){
+          ws[i->first].weight = (i->second.weight - min_rank / 10.0) / (max_rank - min_rank / 10.0);
+        }
+      }
+    };
 
-	public: 
-		TextRankExtractor(const Jieba& jieba, const string& stopWordPath) : segment_(jieba.GetDictTrie(), jieba.GetHMMModel()) {
-				LoadStopWordDict(stopWordPath);
-		}
-		~TextRankExtractor() {
-		}
+  public: 
+    TextRankExtractor(const Jieba& jieba, const string& stopWordPath) : segment_(jieba.GetDictTrie(), jieba.GetHMMModel()) {
+        LoadStopWordDict(stopWordPath);
+    }
+    ~TextRankExtractor() {
+    }
 
-		void Extract(const string& sentence, vector<Word>& keywords, size_t topN, size_t span=5,size_t rankTime=10) const {
-			vector<string> words;
-			segment_.Cut(sentence, words);
+    void Extract(const string& sentence, vector<Word>& keywords, size_t topN, size_t span=5,size_t rankTime=10) const {
+      vector<string> words;
+      segment_.Cut(sentence, words);
 
-			TextRankExtractor::WordGraph graph;
-			WordMap wordmap;
-			size_t offset = 0;
+      TextRankExtractor::WordGraph graph;
+      WordMap wordmap;
+      size_t offset = 0;
 
-			for(size_t i=0; i < words.size(); i++){
-				size_t t = offset;
-				offset += words[i].size();
-				if (IsSingleWord(words[i]) || stopWords_.find(words[i]) != stopWords_.end()) {
-					continue;
-				}
-				for(size_t j=i+1;j<i+span && j<words.size();j++){
-					if (IsSingleWord(words[j]) || stopWords_.find(words[j]) != stopWords_.end()) {
-						continue;
-					}
-					graph.addEdge(words[i],words[j],1);
-				}
-				wordmap[words[i]].offsets.push_back(t);
-			}
-			if (offset != sentence.size()) {
-				XLOG(ERROR) << "words illegal";
-				return;
-			}
+      for(size_t i=0; i < words.size(); i++){
+        size_t t = offset;
+        offset += words[i].size();
+        if (IsSingleWord(words[i]) || stopWords_.find(words[i]) != stopWords_.end()) {
+          continue;
+        }
+        for(size_t j=i+1;j<i+span && j<words.size();j++){
+          if (IsSingleWord(words[j]) || stopWords_.find(words[j]) != stopWords_.end()) {
+            continue;
+          }
+          graph.addEdge(words[i],words[j],1);
+        }
+        wordmap[words[i]].offsets.push_back(t);
+      }
+      if (offset != sentence.size()) {
+        XLOG(ERROR) << "words illegal";
+        return;
+      }
 
-			graph.rank(wordmap,rankTime);
-			
-			keywords.clear();
-			keywords.reserve(wordmap.size());
-			for (WordMap::iterator itr = wordmap.begin(); itr != wordmap.end(); ++itr) {
-				keywords.push_back(itr->second);
-			}
-			
-			topN = min(topN, keywords.size());
-			partial_sort(keywords.begin(), keywords.begin() + topN, keywords.end(), Compare);
-			keywords.resize(topN);
-		}
-	private:
-		void LoadStopWordDict(const string& filePath) {
-			ifstream ifs(filePath.c_str());
-			XCHECK(ifs.is_open()) << "open " << filePath << " failed";
-			string line ;
-			while (getline(ifs, line)) {
-				stopWords_.insert(line);
-			}
-			assert(stopWords_.size());
-		}
+      graph.rank(wordmap,rankTime);
+      
+      keywords.clear();
+      keywords.reserve(wordmap.size());
+      for (WordMap::iterator itr = wordmap.begin(); itr != wordmap.end(); ++itr) {
+        keywords.push_back(itr->second);
+      }
+      
+      topN = min(topN, keywords.size());
+      partial_sort(keywords.begin(), keywords.begin() + topN, keywords.end(), Compare);
+      keywords.resize(topN);
+    }
+  private:
+    void LoadStopWordDict(const string& filePath) {
+      ifstream ifs(filePath.c_str());
+      XCHECK(ifs.is_open()) << "open " << filePath << " failed";
+      string line ;
+      while (getline(ifs, line)) {
+        stopWords_.insert(line);
+      }
+      assert(stopWords_.size());
+    }
 
-		bool IsSingleWord(const string& str) const {
-			Unicode unicode;
-			TransCode::Decode(str, unicode);
-			if (unicode.size() == 1)
-				return true;
-			return false;
-		}
+    bool IsSingleWord(const string& str) const {
+      Unicode unicode;
+      TransCode::Decode(str, unicode);
+      if (unicode.size() == 1)
+        return true;
+      return false;
+    }
 
-		static void sortMapValue(WordMap &map,vector<Word>& result,size_t topN){
-			for(auto i=map.begin();i!=map.end();i++){
-				result.push_back(i->second);
-			}
-			partial_sort(result.begin(),result.begin()+topN,result.end(),Compare);
-		}
+    static void sortMapValue(WordMap &map,vector<Word>& result,size_t topN){
+      for(auto i=map.begin();i!=map.end();i++){
+        result.push_back(i->second);
+      }
+      partial_sort(result.begin(),result.begin()+topN,result.end(),Compare);
+    }
 
-		static bool Compare(const Word &x,const Word &y){
-			return x.weight > y.weight;
-		}
+    static bool Compare(const Word &x,const Word &y){
+      return x.weight > y.weight;
+    }
 
-		MixSegment segment_;
-		unordered_set<string> stopWords_;
-	};
+    MixSegment segment_;
+    unordered_set<string> stopWords_;
+  };
 } // namespace cppjieba
 
 #endif

From 0f66a923b368cbb4f81b49fb12e45f48aba7469c Mon Sep 17 00:00:00 2001
From: mayunyun <121999660@qq.com>
Date: Tue, 3 May 2016 18:06:14 +0800
Subject: [PATCH 04/10] =?UTF-8?q?1.=E5=A2=9E=E5=8A=A0=E5=8D=95=E5=85=83?=
 =?UTF-8?q?=E6=B5=8B=E8=AF=95=202.=E5=A2=9E=E5=8A=A0=E4=BA=86=E6=9E=84?=
 =?UTF-8?q?=E9=80=A0=E5=87=BD=E6=95=B0=E7=9A=84=E9=87=8D=E8=BD=BD=EF=BC=8C?=
 =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E4=BA=86=E6=8F=90=E5=8F=96=E5=87=BD=E6=95=B0?=
 =?UTF-8?q?=E7=9A=84=E9=87=8D=E8=BD=BD?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 include/cppjieba/TextRankExtractor.hpp | 35 ++++++++++-
 test/unittest/textrank_test.cpp        | 85 ++++++++++++++++++++++++++
 2 files changed, 119 insertions(+), 1 deletion(-)
 create mode 100644 test/unittest/textrank_test.cpp

diff --git a/include/cppjieba/TextRankExtractor.hpp b/include/cppjieba/TextRankExtractor.hpp
index 8e7d18a..82e56f3 100644
--- a/include/cppjieba/TextRankExtractor.hpp
+++ b/include/cppjieba/TextRankExtractor.hpp
@@ -82,12 +82,41 @@ namespace cppjieba {
       }
     };
 
-  public: 
+  public: 
+  TextRankExtractor(const string& dictPath, 
+        const string& hmmFilePath, 
+        const string& stopWordPath, 
+        const string& userDict = "") 
+    : segment_(dictPath, hmmFilePath, userDict) {
+    LoadStopWordDict(stopWordPath);
+  }
+  TextRankExtractor(const DictTrie* dictTrie, 
+        const HMMModel* model,
+        const string& stopWordPath) 
+    : segment_(dictTrie, model) {
+    LoadStopWordDict(stopWordPath);
+  }
     TextRankExtractor(const Jieba& jieba, const string& stopWordPath) : segment_(jieba.GetDictTrie(), jieba.GetHMMModel()) {
         LoadStopWordDict(stopWordPath);
     }
     ~TextRankExtractor() {
     }
+
+    void Extract(const string& sentence, vector<string>& keywords, size_t topN) const {
+      vector<Word> topWords;
+      Extract(sentence, topWords, topN);
+      for (size_t i = 0; i < topWords.size(); i++) {
+        keywords.push_back(topWords[i].word);
+      }
+    }
+
+    void Extract(const string& sentence, vector<pair<string, double> >& keywords, size_t topN) const {
+      vector<Word> topWords;
+      Extract(sentence, topWords, topN);
+      for (size_t i = 0; i < topWords.size(); i++) {
+        keywords.push_back(pair<string, double>(topWords[i].word, topWords[i].weight));
+      }
+    }
 
     void Extract(const string& sentence, vector<Word>& keywords, size_t topN, size_t span=5,size_t rankTime=10) const {
       vector<string> words;
@@ -161,6 +190,10 @@ namespace cppjieba {
     MixSegment segment_;
     unordered_set<string> stopWords_;
   };
+  
+  inline ostream& operator << (ostream& os, const TextRankExtractor::Word& word) {
+    return os << word.word << '|' << word.offsets << '|' << word.weight; 
+  }
 } // namespace cppjieba
 
 #endif
diff --git a/test/unittest/textrank_test.cpp b/test/unittest/textrank_test.cpp
new file mode 100644
index 0000000..39b2163
--- /dev/null
+++ b/test/unittest/textrank_test.cpp
@@ -0,0 +1,85 @@
+#include "cppjieba/TextRankExtractor.hpp"
+#include "gtest/gtest.h"
+
+using namespace cppjieba;
+
+TEST(TextRankExtractorTest, Test1) {
+  TextRankExtractor Extractor(
+    "../test/testdata/extra_dict/jieba.dict.small.utf8",
+    "../dict/hmm_model.utf8", 
+    "../dict/stop_words.utf8");
+  {
+    string s("你好世界世界而且而且");
+    string res;
+    size_t topN = 5;
+
+    {
+      vector<string> words;
+      Extractor.Extract(s, words, topN);
+      res << words;
+      ASSERT_EQ(res, "[\"世界\", \"你好\"]");
+    }
+
+    {
+      vector<pair<string, double> > words;
+      Extractor.Extract(s, words, topN);
+      res << words;
+      ASSERT_EQ(res, "[\"世界:1\", \"你好:0.514286\"]");
+    }
+
+    {
+      vector<TextRankExtractor::Word> words;
+      Extractor.Extract(s, words, topN);
+      res << words;
+      ASSERT_EQ(res, "[\"世界|[\"6\", \"12\"]|1\", \"你好|[\"0\"]|0.514286\"]");
+    }
+  }
+
+  { 
+    string s("\xe6\x88\x91\xe6\x98\xaf\xe6\x8b\x96\xe6\x8b\x89\xe6\x9c\xba\xe5\xad\xa6\xe9\x99\xa2\xe6\x89\x8b\xe6\x89\xb6\xe6\x8b\x96\xe6\x8b\x89\xe6\x9c\xba\xe4\xb8\x93\xe4\xb8\x9a\xe7\x9a\x84\xe3\x80\x82\xe4\xb8\x8d\xe7\x94\xa8\xe5\xa4\x9a\xe4\xb9\x85\xef\xbc\x8c\xe6\x88\x91\xe5\xb0\xb1\xe4\xbc\x9a\xe5\x8d\x87\xe8\x81\x8c\xe5\x8a\xa0\xe8\x96\xaa\xef\xbc\x8c\xe5\xbd\x93\xe4\xb8\x8a CEO\xef\xbc\x8c\xe8\xb5\xb0\xe4\xb8\x8a\xe4\xba\xba\xe7\x94\x9f\xe5\xb7\x85\xe5\xb3\xb0");
+    string res;
+    vector<TextRankExtractor::Word> wordweights;
+    size_t topN = 5;
+    Extractor.Extract(s, wordweights, topN);
+    res << wordweights;
+    ASSERT_EQ(res, "[\"\xE4\xB8\x93\xE4\xB8\x9A|[\"36\"]|1\", \"CEO|[\"94\"]|0.953149\", \"\xE6\x89\x8B\xE6\x89\xB6\xE6\x8B\x96\xE6\x8B\x89\xE6\x9C\xBA|[\"21\"]|0.794203\", \"\xE5\xBD\x93\xE4\xB8\x8A|[\"87\"]|0.78716\", \"\xE8\xB5\xB0\xE4\xB8\x8A|[\"100\"]|0.767636\"]");
+  }
+
+  {
+    string s("一部iPhone6");
+    string res;
+    vector<TextRankExtractor::Word> wordweights;
+    size_t topN = 5;
+    Extractor.Extract(s, wordweights, topN);
+    res << wordweights;
+    ASSERT_EQ(res, "[\"iPhone6|[\"6\"]|1\", \"\xE4\xB8\x80\xE9\x83\xA8|[\"0\"]|0.996126\"]");
+  }
+}
+
+TEST(TextRankExtractorTest, Test2) {
+  TextRankExtractor Extractor(
+    "../test/testdata/extra_dict/jieba.dict.small.utf8",
+    "../dict/hmm_model.utf8",
+    "../dict/stop_words.utf8",
+    "../test/testdata/userdict.utf8");
+
+  {
+    string s("\xe8\x93\x9d\xe7\xbf\x94\xe4\xbc\x98\xe7\xa7\x80\xe6\xaf\x95\xe4\xb8\x9a\xe7\x94\x9f");
+    string res;
+    vector<TextRankExtractor::Word> wordweights;
+    size_t topN = 5;
+    Extractor.Extract(s, wordweights, topN);
+    res << wordweights;
+    ASSERT_EQ(res, "[\"\xE4\xBC\x98\xE7\xA7\x80|[\"6\"]|1\", \"\xE6\xAF\x95\xE4\xB8\x9A\xE7\x94\x9F|[\"12\"]|0.996685\", \"\xE8\x93\x9D\xE7\xBF\x94|[\"0\"]|0.992994\"]");
+  }
+
+  {
+    string s("一部iPhone6");
+    string res;
+    vector<TextRankExtractor::Word> wordweights;
+    size_t topN = 5;
+    Extractor.Extract(s, wordweights, topN);
+    res << wordweights;
+    ASSERT_EQ(res, "[\"iPhone6|[\"6\"]|1\", \"\xE4\xB8\x80\xE9\x83\xA8|[\"0\"]|0.996126\"]");
+  }
+}

From 6d105a864df9fc197c884e2a1aecc132cc36f425 Mon Sep 17 00:00:00 2001
From: Yanyi Wu <i@yanyiwu.com>
Date: Tue, 3 May 2016 19:53:40 +0800
Subject: [PATCH 05/10] Update TextRankExtractor.hpp

remove unused function which using c++11 keyword `auto`
---
 include/cppjieba/TextRankExtractor.hpp | 71 ++++++++++++--------------
 1 file changed, 32 insertions(+), 39 deletions(-)

diff --git a/include/cppjieba/TextRankExtractor.hpp b/include/cppjieba/TextRankExtractor.hpp
index 82e56f3..948f3ad 100644
--- a/include/cppjieba/TextRankExtractor.hpp
+++ b/include/cppjieba/TextRankExtractor.hpp
@@ -1,4 +1,4 @@
-﻿#ifndef CPPJIEBA_TEXTRANK_EXTRACTOR_H
+#ifndef CPPJIEBA_TEXTRANK_EXTRACTOR_H
 #define CPPJIEBA_TEXTRANK_EXTRACTOR_H
 
 #include <cmath>
@@ -82,40 +82,40 @@ namespace cppjieba {
       }
     };
 
-  public: 
-  TextRankExtractor(const string& dictPath, 
-        const string& hmmFilePath, 
-        const string& stopWordPath, 
-        const string& userDict = "") 
-    : segment_(dictPath, hmmFilePath, userDict) {
-    LoadStopWordDict(stopWordPath);
-  }
-  TextRankExtractor(const DictTrie* dictTrie, 
-        const HMMModel* model,
-        const string& stopWordPath) 
-    : segment_(dictTrie, model) {
-    LoadStopWordDict(stopWordPath);
+  public: 
+  TextRankExtractor(const string& dictPath, 
+        const string& hmmFilePath, 
+        const string& stopWordPath, 
+        const string& userDict = "") 
+    : segment_(dictPath, hmmFilePath, userDict) {
+    LoadStopWordDict(stopWordPath);
+  }
+  TextRankExtractor(const DictTrie* dictTrie, 
+        const HMMModel* model,
+        const string& stopWordPath) 
+    : segment_(dictTrie, model) {
+    LoadStopWordDict(stopWordPath);
   }
     TextRankExtractor(const Jieba& jieba, const string& stopWordPath) : segment_(jieba.GetDictTrie(), jieba.GetHMMModel()) {
         LoadStopWordDict(stopWordPath);
     }
     ~TextRankExtractor() {
     }
-
-    void Extract(const string& sentence, vector<string>& keywords, size_t topN) const {
-      vector<Word> topWords;
-      Extract(sentence, topWords, topN);
-      for (size_t i = 0; i < topWords.size(); i++) {
-        keywords.push_back(topWords[i].word);
-      }
-    }
-
-    void Extract(const string& sentence, vector<pair<string, double> >& keywords, size_t topN) const {
-      vector<Word> topWords;
-      Extract(sentence, topWords, topN);
-      for (size_t i = 0; i < topWords.size(); i++) {
-        keywords.push_back(pair<string, double>(topWords[i].word, topWords[i].weight));
-      }
+
+    void Extract(const string& sentence, vector<string>& keywords, size_t topN) const {
+      vector<Word> topWords;
+      Extract(sentence, topWords, topN);
+      for (size_t i = 0; i < topWords.size(); i++) {
+        keywords.push_back(topWords[i].word);
+      }
+    }
+
+    void Extract(const string& sentence, vector<pair<string, double> >& keywords, size_t topN) const {
+      vector<Word> topWords;
+      Extract(sentence, topWords, topN);
+      for (size_t i = 0; i < topWords.size(); i++) {
+        keywords.push_back(pair<string, double>(topWords[i].word, topWords[i].weight));
+      }
     }
 
     void Extract(const string& sentence, vector<Word>& keywords, size_t topN, size_t span=5,size_t rankTime=10) const {
@@ -176,13 +176,6 @@ namespace cppjieba {
       return false;
     }
 
-    static void sortMapValue(WordMap &map,vector<Word>& result,size_t topN){
-      for(auto i=map.begin();i!=map.end();i++){
-        result.push_back(i->second);
-      }
-      partial_sort(result.begin(),result.begin()+topN,result.end(),Compare);
-    }
-
     static bool Compare(const Word &x,const Word &y){
       return x.weight > y.weight;
     }
@@ -190,9 +183,9 @@ namespace cppjieba {
     MixSegment segment_;
     unordered_set<string> stopWords_;
   };
-  
-  inline ostream& operator << (ostream& os, const TextRankExtractor::Word& word) {
-    return os << word.word << '|' << word.offsets << '|' << word.weight; 
+  
+  inline ostream& operator << (ostream& os, const TextRankExtractor::Word& word) {
+    return os << word.word << '|' << word.offsets << '|' << word.weight; 
   }
 } // namespace cppjieba
 

From a1ea1d075778ec049d55e5f47eb749e6a4909ae9 Mon Sep 17 00:00:00 2001
From: yanyiwu <i@yanyiwu.com>
Date: Tue, 3 May 2016 20:01:44 +0800
Subject: [PATCH 06/10] add textrank unittest into cmake

---
 test/unittest/CMakeLists.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/test/unittest/CMakeLists.txt b/test/unittest/CMakeLists.txt
index 2655215..ef19de4 100644
--- a/test/unittest/CMakeLists.txt
+++ b/test/unittest/CMakeLists.txt
@@ -13,6 +13,7 @@ ADD_EXECUTABLE(test.run
     pos_tagger_test.cpp
     jieba_test.cpp
     pre_filter_test.cpp
+    textrank_test.cpp
 )
 TARGET_LINK_LIBRARIES(test.run gtest pthread)
 

From 39316114c526ed55dcb889dc9ab3eb3c1600000b Mon Sep 17 00:00:00 2001
From: yanyiwu <i@yanyiwu.com>
Date: Tue, 3 May 2016 20:49:47 +0800
Subject: [PATCH 07/10] correct unittest case

---
 test/unittest/textrank_test.cpp | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/test/unittest/textrank_test.cpp b/test/unittest/textrank_test.cpp
index 39b2163..c4ae193 100644
--- a/test/unittest/textrank_test.cpp
+++ b/test/unittest/textrank_test.cpp
@@ -42,7 +42,8 @@ TEST(TextRankExtractorTest, Test1) {
     size_t topN = 5;
     Extractor.Extract(s, wordweights, topN);
     res << wordweights;
-    ASSERT_EQ(res, "[\"\xE4\xB8\x93\xE4\xB8\x9A|[\"36\"]|1\", \"CEO|[\"94\"]|0.953149\", \"\xE6\x89\x8B\xE6\x89\xB6\xE6\x8B\x96\xE6\x8B\x89\xE6\x9C\xBA|[\"21\"]|0.794203\", \"\xE5\xBD\x93\xE4\xB8\x8A|[\"87\"]|0.78716\", \"\xE8\xB5\xB0\xE4\xB8\x8A|[\"100\"]|0.767636\"]");
+    ASSERT_EQ(res, "[\"专业|[\"36\"]|1\", \"CEO|[\"94\"]|0.94764\", \"当上|[\"87\"]|0.79271\", \"手扶拖拉机|[\"21\"]|0.789347\", \"走上|[\"100\"]|0.768261\"]");
+    // ASSERT_EQ(res, "[\"\xE4\xB8\x93\xE4\xB8\x9A|[\"36\"]|1\", \"CEO|[\"94\"]|0.953149\", \"\xE6\x89\x8B\xE6\x89\xB6\xE6\x8B\x96\xE6\x8B\x89\xE6\x9C\xBA|[\"21\"]|0.794203\", \"\xE5\xBD\x93\xE4\xB8\x8A|[\"87\"]|0.78716\", \"\xE8\xB5\xB0\xE4\xB8\x8A|[\"100\"]|0.767636\"]");
   }
 
   {
@@ -70,7 +71,8 @@ TEST(TextRankExtractorTest, Test2) {
     size_t topN = 5;
     Extractor.Extract(s, wordweights, topN);
     res << wordweights;
-    ASSERT_EQ(res, "[\"\xE4\xBC\x98\xE7\xA7\x80|[\"6\"]|1\", \"\xE6\xAF\x95\xE4\xB8\x9A\xE7\x94\x9F|[\"12\"]|0.996685\", \"\xE8\x93\x9D\xE7\xBF\x94|[\"0\"]|0.992994\"]");
+    ASSERT_EQ(res, "[\"蓝翔|[\"0\"]|1\", \"毕业生|[\"12\"]|0.996685\", \"优秀|[\"6\"]|0.992994\"]");
+    //ASSERT_EQ(res, "[\"\xE4\xBC\x98\xE7\xA7\x80|[\"6\"]|1\", \"\xE6\xAF\x95\xE4\xB8\x9A\xE7\x94\x9F|[\"12\"]|0.996685\", \"\xE8\x93\x9D\xE7\xBF\x94|[\"0\"]|0.992994\"]");
   }
 
   {

From f253db0133a8ba680acad0ad7e1f8e4f64e10059 Mon Sep 17 00:00:00 2001
From: yanyiwu <i@yanyiwu.com>
Date: Tue, 3 May 2016 21:24:40 +0800
Subject: [PATCH 08/10] use map/set instead of unordered_map/unordered_set to
 make result stable

---
 include/cppjieba/TextRankExtractor.hpp | 11 +++++++----
 test/unittest/textrank_test.cpp        | 14 +++++++++-----
 2 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/include/cppjieba/TextRankExtractor.hpp b/include/cppjieba/TextRankExtractor.hpp
index 948f3ad..a625695 100644
--- a/include/cppjieba/TextRankExtractor.hpp
+++ b/include/cppjieba/TextRankExtractor.hpp
@@ -12,15 +12,18 @@ namespace cppjieba {
   public:
     typedef struct _Word {string word;vector<size_t> offsets;double weight;}    Word; // struct Word
   private:
-    typedef std::unordered_map<string,Word> WordMap;
+    typedef std::map<string,Word> WordMap;
   
     class WordGraph{
     private:
       typedef double Score;
       typedef string Node;
-      typedef std::unordered_set<Node> NodeSet;
-      typedef std::unordered_map<Node,double> Edges;
-      typedef std::unordered_map<Node,Edges> Graph;
+      typedef std::set<Node> NodeSet;
+
+      typedef std::map<Node,double> Edges;
+      typedef std::map<Node,Edges> Graph;
+      //typedef std::unordered_map<Node,double> Edges;
+      //typedef std::unordered_map<Node,Edges> Graph;
 
       double d;
       Graph graph;
diff --git a/test/unittest/textrank_test.cpp b/test/unittest/textrank_test.cpp
index c4ae193..70dbc52 100644
--- a/test/unittest/textrank_test.cpp
+++ b/test/unittest/textrank_test.cpp
@@ -24,14 +24,16 @@ TEST(TextRankExtractorTest, Test1) {
       vector<pair<string, double> > words;
       Extractor.Extract(s, words, topN);
       res << words;
-      ASSERT_EQ(res, "[\"世界:1\", \"你好:0.514286\"]");
+      //ASSERT_EQ(res, "[\"世界:1\", \"你好:0.514286\"]");
+      ASSERT_EQ(res, "[\"\xE4\xB8\x96\xE7\x95\x8C:1\", \"\xE4\xBD\xA0\xE5\xA5\xBD:0.519787\"]");
     }
 
     {
       vector<TextRankExtractor::Word> words;
       Extractor.Extract(s, words, topN);
       res << words;
-      ASSERT_EQ(res, "[\"世界|[\"6\", \"12\"]|1\", \"你好|[\"0\"]|0.514286\"]");
+      //ASSERT_EQ(res, "[\"世界|[\"6\", \"12\"]|1\", \"你好|[\"0\"]|0.514286\"]");
+      ASSERT_EQ(res, "[\"\xE4\xB8\x96\xE7\x95\x8C|[\"6\", \"12\"]|1\", \"\xE4\xBD\xA0\xE5\xA5\xBD|[\"0\"]|0.519787\"]");
     }
   }
 
@@ -42,7 +44,7 @@ TEST(TextRankExtractorTest, Test1) {
     size_t topN = 5;
     Extractor.Extract(s, wordweights, topN);
     res << wordweights;
-    ASSERT_EQ(res, "[\"专业|[\"36\"]|1\", \"CEO|[\"94\"]|0.94764\", \"当上|[\"87\"]|0.79271\", \"手扶拖拉机|[\"21\"]|0.789347\", \"走上|[\"100\"]|0.768261\"]");
+    ASSERT_EQ(res, "[\"\xE4\xB8\x93\xE4\xB8\x9A|[\"36\"]|1\", \"CEO|[\"94\"]|0.95375\", \"\xE6\x89\x8B\xE6\x89\xB6\xE6\x8B\x96\xE6\x8B\x89\xE6\x9C\xBA|[\"21\"]|0.801701\", \"\xE5\xBD\x93\xE4\xB8\x8A|[\"87\"]|0.798968\", \"\xE8\xB5\xB0\xE4\xB8\x8A|[\"100\"]|0.775505\"]");
     // ASSERT_EQ(res, "[\"\xE4\xB8\x93\xE4\xB8\x9A|[\"36\"]|1\", \"CEO|[\"94\"]|0.953149\", \"\xE6\x89\x8B\xE6\x89\xB6\xE6\x8B\x96\xE6\x8B\x89\xE6\x9C\xBA|[\"21\"]|0.794203\", \"\xE5\xBD\x93\xE4\xB8\x8A|[\"87\"]|0.78716\", \"\xE8\xB5\xB0\xE4\xB8\x8A|[\"100\"]|0.767636\"]");
   }
 
@@ -53,7 +55,8 @@ TEST(TextRankExtractorTest, Test1) {
     size_t topN = 5;
     Extractor.Extract(s, wordweights, topN);
     res << wordweights;
-    ASSERT_EQ(res, "[\"iPhone6|[\"6\"]|1\", \"\xE4\xB8\x80\xE9\x83\xA8|[\"0\"]|0.996126\"]");
+    ASSERT_EQ(res, "[\"\xE4\xB8\x80\xE9\x83\xA8|[\"0\"]|1\", \"iPhone6|[\"6\"]|0.996126\"]");
+    //ASSERT_EQ(res, "[\"iPhone6|[\"6\"]|1\", \"\xE4\xB8\x80\xE9\x83\xA8|[\"0\"]|0.996126\"]");
   }
 }
 
@@ -82,6 +85,7 @@ TEST(TextRankExtractorTest, Test2) {
     size_t topN = 5;
     Extractor.Extract(s, wordweights, topN);
     res << wordweights;
-    ASSERT_EQ(res, "[\"iPhone6|[\"6\"]|1\", \"\xE4\xB8\x80\xE9\x83\xA8|[\"0\"]|0.996126\"]");
+    //ASSERT_EQ(res, "[\"iPhone6|[\"6\"]|1\", \"\xE4\xB8\x80\xE9\x83\xA8|[\"0\"]|0.996126\"]");
+    ASSERT_EQ(res, "[\"\xE4\xB8\x80\xE9\x83\xA8|[\"0\"]|1\", \"iPhone6|[\"6\"]|0.996126\"]");
   }
 }

From d5a52a8e7b69ff70a16fb60bcf23c44a0649dd06 Mon Sep 17 00:00:00 2001
From: mayunyun <121999660@qq.com>
Date: Wed, 4 May 2016 17:52:30 +0800
Subject: [PATCH 09/10] 1. remove stopword from span windows 2. update unittest

---
 include/cppjieba/TextRankExtractor.hpp | 3 ++-
 test/unittest/textrank_test.cpp        | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/include/cppjieba/TextRankExtractor.hpp b/include/cppjieba/TextRankExtractor.hpp
index a625695..34c6aae 100644
--- a/include/cppjieba/TextRankExtractor.hpp
+++ b/include/cppjieba/TextRankExtractor.hpp
@@ -135,8 +135,9 @@ namespace cppjieba {
         if (IsSingleWord(words[i]) || stopWords_.find(words[i]) != stopWords_.end()) {
           continue;
         }
-        for(size_t j=i+1;j<i+span && j<words.size();j++){
+        for(size_t j=i+1,skip=0;j<i+span+skip && j<words.size();j++){
           if (IsSingleWord(words[j]) || stopWords_.find(words[j]) != stopWords_.end()) {
+            skip++;
             continue;
           }
           graph.addEdge(words[i],words[j],1);
diff --git a/test/unittest/textrank_test.cpp b/test/unittest/textrank_test.cpp
index 70dbc52..a40f8a8 100644
--- a/test/unittest/textrank_test.cpp
+++ b/test/unittest/textrank_test.cpp
@@ -44,7 +44,7 @@ TEST(TextRankExtractorTest, Test1) {
     size_t topN = 5;
     Extractor.Extract(s, wordweights, topN);
     res << wordweights;
-    ASSERT_EQ(res, "[\"\xE4\xB8\x93\xE4\xB8\x9A|[\"36\"]|1\", \"CEO|[\"94\"]|0.95375\", \"\xE6\x89\x8B\xE6\x89\xB6\xE6\x8B\x96\xE6\x8B\x89\xE6\x9C\xBA|[\"21\"]|0.801701\", \"\xE5\xBD\x93\xE4\xB8\x8A|[\"87\"]|0.798968\", \"\xE8\xB5\xB0\xE4\xB8\x8A|[\"100\"]|0.775505\"]");
+    ASSERT_EQ(res, "[\"\xE5\xBD\x93\xE4\xB8\x8A|[\"87\"]|1\", \"\xE4\xB8\x8D\xE7\x94\xA8|[\"48\"]|0.989848\", \"\xE5\xA4\x9A\xE4\xB9\x85|[\"54\"]|0.985126\", \"\xE5\x8A\xA0\xE8\x96\xAA|[\"78\"]|0.983046\", \"\xE5\x8D\x87\xE8\x81\x8C|[\"72\"]|0.980278\"]");
     // ASSERT_EQ(res, "[\"\xE4\xB8\x93\xE4\xB8\x9A|[\"36\"]|1\", \"CEO|[\"94\"]|0.953149\", \"\xE6\x89\x8B\xE6\x89\xB6\xE6\x8B\x96\xE6\x8B\x89\xE6\x9C\xBA|[\"21\"]|0.794203\", \"\xE5\xBD\x93\xE4\xB8\x8A|[\"87\"]|0.78716\", \"\xE8\xB5\xB0\xE4\xB8\x8A|[\"100\"]|0.767636\"]");
   }
 

From b355e9f4878c88f5db564aa5fd1ba986bb2e967e Mon Sep 17 00:00:00 2001
From: yanyiwu <i@yanyiwu.com>
Date: Wed, 4 May 2016 19:33:05 +0800
Subject: [PATCH 10/10] update unittest to pass 'make test'

---
 test/unittest/textrank_test.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/test/unittest/textrank_test.cpp b/test/unittest/textrank_test.cpp
index d62b2e5..ef7ac27 100644
--- a/test/unittest/textrank_test.cpp
+++ b/test/unittest/textrank_test.cpp
@@ -42,7 +42,8 @@ TEST(TextRankExtractorTest, Test1) {
     size_t topN = 5;
     Extractor.Extract(s, wordweights, topN);
     res << wordweights;
-    ASSERT_EQ(res, "[{\"word\": \"专业\", \"offset\": [36], \"weight\": 1}, {\"word\": \"CEO\", \"offset\": [94], \"weight\": 0.95375}, {\"word\": \"手扶拖拉机\", \"offset\": [21], \"weight\": 0.801701}, {\"word\": \"当上\", \"offset\": [87], \"weight\": 0.798968}, {\"word\": \"走上\", \"offset\": [100], \"weight\": 0.775505}]");
+    ASSERT_EQ(res, "[{\"word\": \"当上\", \"offset\": [87], \"weight\": 1}, {\"word\": \"不用\", \"offset\": [48], \"weight\": 0.989848}, {\"word\": \"多久\", \"offset\": [54], \"weight\": 0.985126}, {\"word\": \"加薪\", \"offset\": [78], \"weight\": 0.983046}, {\"word\": \"升职\", \"offset\": [72], \"weight\": 0.980278}]");
+    //ASSERT_EQ(res, "[{\"word\": \"专业\", \"offset\": [36], \"weight\": 1}, {\"word\": \"CEO\", \"offset\": [94], \"weight\": 0.95375}, {\"word\": \"手扶拖拉机\", \"offset\": [21], \"weight\": 0.801701}, {\"word\": \"当上\", \"offset\": [87], \"weight\": 0.798968}, {\"word\": \"走上\", \"offset\": [100], \"weight\": 0.775505}]");
   }
 
   {