From e85a3ef8d35416fd38b273951e79cde510e5b9ac Mon Sep 17 00:00:00 2001
From: aholic <ruochen.xu@gmail.com>
Date: Sat, 25 Oct 2014 18:29:04 +0800
Subject: [PATCH 1/6] fix bug for map.erase

---
 src/KeywordExtractor.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/KeywordExtractor.hpp b/src/KeywordExtractor.hpp
index 36c7da0..c148145 100644
--- a/src/KeywordExtractor.hpp
+++ b/src/KeywordExtractor.hpp
@@ -74,7 +74,7 @@ namespace CppJieba
                 {
                     if(_stopWords.end() != _stopWords.find(itr->first))
                     {
-                        wordmap.erase(itr++);
+                        itr = wordmap.erase(itr);
                         continue;
                     }
 

From 283c65db0a3b909b8792e71e948709e3c6cced3b Mon Sep 17 00:00:00 2001
From: aholic <ruochen.xu@gmail.com>
Date: Wed, 5 Nov 2014 11:13:00 +0800
Subject: [PATCH 2/6] fetch ahead

---
 src/KeywordExtractor.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/KeywordExtractor.hpp b/src/KeywordExtractor.hpp
index c148145..e24fd40 100644
--- a/src/KeywordExtractor.hpp
+++ b/src/KeywordExtractor.hpp
@@ -74,7 +74,7 @@ namespace CppJieba
                 {
                     if(_stopWords.end() != _stopWords.find(itr->first))
                     {
-                        itr = wordmap.erase(itr);
+                        wordmap.erase(itr);
                         continue;
                     }
 

From d1a112c0c4ef3404d09c32086046a7464fbd6e5d Mon Sep 17 00:00:00 2001
From: aholic <ruochen.xu@gmail.com>
Date: Sun, 19 Apr 2015 21:44:50 +0800
Subject: [PATCH 3/6] improve efficiency for trie tree in ugly way

---
 src/DictTrie.hpp |   9 +-
 src/UglyTrie.hpp | 253 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 258 insertions(+), 4 deletions(-)
 create mode 100644 src/UglyTrie.hpp

diff --git a/src/DictTrie.hpp b/src/DictTrie.hpp
index 15b04fb..350fcd4 100644
--- a/src/DictTrie.hpp
+++ b/src/DictTrie.hpp
@@ -11,7 +11,8 @@
 #include "Limonp/StringUtil.hpp"
 #include "Limonp/Logger.hpp"
 #include "TransCode.hpp"
-#include "Trie.hpp"
+//#include "Trie.hpp"
+#include "UglyTrie.hpp"
 
 
 
@@ -87,7 +88,7 @@ namespace CppJieba
 
 
         private:
-            Trie * _createTrie(const vector<DictUnit>& dictUnits)
+            UglyTrie * _createTrie(const vector<DictUnit>& dictUnits)
             {
                 assert(dictUnits.size());
                 vector<Unicode> words;
@@ -98,7 +99,7 @@ namespace CppJieba
                     valuePointers.push_back(&dictUnits[i]);
                 }
 
-                Trie * trie = new Trie(words, valuePointers);
+                UglyTrie * trie = new UglyTrie(words, valuePointers);
                 return trie;
             }
             void _loadUserDict(const string& filePath, double defaultWeight, const string& defaultTag)
@@ -195,7 +196,7 @@ namespace CppJieba
 
         private:
             vector<DictUnit> _nodeInfos;
-            Trie * _trie;
+            UglyTrie * _trie;
 
             double _minWeight;
             unordered_set<Unicode::value_type> _userDictSingleChineseWord;
diff --git a/src/UglyTrie.hpp b/src/UglyTrie.hpp
new file mode 100644
index 0000000..9b6bab6
--- /dev/null
+++ b/src/UglyTrie.hpp
@@ -0,0 +1,253 @@
+#ifndef CPPJIEBA_UGLY_TRIE_HPP
+#define CPPJIEBA_UGLY_TRIE_HPP
+
+#include "Limonp/StdExtension.hpp"
+#include <vector>
+#include <queue>
+
+namespace CppJieba 
+{
+    using namespace std;
+
+    struct DictUnit
+    {
+        Unicode word;
+        double weight;
+        string tag;
+    };
+
+    // for debugging
+    inline ostream & operator << (ostream& os, const DictUnit& unit)
+    {
+        string s;
+        s << unit.word;
+        return os << string_format("%s %s %.3lf", s.c_str(), unit.tag.c_str(), unit.weight);
+    }
+
+    typedef LocalVector<std::pair<size_t, const DictUnit*> > DagType;
+
+    struct SegmentChar
+    {
+        uint16_t uniCh;
+        DagType dag;
+        const DictUnit * pInfo;
+        double weight;
+        size_t nextPos;
+        SegmentChar() : uniCh(), pInfo(NULL), weight(0.0), nextPos(0){}
+        ~SegmentChar() {}
+    };
+
+    typedef Unicode::value_type TrieKey;
+
+    class TrieNode
+    {
+        public :
+            TrieNode(): next(NULL), ptValue(NULL) {}
+        public:
+            typedef unordered_map<TrieKey, TrieNode*> NextMap;
+            NextMap *next;
+            const DictUnit *ptValue;
+    };
+
+    class UglyTrie
+    {
+        public:
+            static const size_t BASE_SIZE = (1 << (8 * (sizeof(TrieKey))));
+        public:
+            UglyTrie(const vector<Unicode>& keys, const vector<const DictUnit*>& valuePointers)
+            {
+                _createTrie(keys, valuePointers);
+            }
+            const DictUnit* find(Unicode::const_iterator begin, Unicode::const_iterator end) const
+            {
+                if (begin == end)
+                {
+                    return NULL;
+                }
+
+                const TrieNode* ptNode = _base + (*(begin++));
+                TrieNode::NextMap::const_iterator citer;
+                for (Unicode::const_iterator it = begin; it != end; it++)
+                {
+                    if (NULL == ptNode->next)
+                    {
+                        return NULL;
+                    }
+                    citer = ptNode->next->find(*it);
+                    if (ptNode->next->end() == citer)
+                    {
+                        return NULL;
+                    }
+                    ptNode = citer->second;
+                }
+                return ptNode->ptValue;
+            }
+
+            void find(
+                    Unicode::const_iterator begin,
+                    Unicode::const_iterator end,
+                    vector<struct SegmentChar>& res
+                    ) const
+            {
+                res.resize(end - begin);
+
+                const TrieNode *ptNode = NULL;
+                TrieNode::NextMap::const_iterator citer;
+                for (size_t i = 0; i < size_t(end - begin); i++)
+                {
+                    Unicode::value_type ch = *(begin + i);
+                    ptNode = _base + ch;
+                    res[i].uniCh = ch;
+                    assert(res[i].dag.empty());
+
+                    res[i].dag.push_back(DagType::value_type(i, ptNode->ptValue));
+                    
+                    for (size_t j = i + 1; j < size_t(end - begin); j++)
+                    {
+                        if (ptNode->next == NULL)
+                        {
+                            break;
+                        }
+                        citer = ptNode->next->find(*(begin + j));
+                        if (ptNode->next->end() == citer)
+                        {
+                            break;
+                        }
+                        ptNode = citer->second;
+                        if (NULL != ptNode->ptValue)
+                        {
+                            res[i].dag.push_back(DagType::value_type(j, ptNode->ptValue));
+                        }
+                    }
+                }
+            }
+            bool find(
+                    Unicode::const_iterator begin,
+                    Unicode::const_iterator end,
+                    DagType & res,
+                    size_t offset = 0) const
+            {
+                if (begin == end)
+                {
+                    return !res.empty();
+                }
+
+                const TrieNode* ptNode = _base + (*(begin++));
+                if (ptNode->ptValue != NULL && res.size() == 1)
+                {
+                    res[0].second = ptNode->ptValue;
+                }
+                else if (ptNode->ptValue != NULL)
+                {
+                    res.push_back(DagType::value_type(offset, ptNode->ptValue));
+                }
+
+                TrieNode::NextMap::const_iterator citer;
+                for (Unicode::const_iterator itr = begin; itr != end; itr++)
+                {
+                    if (NULL == ptNode->next)
+                    {
+                        break;
+                    }
+                    citer = ptNode->next->find(*itr);
+                    if (citer == ptNode->next->end())
+                    {
+                        break;
+                    }
+                    ptNode = citer->second;
+                    if (NULL != ptNode->ptValue)
+                    {
+                        res.push_back(DagType::value_type(itr - begin + offset, ptNode->ptValue));
+                    }
+                }
+                return !res.empty();
+            }
+            ~UglyTrie()
+            {
+                for (size_t i = 0; i < BASE_SIZE; i++)
+                {
+                    if (_base[i].next == NULL) 
+                    {
+                        continue;
+                    }
+                    for (TrieNode::NextMap::iterator it = _base[i].next->begin(); it != _base[i].next->end(); it++)
+                    {
+                        _deleteNode(it->second);
+                        it->second = NULL;
+                    }
+                    delete _base[i].next;
+                    _base[i].next = NULL;
+                }
+            }
+
+        private:
+            void _insertNode(const Unicode& key, const DictUnit* ptValue)
+            {
+                if (key.begin() == key.end())
+                {
+                    return;
+                }
+
+                TrieNode::NextMap::const_iterator kmIter;
+                Unicode::const_iterator citer= key.begin();
+                TrieNode *ptNode = _base + (*(citer++));
+                for (; citer != key.end(); citer++)
+                {
+                    if (NULL == ptNode->next)
+                    {
+                        ptNode->next = new TrieNode::NextMap;
+                    }
+                    kmIter = ptNode->next->find(*citer);
+                    if (ptNode->next->end() == kmIter)
+                    {
+                        TrieNode *nextNode = new TrieNode;
+
+                        (*(ptNode->next))[*citer] = nextNode;
+                        ptNode = nextNode;
+                    }
+                    else
+                    {
+                        ptNode = kmIter->second;
+                    }
+                }
+                ptNode->ptValue = ptValue;
+            }
+
+            void _createTrie(const vector<Unicode>& keys, const vector<const DictUnit*>& valuePointers)
+            {
+                if (valuePointers.empty() || keys.empty())
+                {
+                    return;
+                }
+                assert(keys.size() == valuePointers.size());
+
+                for (size_t i = 0; i < keys.size(); i++)
+                {
+                    _insertNode(keys[i], valuePointers[i]);
+                }
+            }
+
+            void _deleteNode(TrieNode* node)
+            {
+                if (NULL == node)
+                {
+                    return;
+                }
+                if (NULL != node->next)
+                {
+                    TrieNode::NextMap::iterator it;
+                    for (it = node->next->begin(); it != node->next->end(); it++)
+                    {
+                        _deleteNode(it->second);
+                    }
+                    delete node->next;
+                    node->next = NULL;
+                }
+                delete node;
+            }
+
+            TrieNode _base[BASE_SIZE];
+    };
+}
+
+#endif

From 931db7d1e5dbe6b613db7170e463ccd88b3a943a Mon Sep 17 00:00:00 2001
From: xuangong <zhangxian1124@gmail.com>
Date: Mon, 20 Jul 2015 23:54:20 +0800
Subject: [PATCH 4/6] astyle

---
 src/DictTrie.hpp         | 321 +++++++++----------
 src/FullSegment.hpp      | 220 ++++++-------
 src/HMMSegment.hpp       | 646 +++++++++++++++++----------------------
 src/ISegment.hpp         |  16 +-
 src/KeywordExtractor.hpp | 260 +++++++---------
 5 files changed, 656 insertions(+), 807 deletions(-)

diff --git a/src/DictTrie.hpp b/src/DictTrie.hpp
index 350fcd4..40602bf 100644
--- a/src/DictTrie.hpp
+++ b/src/DictTrie.hpp
@@ -16,191 +16,164 @@
 
 
 
-namespace CppJieba
-{
-    using namespace Limonp;
-    const double MIN_DOUBLE = -3.14e+100;
-    const double MAX_DOUBLE = 3.14e+100;
-    const size_t DICT_COLUMN_NUM = 3;
-    const char* const UNKNOWN_TAG = "";
+namespace CppJieba {
+using namespace Limonp;
+const double MIN_DOUBLE = -3.14e+100;
+const double MAX_DOUBLE = 3.14e+100;
+const size_t DICT_COLUMN_NUM = 3;
+const char* const UNKNOWN_TAG = "";
 
-    class DictTrie
-    {
-        public:
+class DictTrie {
+ public:
 
-            DictTrie()
-            {
-                _trie = NULL;
-                _minWeight = MAX_DOUBLE;
-            }
-            DictTrie(const string& dictPath, const string& userDictPath = "")
-            {
-                new (this) DictTrie();
-                init(dictPath, userDictPath);
-            }
-            ~DictTrie()
-            {
-                if(_trie)
-                {
-                    delete _trie;
-                }
-            }
-            
-            bool init(const string& dictPath, const string& userDictPath = "")
-            {
-                assert(!_trie);
-                _loadDict(dictPath);
-                _calculateWeight(_nodeInfos);
-                _minWeight = _findMinWeight(_nodeInfos);
-                
-                if(userDictPath.size())
-                {
-                    double maxWeight = _findMaxWeight(_nodeInfos);
-                    _loadUserDict(userDictPath, maxWeight, UNKNOWN_TAG);
-                }
-                _shrink(_nodeInfos);
-                _trie = _createTrie(_nodeInfos);
-                assert(_trie);
-                return true;
-            }
+  DictTrie() {
+    _trie = NULL;
+    _minWeight = MAX_DOUBLE;
+  }
+  DictTrie(const string& dictPath, const string& userDictPath = "") {
+    new (this) DictTrie();
+    init(dictPath, userDictPath);
+  }
+  ~DictTrie() {
+    if(_trie) {
+      delete _trie;
+    }
+  }
 
-            const DictUnit* find(Unicode::const_iterator begin, Unicode::const_iterator end) const
-            {
-                return _trie->find(begin, end);
-            }
-            bool find(Unicode::const_iterator begin, Unicode::const_iterator end, DagType& dag, size_t offset = 0) const
-            {
-                return _trie->find(begin, end, dag, offset);
-            }
-            void find(
-                        Unicode::const_iterator begin, 
-                        Unicode::const_iterator end, 
-                        vector<SegmentChar>& res
-                        ) const
-            {
-                _trie->find(begin, end, res);
-            }
-            bool isUserDictSingleChineseWord(const Unicode::value_type& word) const
-            {
-                return isIn(_userDictSingleChineseWord, word);
-            }
-            double getMinWeight() const {return _minWeight;};
+  bool init(const string& dictPath, const string& userDictPath = "") {
+    assert(!_trie);
+    _loadDict(dictPath);
+    _calculateWeight(_nodeInfos);
+    _minWeight = _findMinWeight(_nodeInfos);
+
+    if(userDictPath.size()) {
+      double maxWeight = _findMaxWeight(_nodeInfos);
+      _loadUserDict(userDictPath, maxWeight, UNKNOWN_TAG);
+    }
+    _shrink(_nodeInfos);
+    _trie = _createTrie(_nodeInfos);
+    assert(_trie);
+    return true;
+  }
+
+  const DictUnit* find(Unicode::const_iterator begin, Unicode::const_iterator end) const {
+    return _trie->find(begin, end);
+  }
+  bool find(Unicode::const_iterator begin, Unicode::const_iterator end, DagType& dag, size_t offset = 0) const {
+    return _trie->find(begin, end, dag, offset);
+  }
+  void find(
+    Unicode::const_iterator begin,
+    Unicode::const_iterator end,
+    vector<SegmentChar>& res
+  ) const {
+    _trie->find(begin, end, res);
+  }
+  bool isUserDictSingleChineseWord(const Unicode::value_type& word) const {
+    return isIn(_userDictSingleChineseWord, word);
+  }
+  double getMinWeight() const {
+    return _minWeight;
+  };
 
 
-        private:
-            UglyTrie * _createTrie(const vector<DictUnit>& dictUnits)
-            {
-                assert(dictUnits.size());
-                vector<Unicode> words;
-                vector<const DictUnit*> valuePointers;
-                for(size_t i = 0 ; i < dictUnits.size(); i ++)
-                {
-                    words.push_back(dictUnits[i].word);
-                    valuePointers.push_back(&dictUnits[i]);
-                }
+ private:
+  UglyTrie * _createTrie(const vector<DictUnit>& dictUnits) {
+    assert(dictUnits.size());
+    vector<Unicode> words;
+    vector<const DictUnit*> valuePointers;
+    for(size_t i = 0 ; i < dictUnits.size(); i ++) {
+      words.push_back(dictUnits[i].word);
+      valuePointers.push_back(&dictUnits[i]);
+    }
 
-                UglyTrie * trie = new UglyTrie(words, valuePointers);
-                return trie;
-            }
-            void _loadUserDict(const string& filePath, double defaultWeight, const string& defaultTag)
-            {
-                ifstream ifs(filePath.c_str());
-                assert(ifs.is_open());
-                string line;
-                DictUnit nodeInfo;
-                vector<string> buf;
-                size_t lineno;
-                for(lineno = 0; getline(ifs, line); lineno++)
-                {
-                    buf.clear();
-                    split(line, buf, " ");
-                    assert(buf.size() >= 1);
-                    if(!TransCode::decode(buf[0], nodeInfo.word))
-                    {
-                        LogError("line[%u:%s] illegal.", lineno, line.c_str());
-                        continue;
-                    }
-                    if(nodeInfo.word.size() == 1)
-                    {
-                        _userDictSingleChineseWord.insert(nodeInfo.word[0]);
-                    }
-                    nodeInfo.weight = defaultWeight;
-                    nodeInfo.tag = (buf.size() == 2 ? buf[1] : defaultTag);
-                    _nodeInfos.push_back(nodeInfo);
-                }
-                LogInfo("load userdict[%s] ok. lines[%u]", filePath.c_str(), lineno);
-            }
-            void _loadDict(const string& filePath) 
-            {
-                ifstream ifs(filePath.c_str());
-                assert(ifs.is_open());
-                string line;
-                vector<string> buf;
+    UglyTrie * trie = new UglyTrie(words, valuePointers);
+    return trie;
+  }
+  void _loadUserDict(const string& filePath, double defaultWeight, const string& defaultTag) {
+    ifstream ifs(filePath.c_str());
+    assert(ifs.is_open());
+    string line;
+    DictUnit nodeInfo;
+    vector<string> buf;
+    size_t lineno;
+    for(lineno = 0; getline(ifs, line); lineno++) {
+      buf.clear();
+      split(line, buf, " ");
+      assert(buf.size() >= 1);
+      if(!TransCode::decode(buf[0], nodeInfo.word)) {
+        LogError("line[%u:%s] illegal.", lineno, line.c_str());
+        continue;
+      }
+      if(nodeInfo.word.size() == 1) {
+        _userDictSingleChineseWord.insert(nodeInfo.word[0]);
+      }
+      nodeInfo.weight = defaultWeight;
+      nodeInfo.tag = (buf.size() == 2 ? buf[1] : defaultTag);
+      _nodeInfos.push_back(nodeInfo);
+    }
+    LogInfo("load userdict[%s] ok. lines[%u]", filePath.c_str(), lineno);
+  }
+  void _loadDict(const string& filePath) {
+    ifstream ifs(filePath.c_str());
+    assert(ifs.is_open());
+    string line;
+    vector<string> buf;
 
-                DictUnit nodeInfo;
-                for(size_t lineno = 0 ; getline(ifs, line); lineno++)
-                {
-                    split(line, buf, " ");
-                    assert(buf.size() == DICT_COLUMN_NUM);
-                    
-                    if(!TransCode::decode(buf[0], nodeInfo.word))
-                    {
-                        LogError("line[%u:%s] illegal.", lineno, line.c_str());
-                        continue;
-                    }
-                    nodeInfo.weight = atof(buf[1].c_str());
-                    nodeInfo.tag = buf[2];
-                    
-                    _nodeInfos.push_back(nodeInfo);
-                }
-            }
-            double _findMinWeight(const vector<DictUnit>& nodeInfos) const
-            {
-                double ret = MAX_DOUBLE;
-                for(size_t i = 0; i < nodeInfos.size(); i++)
-                {
-                    ret = min(nodeInfos[i].weight, ret);
-                }
-                return ret;
-            }
-            double _findMaxWeight(const vector<DictUnit>& nodeInfos) const
-            {
-                double ret = MIN_DOUBLE;
-                for(size_t i = 0; i < nodeInfos.size(); i++)
-                {
-                    ret = max(nodeInfos[i].weight, ret);
-                }
-                return ret;
-            }
+    DictUnit nodeInfo;
+    for(size_t lineno = 0 ; getline(ifs, line); lineno++) {
+      split(line, buf, " ");
+      assert(buf.size() == DICT_COLUMN_NUM);
 
-            void _calculateWeight(vector<DictUnit>& nodeInfos) const
-            {
-                double sum = 0.0;
-                for(size_t i = 0; i < nodeInfos.size(); i++)
-                {
-                    sum += nodeInfos[i].weight;
-                }
-                assert(sum);
-                for(size_t i = 0; i < nodeInfos.size(); i++)
-                {
-                    DictUnit& nodeInfo = nodeInfos[i];
-                    assert(nodeInfo.weight);
-                    nodeInfo.weight = log(double(nodeInfo.weight)/double(sum));
-                }
-            }
+      if(!TransCode::decode(buf[0], nodeInfo.word)) {
+        LogError("line[%u:%s] illegal.", lineno, line.c_str());
+        continue;
+      }
+      nodeInfo.weight = atof(buf[1].c_str());
+      nodeInfo.tag = buf[2];
 
-            void _shrink(vector<DictUnit>& units) const
-            {
-                vector<DictUnit>(units.begin(), units.end()).swap(units);
-            }
+      _nodeInfos.push_back(nodeInfo);
+    }
+  }
+  double _findMinWeight(const vector<DictUnit>& nodeInfos) const {
+    double ret = MAX_DOUBLE;
+    for(size_t i = 0; i < nodeInfos.size(); i++) {
+      ret = min(nodeInfos[i].weight, ret);
+    }
+    return ret;
+  }
+  double _findMaxWeight(const vector<DictUnit>& nodeInfos) const {
+    double ret = MIN_DOUBLE;
+    for(size_t i = 0; i < nodeInfos.size(); i++) {
+      ret = max(nodeInfos[i].weight, ret);
+    }
+    return ret;
+  }
 
-        private:
-            vector<DictUnit> _nodeInfos;
-            UglyTrie * _trie;
+  void _calculateWeight(vector<DictUnit>& nodeInfos) const {
+    double sum = 0.0;
+    for(size_t i = 0; i < nodeInfos.size(); i++) {
+      sum += nodeInfos[i].weight;
+    }
+    assert(sum);
+    for(size_t i = 0; i < nodeInfos.size(); i++) {
+      DictUnit& nodeInfo = nodeInfos[i];
+      assert(nodeInfo.weight);
+      nodeInfo.weight = log(double(nodeInfo.weight)/double(sum));
+    }
+  }
 
-            double _minWeight;
-            unordered_set<Unicode::value_type> _userDictSingleChineseWord;
-    };
+  void _shrink(vector<DictUnit>& units) const {
+    vector<DictUnit>(units.begin(), units.end()).swap(units);
+  }
+
+ private:
+  vector<DictUnit> _nodeInfos;
+  UglyTrie * _trie;
+
+  double _minWeight;
+  unordered_set<Unicode::value_type> _userDictSingleChineseWord;
+};
 }
 
 #endif
diff --git a/src/FullSegment.hpp b/src/FullSegment.hpp
index 0a3e747..a8b60a1 100644
--- a/src/FullSegment.hpp
+++ b/src/FullSegment.hpp
@@ -10,140 +10,116 @@
 #include "SegmentBase.hpp"
 #include "TransCode.hpp"
 
-namespace CppJieba
-{
-    class FullSegment: public SegmentBase
-    {
-        public:
-            FullSegment()
-            {
-                _dictTrie = NULL;
-                _isBorrowed = false;
-            }
-            explicit FullSegment(const string& dictPath)
-            {
-                _dictTrie = NULL;
-                init(dictPath);
-            }
-            explicit FullSegment(const DictTrie* dictTrie) 
-            {
-                _dictTrie = NULL;
-                init(dictTrie);
-            }
-            virtual ~FullSegment()
-            {
-                if(_dictTrie && ! _isBorrowed) 
-                {
-                    delete _dictTrie;
-                }
+namespace CppJieba {
+class FullSegment: public SegmentBase {
+ public:
+  FullSegment() {
+    _dictTrie = NULL;
+    _isBorrowed = false;
+  }
+  explicit FullSegment(const string& dictPath) {
+    _dictTrie = NULL;
+    init(dictPath);
+  }
+  explicit FullSegment(const DictTrie* dictTrie) {
+    _dictTrie = NULL;
+    init(dictTrie);
+  }
+  virtual ~FullSegment() {
+    if(_dictTrie && ! _isBorrowed) {
+      delete _dictTrie;
+    }
 
-            };
-            bool init(const string& dictPath)
-            {
-                assert(_dictTrie == NULL);
-                _dictTrie = new DictTrie(dictPath);
-                _isBorrowed = false;
-                return true;
-            }
-            bool init(const DictTrie* dictTrie) 
-            {
-                assert(_dictTrie == NULL);
-                assert(dictTrie);
-                _dictTrie = dictTrie;
-                _isBorrowed = true;
-                return true;
-            }
+  };
+  bool init(const string& dictPath) {
+    assert(_dictTrie == NULL);
+    _dictTrie = new DictTrie(dictPath);
+    _isBorrowed = false;
+    return true;
+  }
+  bool init(const DictTrie* dictTrie) {
+    assert(_dictTrie == NULL);
+    assert(dictTrie);
+    _dictTrie = dictTrie;
+    _isBorrowed = true;
+    return true;
+  }
 
-            using SegmentBase::cut;
-            bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const
-            {
-                assert(_dictTrie);
-                if (begin >= end)
-                {
-                    LogError("begin >= end");
-                    return false;
-                }
+  using SegmentBase::cut;
+  bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const {
+    assert(_dictTrie);
+    if (begin >= end) {
+      LogError("begin >= end");
+      return false;
+    }
 
-                //resut of searching in trie tree
-                DagType tRes;
+    //resut of searching in trie tree
+    DagType tRes;
 
-                //max index of res's words
-                int maxIdx = 0;
+    //max index of res's words
+    int maxIdx = 0;
 
-                // always equals to (uItr - begin)
-                int uIdx = 0;
+    // always equals to (uItr - begin)
+    int uIdx = 0;
 
-                //tmp variables
-                int wordLen = 0;
-                for (Unicode::const_iterator uItr = begin; uItr != end; uItr++)
-                {
-                    //find word start from uItr
-                    if (_dictTrie->find(uItr, end, tRes, 0))
-                    {
-                        for(DagType::const_iterator itr = tRes.begin(); itr != tRes.end(); itr++)
-                        //for (vector<pair<size_t, const DictUnit*> >::const_iterator itr = tRes.begin(); itr != tRes.end(); itr++)
-                        {
-                            wordLen = itr->second->word.size();
-                            if (wordLen >= 2 || (tRes.size() == 1 && maxIdx <= uIdx))
-                            {
-                                res.push_back(itr->second->word);
-                            }
-                            maxIdx = uIdx+wordLen > maxIdx ? uIdx+wordLen : maxIdx;
-                        }
-                        tRes.clear();
-                    }
-                    else // not found word start from uItr
-                    {
-                        if (maxIdx <= uIdx) // never exist in prev results
-                        {
-                            //put itr itself in res
-                            res.push_back(Unicode(1, *uItr));
+    //tmp variables
+    int wordLen = 0;
+    for (Unicode::const_iterator uItr = begin; uItr != end; uItr++) {
+      //find word start from uItr
+      if (_dictTrie->find(uItr, end, tRes, 0)) {
+        for(DagType::const_iterator itr = tRes.begin(); itr != tRes.end(); itr++)
+          //for (vector<pair<size_t, const DictUnit*> >::const_iterator itr = tRes.begin(); itr != tRes.end(); itr++)
+        {
+          wordLen = itr->second->word.size();
+          if (wordLen >= 2 || (tRes.size() == 1 && maxIdx <= uIdx)) {
+            res.push_back(itr->second->word);
+          }
+          maxIdx = uIdx+wordLen > maxIdx ? uIdx+wordLen : maxIdx;
+        }
+        tRes.clear();
+      } else { // not found word start from uItr
+        if (maxIdx <= uIdx) { // never exist in prev results
+          //put itr itself in res
+          res.push_back(Unicode(1, *uItr));
 
-                            //mark it exits
-                            ++maxIdx;
-                        }
-                    }
-                    ++uIdx;
-                }
+          //mark it exits
+          ++maxIdx;
+        }
+      }
+      ++uIdx;
+    }
 
-                return true;
-            }
+    return true;
+  }
 
-            bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<string>& res) const
-            {
-                assert(_dictTrie);
-                if (begin >= end)
-                {
-                    LogError("begin >= end");
-                    return false;
-                }
+  bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<string>& res) const {
+    assert(_dictTrie);
+    if (begin >= end) {
+      LogError("begin >= end");
+      return false;
+    }
 
-                vector<Unicode> uRes;
-                if (!cut(begin, end, uRes))
-                {
-                    LogError("get unicode cut result error.");
-                    return false;
-                }
+    vector<Unicode> uRes;
+    if (!cut(begin, end, uRes)) {
+      LogError("get unicode cut result error.");
+      return false;
+    }
 
-                string tmp;
-                for (vector<Unicode>::const_iterator uItr = uRes.begin(); uItr != uRes.end(); uItr++)
-                {
-                    if (TransCode::encode(*uItr, tmp))
-                    {
-                        res.push_back(tmp);
-                    }
-                    else
-                    {
-                        LogError("encode failed.");
-                    }
-                }
+    string tmp;
+    for (vector<Unicode>::const_iterator uItr = uRes.begin(); uItr != uRes.end(); uItr++) {
+      if (TransCode::encode(*uItr, tmp)) {
+        res.push_back(tmp);
+      } else {
+        LogError("encode failed.");
+      }
+    }
 
-                return true;
-            }
-        private:
-            const DictTrie* _dictTrie;
-            bool _isBorrowed;
-    };
+    return true;
+  }
+ private:
+  const DictTrie* _dictTrie;
+  bool _isBorrowed;
+};
 }
 
 #endif
diff --git a/src/HMMSegment.hpp b/src/HMMSegment.hpp
index d7c8c89..d000bce 100644
--- a/src/HMMSegment.hpp
+++ b/src/HMMSegment.hpp
@@ -12,387 +12,315 @@
 #include "SegmentBase.hpp"
 #include "DictTrie.hpp"
 
-namespace CppJieba
-{
-    using namespace Limonp;
-    typedef unordered_map<uint16_t, double> EmitProbMap;
-    class HMMSegment: public SegmentBase
-    {
-        public:
-            /*
-             * STATUS:
-             * 0:B, 1:E, 2:M, 3:S
-             * */
-            enum {B = 0, E = 1, M = 2, S = 3, STATUS_SUM = 4};
+namespace CppJieba {
+using namespace Limonp;
+typedef unordered_map<uint16_t, double> EmitProbMap;
+class HMMSegment: public SegmentBase {
+ public:
+  /*
+   * STATUS:
+   * 0:B, 1:E, 2:M, 3:S
+   * */
+  enum {B = 0, E = 1, M = 2, S = 3, STATUS_SUM = 4};
 
-        public:
-            HMMSegment(){}
-            explicit HMMSegment(const string& filePath)
-            {
-                LIMONP_CHECK(init(filePath));
-            }
-            virtual ~HMMSegment(){}
-        public:
-            bool init(const string& filePath)
-            {
-                memset(_startProb, 0, sizeof(_startProb));
-                memset(_transProb, 0, sizeof(_transProb));
-                _statMap[0] = 'B';
-                _statMap[1] = 'E';
-                _statMap[2] = 'M';
-                _statMap[3] = 'S';
-                _emitProbVec.push_back(&_emitProbB);
-                _emitProbVec.push_back(&_emitProbE);
-                _emitProbVec.push_back(&_emitProbM);
-                _emitProbVec.push_back(&_emitProbS);
-                LIMONP_CHECK(_loadModel(filePath.c_str()));
-                LogInfo("HMMSegment init(%s) ok.", filePath.c_str());
-                return true;
-            }
-        public:
-            using SegmentBase::cut;
-        public:
-            bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res)const 
-            {
-                Unicode::const_iterator left = begin;
-                Unicode::const_iterator right = begin;
-                while(right != end)
-                {
-                    if(*right < 0x80) 
-                    {
-                        if(left != right && !_cut(left, right, res))
-                        {
-                            return false;
-                        }
-                        left = right;
-                        do {
-                            right = _sequentialLetterRule(left, end);
-                            if(right != left)
-                            {
-                                break;
-                            }
-                            right = _numbersRule(left, end);
-                            if(right != left)
-                            {
-                                break;
-                            }
-                            right ++;
-                        } while(false);
-                        res.push_back(Unicode(left, right));
-                        left = right;
-                    }
-                    else
-                    {
-                        right++;
-                    }
-                }
-                if(left != right && !_cut(left, right, res))
-                {
-                    return false;
-                }
-                return true;
-            }
-        public:
-            virtual bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<string>& res)const
-            {
-                if(begin == end)
-                {
-                    return false;
-                }
-                vector<Unicode> words;
-                words.reserve(end - begin);
-                if(!cut(begin, end, words))
-                {
-                    return false;
-                }
-                size_t offset = res.size();
-                res.resize(res.size() + words.size());
-                for(size_t i = 0; i < words.size(); i++)
-                {
-                    if(!TransCode::encode(words[i], res[offset + i]))
-                    {
-                        LogError("encode failed.");
-                    }
-                }
-                return true;
-            }
-        private:
-            // sequential letters rule
-            Unicode::const_iterator _sequentialLetterRule(Unicode::const_iterator begin, Unicode::const_iterator end) const
-            {
-                Unicode::value_type x = *begin;
-                if (('a' <= x && x <= 'z') || ('A' <= x && x <= 'Z'))
-                {
-                    begin ++;
-                }
-                else
-                {
-                    return begin;
-                }
-                while(begin != end)
-                {
-                    x = *begin;
-                    if(('a' <= x && x <= 'z') || ('A' <= x && x <= 'Z') || ('0' <= x && x <= '9'))
-                    {
-                        begin ++;
-                    }
-                    else
-                    {
-                        break;
-                    }
-                }
-                return begin;
-            }
-            // 
-            Unicode::const_iterator _numbersRule(Unicode::const_iterator begin, Unicode::const_iterator end) const
-            {
-                Unicode::value_type x = *begin;
-                if('0' <= x && x <= '9')
-                {
-                    begin ++;
-                }
-                else
-                {
-                    return begin;
-                }
-                while(begin != end)
-                {
-                    x = *begin;
-                    if( ('0' <= x && x <= '9') || x == '.')
-                    {
-                        begin++;
-                    }
-                    else
-                    {
-                        break;
-                    }
-                }
-                return begin;
-            }
-            bool _cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const 
-            {
-                vector<size_t> status; 
-                if(!_viterbi(begin, end, status))
-                {
-                    LogError("_viterbi failed.");
-                    return false;
-                }
+ public:
+  HMMSegment() {}
+  explicit HMMSegment(const string& filePath) {
+    LIMONP_CHECK(init(filePath));
+  }
+  virtual ~HMMSegment() {}
+ public:
+  bool init(const string& filePath) {
+    memset(_startProb, 0, sizeof(_startProb));
+    memset(_transProb, 0, sizeof(_transProb));
+    _statMap[0] = 'B';
+    _statMap[1] = 'E';
+    _statMap[2] = 'M';
+    _statMap[3] = 'S';
+    _emitProbVec.push_back(&_emitProbB);
+    _emitProbVec.push_back(&_emitProbE);
+    _emitProbVec.push_back(&_emitProbM);
+    _emitProbVec.push_back(&_emitProbS);
+    LIMONP_CHECK(_loadModel(filePath.c_str()));
+    LogInfo("HMMSegment init(%s) ok.", filePath.c_str());
+    return true;
+  }
+ public:
+  using SegmentBase::cut;
+ public:
+  bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res)const {
+    Unicode::const_iterator left = begin;
+    Unicode::const_iterator right = begin;
+    while(right != end) {
+      if(*right < 0x80) {
+        if(left != right && !_cut(left, right, res)) {
+          return false;
+        }
+        left = right;
+        do {
+          right = _sequentialLetterRule(left, end);
+          if(right != left) {
+            break;
+          }
+          right = _numbersRule(left, end);
+          if(right != left) {
+            break;
+          }
+          right ++;
+        } while(false);
+        res.push_back(Unicode(left, right));
+        left = right;
+      } else {
+        right++;
+      }
+    }
+    if(left != right && !_cut(left, right, res)) {
+      return false;
+    }
+    return true;
+  }
+ public:
+  virtual bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<string>& res)const {
+    if(begin == end) {
+      return false;
+    }
+    vector<Unicode> words;
+    words.reserve(end - begin);
+    if(!cut(begin, end, words)) {
+      return false;
+    }
+    size_t offset = res.size();
+    res.resize(res.size() + words.size());
+    for(size_t i = 0; i < words.size(); i++) {
+      if(!TransCode::encode(words[i], res[offset + i])) {
+        LogError("encode failed.");
+      }
+    }
+    return true;
+  }
+ private:
+  // sequential letters rule
+  Unicode::const_iterator _sequentialLetterRule(Unicode::const_iterator begin, Unicode::const_iterator end) const {
+    Unicode::value_type x = *begin;
+    if (('a' <= x && x <= 'z') || ('A' <= x && x <= 'Z')) {
+      begin ++;
+    } else {
+      return begin;
+    }
+    while(begin != end) {
+      x = *begin;
+      if(('a' <= x && x <= 'z') || ('A' <= x && x <= 'Z') || ('0' <= x && x <= '9')) {
+        begin ++;
+      } else {
+        break;
+      }
+    }
+    return begin;
+  }
+  //
+  Unicode::const_iterator _numbersRule(Unicode::const_iterator begin, Unicode::const_iterator end) const {
+    Unicode::value_type x = *begin;
+    if('0' <= x && x <= '9') {
+      begin ++;
+    } else {
+      return begin;
+    }
+    while(begin != end) {
+      x = *begin;
+      if( ('0' <= x && x <= '9') || x == '.') {
+        begin++;
+      } else {
+        break;
+      }
+    }
+    return begin;
+  }
+  bool _cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const {
+    vector<size_t> status;
+    if(!_viterbi(begin, end, status)) {
+      LogError("_viterbi failed.");
+      return false;
+    }
 
-                Unicode::const_iterator left = begin;
-                Unicode::const_iterator right;
-                for(size_t i = 0; i < status.size(); i++)
-                {
-                    if(status[i] % 2) //if(E == status[i] || S == status[i])
-                    {
-                        right = begin + i + 1;
-                        res.push_back(Unicode(left, right));
-                        left = right;
-                    }
-                }
-                return true;
-            }
+    Unicode::const_iterator left = begin;
+    Unicode::const_iterator right;
+    for(size_t i = 0; i < status.size(); i++) {
+      if(status[i] % 2) { //if(E == status[i] || S == status[i])
+        right = begin + i + 1;
+        res.push_back(Unicode(left, right));
+        left = right;
+      }
+    }
+    return true;
+  }
 
-            bool _viterbi(Unicode::const_iterator begin, Unicode::const_iterator end, vector<size_t>& status)const
-            {
-                if(begin == end)
-                {
-                    return false;
-                }
+  bool _viterbi(Unicode::const_iterator begin, Unicode::const_iterator end, vector<size_t>& status)const {
+    if(begin == end) {
+      return false;
+    }
 
-                size_t Y = STATUS_SUM;
-                size_t X = end - begin;
+    size_t Y = STATUS_SUM;
+    size_t X = end - begin;
 
-                size_t XYSize = X * Y;
-                size_t now, old, stat;
-                double tmp, endE, endS;
+    size_t XYSize = X * Y;
+    size_t now, old, stat;
+    double tmp, endE, endS;
 
-                vector<int> path(XYSize);
-                vector<double> weight(XYSize);
+    vector<int> path(XYSize);
+    vector<double> weight(XYSize);
 
-                //start
-                for(size_t y = 0; y < Y; y++)
-                {
-                    weight[0 + y * X] = _startProb[y] + _getEmitProb(_emitProbVec[y], *begin, MIN_DOUBLE);
-                    path[0 + y * X] = -1;
-                }
+    //start
+    for(size_t y = 0; y < Y; y++) {
+      weight[0 + y * X] = _startProb[y] + _getEmitProb(_emitProbVec[y], *begin, MIN_DOUBLE);
+      path[0 + y * X] = -1;
+    }
 
 
-                double emitProb;
+    double emitProb;
 
-                for(size_t x = 1; x < X; x++)
-                {
-                    for(size_t y = 0; y < Y; y++)
-                    {
-                        now = x + y*X;
-                        weight[now] = MIN_DOUBLE;
-                        path[now] = E; // warning
-                        emitProb = _getEmitProb(_emitProbVec[y], *(begin+x), MIN_DOUBLE);
-                        for(size_t preY = 0; preY < Y; preY++)
-                        {
-                            old = x - 1 + preY * X;
-                            tmp = weight[old] + _transProb[preY][y] + emitProb;
-                            if(tmp > weight[now])
-                            {
-                                weight[now] = tmp;
-                                path[now] = preY;
-                            }
-                        }
-                    }
-                }
+    for(size_t x = 1; x < X; x++) {
+      for(size_t y = 0; y < Y; y++) {
+        now = x + y*X;
+        weight[now] = MIN_DOUBLE;
+        path[now] = E; // warning
+        emitProb = _getEmitProb(_emitProbVec[y], *(begin+x), MIN_DOUBLE);
+        for(size_t preY = 0; preY < Y; preY++) {
+          old = x - 1 + preY * X;
+          tmp = weight[old] + _transProb[preY][y] + emitProb;
+          if(tmp > weight[now]) {
+            weight[now] = tmp;
+            path[now] = preY;
+          }
+        }
+      }
+    }
 
-                endE = weight[X-1+E*X];
-                endS = weight[X-1+S*X];
-                stat = 0;
-                if(endE >= endS)
-                {
-                    stat = E;
-                }
-                else
-                {
-                    stat = S;
-                }
+    endE = weight[X-1+E*X];
+    endS = weight[X-1+S*X];
+    stat = 0;
+    if(endE >= endS) {
+      stat = E;
+    } else {
+      stat = S;
+    }
 
-                status.resize(X);
-                for(int x = X -1 ; x >= 0; x--)
-                {
-                    status[x] = stat;
-                    stat = path[x + stat*X];
-                }
+    status.resize(X);
+    for(int x = X -1 ; x >= 0; x--) {
+      status[x] = stat;
+      stat = path[x + stat*X];
+    }
 
-                return true;
-            }
-            bool _loadModel(const char* const filePath)
-            {
-                ifstream ifile(filePath);
-                string line;
-                vector<string> tmp;
-                vector<string> tmp2;
-                //load _startProb
-                if(!_getLine(ifile, line))
-                {
-                    return false;
-                }
-                split(line, tmp, " ");
-                if(tmp.size() != STATUS_SUM)
-                {
-                    LogError("start_p illegal");
-                    return false;
-                }
-                for(size_t j = 0; j< tmp.size(); j++)
-                {
-                    _startProb[j] = atof(tmp[j].c_str());
-                }
+    return true;
+  }
+  bool _loadModel(const char* const filePath) {
+    ifstream ifile(filePath);
+    string line;
+    vector<string> tmp;
+    vector<string> tmp2;
+    //load _startProb
+    if(!_getLine(ifile, line)) {
+      return false;
+    }
+    split(line, tmp, " ");
+    if(tmp.size() != STATUS_SUM) {
+      LogError("start_p illegal");
+      return false;
+    }
+    for(size_t j = 0; j< tmp.size(); j++) {
+      _startProb[j] = atof(tmp[j].c_str());
+    }
 
-                //load _transProb
-                for(size_t i = 0; i < STATUS_SUM; i++)
-                {
-                    if(!_getLine(ifile, line))
-                    {
-                        return false;
-                    }
-                    split(line, tmp, " ");
-                    if(tmp.size() != STATUS_SUM)
-                    {
-                        LogError("trans_p illegal");
-                        return false;
-                    }
-                    for(size_t j =0; j < STATUS_SUM; j++)
-                    {
-                        _transProb[i][j] = atof(tmp[j].c_str());
-                    }
-                }
+    //load _transProb
+    for(size_t i = 0; i < STATUS_SUM; i++) {
+      if(!_getLine(ifile, line)) {
+        return false;
+      }
+      split(line, tmp, " ");
+      if(tmp.size() != STATUS_SUM) {
+        LogError("trans_p illegal");
+        return false;
+      }
+      for(size_t j =0; j < STATUS_SUM; j++) {
+        _transProb[i][j] = atof(tmp[j].c_str());
+      }
+    }
 
-                //load _emitProbB
-                if(!_getLine(ifile, line) || !_loadEmitProb(line, _emitProbB))
-                {
-                    return false;
-                }
+    //load _emitProbB
+    if(!_getLine(ifile, line) || !_loadEmitProb(line, _emitProbB)) {
+      return false;
+    }
 
-                //load _emitProbE
-                if(!_getLine(ifile, line) || !_loadEmitProb(line, _emitProbE))
-                {
-                    return false;
-                }
+    //load _emitProbE
+    if(!_getLine(ifile, line) || !_loadEmitProb(line, _emitProbE)) {
+      return false;
+    }
 
-                //load _emitProbM
-                if(!_getLine(ifile, line) || !_loadEmitProb(line, _emitProbM))
-                {
-                    return false;
-                }
+    //load _emitProbM
+    if(!_getLine(ifile, line) || !_loadEmitProb(line, _emitProbM)) {
+      return false;
+    }
 
-                //load _emitProbS
-                if(!_getLine(ifile, line) || !_loadEmitProb(line, _emitProbS))
-                {
-                    return false;
-                }
+    //load _emitProbS
+    if(!_getLine(ifile, line) || !_loadEmitProb(line, _emitProbS)) {
+      return false;
+    }
 
-                return true;
-            }
-            bool _getLine(ifstream& ifile, string& line)
-            {
-                while(getline(ifile, line))
-                {
-                    trim(line);
-                    if(line.empty())
-                    {
-                        continue;
-                    }
-                    if(startsWith(line, "#"))
-                    {
-                        continue;
-                    }
-                    return true;
-                }
-                return false;
-            }
-            bool _loadEmitProb(const string& line, EmitProbMap& mp)
-            {
-                if(line.empty())
-                {
-                    return false;
-                }
-                vector<string> tmp, tmp2;
-                Unicode unicode;
-                split(line, tmp, ",");
-                for(size_t i = 0; i < tmp.size(); i++)
-                {
-                    split(tmp[i], tmp2, ":");
-                    if(2 != tmp2.size())
-                    {
-                        LogError("_emitProb illegal.");
-                        return false;
-                    }
-                    if(!TransCode::decode(tmp2[0], unicode) || unicode.size() != 1)
-                    {
-                        LogError("TransCode failed.");
-                        return false;
-                    }
-                    mp[unicode[0]] = atof(tmp2[1].c_str());
-                }
-                return true;
-            }
-            double _getEmitProb(const EmitProbMap* ptMp, uint16_t key, double defVal)const 
-            {
-                EmitProbMap::const_iterator cit = ptMp->find(key);
-                if(cit == ptMp->end())
-                {
-                    return defVal;
-                }
-                return cit->second;
+    return true;
+  }
+  bool _getLine(ifstream& ifile, string& line) {
+    while(getline(ifile, line)) {
+      trim(line);
+      if(line.empty()) {
+        continue;
+      }
+      if(startsWith(line, "#")) {
+        continue;
+      }
+      return true;
+    }
+    return false;
+  }
+  bool _loadEmitProb(const string& line, EmitProbMap& mp) {
+    if(line.empty()) {
+      return false;
+    }
+    vector<string> tmp, tmp2;
+    Unicode unicode;
+    split(line, tmp, ",");
+    for(size_t i = 0; i < tmp.size(); i++) {
+      split(tmp[i], tmp2, ":");
+      if(2 != tmp2.size()) {
+        LogError("_emitProb illegal.");
+        return false;
+      }
+      if(!TransCode::decode(tmp2[0], unicode) || unicode.size() != 1) {
+        LogError("TransCode failed.");
+        return false;
+      }
+      mp[unicode[0]] = atof(tmp2[1].c_str());
+    }
+    return true;
+  }
+  double _getEmitProb(const EmitProbMap* ptMp, uint16_t key, double defVal)const {
+    EmitProbMap::const_iterator cit = ptMp->find(key);
+    if(cit == ptMp->end()) {
+      return defVal;
+    }
+    return cit->second;
 
-            }
+  }
 
-        private:
-            char _statMap[STATUS_SUM];
-            double _startProb[STATUS_SUM];
-            double _transProb[STATUS_SUM][STATUS_SUM];
-            EmitProbMap _emitProbB;
-            EmitProbMap _emitProbE;
-            EmitProbMap _emitProbM;
-            EmitProbMap _emitProbS;
-            vector<EmitProbMap* > _emitProbVec;
+ private:
+  char _statMap[STATUS_SUM];
+  double _startProb[STATUS_SUM];
+  double _transProb[STATUS_SUM][STATUS_SUM];
+  EmitProbMap _emitProbB;
+  EmitProbMap _emitProbE;
+  EmitProbMap _emitProbM;
+  EmitProbMap _emitProbS;
+  vector<EmitProbMap* > _emitProbVec;
 
-    };
+};
 }
 
 #endif
diff --git a/src/ISegment.hpp b/src/ISegment.hpp
index 167e2f9..4faded5 100644
--- a/src/ISegment.hpp
+++ b/src/ISegment.hpp
@@ -2,15 +2,13 @@
 #define CPPJIEBA_SEGMENTINTERFACE_H
 
 
-namespace CppJieba
-{
-    class ISegment
-    {
-        public:
-            virtual ~ISegment(){};
-            virtual bool cut(Unicode::const_iterator begin , Unicode::const_iterator end, vector<string>& res) const = 0;
-            virtual bool cut(const string& str, vector<string>& res) const = 0;
-    };
+namespace CppJieba {
+class ISegment {
+ public:
+  virtual ~ISegment() {};
+  virtual bool cut(Unicode::const_iterator begin , Unicode::const_iterator end, vector<string>& res) const = 0;
+  virtual bool cut(const string& str, vector<string>& res) const = 0;
+};
 }
 
 #endif
diff --git a/src/KeywordExtractor.hpp b/src/KeywordExtractor.hpp
index e3726d9..10f900e 100644
--- a/src/KeywordExtractor.hpp
+++ b/src/KeywordExtractor.hpp
@@ -5,162 +5,136 @@
 #include <cmath>
 #include <set>
 
-namespace CppJieba
-{
-    using namespace Limonp;
+namespace CppJieba {
+using namespace Limonp;
 
-    /*utf8*/
-    class KeywordExtractor
-    {
-        public:
-            KeywordExtractor(){};
-            KeywordExtractor(const string& dictPath, const string& hmmFilePath, const string& idfPath, const string& stopWordPath, const string& userDict = "")
-            {
-                init(dictPath, hmmFilePath, idfPath, stopWordPath, userDict);
-            };
-            ~KeywordExtractor(){};
+/*utf8*/
+class KeywordExtractor {
+ public:
+  KeywordExtractor() {};
+  KeywordExtractor(const string& dictPath, const string& hmmFilePath, const string& idfPath, const string& stopWordPath, const string& userDict = "") {
+    init(dictPath, hmmFilePath, idfPath, stopWordPath, userDict);
+  };
+  ~KeywordExtractor() {};
 
-            void init(const string& dictPath, const string& hmmFilePath, const string& idfPath, const string& stopWordPath, const string& userDict = "")
-            {
-                _loadIdfDict(idfPath);
-                _loadStopWordDict(stopWordPath);
-                LIMONP_CHECK(_segment.init(dictPath, hmmFilePath, userDict));
-            };
+  void init(const string& dictPath, const string& hmmFilePath, const string& idfPath, const string& stopWordPath, const string& userDict = "") {
+    _loadIdfDict(idfPath);
+    _loadStopWordDict(stopWordPath);
+    LIMONP_CHECK(_segment.init(dictPath, hmmFilePath, userDict));
+  };
 
-            bool extract(const string& str, vector<string>& keywords, size_t topN) const
-            {
-                vector<pair<string, double> > topWords;
-                if(!extract(str, topWords, topN))
-                {
-                    return false;
-                }
-                for(size_t i = 0; i < topWords.size(); i++)
-                {
-                    keywords.push_back(topWords[i].first);
-                }
-                return true;
-            }
+  bool extract(const string& str, vector<string>& keywords, size_t topN) const {
+    vector<pair<string, double> > topWords;
+    if(!extract(str, topWords, topN)) {
+      return false;
+    }
+    for(size_t i = 0; i < topWords.size(); i++) {
+      keywords.push_back(topWords[i].first);
+    }
+    return true;
+  }
 
-            bool extract(const string& str, vector<pair<string, double> >& keywords, size_t topN) const
-            {
-                vector<string> words;
-                if(!_segment.cut(str, words))
-                {
-                    LogError("segment cut(%s) failed.", str.c_str());
-                    return false;
-                }
+  bool extract(const string& str, vector<pair<string, double> >& keywords, size_t topN) const {
+    vector<string> words;
+    if(!_segment.cut(str, words)) {
+      LogError("segment cut(%s) failed.", str.c_str());
+      return false;
+    }
 
-                map<string, double> wordmap;
-                for(vector<string>::iterator iter = words.begin(); iter != words.end(); iter++)
-                {
-                    if(_isSingleWord(*iter))
-                    {
-                        continue;
-                    }
-                    wordmap[*iter] += 1.0;
-                }
+    map<string, double> wordmap;
+    for(vector<string>::iterator iter = words.begin(); iter != words.end(); iter++) {
+      if(_isSingleWord(*iter)) {
+        continue;
+      }
+      wordmap[*iter] += 1.0;
+    }
 
-                for(map<string, double>::iterator itr = wordmap.begin(); itr != wordmap.end(); )
-                {
-                    if(_stopWords.end() != _stopWords.find(itr->first))
-                    {
-                        wordmap.erase(itr);
-                        continue;
-                    }
+    for(map<string, double>::iterator itr = wordmap.begin(); itr != wordmap.end(); ) {
+      if(_stopWords.end() != _stopWords.find(itr->first)) {
+        wordmap.erase(itr);
+        continue;
+      }
 
-                    unordered_map<string, double>::const_iterator cit = _idfMap.find(itr->first);
-                    if(cit != _idfMap.end())
-                    {
-                        itr->second *= cit->second;
-                    }
-                    else
-                    {
-                        itr->second *= _idfAverage;
-                    }
-                    itr ++;
-                }
+      unordered_map<string, double>::const_iterator cit = _idfMap.find(itr->first);
+      if(cit != _idfMap.end()) {
+        itr->second *= cit->second;
+      } else {
+        itr->second *= _idfAverage;
+      }
+      itr ++;
+    }
 
-                keywords.clear();
-                std::copy(wordmap.begin(), wordmap.end(), std::inserter(keywords, keywords.begin()));
-                topN = min(topN, keywords.size());
-                partial_sort(keywords.begin(), keywords.begin() + topN, keywords.end(), _cmp);
-                keywords.resize(topN);
-                return true;
-            }
-        private:
-            void _loadIdfDict(const string& idfPath)
-            {
-                ifstream ifs(idfPath.c_str());
-                if(!ifs)
-                {
-                    LogError("open %s failed.", idfPath.c_str());
-                    assert(false);
-                }
-                string line ;
-                vector<string> buf;
-                double idf = 0.0;
-                double idfSum = 0.0;
-                size_t lineno = 0;
-                for(;getline(ifs, line); lineno++)
-                {
-                    buf.clear();
-                    if(line.empty())
-                    {
-                        LogError("line[%d] empty. skipped.", lineno);
-                        continue;
-                    }
-                    if(!split(line, buf, " ") || buf.size() != 2)
-                    {
-                        LogError("line %d [%s] illegal. skipped.", lineno, line.c_str());
-                        continue;
-                    }
-                    idf = atof(buf[1].c_str());
-                    _idfMap[buf[0]] = idf;
-                    idfSum += idf;
+    keywords.clear();
+    std::copy(wordmap.begin(), wordmap.end(), std::inserter(keywords, keywords.begin()));
+    topN = min(topN, keywords.size());
+    partial_sort(keywords.begin(), keywords.begin() + topN, keywords.end(), _cmp);
+    keywords.resize(topN);
+    return true;
+  }
+ private:
+  void _loadIdfDict(const string& idfPath) {
+    ifstream ifs(idfPath.c_str());
+    if(!ifs) {
+      LogError("open %s failed.", idfPath.c_str());
+      assert(false);
+    }
+    string line ;
+    vector<string> buf;
+    double idf = 0.0;
+    double idfSum = 0.0;
+    size_t lineno = 0;
+    for(; getline(ifs, line); lineno++) {
+      buf.clear();
+      if(line.empty()) {
+        LogError("line[%d] empty. skipped.", lineno);
+        continue;
+      }
+      if(!split(line, buf, " ") || buf.size() != 2) {
+        LogError("line %d [%s] illegal. skipped.", lineno, line.c_str());
+        continue;
+      }
+      idf = atof(buf[1].c_str());
+      _idfMap[buf[0]] = idf;
+      idfSum += idf;
 
-                } 
+    }
 
-                assert(lineno);
-                _idfAverage = idfSum / lineno;
-                assert(_idfAverage > 0.0);
-            }
-            void _loadStopWordDict(const string& filePath)
-            {
-                ifstream ifs(filePath.c_str());
-                if(!ifs)
-                {
-                    LogError("open %s failed.", filePath.c_str());
-                    assert(false);
-                }
-                string line ;
-                while(getline(ifs, line))
-                {
-                    _stopWords.insert(line);
-                }
-                assert(_stopWords.size());
-            }
+    assert(lineno);
+    _idfAverage = idfSum / lineno;
+    assert(_idfAverage > 0.0);
+  }
+  void _loadStopWordDict(const string& filePath) {
+    ifstream ifs(filePath.c_str());
+    if(!ifs) {
+      LogError("open %s failed.", filePath.c_str());
+      assert(false);
+    }
+    string line ;
+    while(getline(ifs, line)) {
+      _stopWords.insert(line);
+    }
+    assert(_stopWords.size());
+  }
 
-            bool _isSingleWord(const string& str) const
-            {
-                Unicode unicode;
-                TransCode::decode(str, unicode);
-                if(unicode.size() == 1)
-                  return true;
-                return false;
-            }
+  bool _isSingleWord(const string& str) const {
+    Unicode unicode;
+    TransCode::decode(str, unicode);
+    if(unicode.size() == 1)
+      return true;
+    return false;
+  }
 
-            static bool _cmp(const pair<string, double>& lhs, const pair<string, double>& rhs)
-            {
-                return lhs.second > rhs.second;
-            }
-            
-        private:
-            MixSegment _segment;
-            unordered_map<string, double> _idfMap;
-            double _idfAverage;
+  static bool _cmp(const pair<string, double>& lhs, const pair<string, double>& rhs) {
+    return lhs.second > rhs.second;
+  }
 
-            unordered_set<string> _stopWords;
-    };
+ private:
+  MixSegment _segment;
+  unordered_map<string, double> _idfMap;
+  double _idfAverage;
+
+  unordered_set<string> _stopWords;
+};
 }
 
 #endif

From cf9cc45c198c7e31c0e5b9dc350beba7bbbca957 Mon Sep 17 00:00:00 2001
From: xuangong <zhangxian1124@gmail.com>
Date: Tue, 21 Jul 2015 00:11:13 +0800
Subject: [PATCH 5/6] astyle

---
 src/UglyTrie.hpp | 407 +++++++++++++++++++++--------------------------
 1 file changed, 181 insertions(+), 226 deletions(-)

diff --git a/src/UglyTrie.hpp b/src/UglyTrie.hpp
index 9b6bab6..39a3e89 100644
--- a/src/UglyTrie.hpp
+++ b/src/UglyTrie.hpp
@@ -5,249 +5,204 @@
 #include <vector>
 #include <queue>
 
-namespace CppJieba 
-{
-    using namespace std;
+namespace CppJieba {
+using namespace std;
 
-    struct DictUnit
-    {
-        Unicode word;
-        double weight;
-        string tag;
-    };
+struct DictUnit {
+  Unicode word;
+  double weight;
+  string tag;
+};
 
-    // for debugging
-    inline ostream & operator << (ostream& os, const DictUnit& unit)
-    {
-        string s;
-        s << unit.word;
-        return os << string_format("%s %s %.3lf", s.c_str(), unit.tag.c_str(), unit.weight);
+// for debugging
+inline ostream & operator << (ostream& os, const DictUnit& unit) {
+  string s;
+  s << unit.word;
+  return os << string_format("%s %s %.3lf", s.c_str(), unit.tag.c_str(), unit.weight);
+}
+
+typedef LocalVector<std::pair<size_t, const DictUnit*> > DagType;
+
+struct SegmentChar {
+  uint16_t uniCh;
+  DagType dag;
+  const DictUnit * pInfo;
+  double weight;
+  size_t nextPos;
+  SegmentChar() : uniCh(), pInfo(NULL), weight(0.0), nextPos(0) {}
+  ~SegmentChar() {}
+};
+
+typedef Unicode::value_type TrieKey;
+
+class TrieNode {
+ public :
+  TrieNode(): next(NULL), ptValue(NULL) {}
+ public:
+  typedef unordered_map<TrieKey, TrieNode*> NextMap;
+  NextMap *next;
+  const DictUnit *ptValue;
+};
+
+class UglyTrie {
+ public:
+  static const size_t BASE_SIZE = (1 << (8 * (sizeof(TrieKey))));
+ public:
+  UglyTrie(const vector<Unicode>& keys, const vector<const DictUnit*>& valuePointers) {
+    _createTrie(keys, valuePointers);
+  }
+  const DictUnit* find(Unicode::const_iterator begin, Unicode::const_iterator end) const {
+    if (begin == end) {
+      return NULL;
     }
 
-    typedef LocalVector<std::pair<size_t, const DictUnit*> > DagType;
+    const TrieNode* ptNode = _base + (*(begin++));
+    TrieNode::NextMap::const_iterator citer;
+    for (Unicode::const_iterator it = begin; it != end; it++) {
+      if (NULL == ptNode->next) {
+        return NULL;
+      }
+      citer = ptNode->next->find(*it);
+      if (ptNode->next->end() == citer) {
+        return NULL;
+      }
+      ptNode = citer->second;
+    }
+    return ptNode->ptValue;
+  }
 
-    struct SegmentChar
-    {
-        uint16_t uniCh;
-        DagType dag;
-        const DictUnit * pInfo;
-        double weight;
-        size_t nextPos;
-        SegmentChar() : uniCh(), pInfo(NULL), weight(0.0), nextPos(0){}
-        ~SegmentChar() {}
-    };
+  void find(
+    Unicode::const_iterator begin,
+    Unicode::const_iterator end,
+    vector<struct SegmentChar>& res
+  ) const {
+    res.resize(end - begin);
 
-    typedef Unicode::value_type TrieKey;
+    const TrieNode *ptNode = NULL;
+    TrieNode::NextMap::const_iterator citer;
+    for (size_t i = 0; i < size_t(end - begin); i++) {
+      Unicode::value_type ch = *(begin + i);
+      ptNode = _base + ch;
+      res[i].uniCh = ch;
+      assert(res[i].dag.empty());
 
-    class TrieNode
-    {
-        public :
-            TrieNode(): next(NULL), ptValue(NULL) {}
-        public:
-            typedef unordered_map<TrieKey, TrieNode*> NextMap;
-            NextMap *next;
-            const DictUnit *ptValue;
-    };
+      res[i].dag.push_back(DagType::value_type(i, ptNode->ptValue));
 
-    class UglyTrie
-    {
-        public:
-            static const size_t BASE_SIZE = (1 << (8 * (sizeof(TrieKey))));
-        public:
-            UglyTrie(const vector<Unicode>& keys, const vector<const DictUnit*>& valuePointers)
-            {
-                _createTrie(keys, valuePointers);
-            }
-            const DictUnit* find(Unicode::const_iterator begin, Unicode::const_iterator end) const
-            {
-                if (begin == end)
-                {
-                    return NULL;
-                }
+      for (size_t j = i + 1; j < size_t(end - begin); j++) {
+        if (ptNode->next == NULL) {
+          break;
+        }
+        citer = ptNode->next->find(*(begin + j));
+        if (ptNode->next->end() == citer) {
+          break;
+        }
+        ptNode = citer->second;
+        if (NULL != ptNode->ptValue) {
+          res[i].dag.push_back(DagType::value_type(j, ptNode->ptValue));
+        }
+      }
+    }
+  }
+  bool find(
+    Unicode::const_iterator begin,
+    Unicode::const_iterator end,
+    DagType & res,
+    size_t offset = 0) const {
+    if (begin == end) {
+      return !res.empty();
+    }
 
-                const TrieNode* ptNode = _base + (*(begin++));
-                TrieNode::NextMap::const_iterator citer;
-                for (Unicode::const_iterator it = begin; it != end; it++)
-                {
-                    if (NULL == ptNode->next)
-                    {
-                        return NULL;
-                    }
-                    citer = ptNode->next->find(*it);
-                    if (ptNode->next->end() == citer)
-                    {
-                        return NULL;
-                    }
-                    ptNode = citer->second;
-                }
-                return ptNode->ptValue;
-            }
+    const TrieNode* ptNode = _base + (*(begin++));
+    if (ptNode->ptValue != NULL && res.size() == 1) {
+      res[0].second = ptNode->ptValue;
+    } else if (ptNode->ptValue != NULL) {
+      res.push_back(DagType::value_type(offset, ptNode->ptValue));
+    }
 
-            void find(
-                    Unicode::const_iterator begin,
-                    Unicode::const_iterator end,
-                    vector<struct SegmentChar>& res
-                    ) const
-            {
-                res.resize(end - begin);
+    TrieNode::NextMap::const_iterator citer;
+    for (Unicode::const_iterator itr = begin; itr != end; itr++) {
+      if (NULL == ptNode->next) {
+        break;
+      }
+      citer = ptNode->next->find(*itr);
+      if (citer == ptNode->next->end()) {
+        break;
+      }
+      ptNode = citer->second;
+      if (NULL != ptNode->ptValue) {
+        res.push_back(DagType::value_type(itr - begin + offset, ptNode->ptValue));
+      }
+    }
+    return !res.empty();
+  }
+  ~UglyTrie() {
+    for (size_t i = 0; i < BASE_SIZE; i++) {
+      if (_base[i].next == NULL) {
+        continue;
+      }
+      for (TrieNode::NextMap::iterator it = _base[i].next->begin(); it != _base[i].next->end(); it++) {
+        _deleteNode(it->second);
+        it->second = NULL;
+      }
+      delete _base[i].next;
+      _base[i].next = NULL;
+    }
+  }
 
-                const TrieNode *ptNode = NULL;
-                TrieNode::NextMap::const_iterator citer;
-                for (size_t i = 0; i < size_t(end - begin); i++)
-                {
-                    Unicode::value_type ch = *(begin + i);
-                    ptNode = _base + ch;
-                    res[i].uniCh = ch;
-                    assert(res[i].dag.empty());
+ private:
+  void _insertNode(const Unicode& key, const DictUnit* ptValue) {
+    if (key.begin() == key.end()) {
+      return;
+    }
 
-                    res[i].dag.push_back(DagType::value_type(i, ptNode->ptValue));
-                    
-                    for (size_t j = i + 1; j < size_t(end - begin); j++)
-                    {
-                        if (ptNode->next == NULL)
-                        {
-                            break;
-                        }
-                        citer = ptNode->next->find(*(begin + j));
-                        if (ptNode->next->end() == citer)
-                        {
-                            break;
-                        }
-                        ptNode = citer->second;
-                        if (NULL != ptNode->ptValue)
-                        {
-                            res[i].dag.push_back(DagType::value_type(j, ptNode->ptValue));
-                        }
-                    }
-                }
-            }
-            bool find(
-                    Unicode::const_iterator begin,
-                    Unicode::const_iterator end,
-                    DagType & res,
-                    size_t offset = 0) const
-            {
-                if (begin == end)
-                {
-                    return !res.empty();
-                }
+    TrieNode::NextMap::const_iterator kmIter;
+    Unicode::const_iterator citer= key.begin();
+    TrieNode *ptNode = _base + (*(citer++));
+    for (; citer != key.end(); citer++) {
+      if (NULL == ptNode->next) {
+        ptNode->next = new TrieNode::NextMap;
+      }
+      kmIter = ptNode->next->find(*citer);
+      if (ptNode->next->end() == kmIter) {
+        TrieNode *nextNode = new TrieNode;
 
-                const TrieNode* ptNode = _base + (*(begin++));
-                if (ptNode->ptValue != NULL && res.size() == 1)
-                {
-                    res[0].second = ptNode->ptValue;
-                }
-                else if (ptNode->ptValue != NULL)
-                {
-                    res.push_back(DagType::value_type(offset, ptNode->ptValue));
-                }
+        (*(ptNode->next))[*citer] = nextNode;
+        ptNode = nextNode;
+      } else {
+        ptNode = kmIter->second;
+      }
+    }
+    ptNode->ptValue = ptValue;
+  }
 
-                TrieNode::NextMap::const_iterator citer;
-                for (Unicode::const_iterator itr = begin; itr != end; itr++)
-                {
-                    if (NULL == ptNode->next)
-                    {
-                        break;
-                    }
-                    citer = ptNode->next->find(*itr);
-                    if (citer == ptNode->next->end())
-                    {
-                        break;
-                    }
-                    ptNode = citer->second;
-                    if (NULL != ptNode->ptValue)
-                    {
-                        res.push_back(DagType::value_type(itr - begin + offset, ptNode->ptValue));
-                    }
-                }
-                return !res.empty();
-            }
-            ~UglyTrie()
-            {
-                for (size_t i = 0; i < BASE_SIZE; i++)
-                {
-                    if (_base[i].next == NULL) 
-                    {
-                        continue;
-                    }
-                    for (TrieNode::NextMap::iterator it = _base[i].next->begin(); it != _base[i].next->end(); it++)
-                    {
-                        _deleteNode(it->second);
-                        it->second = NULL;
-                    }
-                    delete _base[i].next;
-                    _base[i].next = NULL;
-                }
-            }
+  void _createTrie(const vector<Unicode>& keys, const vector<const DictUnit*>& valuePointers) {
+    if (valuePointers.empty() || keys.empty()) {
+      return;
+    }
+    assert(keys.size() == valuePointers.size());
 
-        private:
-            void _insertNode(const Unicode& key, const DictUnit* ptValue)
-            {
-                if (key.begin() == key.end())
-                {
-                    return;
-                }
+    for (size_t i = 0; i < keys.size(); i++) {
+      _insertNode(keys[i], valuePointers[i]);
+    }
+  }
 
-                TrieNode::NextMap::const_iterator kmIter;
-                Unicode::const_iterator citer= key.begin();
-                TrieNode *ptNode = _base + (*(citer++));
-                for (; citer != key.end(); citer++)
-                {
-                    if (NULL == ptNode->next)
-                    {
-                        ptNode->next = new TrieNode::NextMap;
-                    }
-                    kmIter = ptNode->next->find(*citer);
-                    if (ptNode->next->end() == kmIter)
-                    {
-                        TrieNode *nextNode = new TrieNode;
+  void _deleteNode(TrieNode* node) {
+    if (NULL == node) {
+      return;
+    }
+    if (NULL != node->next) {
+      TrieNode::NextMap::iterator it;
+      for (it = node->next->begin(); it != node->next->end(); it++) {
+        _deleteNode(it->second);
+      }
+      delete node->next;
+      node->next = NULL;
+    }
+    delete node;
+  }
 
-                        (*(ptNode->next))[*citer] = nextNode;
-                        ptNode = nextNode;
-                    }
-                    else
-                    {
-                        ptNode = kmIter->second;
-                    }
-                }
-                ptNode->ptValue = ptValue;
-            }
-
-            void _createTrie(const vector<Unicode>& keys, const vector<const DictUnit*>& valuePointers)
-            {
-                if (valuePointers.empty() || keys.empty())
-                {
-                    return;
-                }
-                assert(keys.size() == valuePointers.size());
-
-                for (size_t i = 0; i < keys.size(); i++)
-                {
-                    _insertNode(keys[i], valuePointers[i]);
-                }
-            }
-
-            void _deleteNode(TrieNode* node)
-            {
-                if (NULL == node)
-                {
-                    return;
-                }
-                if (NULL != node->next)
-                {
-                    TrieNode::NextMap::iterator it;
-                    for (it = node->next->begin(); it != node->next->end(); it++)
-                    {
-                        _deleteNode(it->second);
-                    }
-                    delete node->next;
-                    node->next = NULL;
-                }
-                delete node;
-            }
-
-            TrieNode _base[BASE_SIZE];
-    };
+  TrieNode _base[BASE_SIZE];
+};
 }
 
 #endif

From f5e74a3f4615885d355724adf44b53849e7ac1b2 Mon Sep 17 00:00:00 2001
From: aholic <ruochen.xu@gmail.com>
Date: Tue, 21 Jul 2015 00:29:49 +0800
Subject: [PATCH 6/6] replace old trie

---
 src/Trie.hpp     | 253 +++++++++++++++++++----------------------------
 src/UglyTrie.hpp | 208 --------------------------------------
 2 files changed, 102 insertions(+), 359 deletions(-)
 delete mode 100644 src/UglyTrie.hpp

diff --git a/src/Trie.hpp b/src/Trie.hpp
index eb20b36..3afda9b 100644
--- a/src/Trie.hpp
+++ b/src/Trie.hpp
@@ -29,228 +29,179 @@ struct SegmentChar {
   const DictUnit * pInfo;
   double weight;
   size_t nextPos;
-  SegmentChar():uniCh(0), pInfo(NULL), weight(0.0), nextPos(0) {
-  }
-  ~SegmentChar() {
-  }
+  SegmentChar() : uniCh(), pInfo(NULL), weight(0.0), nextPos(0) {}
+  ~SegmentChar() {}
 };
 
 typedef Unicode::value_type TrieKey;
 
 class TrieNode {
+ public :
+  TrieNode(): next(NULL), ptValue(NULL) {}
  public:
-  TrieNode(): fail(NULL), next(NULL), ptValue(NULL) {
-  }
-  const TrieNode * findNext(TrieKey key) const {
-    if(next == NULL) {
-      return NULL;
-    }
-    NextMap::const_iterator iter = next->find(key);
-    if(iter == next->end()) {
-      return NULL;
-    }
-    return iter->second;
-  }
- public:
-  typedef unordered_map<TrieKey,  TrieNode*> NextMap;
-  TrieNode * fail;
-  NextMap * next;
-  const DictUnit * ptValue;
+  typedef unordered_map<TrieKey, TrieNode*> NextMap;
+  NextMap *next;
+  const DictUnit *ptValue;
 };
 
 class Trie {
  public:
-  Trie(const vector<Unicode>& keys, const vector<const DictUnit*> & valuePointers) {
-    root_ = new TrieNode;
-    createTrie_(keys, valuePointers);
-    build_();// build automation
-  }
-  ~Trie() {
-    if(root_) {
-      deleteNode_(root_);
-    }
-  }
+  static const size_t BASE_SIZE = (1 << (8 * (sizeof(TrieKey))));
  public:
+  Trie(const vector<Unicode>& keys, const vector<const DictUnit*>& valuePointers) {
+    _createTrie(keys, valuePointers);
+  }
   const DictUnit* find(Unicode::const_iterator begin, Unicode::const_iterator end) const {
+    if (begin == end) {
+      return NULL;
+    }
+
+    const TrieNode* ptNode = _base + (*(begin++));
     TrieNode::NextMap::const_iterator citer;
-    const TrieNode* ptNode = root_;
-    for(Unicode::const_iterator it = begin; it != end; it++) {
-      // build automation
-      assert(ptNode);
-      if(NULL == ptNode->next || ptNode->next->end() == (citer = ptNode->next->find(*it))) {
+    for (Unicode::const_iterator it = begin; it != end; it++) {
+      if (NULL == ptNode->next) {
+        return NULL;
+      }
+      citer = ptNode->next->find(*it);
+      if (ptNode->next->end() == citer) {
         return NULL;
       }
       ptNode = citer->second;
     }
     return ptNode->ptValue;
   }
-  // aho-corasick-automation
-  void find(Unicode::const_iterator begin,
+
+  void find(
+    Unicode::const_iterator begin,
     Unicode::const_iterator end,
-    vector<struct SegmentChar>& res) const {
+    vector<struct SegmentChar>& res
+  ) const {
     res.resize(end - begin);
-    const TrieNode* now = root_;
-    const TrieNode* node;
-    // compiler will complain warnings if only "i < end - begin" .
+
+    const TrieNode *ptNode = NULL;
+    TrieNode::NextMap::const_iterator citer;
     for (size_t i = 0; i < size_t(end - begin); i++) {
       Unicode::value_type ch = *(begin + i);
+      ptNode = _base + ch;
       res[i].uniCh = ch;
       assert(res[i].dag.empty());
-      res[i].dag.push_back(pair<vector<Unicode >::size_type, const DictUnit* >(i, (const DictUnit*)NULL));
-      bool flag = false;
 
-      // rollback
-      while( now != root_ ) {
-        node = now->findNext(ch);
-        if (node != NULL) {
-          flag = true;
+      res[i].dag.push_back(DagType::value_type(i, ptNode->ptValue));
+
+      for (size_t j = i + 1; j < size_t(end - begin); j++) {
+        if (ptNode->next == NULL) {
           break;
-        } else {
-          now = now->fail;
         }
-      }
-
-      if(!flag) {
-        node = now->findNext(ch);
-      }
-      if(node == NULL) {
-        now = root_;
-      } else {
-        now = node;
-        const TrieNode * temp = now;
-        while(temp != root_) {
-          if (temp->ptValue) {
-            size_t pos = i - temp->ptValue->word.size() + 1;
-            res[pos].dag.push_back(pair<vector<Unicode >::size_type, const DictUnit* >(i, temp->ptValue));
-            if(pos == i) {
-              res[pos].dag[0].second = temp->ptValue;
-            }
-          }
-          temp = temp->fail;
-          assert(temp);
+        citer = ptNode->next->find(*(begin + j));
+        if (ptNode->next->end() == citer) {
+          break;
+        }
+        ptNode = citer->second;
+        if (NULL != ptNode->ptValue) {
+          res[i].dag.push_back(DagType::value_type(j, ptNode->ptValue));
         }
       }
     }
   }
-  bool find(Unicode::const_iterator begin,
+  bool find(
+    Unicode::const_iterator begin,
     Unicode::const_iterator end,
     DagType & res,
     size_t offset = 0) const {
-    const TrieNode * ptNode = root_;
+    if (begin == end) {
+      return !res.empty();
+    }
+
+    const TrieNode* ptNode = _base + (*(begin++));
+    if (ptNode->ptValue != NULL && res.size() == 1) {
+      res[0].second = ptNode->ptValue;
+    } else if (ptNode->ptValue != NULL) {
+      res.push_back(DagType::value_type(offset, ptNode->ptValue));
+    }
+
     TrieNode::NextMap::const_iterator citer;
-    for(Unicode::const_iterator itr = begin; itr != end ; itr++) {
-      assert(ptNode);
-      if(NULL == ptNode->next || ptNode->next->end() == (citer = ptNode->next->find(*itr))) {
+    for (Unicode::const_iterator itr = begin; itr != end; itr++) {
+      if (NULL == ptNode->next) {
+        break;
+      }
+      citer = ptNode->next->find(*itr);
+      if (citer == ptNode->next->end()) {
         break;
       }
       ptNode = citer->second;
-      if(ptNode->ptValue) {
-        if(itr == begin && res.size() == 1) { // first singleword
-          res[0].second = ptNode->ptValue;
-        } else {
-          res.push_back(pair<vector<Unicode >::size_type, const DictUnit* >(itr - begin + offset, ptNode->ptValue));
-        }
+      if (NULL != ptNode->ptValue) {
+        res.push_back(DagType::value_type(itr - begin + offset, ptNode->ptValue));
       }
     }
     return !res.empty();
   }
-  void insertNode(const Unicode& key, const DictUnit* ptValue) {
-    TrieNode* newAddedNode = insertNode_(key, ptValue);
-    if (newAddedNode) {
-      build_(newAddedNode);
-    }
-  }
- private:
-  void build_() {
-    assert(root_->ptValue == NULL);
-    assert(root_->next);
-    root_->fail = NULL;
-    for(TrieNode::NextMap::iterator iter = root_->next->begin(); iter != root_->next->end(); iter++) {
-      build_(iter->second);
-    }
-  }
-  void build_(TrieNode* node) {
-    node->fail = root_;
-    queue<TrieNode*> que;
-    que.push(node);
-    TrieNode* back = NULL;
-    TrieNode::NextMap::iterator backiter;
-    while(!que.empty()) {
-      TrieNode * now = que.front();
-      que.pop();
-      if(now->next == NULL) {
+  ~Trie() {
+    for (size_t i = 0; i < BASE_SIZE; i++) {
+      if (_base[i].next == NULL) {
         continue;
       }
-      for(TrieNode::NextMap::iterator iter = now->next->begin(); iter != now->next->end(); iter++) {
-        back = now->fail;
-        while(back != NULL) {
-          if(back->next && (backiter = back->next->find(iter->first)) != back->next->end()) {
-            iter->second->fail = backiter->second;
-            break;
-          }
-          back = back->fail;
-        }
-        if(back == NULL) {
-          iter->second->fail = root_;
-        }
-        que.push(iter->second);
+      for (TrieNode::NextMap::iterator it = _base[i].next->begin(); it != _base[i].next->end(); it++) {
+        _deleteNode(it->second);
+        it->second = NULL;
       }
+      delete _base[i].next;
+      _base[i].next = NULL;
     }
   }
-  void createTrie_(const vector<Unicode>& keys, 
-        const vector<const DictUnit*> & valuePointers) {
-    if(valuePointers.empty() || keys.empty()) {
+
+  void insertNode(const Unicode& key, const DictUnit* ptValue) {
+    if (key.begin() == key.end()) {
       return;
     }
-    assert(keys.size() == valuePointers.size());
-
-    for(size_t i = 0; i < keys.size(); i++) {
-      insertNode_(keys[i], valuePointers[i]);
-    }
-  }
-  TrieNode* insertNode_(const Unicode& key, const DictUnit* ptValue) {
-    TrieNode* ptNode  = root_;
-    TrieNode* newAddedNode = NULL;
 
     TrieNode::NextMap::const_iterator kmIter;
-
-    for(Unicode::const_iterator citer = key.begin(); citer != key.end(); citer++) {
-      if(NULL == ptNode->next) {
+    Unicode::const_iterator citer= key.begin();
+    TrieNode *ptNode = _base + (*(citer++));
+    for (; citer != key.end(); citer++) {
+      if (NULL == ptNode->next) {
         ptNode->next = new TrieNode::NextMap;
       }
       kmIter = ptNode->next->find(*citer);
-      if(ptNode->next->end() == kmIter) {
-        TrieNode * nextNode = new TrieNode;
-        nextNode->next = NULL;
-        nextNode->ptValue = NULL;
+      if (ptNode->next->end() == kmIter) {
+        TrieNode *nextNode = new TrieNode;
 
-        if(newAddedNode == NULL) {
-          newAddedNode = nextNode;
-        }
-        (*ptNode->next)[*citer] = nextNode;
+        (*(ptNode->next))[*citer] = nextNode;
         ptNode = nextNode;
       } else {
         ptNode = kmIter->second;
       }
     }
     ptNode->ptValue = ptValue;
-    return newAddedNode;
   }
-  void deleteNode_(TrieNode* node) {
-    if(!node) {
+
+ private:
+  void _createTrie(const vector<Unicode>& keys, const vector<const DictUnit*>& valuePointers) {
+    if (valuePointers.empty() || keys.empty()) {
       return;
     }
-    if(node->next) {
+    assert(keys.size() == valuePointers.size());
+
+    for (size_t i = 0; i < keys.size(); i++) {
+      insertNode(keys[i], valuePointers[i]);
+    }
+  }
+
+  void _deleteNode(TrieNode* node) {
+    if (NULL == node) {
+      return;
+    }
+    if (NULL != node->next) {
       TrieNode::NextMap::iterator it;
-      for(it = node->next->begin(); it != node->next->end(); it++) {
-        deleteNode_(it->second);
+      for (it = node->next->begin(); it != node->next->end(); it++) {
+        _deleteNode(it->second);
       }
       delete node->next;
+      node->next = NULL;
     }
     delete node;
   }
- private:
-  TrieNode* root_;
+
+  TrieNode _base[BASE_SIZE];
 };
 }
 
diff --git a/src/UglyTrie.hpp b/src/UglyTrie.hpp
deleted file mode 100644
index 39a3e89..0000000
--- a/src/UglyTrie.hpp
+++ /dev/null
@@ -1,208 +0,0 @@
-#ifndef CPPJIEBA_UGLY_TRIE_HPP
-#define CPPJIEBA_UGLY_TRIE_HPP
-
-#include "Limonp/StdExtension.hpp"
-#include <vector>
-#include <queue>
-
-namespace CppJieba {
-using namespace std;
-
-struct DictUnit {
-  Unicode word;
-  double weight;
-  string tag;
-};
-
-// for debugging
-inline ostream & operator << (ostream& os, const DictUnit& unit) {
-  string s;
-  s << unit.word;
-  return os << string_format("%s %s %.3lf", s.c_str(), unit.tag.c_str(), unit.weight);
-}
-
-typedef LocalVector<std::pair<size_t, const DictUnit*> > DagType;
-
-struct SegmentChar {
-  uint16_t uniCh;
-  DagType dag;
-  const DictUnit * pInfo;
-  double weight;
-  size_t nextPos;
-  SegmentChar() : uniCh(), pInfo(NULL), weight(0.0), nextPos(0) {}
-  ~SegmentChar() {}
-};
-
-typedef Unicode::value_type TrieKey;
-
-class TrieNode {
- public :
-  TrieNode(): next(NULL), ptValue(NULL) {}
- public:
-  typedef unordered_map<TrieKey, TrieNode*> NextMap;
-  NextMap *next;
-  const DictUnit *ptValue;
-};
-
-class UglyTrie {
- public:
-  static const size_t BASE_SIZE = (1 << (8 * (sizeof(TrieKey))));
- public:
-  UglyTrie(const vector<Unicode>& keys, const vector<const DictUnit*>& valuePointers) {
-    _createTrie(keys, valuePointers);
-  }
-  const DictUnit* find(Unicode::const_iterator begin, Unicode::const_iterator end) const {
-    if (begin == end) {
-      return NULL;
-    }
-
-    const TrieNode* ptNode = _base + (*(begin++));
-    TrieNode::NextMap::const_iterator citer;
-    for (Unicode::const_iterator it = begin; it != end; it++) {
-      if (NULL == ptNode->next) {
-        return NULL;
-      }
-      citer = ptNode->next->find(*it);
-      if (ptNode->next->end() == citer) {
-        return NULL;
-      }
-      ptNode = citer->second;
-    }
-    return ptNode->ptValue;
-  }
-
-  void find(
-    Unicode::const_iterator begin,
-    Unicode::const_iterator end,
-    vector<struct SegmentChar>& res
-  ) const {
-    res.resize(end - begin);
-
-    const TrieNode *ptNode = NULL;
-    TrieNode::NextMap::const_iterator citer;
-    for (size_t i = 0; i < size_t(end - begin); i++) {
-      Unicode::value_type ch = *(begin + i);
-      ptNode = _base + ch;
-      res[i].uniCh = ch;
-      assert(res[i].dag.empty());
-
-      res[i].dag.push_back(DagType::value_type(i, ptNode->ptValue));
-
-      for (size_t j = i + 1; j < size_t(end - begin); j++) {
-        if (ptNode->next == NULL) {
-          break;
-        }
-        citer = ptNode->next->find(*(begin + j));
-        if (ptNode->next->end() == citer) {
-          break;
-        }
-        ptNode = citer->second;
-        if (NULL != ptNode->ptValue) {
-          res[i].dag.push_back(DagType::value_type(j, ptNode->ptValue));
-        }
-      }
-    }
-  }
-  bool find(
-    Unicode::const_iterator begin,
-    Unicode::const_iterator end,
-    DagType & res,
-    size_t offset = 0) const {
-    if (begin == end) {
-      return !res.empty();
-    }
-
-    const TrieNode* ptNode = _base + (*(begin++));
-    if (ptNode->ptValue != NULL && res.size() == 1) {
-      res[0].second = ptNode->ptValue;
-    } else if (ptNode->ptValue != NULL) {
-      res.push_back(DagType::value_type(offset, ptNode->ptValue));
-    }
-
-    TrieNode::NextMap::const_iterator citer;
-    for (Unicode::const_iterator itr = begin; itr != end; itr++) {
-      if (NULL == ptNode->next) {
-        break;
-      }
-      citer = ptNode->next->find(*itr);
-      if (citer == ptNode->next->end()) {
-        break;
-      }
-      ptNode = citer->second;
-      if (NULL != ptNode->ptValue) {
-        res.push_back(DagType::value_type(itr - begin + offset, ptNode->ptValue));
-      }
-    }
-    return !res.empty();
-  }
-  ~UglyTrie() {
-    for (size_t i = 0; i < BASE_SIZE; i++) {
-      if (_base[i].next == NULL) {
-        continue;
-      }
-      for (TrieNode::NextMap::iterator it = _base[i].next->begin(); it != _base[i].next->end(); it++) {
-        _deleteNode(it->second);
-        it->second = NULL;
-      }
-      delete _base[i].next;
-      _base[i].next = NULL;
-    }
-  }
-
- private:
-  void _insertNode(const Unicode& key, const DictUnit* ptValue) {
-    if (key.begin() == key.end()) {
-      return;
-    }
-
-    TrieNode::NextMap::const_iterator kmIter;
-    Unicode::const_iterator citer= key.begin();
-    TrieNode *ptNode = _base + (*(citer++));
-    for (; citer != key.end(); citer++) {
-      if (NULL == ptNode->next) {
-        ptNode->next = new TrieNode::NextMap;
-      }
-      kmIter = ptNode->next->find(*citer);
-      if (ptNode->next->end() == kmIter) {
-        TrieNode *nextNode = new TrieNode;
-
-        (*(ptNode->next))[*citer] = nextNode;
-        ptNode = nextNode;
-      } else {
-        ptNode = kmIter->second;
-      }
-    }
-    ptNode->ptValue = ptValue;
-  }
-
-  void _createTrie(const vector<Unicode>& keys, const vector<const DictUnit*>& valuePointers) {
-    if (valuePointers.empty() || keys.empty()) {
-      return;
-    }
-    assert(keys.size() == valuePointers.size());
-
-    for (size_t i = 0; i < keys.size(); i++) {
-      _insertNode(keys[i], valuePointers[i]);
-    }
-  }
-
-  void _deleteNode(TrieNode* node) {
-    if (NULL == node) {
-      return;
-    }
-    if (NULL != node->next) {
-      TrieNode::NextMap::iterator it;
-      for (it = node->next->begin(); it != node->next->end(); it++) {
-        _deleteNode(it->second);
-      }
-      delete node->next;
-      node->next = NULL;
-    }
-    delete node;
-  }
-
-  TrieNode _base[BASE_SIZE];
-};
-}
-
-#endif