From 2d3c51dba77f446cfc5cebec019e7d4c90d3847e Mon Sep 17 00:00:00 2001 From: yanyiwu Date: Thu, 4 Feb 2016 23:43:26 +0800 Subject: [PATCH] upgrade limonp and use limonp::HashMap in Trie --- ChangeLog.md | 4 + deps/limonp/ForcePublic.hpp | 7 ++ deps/limonp/HashMap.hpp | 229 ++++++++++++++++++++++++++++++++++++ deps/limonp/NonCopyable.hpp | 2 - include/cppjieba/Trie.hpp | 28 ++--- 5 files changed, 253 insertions(+), 17 deletions(-) create mode 100644 deps/limonp/ForcePublic.hpp create mode 100644 deps/limonp/HashMap.hpp diff --git a/ChangeLog.md b/ChangeLog.md index 2678cb2..b7e16b7 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -1,5 +1,9 @@ # CppJieba ChangeLog +## next version + ++ 升级 [limonp] 并使用定制化的 limonp::HashMap 。 + ## v4.4.1 + 使用 valgrind 检查内存泄露的问题,定位出一个HMM模型初始化的问题导致内存泄露的bug,不过此内存泄露不是致命问题, diff --git a/deps/limonp/ForcePublic.hpp b/deps/limonp/ForcePublic.hpp new file mode 100644 index 0000000..2076682 --- /dev/null +++ b/deps/limonp/ForcePublic.hpp @@ -0,0 +1,7 @@ +#ifndef LIMONP_FORCE_PUBLIC_H +#define LIMONP_FORCE_PUBLIC_H + +#define private public +#define protected public + +#endif // LIMONP_FORCE_PUBLIC_H diff --git a/deps/limonp/HashMap.hpp b/deps/limonp/HashMap.hpp new file mode 100644 index 0000000..187903d --- /dev/null +++ b/deps/limonp/HashMap.hpp @@ -0,0 +1,229 @@ +#ifndef LIMONP_HASH_MAP_HPP +#define LIMONP_HASH_MAP_HPP + +#include +#include +#include +#include +#include +#include + +namespace limonp { + +static size_t PRIME_NUMBERS[] = {3, 7, 17, 37, 79, 163, 331, + 673, 1361, 2729, 471, 10949, + 21911, 43853, 87719, 175447, 350899, + 701819, 1403641, 2807303, 5614657, 11229331, + 22458671, 44917381, 89834777, 179669557, 359339171, + 718678369, 1437356741, 2147483647 +}; + +template +class HashMap { + private: + typedef std::pair ValueT; + + class LightList { + public: + struct Node { + ValueT value; + struct Node* next; + }; // struct Node + + LightList() + : head_(NULL) { + } + LightList(const LightList& ll) + : head_(NULL) { + struct Node* node = ll.head_; + while (node != NULL) { + UniqAppend(node->value); + node = node->next; + } + } + ~LightList() { + while (head_ != NULL) { + Node* x = head_; + head_ = head_->next; + delete x; + } + } + + //O(n) + std::pair UniqAppend(const ValueT& x) { + struct Node** pp = &head_; + while ((*pp) != NULL) { + if ((*pp)->value.first == x.first) { + (*pp)->value.second = x.second; + return std::pair((*pp)->value, false); + } + pp = &(*pp)->next; + } + struct Node* node = new Node; + node->value = x; + node->next = NULL; + (*pp) = node; + return std::pair(node->value, true); + } + + //O(1) + void PushFront(const ValueT& x) { + Node* node = new Node; + node->value = x; + node->next = head_; + head_ = node; + } + private: + LightList& operator = (const LightList& ll); + + friend class HashMap; + friend class Iterator; + + Node* head_; + }; // class LightList + + template + class Iterator { + public: + Iterator() { + } + + Iterator& operator ++() { + assert(buckets_ != NULL); + assert(node_ != NULL); + node_ = node_->next; + while (node_ == NULL) { + ++bucketid_; + if (bucketid_ >= buckets_->size()) { + break; + } + node_ = (*buckets_)[bucketid_].head_; + } + return *this; + } + + bool operator == (const Iterator& iter) const { + return node_ == iter.node_; + } + + bool operator != (const Iterator& iter) const { + return node_ != iter.node_; + } + + ValueT* operator -> () { + assert(node_ != NULL); + assert(bucketid_ < buckets_->size()); + return &node_->value; + } + ValueT& operator * () { + assert(node_ != NULL); + assert(bucketid_ < buckets_->size()); + return node_->value; + } + private: + friend class HashMap; + Iterator(size_t bucketid, BucketsT buckets, NodeT node) + : bucketid_(bucketid), buckets_(buckets), node_(node) { + } + size_t bucketid_; + BucketsT buckets_; + NodeT node_; + }; // class Iterator + + public: + typedef Iterator*, const struct LightList::Node*, const ValueT> const_iterator; + + HashMap() + : size_(0) { + } + ~HashMap() { + } + + size_t Size() const { + return size_; + } + + size_t BucketSize() const { + return buckets_.size(); + } + + bool Insert(const ValueT& v) { + size_ ++; + if (size_ > buckets_.size()) { + const size_t* begin = PRIME_NUMBERS; + const size_t* end = PRIME_NUMBERS + sizeof(PRIME_NUMBERS)/sizeof(*PRIME_NUMBERS); + const size_t* cur = std::lower_bound(begin, end, size_); + if (end != cur) { + Rehash(*cur); + } else { + Rehash(size_); + } + } + + assert(buckets_.size() >= size_ && size_ > 0); + size_t bucketid = Hash(v.first) % buckets_.size(); + bool ok = buckets_[bucketid].UniqAppend(v).second; + if (!ok) { + size_ --; + } + return ok; + } + + const_iterator Find(const KeyT& key) const { + if (size_ == 0) { + return End(); + } + assert(buckets_.size() > 0); + size_t bucketid = Hash(key) % buckets_.size(); + const struct LightList::Node* node = buckets_[bucketid].head_; + while (node != NULL) { + if (node->value.first == key) { + return const_iterator(bucketid, &buckets_, node); + } + node = node->next; + } + return End(); + } + + const_iterator Begin() const { + if (buckets_.empty()) { + return End(); + } + size_t bucketid = 0; + assert(bucketid < buckets_.size()); + const struct LightList::Node* node = buckets_[bucketid].head_; + while (node == NULL && bucketid < buckets_.size()) { + bucketid ++; + node = buckets_[bucketid].head_; + } + return const_iterator(bucketid, &buckets_, node); + } + const_iterator End() const { + return const_iterator(buckets_.size(), &buckets_, NULL); + } + + void Rehash(size_t maxsize) { + assert(maxsize > 0); + std::vector newbuckets(maxsize); + for (size_t i = 0; i < buckets_.size(); ++i) { + struct LightList::Node* oldnode = buckets_[i].head_; + while (oldnode != NULL) { + size_t bucketid = Hash(oldnode->value.first) % maxsize; //TODO + newbuckets[bucketid].PushFront(oldnode->value); + oldnode = oldnode->next; + } + } + buckets_.swap(newbuckets); + } + private: + size_t Hash(KeyT key) const { + return key; + } + + std::vector buckets_; + size_t size_; +}; // class HashMap + +} // namespace limonp + +#endif // LIMONP_HASH_MAP_HPP diff --git a/deps/limonp/NonCopyable.hpp b/deps/limonp/NonCopyable.hpp index 145400f..5cdcf37 100644 --- a/deps/limonp/NonCopyable.hpp +++ b/deps/limonp/NonCopyable.hpp @@ -1,5 +1,3 @@ -/************************************ - ************************************/ #ifndef LIMONP_NONCOPYABLE_H #define LIMONP_NONCOPYABLE_H diff --git a/include/cppjieba/Trie.hpp b/include/cppjieba/Trie.hpp index a44b9de..c2c0fea 100644 --- a/include/cppjieba/Trie.hpp +++ b/include/cppjieba/Trie.hpp @@ -1,9 +1,10 @@ #ifndef CPPJIEBA_TRIE_HPP #define CPPJIEBA_TRIE_HPP -#include "limonp/StdExtension.hpp" #include #include +#include "limonp/StdExtension.hpp" +#include "limonp/HashMap.hpp" namespace cppjieba { using namespace std; @@ -40,7 +41,7 @@ class TrieNode { TrieNode(): next(NULL), ptValue(NULL) { } public: - typedef unordered_map NextMap; + typedef limonp::HashMap NextMap; NextMap *next; const DictUnit *ptValue; }; @@ -56,9 +57,8 @@ class Trie { if (_base[i].next == NULL) { continue; } - for (TrieNode::NextMap::iterator it = _base[i].next->begin(); it != _base[i].next->end(); it++) { + for (TrieNode::NextMap::const_iterator it = _base[i].next->Begin(); it != _base[i].next->End(); ++it) { DeleteNode(it->second); - it->second = NULL; } delete _base[i].next; _base[i].next = NULL; @@ -76,8 +76,8 @@ class Trie { if (NULL == ptNode->next) { return NULL; } - citer = ptNode->next->find(*it); - if (ptNode->next->end() == citer) { + citer = ptNode->next->Find(*it); + if (ptNode->next->End() == citer) { return NULL; } ptNode = citer->second; @@ -105,8 +105,8 @@ class Trie { if (ptNode->next == NULL) { break; } - citer = ptNode->next->find(*(begin + j)); - if (ptNode->next->end() == citer) { + citer = ptNode->next->Find(*(begin + j)); + if (ptNode->next->End() == citer) { break; } ptNode = citer->second; @@ -129,11 +129,11 @@ class Trie { if (NULL == ptNode->next) { ptNode->next = new TrieNode::NextMap; } - kmIter = ptNode->next->find(*citer); - if (ptNode->next->end() == kmIter) { + kmIter = ptNode->next->Find(*citer); + if (ptNode->next->End() == kmIter) { TrieNode *nextNode = new TrieNode; - (*(ptNode->next))[*citer] = nextNode; + ptNode->next->Insert(make_pair(*citer, nextNode)); ptNode = nextNode; } else { ptNode = kmIter->second; @@ -154,17 +154,15 @@ class Trie { } } - void DeleteNode(TrieNode* node) { + void DeleteNode(const TrieNode* node) { if (NULL == node) { return; } if (NULL != node->next) { - TrieNode::NextMap::iterator it; - for (it = node->next->begin(); it != node->next->end(); it++) { + for (TrieNode::NextMap::const_iterator it = node->next->Begin(); it != node->next->End(); ++it) { DeleteNode(it->second); } delete node->next; - node->next = NULL; } delete node; }