From 239d025cd88e2fd68291f75b7249b9eb1fe1e0d5 Mon Sep 17 00:00:00 2001 From: yanyiwu Date: Tue, 16 Feb 2016 20:24:28 +0800 Subject: [PATCH] delete HashMap, use unordered_map instead --- ChangeLog.md | 2 +- include/cppjieba/HashMap.hpp | 229 ----------------------------------- include/cppjieba/Trie.hpp | 25 ++-- 3 files changed, 14 insertions(+), 242 deletions(-) delete mode 100644 include/cppjieba/HashMap.hpp diff --git a/ChangeLog.md b/ChangeLog.md index bdd44d3..094f747 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -2,7 +2,7 @@ ## next version -+ 开始在 Trie 中使用定制化的 cppjieba::HashMap ,并去除之前糟糕的针对 uint16 优化的用数组代替 map 的设计, ++ 在 Trie 中去除之前糟糕的针对 uint16 优化的用数组代替 map 的设计, 该设计的主要问题是前提 unicode 每个字符必须是 uint16 ,则无法更全面得支持 unicode 多国字符。 ## v4.4.1 diff --git a/include/cppjieba/HashMap.hpp b/include/cppjieba/HashMap.hpp deleted file mode 100644 index 9aaced8..0000000 --- a/include/cppjieba/HashMap.hpp +++ /dev/null @@ -1,229 +0,0 @@ -#ifndef LIMONP_HASH_MAP_HPP -#define LIMONP_HASH_MAP_HPP - -#include -#include -#include -#include -#include -#include - -namespace cppjieba { - -static size_t PRIME_NUMBERS[] = {3, 7, 17, 37, 79, 163, 331, - 673, 1361, 2729, 471, 10949, - 21911, 43853, 87719, 175447, 350899, - 701819, 1403641, 2807303, 5614657, 11229331, - 22458671, 44917381, 89834777, 179669557, 359339171, - 718678369, 1437356741, 2147483647 -}; - -template -class HashMap { - private: - typedef std::pair ValueT; - - class LightList { - public: - struct Node { - ValueT value; - struct Node* next; - }; // struct Node - - LightList() - : head_(NULL) { - } - LightList(const LightList& ll) - : head_(NULL) { - struct Node* node = ll.head_; - while (node != NULL) { - UniqAppend(node->value); - node = node->next; - } - } - ~LightList() { - while (head_ != NULL) { - Node* x = head_; - head_ = head_->next; - delete x; - } - } - - //O(n) - std::pair UniqAppend(const ValueT& x) { - struct Node** pp = &head_; - while ((*pp) != NULL) { - if ((*pp)->value.first == x.first) { - (*pp)->value.second = x.second; - return std::pair((*pp)->value, false); - } - pp = &(*pp)->next; - } - struct Node* node = new Node; - node->value = x; - node->next = NULL; - (*pp) = node; - return std::pair(node->value, true); - } - - //O(1) - void PushFront(const ValueT& x) { - Node* node = new Node; - node->value = x; - node->next = head_; - head_ = node; - } - private: - LightList& operator = (const LightList& ll); - - friend class HashMap; - friend class Iterator; - - Node* head_; - }; // class LightList - - template - class Iterator { - public: - Iterator() { - } - - Iterator& operator ++() { - assert(buckets_ != NULL); - assert(node_ != NULL); - node_ = node_->next; - while (node_ == NULL) { - ++bucketid_; - if (bucketid_ >= buckets_->size()) { - break; - } - node_ = (*buckets_)[bucketid_].head_; - } - return *this; - } - - bool operator == (const Iterator& iter) const { - return node_ == iter.node_; - } - - bool operator != (const Iterator& iter) const { - return node_ != iter.node_; - } - - ValueT* operator -> () { - assert(node_ != NULL); - assert(bucketid_ < buckets_->size()); - return &node_->value; - } - ValueT& operator * () { - assert(node_ != NULL); - assert(bucketid_ < buckets_->size()); - return node_->value; - } - private: - friend class HashMap; - Iterator(size_t bucketid, BucketsT buckets, NodeT node) - : bucketid_(bucketid), buckets_(buckets), node_(node) { - } - size_t bucketid_; - BucketsT buckets_; - NodeT node_; - }; // class Iterator - - public: - typedef Iterator*, const struct LightList::Node*, const ValueT> const_iterator; - - HashMap() - : size_(0) { - } - ~HashMap() { - } - - size_t Size() const { - return size_; - } - - size_t BucketSize() const { - return buckets_.size(); - } - - bool Insert(const ValueT& v) { - size_ ++; - if (size_ > buckets_.size()) { - const size_t* begin = PRIME_NUMBERS; - const size_t* end = PRIME_NUMBERS + sizeof(PRIME_NUMBERS)/sizeof(*PRIME_NUMBERS); - const size_t* cur = std::lower_bound(begin, end, size_); - if (end != cur) { - Rehash(*cur); - } else { - Rehash(size_); - } - } - - assert(buckets_.size() >= size_ && size_ > 0); - size_t bucketid = Hash(v.first) % buckets_.size(); - bool ok = buckets_[bucketid].UniqAppend(v).second; - if (!ok) { - size_ --; - } - return ok; - } - - const_iterator Find(const KeyT& key) const { - if (size_ == 0) { - return End(); - } - assert(buckets_.size() > 0); - size_t bucketid = Hash(key) % buckets_.size(); - const struct LightList::Node* node = buckets_[bucketid].head_; - while (node != NULL) { - if (node->value.first == key) { - return const_iterator(bucketid, &buckets_, node); - } - node = node->next; - } - return End(); - } - - const_iterator Begin() const { - if (buckets_.empty()) { - return End(); - } - size_t bucketid = 0; - assert(bucketid < buckets_.size()); - const struct LightList::Node* node = buckets_[bucketid].head_; - while (node == NULL && bucketid < buckets_.size()) { - bucketid ++; - node = buckets_[bucketid].head_; - } - return const_iterator(bucketid, &buckets_, node); - } - const_iterator End() const { - return const_iterator(buckets_.size(), &buckets_, NULL); - } - - void Rehash(size_t maxsize) { - assert(maxsize > 0); - std::vector newbuckets(maxsize); - for (size_t i = 0; i < buckets_.size(); ++i) { - struct LightList::Node* oldnode = buckets_[i].head_; - while (oldnode != NULL) { - size_t bucketid = Hash(oldnode->value.first) % maxsize; //TODO - newbuckets[bucketid].PushFront(oldnode->value); - oldnode = oldnode->next; - } - } - buckets_.swap(newbuckets); - } - private: - size_t Hash(KeyT key) const { - return key; - } - - std::vector buckets_; - size_t size_; -}; // class HashMap - -} // namespace cppjieba - -#endif // LIMONP_HASH_MAP_HPP diff --git a/include/cppjieba/Trie.hpp b/include/cppjieba/Trie.hpp index 7e1c49c..fb5b9ff 100644 --- a/include/cppjieba/Trie.hpp +++ b/include/cppjieba/Trie.hpp @@ -3,9 +3,10 @@ #include #include -#include "HashMap.hpp" +#include "limonp/StdExtension.hpp" namespace cppjieba { + using namespace std; const size_t MAX_WORD_LENGTH = 512; @@ -40,7 +41,7 @@ class TrieNode { TrieNode(): next(NULL), ptValue(NULL) { } public: - typedef HashMap NextMap; + typedef unordered_map NextMap; NextMap *next; const DictUnit *ptValue; }; @@ -66,8 +67,8 @@ class Trie { if (NULL == ptNode->next) { return NULL; } - citer = ptNode->next->Find(*it); - if (ptNode->next->End() == citer) { + citer = ptNode->next->find(*it); + if (ptNode->next->end() == citer) { return NULL; } ptNode = citer->second; @@ -88,7 +89,7 @@ class Trie { Rune rune = *(begin + i); res[i].rune = rune; - if (root_->next != NULL && root_->next->End() != (citer = root_->next->Find(rune))) { + if (root_->next != NULL && root_->next->end() != (citer = root_->next->find(rune))) { ptNode = citer->second; } else { ptNode = NULL; @@ -103,8 +104,8 @@ class Trie { if (ptNode == NULL || ptNode->next == NULL) { break; } - citer = ptNode->next->Find(*(begin + j)); - if (ptNode->next->End() == citer) { + citer = ptNode->next->find(*(begin + j)); + if (ptNode->next->end() == citer) { break; } ptNode = citer->second; @@ -126,11 +127,11 @@ class Trie { if (NULL == ptNode->next) { ptNode->next = new TrieNode::NextMap; } - kmIter = ptNode->next->Find(*citer); - if (ptNode->next->End() == kmIter) { + kmIter = ptNode->next->find(*citer); + if (ptNode->next->end() == kmIter) { TrieNode *nextNode = new TrieNode; - ptNode->next->Insert(make_pair(*citer, nextNode)); + ptNode->next->insert(make_pair(*citer, nextNode)); ptNode = nextNode; } else { ptNode = kmIter->second; @@ -152,12 +153,12 @@ class Trie { } } - void DeleteNode(const TrieNode* node) { + void DeleteNode(TrieNode* node) { if (NULL == node) { return; } if (NULL != node->next) { - for (TrieNode::NextMap::const_iterator it = node->next->Begin(); it != node->next->End(); ++it) { + for (TrieNode::NextMap::iterator it = node->next->begin(); it != node->next->end(); ++it) { DeleteNode(it->second); } delete node->next;