From cf9cc45c198c7e31c0e5b9dc350beba7bbbca957 Mon Sep 17 00:00:00 2001 From: xuangong Date: Tue, 21 Jul 2015 00:11:13 +0800 Subject: [PATCH] astyle --- src/UglyTrie.hpp | 407 +++++++++++++++++++++-------------------------- 1 file changed, 181 insertions(+), 226 deletions(-) diff --git a/src/UglyTrie.hpp b/src/UglyTrie.hpp index 9b6bab6..39a3e89 100644 --- a/src/UglyTrie.hpp +++ b/src/UglyTrie.hpp @@ -5,249 +5,204 @@ #include #include -namespace CppJieba -{ - using namespace std; +namespace CppJieba { +using namespace std; - struct DictUnit - { - Unicode word; - double weight; - string tag; - }; +struct DictUnit { + Unicode word; + double weight; + string tag; +}; - // for debugging - inline ostream & operator << (ostream& os, const DictUnit& unit) - { - string s; - s << unit.word; - return os << string_format("%s %s %.3lf", s.c_str(), unit.tag.c_str(), unit.weight); +// for debugging +inline ostream & operator << (ostream& os, const DictUnit& unit) { + string s; + s << unit.word; + return os << string_format("%s %s %.3lf", s.c_str(), unit.tag.c_str(), unit.weight); +} + +typedef LocalVector > DagType; + +struct SegmentChar { + uint16_t uniCh; + DagType dag; + const DictUnit * pInfo; + double weight; + size_t nextPos; + SegmentChar() : uniCh(), pInfo(NULL), weight(0.0), nextPos(0) {} + ~SegmentChar() {} +}; + +typedef Unicode::value_type TrieKey; + +class TrieNode { + public : + TrieNode(): next(NULL), ptValue(NULL) {} + public: + typedef unordered_map NextMap; + NextMap *next; + const DictUnit *ptValue; +}; + +class UglyTrie { + public: + static const size_t BASE_SIZE = (1 << (8 * (sizeof(TrieKey)))); + public: + UglyTrie(const vector& keys, const vector& valuePointers) { + _createTrie(keys, valuePointers); + } + const DictUnit* find(Unicode::const_iterator begin, Unicode::const_iterator end) const { + if (begin == end) { + return NULL; } - typedef LocalVector > DagType; + const TrieNode* ptNode = _base + (*(begin++)); + TrieNode::NextMap::const_iterator citer; + for (Unicode::const_iterator it = begin; it != end; it++) { + if (NULL == ptNode->next) { + return NULL; + } + citer = ptNode->next->find(*it); + if (ptNode->next->end() == citer) { + return NULL; + } + ptNode = citer->second; + } + return ptNode->ptValue; + } - struct SegmentChar - { - uint16_t uniCh; - DagType dag; - const DictUnit * pInfo; - double weight; - size_t nextPos; - SegmentChar() : uniCh(), pInfo(NULL), weight(0.0), nextPos(0){} - ~SegmentChar() {} - }; + void find( + Unicode::const_iterator begin, + Unicode::const_iterator end, + vector& res + ) const { + res.resize(end - begin); - typedef Unicode::value_type TrieKey; + const TrieNode *ptNode = NULL; + TrieNode::NextMap::const_iterator citer; + for (size_t i = 0; i < size_t(end - begin); i++) { + Unicode::value_type ch = *(begin + i); + ptNode = _base + ch; + res[i].uniCh = ch; + assert(res[i].dag.empty()); - class TrieNode - { - public : - TrieNode(): next(NULL), ptValue(NULL) {} - public: - typedef unordered_map NextMap; - NextMap *next; - const DictUnit *ptValue; - }; + res[i].dag.push_back(DagType::value_type(i, ptNode->ptValue)); - class UglyTrie - { - public: - static const size_t BASE_SIZE = (1 << (8 * (sizeof(TrieKey)))); - public: - UglyTrie(const vector& keys, const vector& valuePointers) - { - _createTrie(keys, valuePointers); - } - const DictUnit* find(Unicode::const_iterator begin, Unicode::const_iterator end) const - { - if (begin == end) - { - return NULL; - } + for (size_t j = i + 1; j < size_t(end - begin); j++) { + if (ptNode->next == NULL) { + break; + } + citer = ptNode->next->find(*(begin + j)); + if (ptNode->next->end() == citer) { + break; + } + ptNode = citer->second; + if (NULL != ptNode->ptValue) { + res[i].dag.push_back(DagType::value_type(j, ptNode->ptValue)); + } + } + } + } + bool find( + Unicode::const_iterator begin, + Unicode::const_iterator end, + DagType & res, + size_t offset = 0) const { + if (begin == end) { + return !res.empty(); + } - const TrieNode* ptNode = _base + (*(begin++)); - TrieNode::NextMap::const_iterator citer; - for (Unicode::const_iterator it = begin; it != end; it++) - { - if (NULL == ptNode->next) - { - return NULL; - } - citer = ptNode->next->find(*it); - if (ptNode->next->end() == citer) - { - return NULL; - } - ptNode = citer->second; - } - return ptNode->ptValue; - } + const TrieNode* ptNode = _base + (*(begin++)); + if (ptNode->ptValue != NULL && res.size() == 1) { + res[0].second = ptNode->ptValue; + } else if (ptNode->ptValue != NULL) { + res.push_back(DagType::value_type(offset, ptNode->ptValue)); + } - void find( - Unicode::const_iterator begin, - Unicode::const_iterator end, - vector& res - ) const - { - res.resize(end - begin); + TrieNode::NextMap::const_iterator citer; + for (Unicode::const_iterator itr = begin; itr != end; itr++) { + if (NULL == ptNode->next) { + break; + } + citer = ptNode->next->find(*itr); + if (citer == ptNode->next->end()) { + break; + } + ptNode = citer->second; + if (NULL != ptNode->ptValue) { + res.push_back(DagType::value_type(itr - begin + offset, ptNode->ptValue)); + } + } + return !res.empty(); + } + ~UglyTrie() { + for (size_t i = 0; i < BASE_SIZE; i++) { + if (_base[i].next == NULL) { + continue; + } + for (TrieNode::NextMap::iterator it = _base[i].next->begin(); it != _base[i].next->end(); it++) { + _deleteNode(it->second); + it->second = NULL; + } + delete _base[i].next; + _base[i].next = NULL; + } + } - const TrieNode *ptNode = NULL; - TrieNode::NextMap::const_iterator citer; - for (size_t i = 0; i < size_t(end - begin); i++) - { - Unicode::value_type ch = *(begin + i); - ptNode = _base + ch; - res[i].uniCh = ch; - assert(res[i].dag.empty()); + private: + void _insertNode(const Unicode& key, const DictUnit* ptValue) { + if (key.begin() == key.end()) { + return; + } - res[i].dag.push_back(DagType::value_type(i, ptNode->ptValue)); - - for (size_t j = i + 1; j < size_t(end - begin); j++) - { - if (ptNode->next == NULL) - { - break; - } - citer = ptNode->next->find(*(begin + j)); - if (ptNode->next->end() == citer) - { - break; - } - ptNode = citer->second; - if (NULL != ptNode->ptValue) - { - res[i].dag.push_back(DagType::value_type(j, ptNode->ptValue)); - } - } - } - } - bool find( - Unicode::const_iterator begin, - Unicode::const_iterator end, - DagType & res, - size_t offset = 0) const - { - if (begin == end) - { - return !res.empty(); - } + TrieNode::NextMap::const_iterator kmIter; + Unicode::const_iterator citer= key.begin(); + TrieNode *ptNode = _base + (*(citer++)); + for (; citer != key.end(); citer++) { + if (NULL == ptNode->next) { + ptNode->next = new TrieNode::NextMap; + } + kmIter = ptNode->next->find(*citer); + if (ptNode->next->end() == kmIter) { + TrieNode *nextNode = new TrieNode; - const TrieNode* ptNode = _base + (*(begin++)); - if (ptNode->ptValue != NULL && res.size() == 1) - { - res[0].second = ptNode->ptValue; - } - else if (ptNode->ptValue != NULL) - { - res.push_back(DagType::value_type(offset, ptNode->ptValue)); - } + (*(ptNode->next))[*citer] = nextNode; + ptNode = nextNode; + } else { + ptNode = kmIter->second; + } + } + ptNode->ptValue = ptValue; + } - TrieNode::NextMap::const_iterator citer; - for (Unicode::const_iterator itr = begin; itr != end; itr++) - { - if (NULL == ptNode->next) - { - break; - } - citer = ptNode->next->find(*itr); - if (citer == ptNode->next->end()) - { - break; - } - ptNode = citer->second; - if (NULL != ptNode->ptValue) - { - res.push_back(DagType::value_type(itr - begin + offset, ptNode->ptValue)); - } - } - return !res.empty(); - } - ~UglyTrie() - { - for (size_t i = 0; i < BASE_SIZE; i++) - { - if (_base[i].next == NULL) - { - continue; - } - for (TrieNode::NextMap::iterator it = _base[i].next->begin(); it != _base[i].next->end(); it++) - { - _deleteNode(it->second); - it->second = NULL; - } - delete _base[i].next; - _base[i].next = NULL; - } - } + void _createTrie(const vector& keys, const vector& valuePointers) { + if (valuePointers.empty() || keys.empty()) { + return; + } + assert(keys.size() == valuePointers.size()); - private: - void _insertNode(const Unicode& key, const DictUnit* ptValue) - { - if (key.begin() == key.end()) - { - return; - } + for (size_t i = 0; i < keys.size(); i++) { + _insertNode(keys[i], valuePointers[i]); + } + } - TrieNode::NextMap::const_iterator kmIter; - Unicode::const_iterator citer= key.begin(); - TrieNode *ptNode = _base + (*(citer++)); - for (; citer != key.end(); citer++) - { - if (NULL == ptNode->next) - { - ptNode->next = new TrieNode::NextMap; - } - kmIter = ptNode->next->find(*citer); - if (ptNode->next->end() == kmIter) - { - TrieNode *nextNode = new TrieNode; + void _deleteNode(TrieNode* node) { + if (NULL == node) { + return; + } + if (NULL != node->next) { + TrieNode::NextMap::iterator it; + for (it = node->next->begin(); it != node->next->end(); it++) { + _deleteNode(it->second); + } + delete node->next; + node->next = NULL; + } + delete node; + } - (*(ptNode->next))[*citer] = nextNode; - ptNode = nextNode; - } - else - { - ptNode = kmIter->second; - } - } - ptNode->ptValue = ptValue; - } - - void _createTrie(const vector& keys, const vector& valuePointers) - { - if (valuePointers.empty() || keys.empty()) - { - return; - } - assert(keys.size() == valuePointers.size()); - - for (size_t i = 0; i < keys.size(); i++) - { - _insertNode(keys[i], valuePointers[i]); - } - } - - void _deleteNode(TrieNode* node) - { - if (NULL == node) - { - return; - } - if (NULL != node->next) - { - TrieNode::NextMap::iterator it; - for (it = node->next->begin(); it != node->next->end(); it++) - { - _deleteNode(it->second); - } - delete node->next; - node->next = NULL; - } - delete node; - } - - TrieNode _base[BASE_SIZE]; - }; + TrieNode _base[BASE_SIZE]; +}; } #endif