From 75367a20c91ad4cb0f852fc059d8893e834a5537 Mon Sep 17 00:00:00 2001 From: wyy Date: Wed, 12 Nov 2014 19:45:20 +0800 Subject: [PATCH] little modification --- src/MPSegment.hpp | 9 ++------- src/Trie.hpp | 30 +++++++++++++++++++----------- 2 files changed, 21 insertions(+), 18 deletions(-) diff --git a/src/MPSegment.hpp b/src/MPSegment.hpp index 60c76bd..333fe65 100644 --- a/src/MPSegment.hpp +++ b/src/MPSegment.hpp @@ -76,11 +76,7 @@ namespace CppJieba _calcDP(segmentChars); - if(!_cut(segmentChars, res)) - { - LogError("_cut failed."); - return false; - } + _cut(segmentChars, res); return true; } @@ -127,7 +123,7 @@ namespace CppJieba } } } - bool _cut(const vector& segmentChars, vector& res)const + void _cut(const vector& segmentChars, vector& res) const { size_t i = 0; while(i < segmentChars.size()) @@ -144,7 +140,6 @@ namespace CppJieba i++; } } - return true; } diff --git a/src/Trie.hpp b/src/Trie.hpp index 9d10267..39651c2 100644 --- a/src/Trie.hpp +++ b/src/Trie.hpp @@ -87,6 +87,7 @@ namespace CppJieba } return ptNode->ptValue; } + // aho-corasick-automation void find( typename KeyContainerType::const_iterator begin, typename KeyContainerType::const_iterator end, @@ -96,26 +97,32 @@ namespace CppJieba res.resize(end - begin); const TrieNodeType * now = _root; typename TrieNodeType::NextMap::const_iterator iter; - for (size_t i = 0; i < end - begin; i++) { + for (size_t i = 0; i < end - begin; i++) + { bool flag = false; res[i].uniCh = *(begin + i); assert(res[i].dag.empty()); - res[i].dag.reserve(4);//TODO - while( now != _root && (now->next == NULL || (iter = now->next->find(*(begin + i))) == now->next->end())) { + res[i].dag.reserve(2); + while( now != _root && (now->next == NULL || (iter = now->next->find(*(begin + i))) == now->next->end())) + { now = now->fail; } - if(now->next == NULL || (iter = now->next->find(*(begin + i))) == now->next->end()) { + if(now->next == NULL || (iter = now->next->find(*(begin + i))) == now->next->end()) + { now = _root; - } else { + } + else + { now = iter->second; const TrieNodeType * temp = now; - while(temp != _root) { - if (temp->ptValue) { - string str; - TransCode::encode(temp->ptValue->word, str); + while(temp != _root) + { + if (temp->ptValue) + { size_t pos = i - temp->ptValue->word.size() + 1; res[pos].dag.push_back(pair(i, temp->ptValue)); - if(temp->ptValue->word.size() == 1) { + if(pos == i) + { flag = true; } } @@ -123,7 +130,8 @@ namespace CppJieba assert(temp); } } - if(!flag) { + if(!flag) + { res[i].dag.push_back(pair(i, NULL)); } }