From 0f79fa6c24199d8889d5524969000c7f2ad8710c Mon Sep 17 00:00:00 2001 From: yanyiwu Date: Fri, 24 Jul 2015 13:42:24 +0800 Subject: [PATCH] =?UTF-8?q?=E7=BB=9F=E4=B8=80=E5=9C=A8SegmentBase=E6=90=9E?= =?UTF-8?q?=E5=AE=9A=E6=89=80=E6=9C=89Unicode=E5=92=8Cstring=E7=9A=84?= =?UTF-8?q?=E8=BD=AC=E7=A0=81=E4=BA=8B=E6=83=85?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/FullSegment.hpp | 43 ------------------------------------------- src/HMMSegment.hpp | 16 ---------------- src/ISegment.hpp | 15 ++++++++------- src/MPSegment.hpp | 14 -------------- src/MixSegment.hpp | 19 ------------------- src/QuerySegment.hpp | 16 ---------------- src/SegmentBase.hpp | 25 +++++++++++++++++++++++-- 7 files changed, 31 insertions(+), 117 deletions(-) diff --git a/src/FullSegment.hpp b/src/FullSegment.hpp index 1a70315..37a88e0 100644 --- a/src/FullSegment.hpp +++ b/src/FullSegment.hpp @@ -58,49 +58,6 @@ class FullSegment: public SegmentBase { } uIdx++; } - /* - for (Unicode::const_iterator uItr = begin; uItr != end; uItr++) { - //find word start from uItr - if (dictTrie_->find(uItr, end, tRes, 0)) { - for(LocalVector >::const_iterator itr = tRes.begin(); itr != tRes.end(); itr++) { - wordLen = itr->second->word.size(); - if (wordLen >= 2 || (tRes.size() == 1 && maxIdx <= uIdx)) { - res.push_back(itr->second->word); - } - maxIdx = uIdx+wordLen > maxIdx ? uIdx+wordLen : maxIdx; - } - tRes.clear(); - } else { // not found word start from uItr - if (maxIdx <= uIdx) { // never exist in prev results - //put itr itself in res - res.push_back(Unicode(1, *uItr)); - - //mark it exits - ++maxIdx; - } - } - ++uIdx; - } - */ - - return true; - } - - bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, - vector& res) const { - vector uRes; - if (!cut(begin, end, uRes)) { - LogError("get unicode cut result error."); - return false; - } - - string tmp; - for (vector::const_iterator uItr = uRes.begin(); - uItr != uRes.end(); uItr++) { - TransCode::encode(*uItr, tmp); - res.push_back(tmp); - } - return true; } private: diff --git a/src/HMMSegment.hpp b/src/HMMSegment.hpp index 83769b2..554a52f 100644 --- a/src/HMMSegment.hpp +++ b/src/HMMSegment.hpp @@ -55,22 +55,6 @@ class HMMSegment: public SegmentBase { } return true; } - virtual bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector& res)const { - if(begin == end) { - return false; - } - vector words; - words.reserve(end - begin); - if(!cut(begin, end, words)) { - return false; - } - size_t offset = res.size(); - res.resize(res.size() + words.size()); - for(size_t i = 0; i < words.size(); i++) { - TransCode::encode(words[i], res[offset + i]); - } - return true; - } private: // sequential letters rule Unicode::const_iterator sequentialLetterRule_(Unicode::const_iterator begin, Unicode::const_iterator end) const { diff --git a/src/ISegment.hpp b/src/ISegment.hpp index 4faded5..f0704b6 100644 --- a/src/ISegment.hpp +++ b/src/ISegment.hpp @@ -1,14 +1,15 @@ -#ifndef CPPJIEBA_SEGMENTINTERFACE_H -#define CPPJIEBA_SEGMENTINTERFACE_H - +#ifndef CPPJIEBA_ISEGMENT_H +#define CPPJIEBA_ISEGMENT_H namespace CppJieba { + class ISegment { public: - virtual ~ISegment() {}; - virtual bool cut(Unicode::const_iterator begin , Unicode::const_iterator end, vector& res) const = 0; + virtual ~ISegment() { + }; virtual bool cut(const string& str, vector& res) const = 0; }; -} -#endif +} // namespace CppJieba + +#endif // CPPJIEBA_ISEGMENT_H diff --git a/src/MPSegment.hpp b/src/MPSegment.hpp index 975998b..da6f38b 100644 --- a/src/MPSegment.hpp +++ b/src/MPSegment.hpp @@ -33,20 +33,6 @@ class MPSegment: public SegmentBase { } using SegmentBase::cut; - virtual bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector& res)const { - vector words; - words.reserve(end - begin); - if(!cut(begin, end, words)) { - return false; - } - size_t offset = res.size(); - res.resize(res.size() + words.size()); - for(size_t i = 0; i < words.size(); i++) { - TransCode::encode(words[i], res[i + offset]); - } - return true; - } - bool cut(Unicode::const_iterator begin , Unicode::const_iterator end, vector& res) const { vector dags; diff --git a/src/MixSegment.hpp b/src/MixSegment.hpp index afe536d..36d00a2 100644 --- a/src/MixSegment.hpp +++ b/src/MixSegment.hpp @@ -68,25 +68,6 @@ class MixSegment: public SegmentBase { return true; } - virtual bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector& res)const { - if(begin == end) { - return false; - } - - vector uRes; - uRes.reserve(end - begin); - if (!cut(begin, end, uRes)) { - return false; - } - - size_t offset = res.size(); - res.resize(res.size() + uRes.size()); - for(size_t i = 0; i < uRes.size(); i ++, offset++) { - TransCode::encode(uRes[i], res[offset]); - } - return true; - } - const DictTrie* getDictTrie() const { return mpSeg_.getDictTrie(); } diff --git a/src/QuerySegment.hpp b/src/QuerySegment.hpp index bd8e676..31e8847 100644 --- a/src/QuerySegment.hpp +++ b/src/QuerySegment.hpp @@ -55,22 +55,6 @@ class QuerySegment: public SegmentBase { return true; } - - bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector& res) const { - vector uRes; - if (!cut(begin, end, uRes)) { - LogError("get unicode cut result error."); - return false; - } - - string tmp; - for (vector::const_iterator uItr = uRes.begin(); uItr != uRes.end(); uItr++) { - TransCode::encode(*uItr, tmp); - res.push_back(tmp); - } - - return true; - } private: MixSegment mixSeg_; FullSegment fullSeg_; diff --git a/src/SegmentBase.hpp b/src/SegmentBase.hpp index 93bc901..512be42 100644 --- a/src/SegmentBase.hpp +++ b/src/SegmentBase.hpp @@ -24,9 +24,10 @@ class SegmentBase: public ISegment, public NonCopyable { SegmentBase() { loadSpecialSymbols_(); }; - virtual ~SegmentBase() {}; + virtual ~SegmentBase() { + }; public: - virtual bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector& res) const = 0; + virtual bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector& res) const = 0; virtual bool cut(const string& str, vector& res) const { res.clear(); @@ -54,6 +55,26 @@ class SegmentBase: public ISegment, public NonCopyable { return true; } + virtual bool cut(Unicode::const_iterator begin, + Unicode::const_iterator end, + vector& res) const { + if(begin == end) { + return false; + } + + vector uRes; + uRes.reserve(end - begin); + if (!cut(begin, end, uRes)) { + return false; + } + + size_t offset = res.size(); + res.resize(res.size() + uRes.size()); + for(size_t i = 0; i < uRes.size(); i ++, offset++) { + TransCode::encode(uRes[i], res[offset]); + } + return true; + } private: void loadSpecialSymbols_() { size_t size = sizeof(SPECIAL_SYMBOL)/sizeof(*SPECIAL_SYMBOL);