From e7a45d2dde4820f6b3071bdfd28b5e9072bccace Mon Sep 17 00:00:00 2001 From: yanyiwu Date: Sun, 17 Apr 2016 22:23:00 +0800 Subject: [PATCH] remove LevelSegment --- ChangeLog.md | 4 ++ include/cppjieba/LevelSegment.hpp | 81 ------------------------------- test/unittest/segments_test.cpp | 15 ------ 3 files changed, 4 insertions(+), 96 deletions(-) delete mode 100644 include/cppjieba/LevelSegment.hpp diff --git a/ChangeLog.md b/ChangeLog.md index ea5e16b..c3c5c89 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -1,5 +1,9 @@ # CppJieba ChangeLog +## next version + ++ remove LevelSegment; + ## v4.6.0 + Change Jieba::Locate(deprecated) to be static function. diff --git a/include/cppjieba/LevelSegment.hpp b/include/cppjieba/LevelSegment.hpp deleted file mode 100644 index 9fa5909..0000000 --- a/include/cppjieba/LevelSegment.hpp +++ /dev/null @@ -1,81 +0,0 @@ -#ifndef CPPJIEBA_LEVELSEGMENT_H -#define CPPJIEBA_LEVELSEGMENT_H - -#include "MPSegment.hpp" - -namespace cppjieba { - -class LevelSegment: public SegmentBase{ - public: - LevelSegment(const string& dictPath, - const string& userDictPath = "") - : mpSeg_(dictPath, userDictPath) { - } - LevelSegment(const DictTrie* dictTrie) - : mpSeg_(dictTrie) { - } - ~LevelSegment() { - } - - void Cut(RuneStrArray::const_iterator begin, - RuneStrArray::const_iterator end, - vector >& res) const { - res.clear(); - vector words; - vector smallerWords; - words.reserve(end - begin); - mpSeg_.Cut(begin, end, words); - smallerWords.reserve(words.size()); - res.reserve(words.size()); - - size_t level = 0; - while (!words.empty()) { - smallerWords.clear(); - for (size_t i = 0; i < words.size(); i++) { - if (words[i].size() >= 3) { - size_t len = words[i].size() - 1; - mpSeg_.Cut(words[i].begin(), words[i].end(), smallerWords, len); // buffer.push_back without clear - } - if (words[i].size() > 1) { - res.push_back(pair(words[i], level)); - } - } - - words.swap(smallerWords); - level++; - } - } - - void Cut(const string& sentence, - vector >& words) const { - words.clear(); - RuneStrArray unicode; - DecodeRunesInString(sentence, unicode); - vector > unicodeWords; - Cut(unicode.begin(), unicode.end(), unicodeWords); - words.resize(unicodeWords.size()); - for (size_t i = 0; i < words.size(); i++) { - TransCode::Encode(unicodeWords[i].first, words[i].first); - words[i].second = unicodeWords[i].second; - } - } - - bool Cut(const string& sentence, - vector& res) const { - vector > words; - Cut(sentence, words); - res.clear(); - res.reserve(words.size()); - for (size_t i = 0; i < words.size(); i++) { - res.push_back(words[i].first); - } - return true; - } - - private: - MPSegment mpSeg_; -}; // class LevelSegment - -} // namespace cppjieba - -#endif // CPPJIEBA_LEVELSEGMENT_H diff --git a/test/unittest/segments_test.cpp b/test/unittest/segments_test.cpp index 1679d08..ccb065a 100644 --- a/test/unittest/segments_test.cpp +++ b/test/unittest/segments_test.cpp @@ -4,7 +4,6 @@ #include "cppjieba/HMMSegment.hpp" #include "cppjieba/FullSegment.hpp" #include "cppjieba/QuerySegment.hpp" -//#include "cppjieba/LevelSegment.hpp" #include "gtest/gtest.h" using namespace cppjieba; @@ -238,20 +237,6 @@ TEST(QuerySegment, Test2) { } } -/* -TEST(LevelSegmentTest, Test0) { - string s; - LevelSegment segment("../test/testdata/extra_dict/jieba.dict.small.utf8"); - vector > words; - segment.Cut("南京市长江大桥", words); - ASSERT_EQ("[\"南京市:0\", \"长江大桥:0\", \"南京:1\", \"长江:1\", \"大桥:1\"]", s << words); - - vector res; - segment.Cut("南京市长江大桥", res); - ASSERT_EQ("[\"南京市\", \"长江大桥\", \"南京\", \"长江\", \"大桥\"]", s << res); -} -*/ - TEST(MPSegmentTest, Unicode32) { string s("天气很好,🙋 我们去郊游。"); vector words;