remove LevelSegment

This commit is contained in:
yanyiwu 2016-04-17 22:23:00 +08:00
parent 42a73eeb64
commit e7a45d2dde
3 changed files with 4 additions and 96 deletions

View File

@ -1,5 +1,9 @@
# CppJieba ChangeLog
## next version
+ remove LevelSegment;
## v4.6.0
+ Change Jieba::Locate(deprecated) to be static function.

View File

@ -1,81 +0,0 @@
#ifndef CPPJIEBA_LEVELSEGMENT_H
#define CPPJIEBA_LEVELSEGMENT_H
#include "MPSegment.hpp"
namespace cppjieba {
class LevelSegment: public SegmentBase{
public:
LevelSegment(const string& dictPath,
const string& userDictPath = "")
: mpSeg_(dictPath, userDictPath) {
}
LevelSegment(const DictTrie* dictTrie)
: mpSeg_(dictTrie) {
}
~LevelSegment() {
}
void Cut(RuneStrArray::const_iterator begin,
RuneStrArray::const_iterator end,
vector<pair<WordRange, size_t> >& res) const {
res.clear();
vector<Unicode> words;
vector<Unicode> smallerWords;
words.reserve(end - begin);
mpSeg_.Cut(begin, end, words);
smallerWords.reserve(words.size());
res.reserve(words.size());
size_t level = 0;
while (!words.empty()) {
smallerWords.clear();
for (size_t i = 0; i < words.size(); i++) {
if (words[i].size() >= 3) {
size_t len = words[i].size() - 1;
mpSeg_.Cut(words[i].begin(), words[i].end(), smallerWords, len); // buffer.push_back without clear
}
if (words[i].size() > 1) {
res.push_back(pair<Unicode, size_t>(words[i], level));
}
}
words.swap(smallerWords);
level++;
}
}
void Cut(const string& sentence,
vector<pair<string, size_t> >& words) const {
words.clear();
RuneStrArray unicode;
DecodeRunesInString(sentence, unicode);
vector<pair<WordRange, size_t> > unicodeWords;
Cut(unicode.begin(), unicode.end(), unicodeWords);
words.resize(unicodeWords.size());
for (size_t i = 0; i < words.size(); i++) {
TransCode::Encode(unicodeWords[i].first, words[i].first);
words[i].second = unicodeWords[i].second;
}
}
bool Cut(const string& sentence,
vector<string>& res) const {
vector<pair<string, size_t> > words;
Cut(sentence, words);
res.clear();
res.reserve(words.size());
for (size_t i = 0; i < words.size(); i++) {
res.push_back(words[i].first);
}
return true;
}
private:
MPSegment mpSeg_;
}; // class LevelSegment
} // namespace cppjieba
#endif // CPPJIEBA_LEVELSEGMENT_H

View File

@ -4,7 +4,6 @@
#include "cppjieba/HMMSegment.hpp"
#include "cppjieba/FullSegment.hpp"
#include "cppjieba/QuerySegment.hpp"
//#include "cppjieba/LevelSegment.hpp"
#include "gtest/gtest.h"
using namespace cppjieba;
@ -238,20 +237,6 @@ TEST(QuerySegment, Test2) {
}
}
/*
TEST(LevelSegmentTest, Test0) {
string s;
LevelSegment segment("../test/testdata/extra_dict/jieba.dict.small.utf8");
vector<pair<string, size_t> > words;
segment.Cut("南京市长江大桥", words);
ASSERT_EQ("[\"南京市:0\", \"长江大桥:0\", \"南京:1\", \"长江:1\", \"大桥:1\"]", s << words);
vector<string> res;
segment.Cut("南京市长江大桥", res);
ASSERT_EQ("[\"南京市\", \"长江大桥\", \"南京\", \"长江\", \"大桥\"]", s << res);
}
*/
TEST(MPSegmentTest, Unicode32) {
string s("天气很好,🙋 我们去郊游。");
vector<string> words;