mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
remove LevelSegment
This commit is contained in:
parent
42a73eeb64
commit
e7a45d2dde
@ -1,5 +1,9 @@
|
||||
# CppJieba ChangeLog
|
||||
|
||||
## next version
|
||||
|
||||
+ remove LevelSegment;
|
||||
|
||||
## v4.6.0
|
||||
|
||||
+ Change Jieba::Locate(deprecated) to be static function.
|
||||
|
@ -1,81 +0,0 @@
|
||||
#ifndef CPPJIEBA_LEVELSEGMENT_H
|
||||
#define CPPJIEBA_LEVELSEGMENT_H
|
||||
|
||||
#include "MPSegment.hpp"
|
||||
|
||||
namespace cppjieba {
|
||||
|
||||
class LevelSegment: public SegmentBase{
|
||||
public:
|
||||
LevelSegment(const string& dictPath,
|
||||
const string& userDictPath = "")
|
||||
: mpSeg_(dictPath, userDictPath) {
|
||||
}
|
||||
LevelSegment(const DictTrie* dictTrie)
|
||||
: mpSeg_(dictTrie) {
|
||||
}
|
||||
~LevelSegment() {
|
||||
}
|
||||
|
||||
void Cut(RuneStrArray::const_iterator begin,
|
||||
RuneStrArray::const_iterator end,
|
||||
vector<pair<WordRange, size_t> >& res) const {
|
||||
res.clear();
|
||||
vector<Unicode> words;
|
||||
vector<Unicode> smallerWords;
|
||||
words.reserve(end - begin);
|
||||
mpSeg_.Cut(begin, end, words);
|
||||
smallerWords.reserve(words.size());
|
||||
res.reserve(words.size());
|
||||
|
||||
size_t level = 0;
|
||||
while (!words.empty()) {
|
||||
smallerWords.clear();
|
||||
for (size_t i = 0; i < words.size(); i++) {
|
||||
if (words[i].size() >= 3) {
|
||||
size_t len = words[i].size() - 1;
|
||||
mpSeg_.Cut(words[i].begin(), words[i].end(), smallerWords, len); // buffer.push_back without clear
|
||||
}
|
||||
if (words[i].size() > 1) {
|
||||
res.push_back(pair<Unicode, size_t>(words[i], level));
|
||||
}
|
||||
}
|
||||
|
||||
words.swap(smallerWords);
|
||||
level++;
|
||||
}
|
||||
}
|
||||
|
||||
void Cut(const string& sentence,
|
||||
vector<pair<string, size_t> >& words) const {
|
||||
words.clear();
|
||||
RuneStrArray unicode;
|
||||
DecodeRunesInString(sentence, unicode);
|
||||
vector<pair<WordRange, size_t> > unicodeWords;
|
||||
Cut(unicode.begin(), unicode.end(), unicodeWords);
|
||||
words.resize(unicodeWords.size());
|
||||
for (size_t i = 0; i < words.size(); i++) {
|
||||
TransCode::Encode(unicodeWords[i].first, words[i].first);
|
||||
words[i].second = unicodeWords[i].second;
|
||||
}
|
||||
}
|
||||
|
||||
bool Cut(const string& sentence,
|
||||
vector<string>& res) const {
|
||||
vector<pair<string, size_t> > words;
|
||||
Cut(sentence, words);
|
||||
res.clear();
|
||||
res.reserve(words.size());
|
||||
for (size_t i = 0; i < words.size(); i++) {
|
||||
res.push_back(words[i].first);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private:
|
||||
MPSegment mpSeg_;
|
||||
}; // class LevelSegment
|
||||
|
||||
} // namespace cppjieba
|
||||
|
||||
#endif // CPPJIEBA_LEVELSEGMENT_H
|
@ -4,7 +4,6 @@
|
||||
#include "cppjieba/HMMSegment.hpp"
|
||||
#include "cppjieba/FullSegment.hpp"
|
||||
#include "cppjieba/QuerySegment.hpp"
|
||||
//#include "cppjieba/LevelSegment.hpp"
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
using namespace cppjieba;
|
||||
@ -238,20 +237,6 @@ TEST(QuerySegment, Test2) {
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
TEST(LevelSegmentTest, Test0) {
|
||||
string s;
|
||||
LevelSegment segment("../test/testdata/extra_dict/jieba.dict.small.utf8");
|
||||
vector<pair<string, size_t> > words;
|
||||
segment.Cut("南京市长江大桥", words);
|
||||
ASSERT_EQ("[\"南京市:0\", \"长江大桥:0\", \"南京:1\", \"长江:1\", \"大桥:1\"]", s << words);
|
||||
|
||||
vector<string> res;
|
||||
segment.Cut("南京市长江大桥", res);
|
||||
ASSERT_EQ("[\"南京市\", \"长江大桥\", \"南京\", \"长江\", \"大桥\"]", s << res);
|
||||
}
|
||||
*/
|
||||
|
||||
TEST(MPSegmentTest, Unicode32) {
|
||||
string s("天气很好,🙋 我们去郊游。");
|
||||
vector<string> words;
|
||||
|
Loading…
x
Reference in New Issue
Block a user