mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
remove LevelSegment
This commit is contained in:
parent
42a73eeb64
commit
e7a45d2dde
@ -1,5 +1,9 @@
|
|||||||
# CppJieba ChangeLog
|
# CppJieba ChangeLog
|
||||||
|
|
||||||
|
## next version
|
||||||
|
|
||||||
|
+ remove LevelSegment;
|
||||||
|
|
||||||
## v4.6.0
|
## v4.6.0
|
||||||
|
|
||||||
+ Change Jieba::Locate(deprecated) to be static function.
|
+ Change Jieba::Locate(deprecated) to be static function.
|
||||||
|
@ -1,81 +0,0 @@
|
|||||||
#ifndef CPPJIEBA_LEVELSEGMENT_H
|
|
||||||
#define CPPJIEBA_LEVELSEGMENT_H
|
|
||||||
|
|
||||||
#include "MPSegment.hpp"
|
|
||||||
|
|
||||||
namespace cppjieba {
|
|
||||||
|
|
||||||
class LevelSegment: public SegmentBase{
|
|
||||||
public:
|
|
||||||
LevelSegment(const string& dictPath,
|
|
||||||
const string& userDictPath = "")
|
|
||||||
: mpSeg_(dictPath, userDictPath) {
|
|
||||||
}
|
|
||||||
LevelSegment(const DictTrie* dictTrie)
|
|
||||||
: mpSeg_(dictTrie) {
|
|
||||||
}
|
|
||||||
~LevelSegment() {
|
|
||||||
}
|
|
||||||
|
|
||||||
void Cut(RuneStrArray::const_iterator begin,
|
|
||||||
RuneStrArray::const_iterator end,
|
|
||||||
vector<pair<WordRange, size_t> >& res) const {
|
|
||||||
res.clear();
|
|
||||||
vector<Unicode> words;
|
|
||||||
vector<Unicode> smallerWords;
|
|
||||||
words.reserve(end - begin);
|
|
||||||
mpSeg_.Cut(begin, end, words);
|
|
||||||
smallerWords.reserve(words.size());
|
|
||||||
res.reserve(words.size());
|
|
||||||
|
|
||||||
size_t level = 0;
|
|
||||||
while (!words.empty()) {
|
|
||||||
smallerWords.clear();
|
|
||||||
for (size_t i = 0; i < words.size(); i++) {
|
|
||||||
if (words[i].size() >= 3) {
|
|
||||||
size_t len = words[i].size() - 1;
|
|
||||||
mpSeg_.Cut(words[i].begin(), words[i].end(), smallerWords, len); // buffer.push_back without clear
|
|
||||||
}
|
|
||||||
if (words[i].size() > 1) {
|
|
||||||
res.push_back(pair<Unicode, size_t>(words[i], level));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
words.swap(smallerWords);
|
|
||||||
level++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void Cut(const string& sentence,
|
|
||||||
vector<pair<string, size_t> >& words) const {
|
|
||||||
words.clear();
|
|
||||||
RuneStrArray unicode;
|
|
||||||
DecodeRunesInString(sentence, unicode);
|
|
||||||
vector<pair<WordRange, size_t> > unicodeWords;
|
|
||||||
Cut(unicode.begin(), unicode.end(), unicodeWords);
|
|
||||||
words.resize(unicodeWords.size());
|
|
||||||
for (size_t i = 0; i < words.size(); i++) {
|
|
||||||
TransCode::Encode(unicodeWords[i].first, words[i].first);
|
|
||||||
words[i].second = unicodeWords[i].second;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bool Cut(const string& sentence,
|
|
||||||
vector<string>& res) const {
|
|
||||||
vector<pair<string, size_t> > words;
|
|
||||||
Cut(sentence, words);
|
|
||||||
res.clear();
|
|
||||||
res.reserve(words.size());
|
|
||||||
for (size_t i = 0; i < words.size(); i++) {
|
|
||||||
res.push_back(words[i].first);
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
MPSegment mpSeg_;
|
|
||||||
}; // class LevelSegment
|
|
||||||
|
|
||||||
} // namespace cppjieba
|
|
||||||
|
|
||||||
#endif // CPPJIEBA_LEVELSEGMENT_H
|
|
@ -4,7 +4,6 @@
|
|||||||
#include "cppjieba/HMMSegment.hpp"
|
#include "cppjieba/HMMSegment.hpp"
|
||||||
#include "cppjieba/FullSegment.hpp"
|
#include "cppjieba/FullSegment.hpp"
|
||||||
#include "cppjieba/QuerySegment.hpp"
|
#include "cppjieba/QuerySegment.hpp"
|
||||||
//#include "cppjieba/LevelSegment.hpp"
|
|
||||||
#include "gtest/gtest.h"
|
#include "gtest/gtest.h"
|
||||||
|
|
||||||
using namespace cppjieba;
|
using namespace cppjieba;
|
||||||
@ -238,20 +237,6 @@ TEST(QuerySegment, Test2) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
TEST(LevelSegmentTest, Test0) {
|
|
||||||
string s;
|
|
||||||
LevelSegment segment("../test/testdata/extra_dict/jieba.dict.small.utf8");
|
|
||||||
vector<pair<string, size_t> > words;
|
|
||||||
segment.Cut("南京市长江大桥", words);
|
|
||||||
ASSERT_EQ("[\"南京市:0\", \"长江大桥:0\", \"南京:1\", \"长江:1\", \"大桥:1\"]", s << words);
|
|
||||||
|
|
||||||
vector<string> res;
|
|
||||||
segment.Cut("南京市长江大桥", res);
|
|
||||||
ASSERT_EQ("[\"南京市\", \"长江大桥\", \"南京\", \"长江\", \"大桥\"]", s << res);
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
|
|
||||||
TEST(MPSegmentTest, Unicode32) {
|
TEST(MPSegmentTest, Unicode32) {
|
||||||
string s("天气很好,🙋 我们去郊游。");
|
string s("天气很好,🙋 我们去郊游。");
|
||||||
vector<string> words;
|
vector<string> words;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user