mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
refactor, simplify SegmentBase
This commit is contained in:
parent
e9241d9025
commit
6d69363145
@ -23,51 +23,22 @@ class SegmentBase: public ISegment {
|
|||||||
}
|
}
|
||||||
public:
|
public:
|
||||||
virtual void cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const = 0;
|
virtual void cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const = 0;
|
||||||
virtual bool cut(const string& str, vector<string>& res) const {
|
virtual bool cut(const string& sentence, vector<string>& words) const {
|
||||||
PreFilter pre_filter(symbols_, str);
|
PreFilter pre_filter(symbols_, sentence);
|
||||||
PreFilter::Range range;
|
PreFilter::Range range;
|
||||||
res.clear();
|
vector<Unicode> uwords;
|
||||||
|
uwords.reserve(sentence.size());
|
||||||
while (pre_filter.HasNext()) {
|
while (pre_filter.HasNext()) {
|
||||||
range = pre_filter.Next();
|
range = pre_filter.Next();
|
||||||
cut(range.begin, range.end, res);
|
cut(range.begin, range.end, uwords);
|
||||||
|
}
|
||||||
|
words.resize(uwords.size());
|
||||||
|
for (size_t i = 0; i < uwords.size(); i++) {
|
||||||
|
TransCode::encode(uwords[i], words[i]);
|
||||||
}
|
}
|
||||||
//Unicode unicode;
|
|
||||||
//unicode.reserve(str.size());
|
|
||||||
|
|
||||||
//TransCode::decode(str, unicode);
|
|
||||||
|
|
||||||
//Unicode::const_iterator left = unicode.begin();
|
|
||||||
//Unicode::const_iterator right;
|
|
||||||
|
|
||||||
//for(right = unicode.begin(); right != unicode.end(); right++) {
|
|
||||||
// if(isIn(specialSymbols_, *right)) {
|
|
||||||
// if(left != right) {
|
|
||||||
// cut(left, right, res);
|
|
||||||
// }
|
|
||||||
// res.resize(res.size() + 1);
|
|
||||||
// TransCode::encode(right, right + 1, res.back());
|
|
||||||
// left = right + 1;
|
|
||||||
// }
|
|
||||||
//}
|
|
||||||
//if(left != right) {
|
|
||||||
// cut(left, right, res);
|
|
||||||
//}
|
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
void cut(Unicode::const_iterator begin,
|
|
||||||
Unicode::const_iterator end,
|
|
||||||
vector<string>& res) const {
|
|
||||||
vector<Unicode> uRes;
|
|
||||||
uRes.reserve(end - begin);
|
|
||||||
cut(begin, end, uRes);
|
|
||||||
|
|
||||||
size_t offset = res.size();
|
|
||||||
res.resize(res.size() + uRes.size());
|
|
||||||
for(size_t i = 0; i < uRes.size(); i ++, offset++) {
|
|
||||||
TransCode::encode(uRes[i], res[offset]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
private:
|
private:
|
||||||
void LoadSpecialSymbols() {
|
void LoadSpecialSymbols() {
|
||||||
size_t size = sizeof(SPECIAL_SYMBOL)/sizeof(*SPECIAL_SYMBOL);
|
size_t size = sizeof(SPECIAL_SYMBOL)/sizeof(*SPECIAL_SYMBOL);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user