mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
refactor, simplify SegmentBase
This commit is contained in:
parent
e9241d9025
commit
6d69363145
@ -23,51 +23,22 @@ class SegmentBase: public ISegment {
|
||||
}
|
||||
public:
|
||||
virtual void cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const = 0;
|
||||
virtual bool cut(const string& str, vector<string>& res) const {
|
||||
PreFilter pre_filter(symbols_, str);
|
||||
virtual bool cut(const string& sentence, vector<string>& words) const {
|
||||
PreFilter pre_filter(symbols_, sentence);
|
||||
PreFilter::Range range;
|
||||
res.clear();
|
||||
vector<Unicode> uwords;
|
||||
uwords.reserve(sentence.size());
|
||||
while (pre_filter.HasNext()) {
|
||||
range = pre_filter.Next();
|
||||
cut(range.begin, range.end, res);
|
||||
cut(range.begin, range.end, uwords);
|
||||
}
|
||||
words.resize(uwords.size());
|
||||
for (size_t i = 0; i < uwords.size(); i++) {
|
||||
TransCode::encode(uwords[i], words[i]);
|
||||
}
|
||||
//Unicode unicode;
|
||||
//unicode.reserve(str.size());
|
||||
|
||||
//TransCode::decode(str, unicode);
|
||||
|
||||
//Unicode::const_iterator left = unicode.begin();
|
||||
//Unicode::const_iterator right;
|
||||
|
||||
//for(right = unicode.begin(); right != unicode.end(); right++) {
|
||||
// if(isIn(specialSymbols_, *right)) {
|
||||
// if(left != right) {
|
||||
// cut(left, right, res);
|
||||
// }
|
||||
// res.resize(res.size() + 1);
|
||||
// TransCode::encode(right, right + 1, res.back());
|
||||
// left = right + 1;
|
||||
// }
|
||||
//}
|
||||
//if(left != right) {
|
||||
// cut(left, right, res);
|
||||
//}
|
||||
|
||||
return true;
|
||||
}
|
||||
void cut(Unicode::const_iterator begin,
|
||||
Unicode::const_iterator end,
|
||||
vector<string>& res) const {
|
||||
vector<Unicode> uRes;
|
||||
uRes.reserve(end - begin);
|
||||
cut(begin, end, uRes);
|
||||
|
||||
size_t offset = res.size();
|
||||
res.resize(res.size() + uRes.size());
|
||||
for(size_t i = 0; i < uRes.size(); i ++, offset++) {
|
||||
TransCode::encode(uRes[i], res[offset]);
|
||||
}
|
||||
}
|
||||
private:
|
||||
void LoadSpecialSymbols() {
|
||||
size_t size = sizeof(SPECIAL_SYMBOL)/sizeof(*SPECIAL_SYMBOL);
|
||||
|
Loading…
x
Reference in New Issue
Block a user