refactor, simplify SegmentBase

This commit is contained in:
yanyiwu 2015-09-13 16:29:35 +08:00
parent e9241d9025
commit 6d69363145

View File

@ -23,51 +23,22 @@ class SegmentBase: public ISegment {
}
public:
virtual void cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const = 0;
virtual bool cut(const string& str, vector<string>& res) const {
PreFilter pre_filter(symbols_, str);
virtual bool cut(const string& sentence, vector<string>& words) const {
PreFilter pre_filter(symbols_, sentence);
PreFilter::Range range;
res.clear();
vector<Unicode> uwords;
uwords.reserve(sentence.size());
while (pre_filter.HasNext()) {
range = pre_filter.Next();
cut(range.begin, range.end, res);
cut(range.begin, range.end, uwords);
}
words.resize(uwords.size());
for (size_t i = 0; i < uwords.size(); i++) {
TransCode::encode(uwords[i], words[i]);
}
//Unicode unicode;
//unicode.reserve(str.size());
//TransCode::decode(str, unicode);
//Unicode::const_iterator left = unicode.begin();
//Unicode::const_iterator right;
//for(right = unicode.begin(); right != unicode.end(); right++) {
// if(isIn(specialSymbols_, *right)) {
// if(left != right) {
// cut(left, right, res);
// }
// res.resize(res.size() + 1);
// TransCode::encode(right, right + 1, res.back());
// left = right + 1;
// }
//}
//if(left != right) {
// cut(left, right, res);
//}
return true;
}
void cut(Unicode::const_iterator begin,
Unicode::const_iterator end,
vector<string>& res) const {
vector<Unicode> uRes;
uRes.reserve(end - begin);
cut(begin, end, uRes);
size_t offset = res.size();
res.resize(res.size() + uRes.size());
for(size_t i = 0; i < uRes.size(); i ++, offset++) {
TransCode::encode(uRes[i], res[offset]);
}
}
private:
void LoadSpecialSymbols() {
size_t size = sizeof(SPECIAL_SYMBOL)/sizeof(*SPECIAL_SYMBOL);