improve readability

This commit is contained in:
yanyiwu 2016-05-09 22:09:57 +08:00
parent e4e1b4e953
commit 6e3ecec599

View File

@ -8,27 +8,50 @@
namespace cppjieba {
//const char* const SPECIAL_CHARS = " \t\n。";
const Rune SPECIAL_SYMBOL[] = {32u, 9u, 10u, 65292u, 12290u};
const char* const SPECIAL_RUNES = " \t\n,。";
using namespace limonp;
class SegmentBase {
public:
SegmentBase() {
LoadSpecialSymbols();
XCHECK(Insert(SPECIAL_RUNES));
}
~SegmentBase() {
}
protected:
void LoadSpecialSymbols() {
size_t size = sizeof(SPECIAL_SYMBOL)/sizeof(*SPECIAL_SYMBOL);
for (size_t i = 0; i < size; i ++) {
symbols_.insert(SPECIAL_SYMBOL[i]);
bool Insert(const string& s) {
RuneStrArray runes;
if (!DecodeRunesInString(s, runes)) {
XLOG(ERROR) << "decode " << s << " failed";
return false;
}
assert(symbols_.size());
for (size_t i = 0; i < runes.size(); i++) {
if (!symbols_.insert(runes[i].rune).second) {
XLOG(ERROR) << s.substr(runes[i].offset, runes[i].len) << " already exists";
return false;
}
}
return true;
}
//bool Remove(const string& s) {
// RuneStrArray runes;
// if (!DecodeRunesInString(s, runes)) {
// XLOG(ERROR) << "decode " << s << " failed";
// return false;
// }
// for (size_t i = 0; i < runes.size(); i++) {
// unordered_set<Rune>::iterator iter = symbols_.find(runes[i].rune);
// if (iter == symbols_.end()) {
// XLOG(ERROR) << s.substr(runes[i].offset, runes[i].len) << " not found";
// return false;
// }
// symbols_.erase(iter);
// }
// return true;
//}
unordered_set<Rune> symbols_;
}; // class SegmentBase