cppjieba/include/cppjieba/SegmentBase.hpp
yanyiwu 3c5ad24260 source code layout change:
1. src/ -> include/cppjieba/
2. src/limonp/ -> deps/limonp/
3. server/husky -> deps/husky/
4. test/unittest/gtest -> deps/gtest
2016-01-11 14:25:02 +08:00

38 lines
710 B
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#ifndef CPPJIEBA_SEGMENTBASE_H
#define CPPJIEBA_SEGMENTBASE_H
#include "limonp/Logging.hpp"
#include "PreFilter.hpp"
#include <cassert>
namespace cppjieba {
//const char* const SPECIAL_CHARS = " \t\n。";
const Rune SPECIAL_SYMBOL[] = {32u, 9u, 10u, 65292u, 12290u};
using namespace limonp;
class SegmentBase {
public:
SegmentBase() {
LoadSpecialSymbols();
}
~SegmentBase() {
}
protected:
void LoadSpecialSymbols() {
size_t size = sizeof(SPECIAL_SYMBOL)/sizeof(*SPECIAL_SYMBOL);
for (size_t i = 0; i < size; i ++) {
symbols_.insert(SPECIAL_SYMBOL[i]);
}
assert(symbols_.size());
}
unordered_set<Rune> symbols_;
}; // class SegmentBase
} // cppjieba
#endif