规范Unicode的相关命名,使用Rune代表一个中文字符

This commit is contained in:
yanyiwu 2015-07-21 14:54:50 +08:00
parent 0e16e000ea
commit 78e41e5fd0
6 changed files with 18 additions and 16 deletions

View File

@ -71,7 +71,7 @@ class DictTrie {
vector<Dag>& res) const { vector<Dag>& res) const {
trie_->find(begin, end, res); trie_->find(begin, end, res);
} }
bool isUserDictSingleChineseWord(const Unicode::value_type& word) const { bool isUserDictSingleChineseWord(const Rune& word) const {
return isIn(userDictSingleChineseWord_, word); return isIn(userDictSingleChineseWord_, word);
} }
double getMinWeight() const { double getMinWeight() const {
@ -198,7 +198,7 @@ class DictTrie {
double minWeight_; double minWeight_;
double maxWeight_; double maxWeight_;
unordered_set<Unicode::value_type> userDictSingleChineseWord_; unordered_set<Rune> userDictSingleChineseWord_;
}; };
} }

View File

@ -74,7 +74,7 @@ class HMMSegment: public SegmentBase {
private: private:
// sequential letters rule // sequential letters rule
Unicode::const_iterator sequentialLetterRule_(Unicode::const_iterator begin, Unicode::const_iterator end) const { Unicode::const_iterator sequentialLetterRule_(Unicode::const_iterator begin, Unicode::const_iterator end) const {
Unicode::value_type x = *begin; Rune x = *begin;
if (('a' <= x && x <= 'z') || ('A' <= x && x <= 'Z')) { if (('a' <= x && x <= 'z') || ('A' <= x && x <= 'Z')) {
begin ++; begin ++;
} else { } else {
@ -92,7 +92,7 @@ class HMMSegment: public SegmentBase {
} }
// //
Unicode::const_iterator numbersRule_(Unicode::const_iterator begin, Unicode::const_iterator end) const { Unicode::const_iterator numbersRule_(Unicode::const_iterator begin, Unicode::const_iterator end) const {
Unicode::value_type x = *begin; Rune x = *begin;
if('0' <= x && x <= '9') { if('0' <= x && x <= '9') {
begin ++; begin ++;
} else { } else {

View File

@ -28,7 +28,7 @@ class MPSegment: public SegmentBase {
} }
} }
bool isUserDictSingleChineseWord(const Unicode::value_type & value) const { bool isUserDictSingleChineseWord(const Rune & value) const {
return dictTrie_->isUserDictSingleChineseWord(value); return dictTrie_->isUserDictSingleChineseWord(value);
} }
@ -101,7 +101,7 @@ class MPSegment: public SegmentBase {
res.push_back(p->word); res.push_back(p->word);
i += p->word.size(); i += p->word.size();
} else { //single chinese word } else { //single chinese word
res.push_back(Unicode(1, dags[i].uniCh)); res.push_back(Unicode(1, dags[i].rune));
i++; i++;
} }
} }

View File

@ -14,9 +14,9 @@ using namespace Limonp;
//const char* const SPECIAL_CHARS = " \t\n"; //const char* const SPECIAL_CHARS = " \t\n";
#ifndef CPPJIEBA_GBK #ifndef CPPJIEBA_GBK
const UnicodeValueType SPECIAL_SYMBOL[] = {32u, 9u, 10u, 12290u, 65292u}; const Rune SPECIAL_SYMBOL[] = {32u, 9u, 10u, 12290u, 65292u};
#else #else
const UnicodeValueType SPECIAL_SYMBOL[] = {32u, 9u, 10u}; const Rune SPECIAL_SYMBOL[] = {32u, 9u, 10u};
#endif #endif
class SegmentBase: public ISegment, public NonCopyable { class SegmentBase: public ISegment, public NonCopyable {
@ -63,7 +63,7 @@ class SegmentBase: public ISegment, public NonCopyable {
assert(specialSymbols_.size()); assert(specialSymbols_.size());
} }
private: private:
unordered_set<UnicodeValueType> specialSymbols_; unordered_set<Rune> specialSymbols_;
}; };
} }

View File

@ -12,8 +12,10 @@
namespace CppJieba { namespace CppJieba {
using namespace Limonp; using namespace Limonp;
typedef uint16_t UnicodeValueType;
typedef Limonp::LocalVector<UnicodeValueType> Unicode; typedef uint16_t Rune;
typedef Limonp::LocalVector<Rune> Unicode;
namespace TransCode { namespace TransCode {
inline bool decode(const string& str, Unicode& res) { inline bool decode(const string& str, Unicode& res) {
#ifdef CPPJIEBA_GBK #ifdef CPPJIEBA_GBK

View File

@ -22,16 +22,16 @@ inline ostream & operator << (ostream& os, const DictUnit& unit) {
} }
struct Dag { struct Dag {
uint16_t uniCh; uint16_t rune;
LocalVector<pair<size_t, const DictUnit*> > nexts; LocalVector<pair<size_t, const DictUnit*> > nexts;
const DictUnit * pInfo; const DictUnit * pInfo;
double weight; double weight;
size_t nextPos; size_t nextPos;
Dag():uniCh(0), pInfo(NULL), weight(0.0), nextPos(0) { Dag():rune(0), pInfo(NULL), weight(0.0), nextPos(0) {
} }
}; };
typedef Unicode::value_type TrieKey; typedef Rune TrieKey;
class TrieNode { class TrieNode {
public : public :
@ -90,9 +90,9 @@ class Trie {
const TrieNode *ptNode = NULL; const TrieNode *ptNode = NULL;
TrieNode::NextMap::const_iterator citer; TrieNode::NextMap::const_iterator citer;
for (size_t i = 0; i < size_t(end - begin); i++) { for (size_t i = 0; i < size_t(end - begin); i++) {
Unicode::value_type ch = *(begin + i); Rune ch = *(begin + i);
ptNode = _base + ch; ptNode = _base + ch;
res[i].uniCh = ch; res[i].rune = ch;
assert(res[i].nexts.empty()); assert(res[i].nexts.empty());
res[i].nexts.push_back(pair<size_t, const DictUnit*>(i, ptNode->ptValue)); res[i].nexts.push_back(pair<size_t, const DictUnit*>(i, ptNode->ptValue));