规范Unicode的相关命名,使用Rune代表一个中文字符

This commit is contained in:
yanyiwu 2015-07-21 14:54:50 +08:00
parent 0e16e000ea
commit 78e41e5fd0
6 changed files with 18 additions and 16 deletions

View File

@ -71,7 +71,7 @@ class DictTrie {
vector<Dag>& res) const {
trie_->find(begin, end, res);
}
bool isUserDictSingleChineseWord(const Unicode::value_type& word) const {
bool isUserDictSingleChineseWord(const Rune& word) const {
return isIn(userDictSingleChineseWord_, word);
}
double getMinWeight() const {
@ -198,7 +198,7 @@ class DictTrie {
double minWeight_;
double maxWeight_;
unordered_set<Unicode::value_type> userDictSingleChineseWord_;
unordered_set<Rune> userDictSingleChineseWord_;
};
}

View File

@ -74,7 +74,7 @@ class HMMSegment: public SegmentBase {
private:
// sequential letters rule
Unicode::const_iterator sequentialLetterRule_(Unicode::const_iterator begin, Unicode::const_iterator end) const {
Unicode::value_type x = *begin;
Rune x = *begin;
if (('a' <= x && x <= 'z') || ('A' <= x && x <= 'Z')) {
begin ++;
} else {
@ -92,7 +92,7 @@ class HMMSegment: public SegmentBase {
}
//
Unicode::const_iterator numbersRule_(Unicode::const_iterator begin, Unicode::const_iterator end) const {
Unicode::value_type x = *begin;
Rune x = *begin;
if('0' <= x && x <= '9') {
begin ++;
} else {

View File

@ -28,7 +28,7 @@ class MPSegment: public SegmentBase {
}
}
bool isUserDictSingleChineseWord(const Unicode::value_type & value) const {
bool isUserDictSingleChineseWord(const Rune & value) const {
return dictTrie_->isUserDictSingleChineseWord(value);
}
@ -101,7 +101,7 @@ class MPSegment: public SegmentBase {
res.push_back(p->word);
i += p->word.size();
} else { //single chinese word
res.push_back(Unicode(1, dags[i].uniCh));
res.push_back(Unicode(1, dags[i].rune));
i++;
}
}

View File

@ -14,9 +14,9 @@ using namespace Limonp;
//const char* const SPECIAL_CHARS = " \t\n";
#ifndef CPPJIEBA_GBK
const UnicodeValueType SPECIAL_SYMBOL[] = {32u, 9u, 10u, 12290u, 65292u};
const Rune SPECIAL_SYMBOL[] = {32u, 9u, 10u, 12290u, 65292u};
#else
const UnicodeValueType SPECIAL_SYMBOL[] = {32u, 9u, 10u};
const Rune SPECIAL_SYMBOL[] = {32u, 9u, 10u};
#endif
class SegmentBase: public ISegment, public NonCopyable {
@ -63,7 +63,7 @@ class SegmentBase: public ISegment, public NonCopyable {
assert(specialSymbols_.size());
}
private:
unordered_set<UnicodeValueType> specialSymbols_;
unordered_set<Rune> specialSymbols_;
};
}

View File

@ -12,8 +12,10 @@
namespace CppJieba {
using namespace Limonp;
typedef uint16_t UnicodeValueType;
typedef Limonp::LocalVector<UnicodeValueType> Unicode;
typedef uint16_t Rune;
typedef Limonp::LocalVector<Rune> Unicode;
namespace TransCode {
inline bool decode(const string& str, Unicode& res) {
#ifdef CPPJIEBA_GBK

View File

@ -22,16 +22,16 @@ inline ostream & operator << (ostream& os, const DictUnit& unit) {
}
struct Dag {
uint16_t uniCh;
uint16_t rune;
LocalVector<pair<size_t, const DictUnit*> > nexts;
const DictUnit * pInfo;
double weight;
size_t nextPos;
Dag():uniCh(0), pInfo(NULL), weight(0.0), nextPos(0) {
Dag():rune(0), pInfo(NULL), weight(0.0), nextPos(0) {
}
};
typedef Unicode::value_type TrieKey;
typedef Rune TrieKey;
class TrieNode {
public :
@ -90,9 +90,9 @@ class Trie {
const TrieNode *ptNode = NULL;
TrieNode::NextMap::const_iterator citer;
for (size_t i = 0; i < size_t(end - begin); i++) {
Unicode::value_type ch = *(begin + i);
Rune ch = *(begin + i);
ptNode = _base + ch;
res[i].uniCh = ch;
res[i].rune = ch;
assert(res[i].nexts.empty());
res[i].nexts.push_back(pair<size_t, const DictUnit*>(i, ptNode->ptValue));