diff --git a/src/DictTrie.hpp b/src/DictTrie.hpp index 15de328..f5347b5 100644 --- a/src/DictTrie.hpp +++ b/src/DictTrie.hpp @@ -71,7 +71,7 @@ class DictTrie { vector& res) const { trie_->find(begin, end, res); } - bool isUserDictSingleChineseWord(const Unicode::value_type& word) const { + bool isUserDictSingleChineseWord(const Rune& word) const { return isIn(userDictSingleChineseWord_, word); } double getMinWeight() const { @@ -198,7 +198,7 @@ class DictTrie { double minWeight_; double maxWeight_; - unordered_set userDictSingleChineseWord_; + unordered_set userDictSingleChineseWord_; }; } diff --git a/src/HMMSegment.hpp b/src/HMMSegment.hpp index 46aa8d9..83769b2 100644 --- a/src/HMMSegment.hpp +++ b/src/HMMSegment.hpp @@ -74,7 +74,7 @@ class HMMSegment: public SegmentBase { private: // sequential letters rule Unicode::const_iterator sequentialLetterRule_(Unicode::const_iterator begin, Unicode::const_iterator end) const { - Unicode::value_type x = *begin; + Rune x = *begin; if (('a' <= x && x <= 'z') || ('A' <= x && x <= 'Z')) { begin ++; } else { @@ -92,7 +92,7 @@ class HMMSegment: public SegmentBase { } // Unicode::const_iterator numbersRule_(Unicode::const_iterator begin, Unicode::const_iterator end) const { - Unicode::value_type x = *begin; + Rune x = *begin; if('0' <= x && x <= '9') { begin ++; } else { diff --git a/src/MPSegment.hpp b/src/MPSegment.hpp index f35158c..975998b 100644 --- a/src/MPSegment.hpp +++ b/src/MPSegment.hpp @@ -28,7 +28,7 @@ class MPSegment: public SegmentBase { } } - bool isUserDictSingleChineseWord(const Unicode::value_type & value) const { + bool isUserDictSingleChineseWord(const Rune & value) const { return dictTrie_->isUserDictSingleChineseWord(value); } @@ -101,7 +101,7 @@ class MPSegment: public SegmentBase { res.push_back(p->word); i += p->word.size(); } else { //single chinese word - res.push_back(Unicode(1, dags[i].uniCh)); + res.push_back(Unicode(1, dags[i].rune)); i++; } } diff --git a/src/SegmentBase.hpp b/src/SegmentBase.hpp index 9e6f4e4..93bc901 100644 --- a/src/SegmentBase.hpp +++ b/src/SegmentBase.hpp @@ -14,9 +14,9 @@ using namespace Limonp; //const char* const SPECIAL_CHARS = " \t\n"; #ifndef CPPJIEBA_GBK -const UnicodeValueType SPECIAL_SYMBOL[] = {32u, 9u, 10u, 12290u, 65292u}; +const Rune SPECIAL_SYMBOL[] = {32u, 9u, 10u, 12290u, 65292u}; #else -const UnicodeValueType SPECIAL_SYMBOL[] = {32u, 9u, 10u}; +const Rune SPECIAL_SYMBOL[] = {32u, 9u, 10u}; #endif class SegmentBase: public ISegment, public NonCopyable { @@ -63,7 +63,7 @@ class SegmentBase: public ISegment, public NonCopyable { assert(specialSymbols_.size()); } private: - unordered_set specialSymbols_; + unordered_set specialSymbols_; }; } diff --git a/src/TransCode.hpp b/src/TransCode.hpp index 5c96978..d7fa162 100644 --- a/src/TransCode.hpp +++ b/src/TransCode.hpp @@ -12,8 +12,10 @@ namespace CppJieba { using namespace Limonp; -typedef uint16_t UnicodeValueType; -typedef Limonp::LocalVector Unicode; + +typedef uint16_t Rune; +typedef Limonp::LocalVector Unicode; + namespace TransCode { inline bool decode(const string& str, Unicode& res) { #ifdef CPPJIEBA_GBK diff --git a/src/Trie.hpp b/src/Trie.hpp index 289787b..c42c383 100644 --- a/src/Trie.hpp +++ b/src/Trie.hpp @@ -22,16 +22,16 @@ inline ostream & operator << (ostream& os, const DictUnit& unit) { } struct Dag { - uint16_t uniCh; + uint16_t rune; LocalVector > nexts; const DictUnit * pInfo; double weight; size_t nextPos; - Dag():uniCh(0), pInfo(NULL), weight(0.0), nextPos(0) { + Dag():rune(0), pInfo(NULL), weight(0.0), nextPos(0) { } }; -typedef Unicode::value_type TrieKey; +typedef Rune TrieKey; class TrieNode { public : @@ -90,9 +90,9 @@ class Trie { const TrieNode *ptNode = NULL; TrieNode::NextMap::const_iterator citer; for (size_t i = 0; i < size_t(end - begin); i++) { - Unicode::value_type ch = *(begin + i); + Rune ch = *(begin + i); ptNode = _base + ch; - res[i].uniCh = ch; + res[i].rune = ch; assert(res[i].nexts.empty()); res[i].nexts.push_back(pair(i, ptNode->ptValue));