update enc

This commit is contained in:
wyy 2013-12-06 04:57:19 -08:00
parent 1576d15b2f
commit 1bdce8904f
2 changed files with 20 additions and 10 deletions

View File

@ -98,6 +98,7 @@ namespace CppJieba
return iterator(&_unico, _unico.end(), _unico.end(), DIGIT_OR_LETTER); return iterator(&_unico, _unico.end(), _unico.end(), DIGIT_OR_LETTER);
} }
}; };
} }
#endif #endif

View File

@ -218,30 +218,29 @@ namespace Limonp
return str.find(ch) != string::npos; return str.find(ch) != string::npos;
} }
inline bool utf8ToUnicode(const string& str, vector<uint16_t>& vec) inline bool utf8ToUnicode(const char * const str, uint len, vector<uint16_t>& vec)
{ {
char ch1, ch2; char ch1, ch2;
if(str.empty()) if(!str)
{ {
return false; return false;
} }
vec.clear(); vec.clear();
size_t siz = str.size(); for(uint i = 0;i < len;)
for(uint i = 0;i < siz;)
{ {
if(!(str[i] & 0x80)) // 0xxxxxxx if(!(str[i] & 0x80)) // 0xxxxxxx
{ {
vec.push_back(str[i]); vec.push_back(str[i]);
i++; i++;
} }
else if ((unsigned char)str[i] <= 0xdf && i + 1 < siz) // 110xxxxxx else if ((unsigned char)str[i] <= 0xdf && i + 1 < len) // 110xxxxxx
{ {
ch1 = (str[i] >> 2) & 0x07; ch1 = (str[i] >> 2) & 0x07;
ch2 = (str[i+1] & 0x3f) | ((str[i] & 0x03) << 6 ); ch2 = (str[i+1] & 0x3f) | ((str[i] & 0x03) << 6 );
vec.push_back(twocharToUint16(ch1, ch2)); vec.push_back(twocharToUint16(ch1, ch2));
i += 2; i += 2;
} }
else if((unsigned char)str[i] <= 0xef && i + 2 < siz) else if((unsigned char)str[i] <= 0xef && i + 2 < len)
{ {
ch1 = (str[i] << 4) | ((str[i+1] >> 2) & 0x0f ); ch1 = (str[i] << 4) | ((str[i+1] >> 2) & 0x0f );
ch2 = ((str[i+1]<<6) & 0xc0) | (str[i+2] & 0x3f); ch2 = ((str[i+1]<<6) & 0xc0) | (str[i+2] & 0x3f);
@ -255,6 +254,10 @@ namespace Limonp
} }
return true; return true;
} }
inline bool utf8ToUnicode(const string& str, vector<uint16_t>& vec)
{
return utf8ToUnicode(str.c_str(), str.size(), vec);
}
inline bool unicodeToUtf8(vector<uint16_t>::const_iterator begin, vector<uint16_t>::const_iterator end, string& res) inline bool unicodeToUtf8(vector<uint16_t>::const_iterator begin, vector<uint16_t>::const_iterator end, string& res)
{ {
@ -287,15 +290,16 @@ namespace Limonp
return true; return true;
} }
inline bool gbkTrans(const string& str, vector<uint16_t>& vec)
inline bool gbkTrans(const char* const str, uint len, vector<uint16_t>& vec)
{ {
vec.clear(); vec.clear();
if(str.empty()) if(!str)
{ {
return false; return false;
} }
uint i = 0; uint i = 0;
while(i < str.size()) while(i < len)
{ {
if(0 == (str[i] & 0x80)) if(0 == (str[i] & 0x80))
{ {
@ -304,7 +308,7 @@ namespace Limonp
} }
else else
{ {
if(i + 1 < str.size()) //&& (str[i+1] & 0x80)) if(i + 1 < len) //&& (str[i+1] & 0x80))
{ {
vec.push_back(twocharToUint16(str[i], str[i + 1])); vec.push_back(twocharToUint16(str[i], str[i + 1]));
i += 2; i += 2;
@ -317,6 +321,11 @@ namespace Limonp
} }
return true; return true;
} }
inline bool gbkTrans(const string& str, vector<uint16_t>& vec)
{
return gbkTrans(str.c_str(), str.size(), vec);
}
inline bool gbkTrans(vector<uint16_t>::const_iterator begin, vector<uint16_t>::const_iterator end, string& res) inline bool gbkTrans(vector<uint16_t>::const_iterator begin, vector<uint16_t>::const_iterator end, string& res)
{ {
if(begin >= end) if(begin >= end)