mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
add gbk
This commit is contained in:
parent
35ba8f058e
commit
fd7ff031d0
@ -1,7 +1,7 @@
|
||||
PROJECT(CPPJIEBA)
|
||||
SET(CMAKE_INSTALL_PREFIX /usr)
|
||||
ADD_DEFINITIONS(-std=c++0x -O3)
|
||||
#ADD_DEFINITIONS(-DCPPJIEBA_GBK)
|
||||
ADD_DEFINITIONS(-DCPPJIEBA_GBK)
|
||||
ADD_SUBDIRECTORY(src)
|
||||
ADD_SUBDIRECTORY(dicts)
|
||||
ADD_SUBDIRECTORY(scripts)
|
||||
|
@ -286,5 +286,61 @@ namespace Limonp
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
inline bool gbkTrans(const string& str, vector<uint16_t>& vec)
|
||||
{
|
||||
vec.clear();
|
||||
if(str.empty())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
uint i = 0;
|
||||
while(i < str.size())
|
||||
{
|
||||
if(0 == (str[i] & 0x80))
|
||||
{
|
||||
vec.push_back(uint16_t(str[i]));
|
||||
i++;
|
||||
}
|
||||
else
|
||||
{
|
||||
if(i + 1 < str.size()) //&& (str[i+1] & 0x80))
|
||||
{
|
||||
vec.push_back(twocharToUint16(str[i], str[i + 1]));
|
||||
i += 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
inline bool gbkTrans(vector<uint16_t>::const_iterator begin, vector<uint16_t>::const_iterator end, string& res)
|
||||
{
|
||||
if(begin >= end)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
res.clear();
|
||||
pair<char, char> pa;
|
||||
while(begin != end)
|
||||
{
|
||||
pa = uint16ToChar2(*begin);
|
||||
if(pa.first & 0x80)
|
||||
{
|
||||
res += pa.first;
|
||||
res += pa.second;
|
||||
}
|
||||
else
|
||||
{
|
||||
res += pa.second;
|
||||
}
|
||||
begin++;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
#endif
|
||||
|
@ -18,7 +18,7 @@ namespace CppJieba
|
||||
inline bool decode(const string& str, vector<uint16_t>& vec)
|
||||
{
|
||||
#ifdef CPPJIEBA_GBK
|
||||
return false;
|
||||
return gbkTrans(str, vec);
|
||||
#else
|
||||
return utf8ToUnicode(str, vec);
|
||||
#endif
|
||||
@ -27,7 +27,7 @@ namespace CppJieba
|
||||
inline bool encode(vector<uint16_t>::const_iterator begin, vector<uint16_t>::const_iterator end, string& res)
|
||||
{
|
||||
#ifdef CPPJIEBA_GBK
|
||||
return false;
|
||||
return gbkTrans(begin, end, res);
|
||||
#else
|
||||
return unicodeToUtf8(begin, end, res);
|
||||
#endif
|
||||
|
Loading…
x
Reference in New Issue
Block a user