mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
update cppcommon
This commit is contained in:
parent
19afccec57
commit
5fcee293d1
@ -198,12 +198,13 @@ namespace CPPCOMMON
|
||||
char * utfStr = new char[len<<1];
|
||||
for(int i = 0; i < len; i+=2)
|
||||
{
|
||||
uint16_t tmp1 = uniStr[i];
|
||||
tmp1 <<= 8;
|
||||
tmp1&= 0xff00;
|
||||
uint16_t tmp2 = uniStr[i+1];
|
||||
tmp2 &= 0x00ff;
|
||||
uniArr[i>>1] = tmp1 | tmp2;
|
||||
//uint16_t tmp1 = uniStr[i];
|
||||
//tmp1 <<= 8;
|
||||
//tmp1 &= 0xff00;
|
||||
//uint16_t tmp2 = uniStr[i+1];
|
||||
//tmp2 &= 0x00ff;
|
||||
//uniArr[i>>1] = tmp1 | tmp2;
|
||||
uniArr[i>>1] = twocharToUint16(uniStr[i], uniStr[i+1]);
|
||||
}
|
||||
|
||||
string res;
|
||||
@ -269,11 +270,14 @@ namespace CPPCOMMON
|
||||
{
|
||||
for(uint i = 0; i < uniLen; i++)
|
||||
{
|
||||
char c = 0;
|
||||
c = ((pUni[i]>>8) & 0x00ff);
|
||||
res += c;
|
||||
c = (pUni[i] & 0x00ff);
|
||||
res += c;
|
||||
//char c = 0;
|
||||
//c = ((pUni[i]>>8) & 0x00ff);
|
||||
//res += c;
|
||||
//c = (pUni[i] & 0x00ff);
|
||||
//res += c;
|
||||
pair<char, char> char2= uint16ToChar2(pUni[i]);
|
||||
res += char2.first;
|
||||
res += char2.second;
|
||||
}
|
||||
}
|
||||
delete [] pUni;
|
||||
@ -324,7 +328,7 @@ int main()
|
||||
// //cout<<strlen(utf8str);
|
||||
// cout<<utf8str<<endl;
|
||||
//}
|
||||
ifstream ifile("jieba.dict.utf8");
|
||||
ifstream ifile("testdata/dict.txt");
|
||||
string line;
|
||||
while(getline(ifile, line))
|
||||
{
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include <cctype>
|
||||
#include <map>
|
||||
#include <stdint.h>
|
||||
#include "typedefs.h"
|
||||
namespace CPPCOMMON
|
||||
@ -31,5 +32,20 @@ namespace CPPCOMMON
|
||||
string unicodeToUtf8(const string& uniStr);
|
||||
int utf8ToUnicode(const char* inutf8, int len, uint16_t* unicode);
|
||||
string utf8ToUnicode(const string& utfStr);
|
||||
|
||||
inline uint16_t twocharToUint16(char high, char low)
|
||||
{
|
||||
return (((uint16_t(high) & 0x00ff ) << 8) | (uint16_t(low) & 0x00ff));
|
||||
}
|
||||
|
||||
inline pair<char, char> uint16ToChar2(uint16_t in)
|
||||
{
|
||||
pair<char, char> res;
|
||||
res.first = (in>>8) & 0x00ff; //high
|
||||
res.second = (in) & 0x00ff; //low
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
#endif
|
||||
|
Loading…
x
Reference in New Issue
Block a user