update cppcommon

This commit is contained in:
gwdwyy 2013-07-09 11:45:20 +08:00
parent 12908b9a2d
commit 19afccec57
3 changed files with 99 additions and 16 deletions

View File

@ -186,6 +186,41 @@ namespace CPPCOMMON
return res;
}
string unicodeToUtf8(const string& uniStr)
{
size_t len = uniStr.size();
if(len%2)
{
return "";
}
uint16_t * uniArr = new uint16_t[len>>1];
char * utfStr = new char[len<<1];
for(int i = 0; i < len; i+=2)
{
uint16_t tmp1 = uniStr[i];
tmp1 <<= 8;
tmp1&= 0xff00;
uint16_t tmp2 = uniStr[i+1];
tmp2 &= 0x00ff;
uniArr[i>>1] = tmp1 | tmp2;
}
string res;
size_t utfLen = unicodeToUtf8(uniArr, len>>1, utfStr);
if(0 == utfLen)
{
res = "";
}
else
{
res = utfStr;
}
delete [] uniArr;
delete [] utfStr;
return res;
}
/*from: http://www.cppblog.com/lf426/archive/2008/03/31/45796.html */
int utf8ToUnicode(const char* inutf8, int len, uint16_t* unicode)
{
@ -221,6 +256,30 @@ namespace CPPCOMMON
return length;
}
string utf8ToUnicode(const string& utfStr)
{
uint16_t* pUni = new uint16_t[utfStr.size()];
size_t uniLen = utf8ToUnicode(utfStr.c_str(), utfStr.size(), pUni);
string res;
if(uniLen ==0 )
{
res = "";
}
else
{
for(uint i = 0; i < uniLen; i++)
{
char c = 0;
c = ((pUni[i]>>8) & 0x00ff);
res += c;
c = (pUni[i] & 0x00ff);
res += c;
}
}
delete [] pUni;
return res;
}
}
#ifdef TEST_STR_FUNCTS
@ -249,22 +308,33 @@ int main()
//
//s = "ab1ba2ab3";
//cout<<replaceStr(s,"ab","###")<<endl;
ifstream ifile("testdata/dict.txt");
string line;
while(getline(ifile, line))
{
uint16_t strbuf[1024];
//ifstream ifile("testdata/dict.txt");
//string line;
//while(getline(ifile, line))
//{
// uint16_t strbuf[1024];
size_t unilen = utf8ToUnicode(line.c_str(), line.size(), strbuf);
for(int i = 0; i < unilen; i++)
{
// printf("%x\n", strbuf[i]);
}
char utf8str[512]={0};
unicodeToUtf8(strbuf, unilen, utf8str);
//cout<<strlen(utf8str);
cout<<utf8str<<endl;
}
// size_t unilen = utf8ToUnicode(line.c_str(), line.size(), strbuf);
// for(int i = 0; i < unilen; i++)
// {
// // printf("%x\n", strbuf[i]);
// }
// char utf8str[512]={0};
// unicodeToUtf8(strbuf, unilen, utf8str);
// //cout<<strlen(utf8str);
// cout<<utf8str<<endl;
//}
ifstream ifile("jieba.dict.utf8");
string line;
while(getline(ifile, line))
{
cout<<line<<endl;
string uniStr = utf8ToUnicode(line);
//cout<<uniStr<<endl;
string utfStr = unicodeToUtf8(uniStr);
cout<<utfStr<<endl;
}
getchar();
return 0;
}
#endif

View File

@ -7,6 +7,7 @@
#include <algorithm>
#include <cctype>
#include <stdint.h>
#include "typedefs.h"
namespace CPPCOMMON
{
using namespace std;
@ -25,8 +26,10 @@ namespace CPPCOMMON
unsigned int countStrDistance(const string& A, const string& B);
unsigned int countStrSimilarity(const string& A, const string& B);
//encode
size_t unicodeToUtf8(uint16_t *in, size_t len, char * out);
string unicodeToUtf8(const string& uniStr);
int utf8ToUnicode(const char* inutf8, int len, uint16_t* unicode);
string utf8ToUnicode(const string& utfStr);
}
#endif

10
cppcommon/typedefs.h Normal file
View File

@ -0,0 +1,10 @@
#ifndef CPPCOMMON_TYPEDEFS_H
#define CPPCOMMON_TYPEDEFS_H
namespace CPPCOMMON
{
typedef unsigned int uint;
}
#endif