From 19afccec57c48d9efebaa3506a9f77864d44ceac Mon Sep 17 00:00:00 2001 From: gwdwyy Date: Tue, 9 Jul 2013 11:45:20 +0800 Subject: [PATCH] update cppcommon --- cppcommon/str_functs.cpp | 100 +++++++++++++++++++++++++++++++++------ cppcommon/str_functs.h | 5 +- cppcommon/typedefs.h | 10 ++++ 3 files changed, 99 insertions(+), 16 deletions(-) create mode 100644 cppcommon/typedefs.h diff --git a/cppcommon/str_functs.cpp b/cppcommon/str_functs.cpp index 7d26fa8..da4ef3d 100644 --- a/cppcommon/str_functs.cpp +++ b/cppcommon/str_functs.cpp @@ -186,6 +186,41 @@ namespace CPPCOMMON return res; } + string unicodeToUtf8(const string& uniStr) + { + size_t len = uniStr.size(); + if(len%2) + { + return ""; + } + + uint16_t * uniArr = new uint16_t[len>>1]; + char * utfStr = new char[len<<1]; + for(int i = 0; i < len; i+=2) + { + uint16_t tmp1 = uniStr[i]; + tmp1 <<= 8; + tmp1&= 0xff00; + uint16_t tmp2 = uniStr[i+1]; + tmp2 &= 0x00ff; + uniArr[i>>1] = tmp1 | tmp2; + } + + string res; + size_t utfLen = unicodeToUtf8(uniArr, len>>1, utfStr); + if(0 == utfLen) + { + res = ""; + } + else + { + res = utfStr; + } + delete [] uniArr; + delete [] utfStr; + return res; + } + /*from: http://www.cppblog.com/lf426/archive/2008/03/31/45796.html */ int utf8ToUnicode(const char* inutf8, int len, uint16_t* unicode) { @@ -221,6 +256,30 @@ namespace CPPCOMMON return length; } + string utf8ToUnicode(const string& utfStr) + { + uint16_t* pUni = new uint16_t[utfStr.size()]; + size_t uniLen = utf8ToUnicode(utfStr.c_str(), utfStr.size(), pUni); + string res; + if(uniLen ==0 ) + { + res = ""; + } + else + { + for(uint i = 0; i < uniLen; i++) + { + char c = 0; + c = ((pUni[i]>>8) & 0x00ff); + res += c; + c = (pUni[i] & 0x00ff); + res += c; + } + } + delete [] pUni; + return res; + } + } #ifdef TEST_STR_FUNCTS @@ -249,22 +308,33 @@ int main() // //s = "ab1ba2ab3"; //cout< #include #include +#include "typedefs.h" namespace CPPCOMMON { using namespace std; @@ -25,8 +26,10 @@ namespace CPPCOMMON unsigned int countStrDistance(const string& A, const string& B); unsigned int countStrSimilarity(const string& A, const string& B); - + //encode size_t unicodeToUtf8(uint16_t *in, size_t len, char * out); + string unicodeToUtf8(const string& uniStr); int utf8ToUnicode(const char* inutf8, int len, uint16_t* unicode); + string utf8ToUnicode(const string& utfStr); } #endif diff --git a/cppcommon/typedefs.h b/cppcommon/typedefs.h new file mode 100644 index 0000000..6a8faa1 --- /dev/null +++ b/cppcommon/typedefs.h @@ -0,0 +1,10 @@ +#ifndef CPPCOMMON_TYPEDEFS_H +#define CPPCOMMON_TYPEDEFS_H + +namespace CPPCOMMON +{ + typedef unsigned int uint; +} + + +#endif