From 13421cd0e64e6ae16690321351a8535e622a7cb3 Mon Sep 17 00:00:00 2001 From: gwdwyy Date: Sat, 24 Aug 2013 20:11:15 +0800 Subject: [PATCH] update cppcommon for trim --- src/cppcommon/str_functs.cpp | 277 +++++++---------------------------- src/cppcommon/str_functs.h | 14 +- 2 files changed, 55 insertions(+), 236 deletions(-) diff --git a/src/cppcommon/str_functs.cpp b/src/cppcommon/str_functs.cpp index 50e3954..5f70a02 100644 --- a/src/cppcommon/str_functs.cpp +++ b/src/cppcommon/str_functs.cpp @@ -1,7 +1,7 @@ /************************************ * file enc : utf8 * author : wuyanyi09@gmail.com -************************************/ + ************************************/ #include "str_functs.h" namespace CPPCOMMON @@ -22,9 +22,9 @@ namespace CPPCOMMON return str; } if (n > -1) - size = n + 1; + size = n + 1; else - size *= 2; + size *= 2; } return str; } @@ -91,7 +91,29 @@ namespace CPPCOMMON return str.substr(posL, posR - posL + 1); } - + + + //http://stackoverflow.com/questions/216823/whats-the-best-way-to-trim-stdstring + // trim from start + std::string <rim(std::string &s) + { + s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::ptr_fun(std::isspace)))); + return s; + } + + // trim from end + std::string &rtrim(std::string &s) + { + s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::ptr_fun(std::isspace))).base(), s.end()); + return s; + } + + // trim from both ends + std::string &trim(std::string &s) + { + return ltrim(rtrim(s)); + } + bool splitStrMultiPatterns( const string& strSrc, vector& outVec, @@ -176,208 +198,6 @@ namespace CPPCOMMON return res; } - /* - //unicode utf8 transform - size_t unicodeToUtf8(uint16_t *in, size_t len, char * out) - { - size_t res = 0; - for (int i = 0; i < len; i++) - { - uint16_t unicode = in[i]; - if (unicode >= 0x0000 && unicode <= 0x007f) - { - *out = (uint8_t)unicode; - out += 1; - res += 1; - } - else if (unicode >= 0x0080 && unicode <= 0x07ff) - { - *out = 0xc0 | (unicode >> 6); - out += 1; - *out = 0x80 | (unicode & (0xff >> 2)); - out += 1; - res += 2; - } - else if (unicode >= 0x0800 && unicode <= 0xffff) - { - *out = 0xe0 | (unicode >> 12); - out += 1; - *out = 0x80 | ((unicode >> 6) & 0x3f); - out += 1; - *out = 0x80 | (unicode & 0x3f); - out += 1; - res += 3; - } - - } - *out = '\0'; - return res; - } - - string unicodeToUtf8(const Unicode& unicode) - { - if(unicode.empty()) - { - return ""; - } - - uint16_t * uniArr = new uint16_t[unicode.size() + 1]; - if(NULL == uniArr) - { - return ""; - } - char * utfStr = new char[unicode.size() * 4 + 1]; - if(NULL == utfStr) - { - delete [] uniArr; - return ""; - } - for(uint i = 0; i < unicode.size(); i++) - { - uniArr[i] = unicode[i]; - } - - string res(""); - size_t utfLen = unicodeToUtf8(uniArr, unicode.size(), utfStr); - if(0 != utfLen) - { - res = utfStr; - } - delete [] uniArr; - delete [] utfStr; - return res; - } - */ - - /*from: http://www.cppblog.com/lf426/archive/2008/03/31/45796.html */ - //int utf8ToUnicode(const char* inutf8, int len, uint16_t* unicode) - //{ - // int length; - // const unsigned char* utf8 = (const unsigned char*) inutf8; - // const unsigned char* t = (const unsigned char*) inutf8; - - // length = 0; - // while (utf8 - t < len) - // { - // if ( *(unsigned char *) utf8 <= 0x7f ) - // { - // //expand with 0s. - // *unicode++ = *utf8++; - // } - // //2 byte. - // else if ( *(unsigned char *) utf8 <= 0xdf ) - // { - // *unicode++ = ((*(unsigned char *) utf8 & 0x1f) << 6) + ((*(unsigned char *) (utf8 + 1)) & 0x3f); - // utf8 += 2; - // } - // //3 byte.Chinese may use 3 byte. - // else { - // *unicode++ = ((int) (*(unsigned char *) utf8 & 0x0f) << 12) + - // ((*(unsigned char *) (utf8 + 1) & 0x3f) << 6) + - // (*(unsigned char *) (utf8 + 2) & 0x3f); - // utf8 += 3; - // } - // length++; - // } - - - // //*unicode = 0; !! this may cause out range of array; - // return length; - //} - - //bool utf8ToUnicode(const string& utfStr, Unicode& unicode) - //{ - // unicode.clear(); - // if(utfStr.empty()) - // { - // return false; - // } - // uint16_t* pUni = new uint16_t[utfStr.size() + 1]; - // if(NULL == pUni) - // { - // return false; - // } - // size_t uniLen = utf8ToUnicode(utfStr.c_str(), utfStr.size(), pUni); - // for(uint i = 0; i < uniLen; i++) - // { - // unicode.push_back(pUni[i]); - // } - // delete [] pUni; - // return true; - //} - - ////iconv - //int code_convert(const char *from_charset,const char *to_charset,char *inbuf,size_t inlen,char *outbuf,size_t outlen) - //{ - // iconv_t cd; - - // char **pin = &inbuf; - // char **pout = &outbuf; - - // cd = iconv_open(to_charset,from_charset); - // if (cd==NULL) - // { - // return -1; - // } - // memset(outbuf,0,outlen); - // size_t ret = iconv(cd,pin,&inlen,pout,&outlen); - // if (ret == -1) - // { - // //cout<<__FILE__<<__LINE__< utf8 - //string gbkToUtf8(const string& gbk) - //{ - // if(gbk.empty()) - // { - // return ""; - // } - // string res(""); - // size_t maxLen = gbk.size()*4 + 1; - // char * pUtf = new char[maxLen]; - // if(NULL == pUtf) - // { - // return ""; - // } - // int ret = code_convert("gbk", "utf-8", (char *)gbk.c_str(), gbk.size(), pUtf, maxLen); - // if(ret == -1) - // { - // delete [] pUtf; - // return res; - // } - // res = pUtf; - // delete [] pUtf; - // return res; - //} - - ////utf8 -> gbk - //string utf8ToGbk(const string& utf) - //{ - // //cout<<__FILE__<<__LINE__< #include #include "typedefs.h" +#include +#include namespace CPPCOMMON { using namespace std; @@ -33,18 +35,12 @@ namespace CPPCOMMON string lowerStr(const string& str); string replaceStr(const string& strSrc, const string& oldStr, const string& newStr, int count = -1); string stripStr(const string& str, const string& patternstr = " \n\t"); + std::string <rim(std::string &s) ; + std::string &rtrim(std::string &s) ; + std::string &trim(std::string &s) ; unsigned int countStrDistance(const string& A, const string& B); unsigned int countStrSimilarity(const string& A, const string& B); - //encode - //size_t unicodeToUtf8(uint16_t *in, size_t len, char * out); - //string unicodeToUtf8(const Unicode& unicode); - //int utf8ToUnicode(const char* inutf8, int len, uint16_t* unicode); - //bool utf8ToUnicode(const string& utfStr, Unicode& unicode); - - //int code_convert(const char *from_charset,const char *to_charset,char *inbuf,size_t inlen,char *outbuf,size_t outlen); - //string gbkToUtf8(const string& gbk); - //string utf8ToGbk(const string& utf); bool uniStrToVec(const string& str, Unicode& vec); string uniVecToStr(const Unicode& vec);