update cppcommon for trim

2025-07-18 00:00:12 +08:00 · 2013-08-24 20:11:15 +08:00 · 2013-08-24 20:11:15 +08:00 · 13421cd0e6
commit 13421cd0e6
parent 27e3b1871c
2 changed files with 55 additions and 236 deletions
--- a/src/cppcommon/str_functs.cpp
+++ b/src/cppcommon/str_functs.cpp
@ -1,7 +1,7 @@
 /************************************
 * file enc : utf8
 * author   : wuyanyi09@gmail.com
-************************************/
+ ************************************/
 #include "str_functs.h"
 namespace CPPCOMMON
@ -22,9 +22,9 @@ namespace CPPCOMMON
 				return str;
 			}
 			if (n > -1)
-				size = n + 1;
+			  size = n + 1;
 			else
-				size *= 2;
+			  size *= 2;
 		}
 		return str;
 	}
@ -91,7 +91,29 @@ namespace CPPCOMMON
 		return str.substr(posL, posR - posL + 1);
 	}
-	
+
 	//http://stackoverflow.com/questions/216823/whats-the-best-way-to-trim-stdstring
 	// trim from start
 	std::string &ltrim(std::string &s) 
 	{
 		s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::ptr_fun<int, int>(std::isspace))));
 		return s;
 	}
 	// trim from end
 	std::string &rtrim(std::string &s) 
 	{
 		s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::ptr_fun<int, int>(std::isspace))).base(), s.end());
 		return s;
 	}
 	// trim from both ends
 	std::string &trim(std::string &s) 
 	{
 		return ltrim(rtrim(s));
 	}
 	bool splitStrMultiPatterns(
 				const string& strSrc, 
 				vector<string>& outVec, 
@ -176,208 +198,6 @@ namespace CPPCOMMON
 		return res;
 	}
 	/*
    //unicode utf8 transform
    size_t unicodeToUtf8(uint16_t *in, size_t len, char * out)
    {
        size_t res = 0;
        for (int i = 0; i < len; i++)
        {
            uint16_t unicode = in[i];
            if (unicode >= 0x0000 && unicode <= 0x007f)
            {
                *out = (uint8_t)unicode;
                out += 1;
                res += 1;
            }
            else if (unicode >= 0x0080 && unicode <= 0x07ff)
            {
                *out = 0xc0 | (unicode >> 6);
                out += 1;
                *out = 0x80 | (unicode & (0xff >> 2));
                out += 1;
                res += 2;
            }
            else if (unicode >= 0x0800 && unicode <= 0xffff)
            {
                *out = 0xe0 | (unicode >> 12);
                out += 1;
                *out = 0x80 | ((unicode >> 6) & 0x3f);
                out += 1;
                *out = 0x80 | (unicode & 0x3f);
                out += 1;
                res += 3;
            }
        }
        *out = '\0';
        return res;
    }
 	string unicodeToUtf8(const Unicode& unicode)
 	{
 		if(unicode.empty())
 		{
 			return "";
 		}
 		uint16_t * uniArr = new uint16_t[unicode.size() + 1];
 		if(NULL == uniArr)
 		{
 			return "";
 		}
 		char * utfStr = new char[unicode.size() * 4 + 1];
 		if(NULL == utfStr)
 		{
 			delete [] uniArr;
 			return "";
 		}
 		for(uint i = 0; i < unicode.size(); i++)
 		{
 			uniArr[i] = unicode[i];
 		}
 		string res("");
 		size_t utfLen = unicodeToUtf8(uniArr, unicode.size(), utfStr);
 		if(0 != utfLen)
 		{
 			res = utfStr;
 		}
 		delete [] uniArr;
 		delete [] utfStr;
 		return res;
 	}
 	*/
    /*from: http://www.cppblog.com/lf426/archive/2008/03/31/45796.html */
    //int utf8ToUnicode(const char* inutf8, int len, uint16_t* unicode)
    //{
    //    int length;
    //    const unsigned char* utf8 = (const unsigned char*) inutf8;
    //    const unsigned char* t = (const unsigned char*) inutf8;
    //    length = 0;
    //    while (utf8 - t < len)
    //    {
    //        if ( *(unsigned char *) utf8 <= 0x7f ) 
    //        {
    //            //expand with 0s.
    //            *unicode++ = *utf8++;
    //        }
    //        //2 byte.
    //        else if ( *(unsigned char *) utf8 <= 0xdf ) 
    //        {
    //            *unicode++ = ((*(unsigned char *) utf8 & 0x1f) << 6) + ((*(unsigned char *) (utf8 + 1)) & 0x3f);
    //            utf8 += 2;
    //        }
    //        //3 byte.Chinese may use 3 byte.
    //        else {
    //            *unicode++ = ((int) (*(unsigned char *) utf8 & 0x0f) << 12) +
    //                ((*(unsigned char *) (utf8 + 1) & 0x3f) << 6) +
    //                (*(unsigned char *) (utf8 + 2) & 0x3f);
    //            utf8 += 3;
    //        }
    //        length++;
    //    }
    //    //*unicode = 0; !! this may cause out range of array;
    //    return length;
    //}
 	//bool utf8ToUnicode(const string& utfStr, Unicode& unicode)
 	//{
 	//	unicode.clear();
 	//	if(utfStr.empty())
 	//	{
 	//		return false;
 	//	}
 	//	uint16_t* pUni = new uint16_t[utfStr.size() + 1];
 	//	if(NULL == pUni)
 	//	{
 	//		return false;
 	//	}
 	//	size_t uniLen = utf8ToUnicode(utfStr.c_str(), utfStr.size(), pUni);
 	//	for(uint i = 0; i < uniLen; i++)
 	//	{
 	//		unicode.push_back(pUni[i]);
 	//	}
 	//	delete [] pUni;
 	//	return true;
 	//}
 	////iconv
 	//int code_convert(const char *from_charset,const char *to_charset,char *inbuf,size_t inlen,char *outbuf,size_t outlen)
 	//{
 	//	iconv_t cd;
 	//	char **pin = &inbuf;
 	//	char **pout = &outbuf;
 	//	cd = iconv_open(to_charset,from_charset);
 	//	if (cd==NULL) 
 	//	{
 	//		return -1;
 	//	}
 	//	memset(outbuf,0,outlen);
 	//	size_t ret = iconv(cd,pin,&inlen,pout,&outlen);
 	//	if (ret == -1)
 	//	{
 	//		//cout<<__FILE__<<__LINE__<<endl;
 	//		return -1;
 	//	}
 	//	iconv_close(cd);
 	//	return 0;
 	//}
 	////gbk -> utf8
 	//string gbkToUtf8(const string& gbk)
 	//{
 	//	if(gbk.empty())
 	//	{
 	//		return "";
 	//	}
 	//	string res("");
 	//	size_t maxLen = gbk.size()*4 + 1;
 	//	char * pUtf = new char[maxLen];
 	//	if(NULL == pUtf)
 	//	{
 	//		return "";
 	//	}
 	//	int ret = code_convert("gbk", "utf-8", (char *)gbk.c_str(), gbk.size(), pUtf, maxLen);
 	//	if(ret == -1)
 	//	{
 	//		delete [] pUtf;
 	//		return res;
 	//	}
 	//	res = pUtf;
 	//	delete [] pUtf;
 	//	return res;
 	//}
 	////utf8 -> gbk
 	//string utf8ToGbk(const string& utf)
 	//{
 	//	//cout<<__FILE__<<__LINE__<<gbk<<endl;
 	//	string res;
 	//	size_t maxLen = utf.size()*4 + 1;
 	//	char * pGbk = new char[maxLen];
 	//	if(NULL == pGbk)
 	//	{
 	//		return "";
 	//	}
 	//	int ret = code_convert("utf-8", "gbk", (char *)utf.c_str(), utf.size(), pGbk, maxLen);
 	//	if(ret == -1)
 	//	{
 	//		delete [] pGbk;
 	//		return "";
 	//	}
 	//	res = pGbk;
 	//	delete [] pGbk;
 	//	return res;
 	//}
 	//
 	//unicode str to vec
 	bool uniStrToVec(const string& str, Unicode& vec)
 	{
@ -440,22 +260,22 @@ int main()
 	//
 	//s = "ab1ba2ab3";
 	//cout<<replaceStr(s,"ab","###")<<endl;
-    //ifstream ifile("testdata/dict.txt");
+	//ifstream ifile("testdata/dict.txt");
-    //string line;
+	//string line;
-    //while(getline(ifile, line))
+	//while(getline(ifile, line))
-    //{
+	//{
-    //    uint16_t strbuf[1024];
+	//    uint16_t strbuf[1024];
-    //    size_t unilen = utf8ToUnicode(line.c_str(), line.size(), strbuf);
+	//    size_t unilen = utf8ToUnicode(line.c_str(), line.size(), strbuf);
-    //    for(int i = 0; i < unilen; i++)
+	//    for(int i = 0; i < unilen; i++)
-    //    {
+	//    {
-    //        // printf("%x\n", strbuf[i]);
+	//        // printf("%x\n", strbuf[i]);
-    //    }
+	//    }
-    //    char utf8str[512]={0};
+	//    char utf8str[512]={0};
-    //    unicodeToUtf8(strbuf, unilen, utf8str);
+	//    unicodeToUtf8(strbuf, unilen, utf8str);
-    //    //cout<<strlen(utf8str);
+	//    //cout<<strlen(utf8str);
-    //    cout<<utf8str<<endl;
+	//    cout<<utf8str<<endl;
-    //}
+	//}
 	//cout<<string_format("hehe%s11asd%dasf","[here]",2);
 	//ifstream ifile("testdata/dict.gbk");
 	//string line;
@ -481,12 +301,15 @@ int main()
 	//	s = utf8ToGbk(s);
 	//	cout<<s<<endl;
 	//}
-	cout<<strStartsWith("--help","--")<<endl;
+	//cout<<strStartsWith("--help","--")<<endl;
-	cout<<strStartsWith("--help","-")<<endl;
+	//cout<<strStartsWith("--help","-")<<endl;
-	cout<<strStartsWith("--help","he")<<endl;
+	//cout<<strStartsWith("--help","he")<<endl;
-	cout<<strStartsWith("help","help")<<endl;
+	//cout<<strStartsWith("help","help")<<endl;
-	cout<<strStartsWith("","help")<<endl;
+	//cout<<strStartsWith("","help")<<endl;
-	cout<<strStartsWith("hel","")<<endl;
+	//cout<<strStartsWith("hel","")<<endl;
 	string s("  helloword heh\t");
 	string b;
 	cout<<trim(b)<<"11"<<endl;
 	return 0;
 }
 #endif
--- a/src/cppcommon/str_functs.h
+++ b/src/cppcommon/str_functs.h
@ -17,6 +17,8 @@
 #include <iconv.h>
 #include <memory.h>
 #include "typedefs.h"
 #include <functional> 
 #include <locale>
 namespace CPPCOMMON
 {
 	using namespace std;
@ -33,18 +35,12 @@ namespace CPPCOMMON
 	string lowerStr(const string& str);
 	string replaceStr(const string& strSrc, const string& oldStr, const string& newStr, int count = -1);
 	string stripStr(const string& str, const string& patternstr = " \n\t");
 	std::string &ltrim(std::string &s) ;
 	std::string &rtrim(std::string &s) ;
 	std::string &trim(std::string &s) ;
 	unsigned int countStrDistance(const string& A, const string& B);
 	unsigned int countStrSimilarity(const string& A, const string& B);
    //encode
    //size_t unicodeToUtf8(uint16_t *in, size_t len, char * out);
 	//string unicodeToUtf8(const Unicode& unicode);
    //int utf8ToUnicode(const char* inutf8, int len, uint16_t* unicode);
 	//bool utf8ToUnicode(const string& utfStr, Unicode& unicode);
 	//int code_convert(const char *from_charset,const char *to_charset,char *inbuf,size_t inlen,char *outbuf,size_t outlen);
 	//string gbkToUtf8(const string& gbk);
 	//string utf8ToGbk(const string& utf);
 	bool uniStrToVec(const string& str, Unicode& vec);
 	string uniVecToStr(const Unicode& vec);