commit 8f35e77bc8b3c8482d514982eff2e1c5a5a9bbbd Author: wyy Date: Sun Jun 23 22:31:51 2013 +0800 init diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..887a366 --- /dev/null +++ b/.gitignore @@ -0,0 +1,8 @@ +*tmp* +tags +*swp +*.out +*.o +*.d +*.ut +log diff --git a/README.md b/README.md new file mode 100644 index 0000000..e69de29 diff --git a/cppcommon/Makefile b/cppcommon/Makefile new file mode 100644 index 0000000..891e7b0 --- /dev/null +++ b/cppcommon/Makefile @@ -0,0 +1,42 @@ +CC = g++ +CCOPT = -Wall -c +LINK = g++ +LINKOPT = +PACK = ar +PACKOPT = rc +SOURCES := $(wildcard *.cpp) +OBJS := $(patsubst %.cpp,%.o,$(SOURCES)) + +DOPACK = $(PACK) $(PACKOPT) $@ $? +DOLINK = $(LINK) $(LINKOPT) -o $@ $? + +CMLIB = cmlib.a + +%.o: %.cpp + $(CC) $(CCOPT) $< + + +all: $(CMLIB) + + +$(CMLIB): $(OBJS) + $(DOPACK) + + +file_functs.test: file_functs.cpp file_functs.h + g++ -o $@ $< -DTEST_FILE_FUNCTS +io_functs.test: io_functs.cpp io_functs.h + g++ -o $@ $< -DTEST_IO_FUNCTS +str_functs.ut: str_functs.cpp str_functs.h + g++ -o $@ $< -DTEST_STR_FUNCTS +vec_functs.test: vec_functs.cpp vec_functs.h vec_functs.tcc + g++ -o $@ $< -DTEST_VEC_FUNCTS + +logger.ut: logger.cpp logger.h file_functs.cpp file_functs.h + g++ -o $@ $< file_functs.cpp -DUNIT_TEST +config.ut: config.cpp config.h + g++ -o $@ $< -DCONFIG_UT $(CMLIB) + +clean: + rm -f *.test *.ut *.o $(CMLIB) + diff --git a/cppcommon/config.cpp b/cppcommon/config.cpp new file mode 100644 index 0000000..f011720 --- /dev/null +++ b/cppcommon/config.cpp @@ -0,0 +1,112 @@ +#include "config.h" +#include "str_functs.h" + +namespace CPPCOMMON +{ + Config::Config() + { + _isInit = false; + } + + Config::~Config() + { + } + + bool Config::init(const string& configFile) + { + char msgBuf[1024]; + if(_isInit) + { + LogFatal("already have been initialized. "); + return false; + } + ifstream ifile(configFile.c_str()); + if(!ifile) + { + sprintf(msgBuf, "open configFile[%s] failed.", configFile.c_str()); + LogFatal(msgBuf); + return false; + } + string line, key, value; + vector vecBuf; + while(getline(ifile, line)) + { + line = _stripComment(line); + if(line.empty()) + { + continue; + } + vecBuf = splitStr(line, "="); + if(2 != vecBuf.size()) + { + sprintf(msgBuf, "line[%s] is illegal.", line.c_str()); + LogFatal(msgBuf); + return false; + } + key = vecBuf[0]; + value = vecBuf[1]; + if(_map.end() != _map.find(key)) + { + sprintf(msgBuf, "key[%s] already exists.", key.c_str()); + LogFatal(msgBuf); + return false; + } + _map[key] = value; + } + ifile.close(); + _isInit = true; + return true; + } + + void Config::display() + { + for(map::iterator it = _map.begin(); it != _map.end(); it++) + { + cout<<"("<first<<","<second<<")"< +#include +#include +#include "logger.h" + +namespace CPPCOMMON +{ + using std::map; + using std::string; + using std::cout; + using std::endl; + using std::ifstream; + class Config + { + public: + Config(); + ~Config(); + bool init(const string& configFile); + void display(); + string getByKey(const string& key); + private: + string _stripComment(const string& line); + map _map; + bool _isInit; + + }; +} + +namespace CPPCOMMON +{ + extern Config gConfig; +} + +#endif diff --git a/cppcommon/file_functs.cpp b/cppcommon/file_functs.cpp new file mode 100644 index 0000000..e487ba2 --- /dev/null +++ b/cppcommon/file_functs.cpp @@ -0,0 +1,45 @@ +#include "file_functs.h" + +namespace CPPCOMMON +{ + using namespace std; + bool checkFileExist(const char * filepath) + { + fstream _file; + _file.open(filepath, ios::in); + if(_file) + return true; + return false; + } + bool createDir(const char * dir_path, bool p) + { + string dir_str(dir_path); + string cmd = "mkdir"; + if(p) + { + cmd += " -p"; + } + cmd += " " + dir_str; + int res = system(cmd.c_str()); + return res; + } + bool checkDirExist(const char * dir_path) + { + return checkFileExist(dir_path); + } +} + +#ifdef TEST_FILE_FUNCTS +#include +using namespace CPPCOMMON; +using namespace std; +int main() +{ + char filename[] = "1/2/3"; + if(!checkFileExist(filename)) + { + createDir(filename); + } + return 0; +} +#endif diff --git a/cppcommon/file_functs.h b/cppcommon/file_functs.h new file mode 100644 index 0000000..980e6b1 --- /dev/null +++ b/cppcommon/file_functs.h @@ -0,0 +1,16 @@ +#ifndef CPPCOMMON_FILE_FUNCTS_H +#define CPPCOMMON_FILE_FUNCTS_H + +#include +#include +#include +#include +namespace CPPCOMMON +{ + bool checkFileExist(const char * filepath); + bool createDir(const char * dir_path, bool p = true); + bool checkDirExist(const char * dir_path); + +} + +#endif diff --git a/cppcommon/io_functs.cpp b/cppcommon/io_functs.cpp new file mode 100644 index 0000000..0a4c790 --- /dev/null +++ b/cppcommon/io_functs.cpp @@ -0,0 +1,25 @@ +#include "io_functs.h" + +namespace CPPCOMMON +{ + string loadFile2Str(const char * const filepath) + { + ifstream in(filepath, ios::in); + istreambuf_iterator beg(in), end; + string str(beg, end); + in.close(); + return str; + } +} + +#ifdef TEST_IO_FUNCTS +#include +using namespace CPPCOMMON; +using namespace std; +int main() +{ + char filename[] = "1/2/3"; + cout< +#include +namespace CPPCOMMON +{ + using namespace std; + string loadFile2Str(const char * const filepath); +} +#endif diff --git a/cppcommon/logger.cpp b/cppcommon/logger.cpp new file mode 100644 index 0000000..383aaae --- /dev/null +++ b/cppcommon/logger.cpp @@ -0,0 +1,77 @@ +#include "logger.h" +namespace CPPCOMMON +{ + const char * Logger::_logFormat = "%s [File:%s] [Line:%d] [%s] Msg:%s"; + const char * Logger::_timeFormat = "%Y-%m-%d %H:%M:%S"; + const char * Logger::_logDir = "./log/"; + const char * Logger::_logName = "run.log"; + Logger::Logger() + { + + _isCoutOpen = true; + _logLevel[LL_DEBUG] = "DEBUG"; + _logLevel[LL_INFO] = "INFO"; + _logLevel[LL_WARN] = "WARN"; + _logLevel[LL_ERROR] = "ERROR"; + _logLevel[LL_FATAL] = "FATAL"; + InitDefault(); + } + Logger::~Logger() + { + if(_logFile) + { + _logFile.close(); + } + } + void Logger::InitDefault() + { + _logCoutLevel = LL_INFO; + _logFileLevel = LL_DEBUG; + if(!checkDirExist(_logDir)) + { + createDir(_logDir); + } + _logFile.open((string(_logDir) + string(_logName)).c_str(), ios::app); + + } + bool Logger::Logging(unsigned int level, const string& msg, const string& fileName, const int& lineNo) + { + if(level < LL_DEBUG || level > LL_FATAL) + { + cerr<<"level's value is out of range"<= _logCoutLevel) + { + cout<<_cStrBuf<= _logFileLevel) + { + _logFile<<_cStrBuf< +#include +#include +#include +#include +#include "file_functs.h" + +#define LL_DEBUG 1 +#define LL_INFO 2 +#define LL_WARN 3 +#define LL_ERROR 4 +#define LL_FATAL 5 +#define LEVEL_ARRAY_SIZE 6 +#define CSTR_BUFFER_SIZE 1024 + + +#define LogDebug(msg) loggerSingleTon.Logging(LL_DEBUG, msg, __FILE__, __LINE__) +#define LogInfo(msg) loggerSingleTon.Logging(LL_INFO, msg, __FILE__, __LINE__) +#define LogWarn(msg) loggerSingleTon.Logging(LL_WARN, msg, __FILE__, __LINE__) +#define LogError(msg) loggerSingleTon.Logging(LL_ERROR, msg, __FILE__, __LINE__) +#define LogFatal(msg) loggerSingleTon.Logging(LL_FATAL, msg, __FILE__, __LINE__) + + + +namespace CPPCOMMON +{ + using namespace std; + class Logger + { + public: + Logger(); + ~Logger(); + public: + void InitDefault(); + bool Logging(unsigned int level, const string& msg, const string& fileName, const int& lineNo); + private: + bool _isCoutOpen; + char _cStrBuf[CSTR_BUFFER_SIZE]; + const char * _logLevel[LEVEL_ARRAY_SIZE]; + ofstream _logFile; + static const char * _logFormat; + static const char * _timeFormat; + static const char * _logDir; + static const char * _logName; + unsigned int _logCoutLevel; + unsigned int _logFileLevel; + time_t _timeNow; + }; +} + +namespace CPPCOMMON +{ + extern Logger loggerSingleTon; +} + +#endif diff --git a/cppcommon/str_functs.cpp b/cppcommon/str_functs.cpp new file mode 100644 index 0000000..7d26fa8 --- /dev/null +++ b/cppcommon/str_functs.cpp @@ -0,0 +1,270 @@ +#include "str_functs.h" + +namespace CPPCOMMON +{ + string joinStr(const vector& src, const string& connectorStr) + { + string res; + string tmpStr; + size_t len = src.size(); + for(size_t i = 0; i < len - 1; i++) + { + res += stripStr(src[i]); + res += connectorStr; + } + if(0 < len) + { + res += stripStr(src[len-1]); + } + return res; + } + vector splitStr(const string& source, const string& pattern) + { + vector res; + splitStr(source, res, pattern); + return res; + } + void splitStr(const string& source, vector& out_vec, const string& pattern) + { + if(0 == pattern.size()) + { + return; + } + string s = source + pattern; + string::size_type pos; + int length = s.size(); + + for(int i = 0; i < length; i++) + { + pos = s.find(pattern, i); + if(pos < length) + { + string tmp = stripStr(s.substr(i, pos - i)); + if("" != tmp) + { + out_vec.push_back(tmp); + } + i = pos + pattern.size() - 1; + } + } + } + + string stripStr(const string& str, const string& patternStr) + { + if(str.empty()) + { + return str; + } + string::size_type posL = str.find_first_not_of(patternStr); + if(string::npos == posL) + { + return str; + } + string::size_type posR = str.find_last_not_of(patternStr); + return str.substr(posL, posR - posL + 1); + + } + + bool splitStrMultiPatterns( + const string& strSrc, + vector& outVec, + const vector& patterns + ) + { + char transChar = '#'; + int transLenThreshold = 10; + string transStr; + transStr += transChar; + while(strSrc.find(transStr) != string::npos) + { + transStr += transChar; + if(transStr.size() > transLenThreshold) + return false; + } + string strSrcMutable = strSrc; + for(int i = 0; i < patterns.size(); i++) + { + strSrcMutable = replaceStr(strSrcMutable, patterns[i], transStr); + } + splitStr(strSrcMutable, outVec, transStr); + return true; + } + + string upperStr(const string& strIn) + { + string str = strIn; + transform(str.begin(), str.end(), str.begin(), (int (*)(int))toupper); + return str; + } + + string lowerStr(const string& strIn) + { + string str = strIn; + transform(str.begin(), str.end(), str.begin(), (int (*)(int))tolower); + return str; + } + + string replaceStr(const string& strSrc, const string& oldStr, const string& newStr, int count) + { + string strRet = strSrc; + size_t pos = 0; + int l_count = 0; + if(-1 == count) + count = strRet.size(); + while((pos = strRet.find(oldStr, pos)) != string::npos) + { + strRet.replace(pos, oldStr.size(), newStr); + if(++l_count >= count) + break; + pos += newStr.size(); + } + return strRet; + } + + unsigned int countStrDistance(const string& A, const string& B) + { + unsigned int lenA = A.size(); + unsigned int lenB = B.size(); + unsigned int len = (lenA < lenB ? lenA : lenB); + unsigned int res = lenA + lenB - 2 * len; + for(size_t i = 0; i < len; i++) + { + if(A[i] != B[i]) + res++; + } + return res; + } + + unsigned int countStrSimilarity(const string& A, const string& B) + { + unsigned int lenA = A.size(); + unsigned int lenB = B.size(); + unsigned int len = (lenA < lenB ? lenA : lenB); + unsigned int res = 0; + for(size_t i = 0; i < len; i++) + { + if(A[i] == B[i]) + res++; + } + return res; + } + + //unicode utf8 transform + size_t unicodeToUtf8(uint16_t *in, size_t len, char * out) + { + size_t res = 0; + for (int i = 0; i < len; i++) + { + uint16_t unicode = in[i]; + if (unicode >= 0x0000 && unicode <= 0x007f) + { + *out = (uint8_t)unicode; + out += 1; + res += 1; + } + else if (unicode >= 0x0080 && unicode <= 0x07ff) + { + *out = 0xc0 | (unicode >> 6); + out += 1; + *out = 0x80 | (unicode & (0xff >> 2)); + out += 1; + res += 2; + } + else if (unicode >= 0x0800 && unicode <= 0xffff) + { + *out = 0xe0 | (unicode >> 12); + out += 1; + *out = 0x80 | ((unicode >> 6) & 0x3f); + out += 1; + *out = 0x80 | (unicode & 0x3f); + out += 1; + res += 3; + } + + } + *out = '\0'; + return res; + } + + /*from: http://www.cppblog.com/lf426/archive/2008/03/31/45796.html */ + int utf8ToUnicode(const char* inutf8, int len, uint16_t* unicode) + { + int length; + const unsigned char* utf8 = (const unsigned char*) inutf8; + const unsigned char* t = (const unsigned char*) inutf8; + + length = 0; + while (utf8 - t < len) + { + if ( *(unsigned char *) utf8 <= 0x7f ) + { + //expand with 0s. + *unicode++ = *utf8++; + } + //2 byte. + else if ( *(unsigned char *) utf8 <= 0xdf ) + { + *unicode++ = ((*(unsigned char *) utf8 & 0x1f) << 6) + ((*(unsigned char *) (utf8 + 1)) & 0x3f); + utf8 += 2; + } + //3 byte.Chinese may use 3 byte. + else { + *unicode++ = ((int) (*(unsigned char *) utf8 & 0x0f) << 12) + + ((*(unsigned char *) (utf8 + 1) & 0x3f) << 6) + + (*(unsigned char *) (utf8 + 2) & 0x3f); + utf8 += 3; + } + length++; + } + + *unicode = 0; + return length; + } + +} + +#ifdef TEST_STR_FUNCTS +#include +using namespace CPPCOMMON; +using namespace std; +int main() +{ + //string s = " \t\n1 a h \n"; + //cout<<"["< vec; + //splitStr("1 3 4", vec); + //char * a[] = {"3","jaj","ads"}; + //vector pats(a,a+3); + //vec.clear(); + //splitStrMultiPattern("1 #3 jajs5 asdf3ads 4", vec, pats); + //for(int i=0;i +#include +#include +#include +#include +#include +#include +namespace CPPCOMMON +{ + using namespace std; + string joinStr(const vector& source, const string& connector); + vector splitStr(const string& source, const string& pattern = " \t\n"); + void splitStr(const string& source, vector& out_vec, const string& pattern = " \t\n"); + bool splitStrMultiPatterns( + const string& strSrc, + vector& outVec, + const vector& patterns + ); + string upperStr(const string& str); + string lowerStr(const string& str); + string replaceStr(const string& strSrc, const string& oldStr, const string& newStr, int count = -1); + string stripStr(const string& str, const string& patternstr = " \n\t"); + unsigned int countStrDistance(const string& A, const string& B); + unsigned int countStrSimilarity(const string& A, const string& B); + + + size_t unicodeToUtf8(uint16_t *in, size_t len, char * out); + int utf8ToUnicode(const char* inutf8, int len, uint16_t* unicode); +} +#endif diff --git a/cppcommon/testdata/dict.txt b/cppcommon/testdata/dict.txt new file mode 100644 index 0000000..e43de3d --- /dev/null +++ b/cppcommon/testdata/dict.txt @@ -0,0 +1,100 @@ +AT&T 3 nz +B超 3 n +c# 3 nz +C# 3 nz +c++ 3 nz +C++ 3 nz +T恤 4 n +一 217830 m +一一 1670 m +一一二 11 m +一一例 3 m +一一分 8 m +一一列举 34 i +一一对 9 m +一一对应 43 l +一一记 2 m +一一道来 4 l +一丁 18 d +一丁不识 3 i +一丁点 3 m +一丁点儿 24 m +一七 22 m +一七八不 3 l +一万 442 m +一万一千 4 m +一万一千五百二十颗 2 m +一万一千八百八十斤 2 m +一万一千多间 2 m +一万一千零九十五册 4 m +一万七千 5 m +一万七千余 2 m +一万七千多 2 m +一万七千多户 2 m +一万万 4 m +一万万两 4 m +一万三千 8 m +一万三千五百一十七 2 m +一万三千五百斤 4 m +一万三千余种 2 m +一万三千块 2 m +一万两 124 m +一万两万 4 m +一万两千 3 m +一万个 62 m +一万九千 2 m +一万九千余 2 m +一万二 10 m +一万二千 7 m +一万二千两 2 m +一万二千五百 4 m +龛 223 ng +龜 2 zg +龟 903 ns +龟儿子 123 n +龟兆 3 nz +龟兹 215 ns +龟兹王 3 nrt +龟冷搘床 3 v +龟冷支床 3 n +龟卜 3 n +龟厌不告 3 l +龟壳 33 n +龟壳花 3 n +龟头 34 n +龟头炎 3 n +龟山 23 ns +龟山乡 3 ns +龟山岛 3 ns +龟年鹤寿 3 ns +龟年鹤算 3 l +龟文 3 nz +龟文写迹 3 n +龟文鸟迹 3 n +龟板 10 n +龟毛免角 3 n +龟毛兔角 3 n +龟溪 3 ns +龟玉 3 nz +龟王 3 nz +龟甲 92 ns +龟甲胶 3 nz +龟筮 3 n +龟纹 3 n +龟缩 29 v +龟肉 3 n +龟背 21 n +龟背竹 3 n +龟苓膏 3 n +龟苗 3 n +龟裂 34 v +龟足 5 v +龟鉴 2 n +龟镜 3 nz +龟鳖 3 n +龟鹤遐寿 3 l +龟龄鹤算 3 n +龟龙片甲 3 nz +龟龙麟凤 3 ns +龠 5 g +龢 732 zg diff --git a/cppcommon/vec_functs.cpp b/cppcommon/vec_functs.cpp new file mode 100644 index 0000000..9c8150f --- /dev/null +++ b/cppcommon/vec_functs.cpp @@ -0,0 +1,30 @@ +#include "vec_functs.h" + +#ifdef TEST_VEC_FUNCTS +using namespace CPPCOMMON; +int main() +{ + vector vec; + for(int i=0;i<5;i++) + vec.push_back(i); + vector pats; + pats.push_back(0); + pats.push_back(3); + //pats.push_back(4); + vector > > res; + splitVec(vec,res,pats); + cout<(vec, 0)<(vec, -1)< +#include +#include +#include +#include + +#define FOR_VECTOR(vec, i) for(size_t i = 0; i < vec.size(); i++) + +#define PRINT_VECTOR(vec) FOR_VECTOR(vec, i)\ +{\ + cout< +#include +#include +#include +#include +namespace CPPCOMMON +{ + using namespace std; + template + bool isInVec(const vector& vec, const T& item) + { + typename vector::const_iterator it = find(vec.begin(), vec.end(), item); + return it != vec.end(); + } + template + void splitVec(const vector& vecSrc, vector< pair > >& outVec, const vector& patterns) + { + vector tmp; + T pattern; + size_t patternSize = patterns.size(); + for(size_t i = 0; i < vecSrc.size(); i++) + { + size_t patternPos = patternSize; + for(size_t j = 0; j < patternSize; j++) + { + if(patterns[j] == vecSrc[i]) + { + patternPos = j; + break; + } + } + if(patternPos != patternSize) + { + if(!tmp.empty()) + { + outVec.push_back(make_pair >(pattern, tmp)); + tmp.clear(); + } + pattern = patterns[patternPos]; + } + else + { + tmp.push_back(vecSrc[i]); + } + } + if(!tmp.empty()) + { + outVec.push_back(make_pair >(pattern, tmp)); + } + } + + template + void splitVec(const vector& vecSrc, vector< vector >& outVec, const vector& patternVec) + { + vector tmp; + for(size_t i = 0; i < vecSrc.size(); i++) + { + bool flag = false; + for(size_t j = 0; j < patternVec.size(); j++) + { + if(patternVec[j] == vecSrc[i]) + { + flag = true; + break; + } + } + if(flag) + { + if(!tmp.empty()) + { + outVec.push_back(tmp); + tmp.clear(); + } + } + else + { + tmp.push_back(vecSrc[i]); + } + } + if(!tmp.empty()) + { + outVec.push_back(tmp); + } + } + +} +#endif +