From b0521c6d6eae8e23672d5918d1d8aecf829392c8 Mon Sep 17 00:00:00 2001 From: wyy Date: Wed, 30 Oct 2013 23:11:51 -0700 Subject: [PATCH 01/25] update cppjieba to adapter new limonp --- cppjieba/TransCode.hpp | 1 - cppjieba/structs.h | 25 ++-- demo/keywordext.cpp | 2 +- demo/segment.cpp | 2 +- demo/server.cpp | 2 +- husky/HttpReqInfo.hpp | 25 ++-- husky/ServerFrame.cpp | 2 +- limonp/ArgvContext.hpp | 18 ++- limonp/MysqlClient.hpp | 11 +- limonp/logger.hpp | 1 - limonp/map_functs.hpp | 147 ++++++++++----------- limonp/str_functs.hpp | 282 +++++++++++++++++++++++++++++++---------- limonp/typedefs.h | 21 --- limonp/vec_functs.hpp | 142 --------------------- 14 files changed, 340 insertions(+), 341 deletions(-) delete mode 100644 limonp/typedefs.h delete mode 100644 limonp/vec_functs.hpp diff --git a/cppjieba/TransCode.hpp b/cppjieba/TransCode.hpp index 46ede56..e49c84d 100644 --- a/cppjieba/TransCode.hpp +++ b/cppjieba/TransCode.hpp @@ -8,7 +8,6 @@ #include "globals.h" #include -#include namespace CppJieba { diff --git a/cppjieba/structs.h b/cppjieba/structs.h index 38fd5ec..88c5894 100644 --- a/cppjieba/structs.h +++ b/cppjieba/structs.h @@ -74,7 +74,7 @@ namespace CppJieba KeyWordInfo(const TrieNodeInfo& trieNodeInfo):TrieNodeInfo(trieNodeInfo) { } - inline string toString() const + string toString() const { string tmp; TransCode::encode(word, tmp); @@ -89,16 +89,23 @@ namespace CppJieba return *this; } }; - - inline string joinWordInfos(const vector& vec) + + inline ostream& operator << (ostream& os, const KeyWordInfo& info) { - vector tmp; - for(uint i = 0; i < vec.size(); i++) - { - tmp.push_back(vec[i].toString()); - } - return joinStr(tmp, ","); + string tmp; + TransCode::encode(info.word, tmp); + return os << "{words:" << tmp << ", weight:" << info.weight << ", idf:" << info.idf << "}"; } + + //inline string joinWordInfos(const vector& vec) + //{ + // vector tmp; + // for(uint i = 0; i < vec.size(); i++) + // { + // tmp.push_back(vec[i].toString()); + // } + // return joinStr(tmp, ","); + //} } #endif diff --git a/demo/keywordext.cpp b/demo/keywordext.cpp index bbdd781..b7713d2 100644 --- a/demo/keywordext.cpp +++ b/demo/keywordext.cpp @@ -23,7 +23,7 @@ void testKeyWordExt(const char * dictPath, const char * filePath) if(!line.empty()) { ext.extract(line, res, 20); - cout<cut(line, res); - cout<& mp) { diff --git a/husky/ServerFrame.cpp b/husky/ServerFrame.cpp index 9903fb7..5d83eb8 100644 --- a/husky/ServerFrame.cpp +++ b/husky/ServerFrame.cpp @@ -140,7 +140,7 @@ namespace Husky if(SOCKET_ERROR==nRetCode) { - LogError("error [%s]", strerror(errno)); + LogDebug("error [%s]", strerror(errno)); closesocket(hClientSock); continue; } diff --git a/limonp/ArgvContext.hpp b/limonp/ArgvContext.hpp index f4aeef2..8be15f1 100644 --- a/limonp/ArgvContext.hpp +++ b/limonp/ArgvContext.hpp @@ -10,7 +10,6 @@ #include #include "str_functs.hpp" #include "map_functs.hpp" -#include "vec_functs.hpp" namespace Limonp { @@ -43,12 +42,7 @@ namespace Limonp } ~ArgvContext(){}; public: - string toString() - { - stringstream ss; - ss<(_args)<(_mpss)<(_sset); - return ss.str(); - } + friend ostream& operator << (ostream& os, const ArgvContext& args); string operator [](uint i) { if(i < _args.size()) @@ -81,6 +75,16 @@ namespace Limonp set _sset; }; + + inline ostream& operator << (ostream& os, const ArgvContext& args) + { + return os< #include #include "logger.hpp" -#include "vec_functs.hpp" namespace Limonp { @@ -21,8 +20,9 @@ namespace Limonp const char * const USER; const char * const PASSWD; const char * const DB; + const char * const CHARSET; public: - MysqlClient(const char* host, uint port, const char* user, const char* passwd, const char* db): HOST(host), PORT(port), USER(user), PASSWD(passwd), DB(db){ _conn = NULL;}; + MysqlClient(const char* host, uint port, const char* user, const char* passwd, const char* db, const char* charset = "utf8"): HOST(host), PORT(port), USER(user), PASSWD(passwd), DB(db), CHARSET(charset){ _conn = NULL;}; ~MysqlClient(){dispose();}; public: bool init() @@ -42,10 +42,17 @@ namespace Limonp return false; } + if(mysql_set_character_set(_conn, CHARSET)) + { + LogError("mysql_set_character_set [%s] failed.", CHARSET); + return false; + } + //set reconenct char value = 1; mysql_options(_conn, MYSQL_OPT_RECONNECT, &value); + LogInfo("MysqlClient {host: %s, port:%d, database:%s, charset:%s}", HOST, PORT, DB, CHARSET); return true; } bool dispose() diff --git a/limonp/logger.hpp b/limonp/logger.hpp index 5ccfdec..ed1a3c2 100644 --- a/limonp/logger.hpp +++ b/limonp/logger.hpp @@ -13,7 +13,6 @@ #include #include "io_functs.hpp" #include "str_functs.hpp" -#include "typedefs.h" #define LogDebug(fmt, ...) Logger::LoggingF(LL_DEBUG, __FILE__, __LINE__, fmt, ## __VA_ARGS__) #define LogInfo(fmt, ...) Logger::LoggingF(LL_INFO, __FILE__, __LINE__, fmt, ## __VA_ARGS__) diff --git a/limonp/map_functs.hpp b/limonp/map_functs.hpp index 9479691..44d472c 100644 --- a/limonp/map_functs.hpp +++ b/limonp/map_functs.hpp @@ -11,91 +11,84 @@ #include #include #include -#include "typedefs.h" + +#include +#define HashMap std::tr1::unordered_map namespace Limonp { using namespace std; - template - string setToString(const set& st) - { - if(st.empty()) - { - return "{}"; - } - stringstream ss; - ss<<'{'; - typename set::const_iterator it = st.begin(); - ss<<*it; - it++; - while(it != st.end()) - { - ss<<", "<<*it; - it++; - } - ss<<'}'; - return ss.str(); - } + //template + // string setToString(const set& st) + // { + // if(st.empty()) + // { + // return "{}"; + // } + // stringstream ss; + // ss<<'{'; + // typename set::const_iterator it = st.begin(); + // ss<<*it; + // it++; + // while(it != st.end()) + // { + // ss<<", "<<*it; + // it++; + // } + // ss<<'}'; + // return ss.str(); + // } - template - string mapToString(const map& mp) - { - if(mp.empty()) - { - return "{}"; - } - stringstream ss; - ss<<'{'; - typename map::const_iterator it = mp.begin(); - ss<first<<": "<second; - it++; - while(it != mp.end()) - { - ss<<", "<first<<": "<second; - it++; - } - ss<<'}'; - return ss.str(); - } + //template + // string mapToString(const map& mp) + // { + // if(mp.empty()) + // { + // return "{}"; + // } + // stringstream ss; + // ss<<'{'; + // typename map::const_iterator it = mp.begin(); + // ss<first<<": "<second; + // it++; + // while(it != mp.end()) + // { + // ss<<", "<first<<": "<second; + // it++; + // } + // ss<<'}'; + // return ss.str(); + // } - template - string HashMapToString(const HashMap& mp) - { - if(mp.empty()) - { - return "{}"; - } - stringstream ss; - ss<<'{'; - typename HashMap::const_iterator it = mp.begin(); - ss<first<<": "<second; - it++; - while(it != mp.end()) - { - ss<<", "<first<<": "<second; - it++; - } - ss<<'}'; - return ss.str(); - } - template - string pairToString(const pair& p) - { - stringstream ss; - ss< - void printMap(const map& mp) - { - for(typename map::const_iterator it = mp.begin(); it != mp.end(); it++) - { - cout<first<<' '<second< + // string HashMapToString(const HashMap& mp) + // { + // if(mp.empty()) + // { + // return "{}"; + // } + // stringstream ss; + // ss<<'{'; + // typename HashMap::const_iterator it = mp.begin(); + // ss<first<<": "<second; + // it++; + // while(it != mp.end()) + // { + // ss<<", "<first<<": "<second; + // it++; + // } + // ss<<'}'; + // return ss.str(); + // } + //template + // string pairToString(const pair& p) + // { + // stringstream ss; + // ss< vT getMap(const map& mp, const kT & key, const vT & defaultVal) diff --git a/limonp/str_functs.hpp b/limonp/str_functs.hpp index 0116995..e3c87ab 100644 --- a/limonp/str_functs.hpp +++ b/limonp/str_functs.hpp @@ -15,9 +15,14 @@ #include #include #include -#include "typedefs.h" #include #include +#include +#include +#include + +#define print(x) cout<<(x)<& src, string& dest, const string& connectorStr) - { - if(src.empty()) - { - return false; - } - for(uint i = 0; i < src.size() - 1; i++) - { - dest += src[i]; - dest += connectorStr; - } - dest += src[src.size() - 1]; - return true; - } + //inline bool joinStr(const vector& src, string& dest, const string& connectorStr) + //{ + // if(src.empty()) + // { + // return false; + // } + // for(uint i = 0; i < src.size() - 1; i++) + // { + // dest += src[i]; + // dest += connectorStr; + // } + // dest += src[src.size() - 1]; + // return true; + //} - inline string joinStr(const vector& source, const string& connector) - { - string res; - joinStr(source, res, connector); - return res; - } + //inline string joinStr(const vector& source, const string& connector) + //{ + // string res; + // joinStr(source, res, connector); + // return res; + //} + + template + void join(T begin, T end, string& res, const string& connector) + { + if(begin == end) + { + return; + } + stringstream ss; + ss<<*begin; + begin++; + while(begin != end) + { + ss << connector << *begin; + begin ++; + } + res = ss.str(); + } + + template + string join(T begin, T end, const string& connector) + { + string res; + join(begin ,end, res, connector); + return res; + } + + template + ostream& operator << (ostream& os, const pair& pr) + { + os << pr.first << ":" << pr.second ; + return os; + } + + template + ostream& operator << (ostream& os, const vector& vec) + { + if(vec.empty()) + { + return os << "[]"; + } + os<<"[\""< + string& operator << (string& str, const T& obj) + { + stringstream ss; + ss << obj; // call ostream& operator << (ostream& os, + return str = ss.str(); + } + + template + ostream& operator << (ostream& os, const map& mp) + { + if(mp.empty()) + { + os<<"{}"; + return os; + } + os<<'{'; + typename map::const_iterator it = mp.begin(); + os<<*it; + it++; + while(it != mp.end()) + { + os<<", "<<*it; + it++; + } + os<<'}'; + return os; + } + + + //template + // string& operator << (string& str, const map& mp) + // { + // if(mp.empty()) + // { + // str = "{}"; + // return str; + // } + // stringstream ss; + // ss<<'{'; + // typename map::const_iterator it = mp.begin(); + // ss<<*it; + // it++; + // while(it != mp.end()) + // { + // ss<<", "<<*it; + // it++; + // } + // ss<<'}'; + // str = ss.str(); + // return str; + // } + + template + ostream& operator << (ostream& os, const HashMap& mp) + { + if(mp.empty()) + { + return os << "{}"; + } + os<<'{'; + typename map::const_iterator it = mp.begin(); + os<<*it; + it++; + while(it != mp.end()) + { + os<<", "<<*it++; + } + return os<<'}'; + } + + //template + // string& operator << (string& str, const set& st) + // { + // stringstream ss; + // ss << st; + // return str = ss.str(); + // } + + template + ostream& operator << (ostream& os, const set& st) + { + if(st.empty()) + { + os << "{}"; + return os; + } + os<<'{'; + typename set::const_iterator it = st.begin(); + os<<*it; + it++; + while(it != st.end()) + { + os<<", "<<*it; + it++; + } + os<<'}'; + return os; + } inline bool splitStr(const string& src, vector& res, const string& pattern) { @@ -104,26 +258,24 @@ namespace Limonp return true; } res.push_back(src.substr(start, end - start)); - if(end == src.size() - 1) - { - res.push_back(""); - break; - } + if(end == src.size() - 1) + { + res.push_back(""); + break; + } start = end + 1; } return true; } - inline string upperStr(const string& strIn) + inline string& upper(string& str) { - string str = strIn; transform(str.begin(), str.end(), str.begin(), (int (*)(int))toupper); return str; } - inline string lowerStr(const string& strIn) + inline string& lower(string& str) { - string str = strIn; transform(str.begin(), str.end(), str.begin(), (int (*)(int))tolower); return str; } @@ -183,40 +335,40 @@ namespace Limonp return str.find(ch) != string::npos; } - inline void extractWords(const string& sentence, vector& words) - { - bool flag = false; - uint lhs = 0, len = 0; - for(uint i = 0; i < sentence.size(); i++) - { - char x = sentence[i]; - if((0x0030 <= x && x<= 0x0039) || (0x0041 <= x && x <= 0x005a ) || (0x0061 <= x && x <= 0x007a)) - { - if(flag) - { - len ++; - } - else - { - lhs = i; - len = 1; - } - flag = true; - } - else - { - if(flag) - { - words.push_back(string(sentence, lhs, len)); - } - flag = false; - } - } - if(flag) - { - words.push_back(string(sentence, lhs, len)); - } - } + //inline void extractWords(const string& sentence, vector& words) + //{ + // bool flag = false; + // uint lhs = 0, len = 0; + // for(uint i = 0; i < sentence.size(); i++) + // { + // char x = sentence[i]; + // if((0x0030 <= x && x<= 0x0039) || (0x0041 <= x && x <= 0x005a ) || (0x0061 <= x && x <= 0x007a)) + // { + // if(flag) + // { + // len ++; + // } + // else + // { + // lhs = i; + // len = 1; + // } + // flag = true; + // } + // else + // { + // if(flag) + // { + // words.push_back(string(sentence, lhs, len)); + // } + // flag = false; + // } + // } + // if(flag) + // { + // words.push_back(string(sentence, lhs, len)); + // } + //} } diff --git a/limonp/typedefs.h b/limonp/typedefs.h deleted file mode 100644 index a8da002..0000000 --- a/limonp/typedefs.h +++ /dev/null @@ -1,21 +0,0 @@ -/************************************ - * file enc : utf8 - * author : wuyanyi09@gmail.com -************************************/ -#ifndef LIMONP_TYPEDEFS_H -#define LIMONP_TYPEDEFS_H - -#include -#include -#include -#include -#define HashMap std::tr1::unordered_map - -namespace Limonp -{ - typedef std::vector Unicode; - typedef std::vector::const_iterator UnicodeConstIterator; -} - - -#endif diff --git a/limonp/vec_functs.hpp b/limonp/vec_functs.hpp deleted file mode 100644 index ac18548..0000000 --- a/limonp/vec_functs.hpp +++ /dev/null @@ -1,142 +0,0 @@ -/************************************ - * file enc : ascii - * author : wuyanyi09@gmail.com -************************************/ -#ifndef LIMONP_VEC_FUNCTS_H -#define LIMONP_VEC_FUNCTS_H -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define FOR_VECTOR(vec, i) for(size_t i = 0; i < vec.size(); i++) - -#define PRINT_VECTOR(vec) FOR_VECTOR(vec, i)\ -{\ - cout< - bool vecToString(const vector& vec, string& res) - { - if(vec.empty()) - { - res = "[]"; - return false; - } - stringstream ss; - ss<<"[\""< - string vecToString(const vector& vec) - { - string res; - vecToString(vec, res); - return res; - } - - template - bool isInVec(const vector& vec, const T& item) - { - typename vector::const_iterator it = find(vec.begin(), vec.end(), item); - return it != vec.end(); - } - template - void splitVec(const vector& vecSrc, vector< pair > >& outVec, const vector& patterns) - { - vector tmp; - T pattern; - size_t patternSize = patterns.size(); - for(size_t i = 0; i < vecSrc.size(); i++) - { - size_t patternPos = patternSize; - for(size_t j = 0; j < patternSize; j++) - { - if(patterns[j] == vecSrc[i]) - { - patternPos = j; - break; - } - } - if(patternPos != patternSize) - { - if(!tmp.empty()) - { - outVec.push_back(make_pair >(pattern, tmp)); - tmp.clear(); - } - pattern = patterns[patternPos]; - } - else - { - tmp.push_back(vecSrc[i]); - } - } - if(!tmp.empty()) - { - outVec.push_back(make_pair >(pattern, tmp)); - } - } - - template - void splitVec(const vector& vecSrc, vector< vector >& outVec, const vector& patternVec) - { - vector tmp; - for(size_t i = 0; i < vecSrc.size(); i++) - { - bool flag = false; - for(size_t j = 0; j < patternVec.size(); j++) - { - if(patternVec[j] == vecSrc[i]) - { - flag = true; - break; - } - } - if(flag) - { - if(!tmp.empty()) - { - outVec.push_back(tmp); - tmp.clear(); - } - } - else - { - tmp.push_back(vecSrc[i]); - } - } - if(!tmp.empty()) - { - outVec.push_back(tmp); - } - } -} - -#endif From 0a980536abcf6b977372c90a4e67820c7a136a45 Mon Sep 17 00:00:00 2001 From: wyy Date: Sun, 3 Nov 2013 04:42:39 -0800 Subject: [PATCH 02/25] bring in cmake --- CMakeLists.txt | 4 ++++ cppjieba/CMakeLists.txt | 1 + 2 files changed, 5 insertions(+) create mode 100644 CMakeLists.txt create mode 100644 cppjieba/CMakeLists.txt diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..d802ac8 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,4 @@ +PROJECT(CPPJIEBA) +ADD_SUBDIRECTORY(cppjieba) +ADD_SUBDIRECTORY(husky) +ADD_SUBDIRECTORY(limonp) diff --git a/cppjieba/CMakeLists.txt b/cppjieba/CMakeLists.txt new file mode 100644 index 0000000..cac5c4d --- /dev/null +++ b/cppjieba/CMakeLists.txt @@ -0,0 +1 @@ +ADD_LIBRARY(cppjieba) From a44b04df20c27a43b6f9d2fd4dab0de821fb9f5e Mon Sep 17 00:00:00 2001 From: wyy Date: Sun, 3 Nov 2013 05:53:27 -0800 Subject: [PATCH 03/25] add cmakelists.txt --- CMakeLists.txt | 2 +- cppjieba/CMakeLists.txt | 4 ++- cppjieba/Makefile | 66 ----------------------------------------- demo/CMakeLists.txt | 8 +++++ demo/Makefile | 53 --------------------------------- husky/CMakeLists.txt | 3 ++ husky/Makefile | 42 -------------------------- 7 files changed, 15 insertions(+), 163 deletions(-) delete mode 100644 cppjieba/Makefile create mode 100644 demo/CMakeLists.txt delete mode 100644 demo/Makefile create mode 100644 husky/CMakeLists.txt delete mode 100644 husky/Makefile diff --git a/CMakeLists.txt b/CMakeLists.txt index d802ac8..098febd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ PROJECT(CPPJIEBA) ADD_SUBDIRECTORY(cppjieba) ADD_SUBDIRECTORY(husky) -ADD_SUBDIRECTORY(limonp) +ADD_SUBDIRECTORY(demo) diff --git a/cppjieba/CMakeLists.txt b/cppjieba/CMakeLists.txt index cac5c4d..81bae4f 100644 --- a/cppjieba/CMakeLists.txt +++ b/cppjieba/CMakeLists.txt @@ -1 +1,3 @@ -ADD_LIBRARY(cppjieba) +SET(LIBCPPJIEBA_SRC HMMSegment.cpp KeyWordExt.cpp MixSegment.cpp MPSegment.cpp Trie.cpp) +INCLUDE_DIRECTORIES(../limonp) +ADD_LIBRARY(cppjieba SHARED ${LIBCPPJIEBA_SRC}) diff --git a/cppjieba/Makefile b/cppjieba/Makefile deleted file mode 100644 index c4febcf..0000000 --- a/cppjieba/Makefile +++ /dev/null @@ -1,66 +0,0 @@ -CXX := g++ -LD := g++ -AR := ar rc - -DEBUG_CXXFLAGS := -g -Wall -DDEBUG -RELEASE_CXXFLAGS := -Wall -O3 - -ifeq (YES, ${RELEASE}) - CXXFLAGS := ${RELEASE_CXXFLAGS} - LDFLAGS := ${RELEASE_LDFLAGS} -else - CXXFLAGS := ${DEBUG_CXXFLAGS} - LDFLAGS := ${DEBUG_LDFLAGS} -endif - -SOURCES := $(wildcard *.cpp) -OBJS := $(patsubst %.cpp,%.o,$(SOURCES)) - -INC := -I../limonp - -LIBA := libcppjieba.a - -# remove the objs after compilation -.INTERMEDIATE: -#.PHONY: clean $(CMLIB) -.PHONY: clean - -all: $(LIBA) - -# This is a suffix rule -#.c.o: -%.o: %.cpp - $(CXX) -c $(CXXFLAGS) $< $(INC) - - -${LIBA}: $(OBJS) - $(AR) $@ $(OBJS) - -#unit test -Trie.ut: Trie.cpp Trie.h globals.h TransCode.cpp TransCode.hpp $(CMLIB) - $(CXX) -o $@ $(CXXFLAGS) Trie.cpp TransCode.cpp -DTRIE_UT $(CMLIB) - -MPSegment.ut: MPSegment.cpp Trie.cpp MPSegment.h Trie.h globals.h $(CMLIB) - $(CXX) -o $@ $(CXXFLAGS) MPSegment.cpp Trie.cpp TransCode.cpp -DSEGMENT_UT $(CMLIB) - -KeyWordExt.ut: KeyWordExt.cpp KeyWordExt.h MPSegment.h Trie.h globals.h TransCode.cpp TransCode.hpp $(CMLIB) - $(CXX) -o $@ $(CXXFLAGS) KeyWordExt.cpp MPSegment.cpp Trie.cpp TransCode.cpp -DKEYWORDEXT_UT $(CMLIB) - -TransCode.ut: TransCode.cpp TransCode.hpp globals.h $(CMLIB) - $(CXX) -o $@ $(CXXFLAGS) TransCode.cpp -DCPPJIEBA_TRANSCODE_UT $(CMLIB) -HMMSegment.ut: HMMSegment.cpp TransCode.cpp TransCode.hpp HMMSegment.h $(CMLIB) - $(CXX) -o $@ $(CXXFLAGS) TransCode.cpp HMMSegment.cpp -DHMMSEGMENT_UT $(CMLIB) -MixSegment.ut: MixSegment.cpp MixSegment.h HMMSegment.cpp MPSegment.cpp Trie.cpp MPSegment.h Trie.h globals.h $(CMLIB) - $(CXX) -o $@ $(CXXFLAGS) MixSegment.cpp HMMSegment.cpp MPSegment.cpp Trie.cpp TransCode.cpp -DMIXSEGMENT_UT $(CMLIB) -ChineseFilter.ut: ChineseFilter.cpp ChineseFilter.hpp - $(CXX) -o $@ $(CXXFLAGS) ChineseFilter.cpp -DUT - -clean: - rm -f *.o *.d *.d.* *.ut $(LIBA) - -sinclude $(SOURCES:.cpp=.d) -%.d:%.cpp - @set -e; rm -f $@; \ - $(CXX) -MM $< > $@.$$$$; \ - sed 's,\($*\).o[ :]*,\1.o $@ : ,g' < $@.$$$$ > $@; \ - rm -f $@.$$$$ diff --git a/demo/CMakeLists.txt b/demo/CMakeLists.txt new file mode 100644 index 0000000..8cd1fc2 --- /dev/null +++ b/demo/CMakeLists.txt @@ -0,0 +1,8 @@ +ADD_EXECUTABLE(segment.demo segment.cpp) +ADD_EXECUTABLE(server.demo server.cpp) +ADD_EXECUTABLE(keywordext.demo keywordext.cpp) +INCLUDE_DIRECTORIES(../limonp ../cppjieba ../husky) +LINK_DIRECTORIES(../cppjieba ../husky) +TARGET_LINK_LIBRARIES(segment.demo cppjieba) +TARGET_LINK_LIBRARIES(server.demo cppjieba husky -lpthread) +TARGET_LINK_LIBRARIES(keywordext.demo cppjieba) diff --git a/demo/Makefile b/demo/Makefile deleted file mode 100644 index a305a01..0000000 --- a/demo/Makefile +++ /dev/null @@ -1,53 +0,0 @@ -CXX := g++ -LD := g++ -AR := ar rc - - -DEBUG_CXXFLAGS := -g -Wall -DDEBUG -RELEASE_CXXFLAGS := -Wall -O3 - -ifeq (YES, ${DEBUG}) - CXXFLAGS := ${DEBUG_CXXFLAGS} - LDFLAGS := ${DEBUG_LDFLAGS} -else - CXXFLAGS := ${RELEASE_CXXFLAGS} - LDFLAGS := ${RELEASE_LDFLAGS} -endif - -INCS := -I../limonp -LINK := -lpthread - -SOURCES := $(wildcard *.cpp) -OBJS := $(patsubst %.cpp,%.o,$(SOURCES)) -DEMOS := $(patsubst %.cpp,%.demo,$(SOURCES)) - -CPPJIEBADIR := ../cppjieba -LIBCPPJIEBA := $(CPPJIEBADIR)/libcppjieba.a - -HUSKYDIR := ../husky -LIBHUSKYA := $(HUSKYDIR)/libhusky.a - -.PHONY: clean $(LIBCPPJIEBA) - -all: $(DEMOS) - -%.demo: %.cpp $(LIBCPPJIEBA) $(LIBHUSKYA) - $(CXX) -o $@ $(CXXFLAGS) $^ $(INCS) $(LINK) - -$(LIBCPPJIEBA): - cd $(CPPJIEBADIR) && $(MAKE) - -$(LIBHUSKYA): - cd $(HUSKYDIR) && $(MAKE) - -clean: - rm -f *.o *.ut *.d *.d.* $(DEMOS) - cd $(CPPJIEBADIR) && make clean - cd $(HUSKYDIR) && make clean - -sinclude $(SOURCES:.cpp=.d) -%.d:%.cpp - @set -e; rm -f $@; \ - $(CXX) -MM $< > $@.$$$$; \ - sed 's,\($*\).o[ :]*,\1.o $@ : ,g' < $@.$$$$ > $@; \ - rm -f $@.$$$$ diff --git a/husky/CMakeLists.txt b/husky/CMakeLists.txt new file mode 100644 index 0000000..b968744 --- /dev/null +++ b/husky/CMakeLists.txt @@ -0,0 +1,3 @@ +SET(LIBHUSKY_SRC Daemon.cpp ServerFrame.cpp) +INCLUDE_DIRECTORIES(../limonp) +ADD_LIBRARY(husky SHARED ${LIBHUSKY_SRC}) diff --git a/husky/Makefile b/husky/Makefile deleted file mode 100644 index 84395f8..0000000 --- a/husky/Makefile +++ /dev/null @@ -1,42 +0,0 @@ -CXX := g++ -LD := g++ -AR := ar rc - -DEBUG_CXXFLAGS := -g -Wall -DDEBUG -RELEASE_CXXFLAGS := -Wall -O3 - -ifeq (YES, ${RELEASE}) - CXXFLAGS := ${RELEASE_CXXFLAGS} - LDFLAGS := ${RELEASE_LDFLAGS} -else - CXXFLAGS := ${DEBUG_CXXFLAGS} - LDFLAGS := ${DEBUG_LDFLAGS} -endif - -DOLINK := $(LD) $(LDFLAGS) -o $@ $^ -DOPACK := $(AR) -SOURCES = $(wildcard *.cpp) -OBJS := $(patsubst %.cpp,%.o,$(SOURCES)) - -INC := -I../limonp -LIBA := libhusky.a - -.PHONY: clean - -all: $(LIBA) - -%.o: %.cpp - $(CXX) -c $(CXXFLAGS) $< $(INC) - -${LIBA}: $(OBJS) - $(DOPACK) $@ $(OBJS) - -clean: - rm -f *.o *.d *.d.* $(LIBA) - -sinclude $(SOURCES:.cpp=.d) -%.d:%.cpp - @set -e; rm -f $@; \ - $(CXX) -MM $< > $@.$$$$; \ - sed 's,\($*\).o[ :]*,\1.o $@ : ,g' < $@.$$$$ > $@; \ - rm -f $@.$$$$ From 308e247476e6dce402f2d2ee4a938aa52876b621 Mon Sep 17 00:00:00 2001 From: wyy Date: Sun, 3 Nov 2013 06:00:35 -0800 Subject: [PATCH 04/25] rm scripts/* --- scripts/add_header.sh | 1 - scripts/check_dict.py | 29 ----------------------------- scripts/filter_dict.py | 23 ----------------------- scripts/iconv_dict.py | 15 --------------- 4 files changed, 68 deletions(-) delete mode 100755 scripts/add_header.sh delete mode 100755 scripts/check_dict.py delete mode 100755 scripts/filter_dict.py delete mode 100755 scripts/iconv_dict.py diff --git a/scripts/add_header.sh b/scripts/add_header.sh deleted file mode 100755 index e27b7a8..0000000 --- a/scripts/add_header.sh +++ /dev/null @@ -1 +0,0 @@ -sed -i '1i/************************************\n * file enc : utf8\n * author : wuyanyi09@gmail.com\n************************************/' ../src/*.h ../src/*.cpp ../src/*.tcc diff --git a/scripts/check_dict.py b/scripts/check_dict.py deleted file mode 100755 index ac528f7..0000000 --- a/scripts/check_dict.py +++ /dev/null @@ -1,29 +0,0 @@ -#!/usr/bin/python - -import sys - -if len(sys.argv) == 1: - print "usage : %s dict_file1 dict_file2 ..." - exit(1) - -d = {} - -for fname in sys.argv[1:]: - with open(fname, "r") as fin: - for i, line in enumerate(fin): - try: - word, cnt, tag = line.strip().split(" ") - if word in d: - print "error file[%s] line[%s] : %s" %(fname, i, line) - exit(1) - else: - d[word] = True - - if 0 >= int(cnt) : - print "error file[%s] line[%s] : %s" %(fname, i, line) - exit(1) - except Exception as err: - print "error file[%s] line[%s] : %s" %(fname, i, line) - exit(1) - -print "OK" diff --git a/scripts/filter_dict.py b/scripts/filter_dict.py deleted file mode 100755 index 8cb247a..0000000 --- a/scripts/filter_dict.py +++ /dev/null @@ -1,23 +0,0 @@ -#!/usr/bin/python - -import sys - -if len(sys.argv) == 1: - print "usage : %s dict_file_path" - exit(1) - -d = {} -with open(sys.argv[1], "r") as fin: - for i, line in enumerate(fin): - word, cnt, tag = line.strip().split(" ") - if word in d: - #print "error file[%s] line[%s] : %s" %(fname, i, line) - #exit(1) - continue - else: - d[word] = True - if 0 >= int(cnt) : - continue - - print line.strip() - diff --git a/scripts/iconv_dict.py b/scripts/iconv_dict.py deleted file mode 100755 index 483ecec..0000000 --- a/scripts/iconv_dict.py +++ /dev/null @@ -1,15 +0,0 @@ -#!/usr/bin/python - -import sys - -if len(sys.argv) != 4: - print "usage : %s from_enc to_enc dict_file_path \nexample: %s gbk utf-8 fname" %(__file__, __file__) - exit(1) - -with open(sys.argv[3], "r") as fin: - for i, line in enumerate(fin): - try: - print line.strip().decode(sys.argv[1]).encode(sys.argv[2]) - except Exception as err: - print >> sys.stderr, err - From e666a0123ea568448d12efd4a46c0ea10285dfc8 Mon Sep 17 00:00:00 2001 From: wyy Date: Sun, 3 Nov 2013 06:11:45 -0800 Subject: [PATCH 05/25] mv demo/ scripts --- {demo => scripts}/example.sh | 0 {demo => scripts}/restart.sh | 0 {demo => scripts}/start.sh | 0 {demo => scripts}/stop.sh | 0 4 files changed, 0 insertions(+), 0 deletions(-) rename {demo => scripts}/example.sh (100%) rename {demo => scripts}/restart.sh (100%) rename {demo => scripts}/start.sh (100%) rename {demo => scripts}/stop.sh (100%) diff --git a/demo/example.sh b/scripts/example.sh similarity index 100% rename from demo/example.sh rename to scripts/example.sh diff --git a/demo/restart.sh b/scripts/restart.sh similarity index 100% rename from demo/restart.sh rename to scripts/restart.sh diff --git a/demo/start.sh b/scripts/start.sh similarity index 100% rename from demo/start.sh rename to scripts/start.sh diff --git a/demo/stop.sh b/scripts/stop.sh similarity index 100% rename from demo/stop.sh rename to scripts/stop.sh From 4680586e65f7090b38c37cbc0f77f0842de11b93 Mon Sep 17 00:00:00 2001 From: wyy Date: Sun, 3 Nov 2013 07:04:36 -0800 Subject: [PATCH 06/25] add install into cmakelists.txt --- CMakeLists.txt | 1 + cppjieba/CMakeLists.txt | 8 ++++++++ husky/CMakeLists.txt | 3 +++ limonp/CMakeLists.txt | 2 ++ 4 files changed, 14 insertions(+) create mode 100644 limonp/CMakeLists.txt diff --git a/CMakeLists.txt b/CMakeLists.txt index 098febd..2273c21 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,5 @@ PROJECT(CPPJIEBA) ADD_SUBDIRECTORY(cppjieba) ADD_SUBDIRECTORY(husky) +ADD_SUBDIRECTORY(limonp) ADD_SUBDIRECTORY(demo) diff --git a/cppjieba/CMakeLists.txt b/cppjieba/CMakeLists.txt index 81bae4f..03d0ae0 100644 --- a/cppjieba/CMakeLists.txt +++ b/cppjieba/CMakeLists.txt @@ -1,3 +1,11 @@ SET(LIBCPPJIEBA_SRC HMMSegment.cpp KeyWordExt.cpp MixSegment.cpp MPSegment.cpp Trie.cpp) + INCLUDE_DIRECTORIES(../limonp) + ADD_LIBRARY(cppjieba SHARED ${LIBCPPJIEBA_SRC}) + +SET_TARGET_PROPERTIES(cppjieba PROPERTIES VERSION 1.2 SOVERSION 1) + + +INSTALL(TARGETS cppjieba LIBRARY DESTINATION lib/CppJieba) +INSTALL(FILES ChineseFilter.hpp HMMSegment.h KeyWordExt.h MPSegment.h structs.h Trie.h globals.h ISegment.hpp MixSegment.h SegmentBase.hpp TransCode.hpp DESTINATION include/CppJieba) diff --git a/husky/CMakeLists.txt b/husky/CMakeLists.txt index b968744..d77435d 100644 --- a/husky/CMakeLists.txt +++ b/husky/CMakeLists.txt @@ -1,3 +1,6 @@ SET(LIBHUSKY_SRC Daemon.cpp ServerFrame.cpp) INCLUDE_DIRECTORIES(../limonp) ADD_LIBRARY(husky SHARED ${LIBHUSKY_SRC}) + +INSTALL(TARGETS husky LIBRARY DESTINATION lib/CppJieba) +INSTALL(FILES Daemon.h globals.h HttpReqInfo.hpp ServerFrame.h ThreadManager.hpp DESTINATION include/CppJieba/Husky) diff --git a/limonp/CMakeLists.txt b/limonp/CMakeLists.txt new file mode 100644 index 0000000..e01bc74 --- /dev/null +++ b/limonp/CMakeLists.txt @@ -0,0 +1,2 @@ +FILE(GLOB HEAD_HPP_LIST "*.hpp") +INSTALL(FILES ${HEAD_HPP_LIST} DESTINATION include/CppJieba/Limonp) From a6e6568b851c4b68dff3bce4794e3e87eed6e9a3 Mon Sep 17 00:00:00 2001 From: wyy Date: Sun, 3 Nov 2013 07:41:21 -0800 Subject: [PATCH 07/25] mkdir src and mv cppjieba limonp husky demo into it --- {demo => src}/CMakeLists.txt | 0 {cppjieba => src/cppjieba}/CMakeLists.txt | 1 - {cppjieba => src/cppjieba}/ChineseFilter.hpp | 0 {cppjieba => src/cppjieba}/HMMSegment.cpp | 0 {cppjieba => src/cppjieba}/HMMSegment.h | 0 {cppjieba => src/cppjieba}/ISegment.hpp | 0 {cppjieba => src/cppjieba}/KeyWordExt.cpp | 0 {cppjieba => src/cppjieba}/KeyWordExt.h | 0 {cppjieba => src/cppjieba}/MPSegment.cpp | 0 {cppjieba => src/cppjieba}/MPSegment.h | 0 {cppjieba => src/cppjieba}/MixSegment.cpp | 0 {cppjieba => src/cppjieba}/MixSegment.h | 0 {cppjieba => src/cppjieba}/SegmentBase.hpp | 0 {cppjieba => src/cppjieba}/TransCode.hpp | 0 {cppjieba => src/cppjieba}/Trie.cpp | 0 {cppjieba => src/cppjieba}/Trie.h | 0 {cppjieba => src/cppjieba}/globals.h | 0 {cppjieba => src/cppjieba}/structs.h | 0 {husky => src/husky}/CMakeLists.txt | 0 {husky => src/husky}/Daemon.cpp | 0 {husky => src/husky}/Daemon.h | 0 {husky => src/husky}/HttpReqInfo.hpp | 0 {husky => src/husky}/ServerFrame.cpp | 0 {husky => src/husky}/ServerFrame.h | 0 {husky => src/husky}/ThreadManager.hpp | 0 {husky => src/husky}/globals.h | 0 {demo => src}/keywordext.cpp | 0 {limonp => src/limonp}/ArgvContext.hpp | 0 {limonp => src/limonp}/CMakeLists.txt | 0 {limonp => src/limonp}/MysqlClient.hpp | 0 {limonp => src/limonp}/cast_functs.hpp | 0 {limonp => src/limonp}/config.hpp | 0 {limonp => src/limonp}/io_functs.hpp | 0 {limonp => src/limonp}/logger.hpp | 0 {limonp => src/limonp}/macro_def.hpp | 0 {limonp => src/limonp}/map_functs.hpp | 0 {limonp => src/limonp}/str_functs.hpp | 0 {demo => src}/segment.cpp | 0 {demo => src}/server.cpp | 0 {demo => src}/testlines.utf8 | 0 40 files changed, 1 deletion(-) rename {demo => src}/CMakeLists.txt (100%) rename {cppjieba => src/cppjieba}/CMakeLists.txt (99%) rename {cppjieba => src/cppjieba}/ChineseFilter.hpp (100%) rename {cppjieba => src/cppjieba}/HMMSegment.cpp (100%) rename {cppjieba => src/cppjieba}/HMMSegment.h (100%) rename {cppjieba => src/cppjieba}/ISegment.hpp (100%) rename {cppjieba => src/cppjieba}/KeyWordExt.cpp (100%) rename {cppjieba => src/cppjieba}/KeyWordExt.h (100%) rename {cppjieba => src/cppjieba}/MPSegment.cpp (100%) rename {cppjieba => src/cppjieba}/MPSegment.h (100%) rename {cppjieba => src/cppjieba}/MixSegment.cpp (100%) rename {cppjieba => src/cppjieba}/MixSegment.h (100%) rename {cppjieba => src/cppjieba}/SegmentBase.hpp (100%) rename {cppjieba => src/cppjieba}/TransCode.hpp (100%) rename {cppjieba => src/cppjieba}/Trie.cpp (100%) rename {cppjieba => src/cppjieba}/Trie.h (100%) rename {cppjieba => src/cppjieba}/globals.h (100%) rename {cppjieba => src/cppjieba}/structs.h (100%) rename {husky => src/husky}/CMakeLists.txt (100%) rename {husky => src/husky}/Daemon.cpp (100%) rename {husky => src/husky}/Daemon.h (100%) rename {husky => src/husky}/HttpReqInfo.hpp (100%) rename {husky => src/husky}/ServerFrame.cpp (100%) rename {husky => src/husky}/ServerFrame.h (100%) rename {husky => src/husky}/ThreadManager.hpp (100%) rename {husky => src/husky}/globals.h (100%) rename {demo => src}/keywordext.cpp (100%) rename {limonp => src/limonp}/ArgvContext.hpp (100%) rename {limonp => src/limonp}/CMakeLists.txt (100%) rename {limonp => src/limonp}/MysqlClient.hpp (100%) rename {limonp => src/limonp}/cast_functs.hpp (100%) rename {limonp => src/limonp}/config.hpp (100%) rename {limonp => src/limonp}/io_functs.hpp (100%) rename {limonp => src/limonp}/logger.hpp (100%) rename {limonp => src/limonp}/macro_def.hpp (100%) rename {limonp => src/limonp}/map_functs.hpp (100%) rename {limonp => src/limonp}/str_functs.hpp (100%) rename {demo => src}/segment.cpp (100%) rename {demo => src}/server.cpp (100%) rename {demo => src}/testlines.utf8 (100%) diff --git a/demo/CMakeLists.txt b/src/CMakeLists.txt similarity index 100% rename from demo/CMakeLists.txt rename to src/CMakeLists.txt diff --git a/cppjieba/CMakeLists.txt b/src/cppjieba/CMakeLists.txt similarity index 99% rename from cppjieba/CMakeLists.txt rename to src/cppjieba/CMakeLists.txt index 03d0ae0..897d1e2 100644 --- a/cppjieba/CMakeLists.txt +++ b/src/cppjieba/CMakeLists.txt @@ -6,6 +6,5 @@ ADD_LIBRARY(cppjieba SHARED ${LIBCPPJIEBA_SRC}) SET_TARGET_PROPERTIES(cppjieba PROPERTIES VERSION 1.2 SOVERSION 1) - INSTALL(TARGETS cppjieba LIBRARY DESTINATION lib/CppJieba) INSTALL(FILES ChineseFilter.hpp HMMSegment.h KeyWordExt.h MPSegment.h structs.h Trie.h globals.h ISegment.hpp MixSegment.h SegmentBase.hpp TransCode.hpp DESTINATION include/CppJieba) diff --git a/cppjieba/ChineseFilter.hpp b/src/cppjieba/ChineseFilter.hpp similarity index 100% rename from cppjieba/ChineseFilter.hpp rename to src/cppjieba/ChineseFilter.hpp diff --git a/cppjieba/HMMSegment.cpp b/src/cppjieba/HMMSegment.cpp similarity index 100% rename from cppjieba/HMMSegment.cpp rename to src/cppjieba/HMMSegment.cpp diff --git a/cppjieba/HMMSegment.h b/src/cppjieba/HMMSegment.h similarity index 100% rename from cppjieba/HMMSegment.h rename to src/cppjieba/HMMSegment.h diff --git a/cppjieba/ISegment.hpp b/src/cppjieba/ISegment.hpp similarity index 100% rename from cppjieba/ISegment.hpp rename to src/cppjieba/ISegment.hpp diff --git a/cppjieba/KeyWordExt.cpp b/src/cppjieba/KeyWordExt.cpp similarity index 100% rename from cppjieba/KeyWordExt.cpp rename to src/cppjieba/KeyWordExt.cpp diff --git a/cppjieba/KeyWordExt.h b/src/cppjieba/KeyWordExt.h similarity index 100% rename from cppjieba/KeyWordExt.h rename to src/cppjieba/KeyWordExt.h diff --git a/cppjieba/MPSegment.cpp b/src/cppjieba/MPSegment.cpp similarity index 100% rename from cppjieba/MPSegment.cpp rename to src/cppjieba/MPSegment.cpp diff --git a/cppjieba/MPSegment.h b/src/cppjieba/MPSegment.h similarity index 100% rename from cppjieba/MPSegment.h rename to src/cppjieba/MPSegment.h diff --git a/cppjieba/MixSegment.cpp b/src/cppjieba/MixSegment.cpp similarity index 100% rename from cppjieba/MixSegment.cpp rename to src/cppjieba/MixSegment.cpp diff --git a/cppjieba/MixSegment.h b/src/cppjieba/MixSegment.h similarity index 100% rename from cppjieba/MixSegment.h rename to src/cppjieba/MixSegment.h diff --git a/cppjieba/SegmentBase.hpp b/src/cppjieba/SegmentBase.hpp similarity index 100% rename from cppjieba/SegmentBase.hpp rename to src/cppjieba/SegmentBase.hpp diff --git a/cppjieba/TransCode.hpp b/src/cppjieba/TransCode.hpp similarity index 100% rename from cppjieba/TransCode.hpp rename to src/cppjieba/TransCode.hpp diff --git a/cppjieba/Trie.cpp b/src/cppjieba/Trie.cpp similarity index 100% rename from cppjieba/Trie.cpp rename to src/cppjieba/Trie.cpp diff --git a/cppjieba/Trie.h b/src/cppjieba/Trie.h similarity index 100% rename from cppjieba/Trie.h rename to src/cppjieba/Trie.h diff --git a/cppjieba/globals.h b/src/cppjieba/globals.h similarity index 100% rename from cppjieba/globals.h rename to src/cppjieba/globals.h diff --git a/cppjieba/structs.h b/src/cppjieba/structs.h similarity index 100% rename from cppjieba/structs.h rename to src/cppjieba/structs.h diff --git a/husky/CMakeLists.txt b/src/husky/CMakeLists.txt similarity index 100% rename from husky/CMakeLists.txt rename to src/husky/CMakeLists.txt diff --git a/husky/Daemon.cpp b/src/husky/Daemon.cpp similarity index 100% rename from husky/Daemon.cpp rename to src/husky/Daemon.cpp diff --git a/husky/Daemon.h b/src/husky/Daemon.h similarity index 100% rename from husky/Daemon.h rename to src/husky/Daemon.h diff --git a/husky/HttpReqInfo.hpp b/src/husky/HttpReqInfo.hpp similarity index 100% rename from husky/HttpReqInfo.hpp rename to src/husky/HttpReqInfo.hpp diff --git a/husky/ServerFrame.cpp b/src/husky/ServerFrame.cpp similarity index 100% rename from husky/ServerFrame.cpp rename to src/husky/ServerFrame.cpp diff --git a/husky/ServerFrame.h b/src/husky/ServerFrame.h similarity index 100% rename from husky/ServerFrame.h rename to src/husky/ServerFrame.h diff --git a/husky/ThreadManager.hpp b/src/husky/ThreadManager.hpp similarity index 100% rename from husky/ThreadManager.hpp rename to src/husky/ThreadManager.hpp diff --git a/husky/globals.h b/src/husky/globals.h similarity index 100% rename from husky/globals.h rename to src/husky/globals.h diff --git a/demo/keywordext.cpp b/src/keywordext.cpp similarity index 100% rename from demo/keywordext.cpp rename to src/keywordext.cpp diff --git a/limonp/ArgvContext.hpp b/src/limonp/ArgvContext.hpp similarity index 100% rename from limonp/ArgvContext.hpp rename to src/limonp/ArgvContext.hpp diff --git a/limonp/CMakeLists.txt b/src/limonp/CMakeLists.txt similarity index 100% rename from limonp/CMakeLists.txt rename to src/limonp/CMakeLists.txt diff --git a/limonp/MysqlClient.hpp b/src/limonp/MysqlClient.hpp similarity index 100% rename from limonp/MysqlClient.hpp rename to src/limonp/MysqlClient.hpp diff --git a/limonp/cast_functs.hpp b/src/limonp/cast_functs.hpp similarity index 100% rename from limonp/cast_functs.hpp rename to src/limonp/cast_functs.hpp diff --git a/limonp/config.hpp b/src/limonp/config.hpp similarity index 100% rename from limonp/config.hpp rename to src/limonp/config.hpp diff --git a/limonp/io_functs.hpp b/src/limonp/io_functs.hpp similarity index 100% rename from limonp/io_functs.hpp rename to src/limonp/io_functs.hpp diff --git a/limonp/logger.hpp b/src/limonp/logger.hpp similarity index 100% rename from limonp/logger.hpp rename to src/limonp/logger.hpp diff --git a/limonp/macro_def.hpp b/src/limonp/macro_def.hpp similarity index 100% rename from limonp/macro_def.hpp rename to src/limonp/macro_def.hpp diff --git a/limonp/map_functs.hpp b/src/limonp/map_functs.hpp similarity index 100% rename from limonp/map_functs.hpp rename to src/limonp/map_functs.hpp diff --git a/limonp/str_functs.hpp b/src/limonp/str_functs.hpp similarity index 100% rename from limonp/str_functs.hpp rename to src/limonp/str_functs.hpp diff --git a/demo/segment.cpp b/src/segment.cpp similarity index 100% rename from demo/segment.cpp rename to src/segment.cpp diff --git a/demo/server.cpp b/src/server.cpp similarity index 100% rename from demo/server.cpp rename to src/server.cpp diff --git a/demo/testlines.utf8 b/src/testlines.utf8 similarity index 100% rename from demo/testlines.utf8 rename to src/testlines.utf8 From ae09f92b1977b4a28d9340dcb89f4904d9f18aa0 Mon Sep 17 00:00:00 2001 From: wyy Date: Sun, 3 Nov 2013 07:47:09 -0800 Subject: [PATCH 08/25] remove keywordext.cpp/h out of src --- src/cppjieba/CMakeLists.txt | 4 +- src/cppjieba/KeyWordExt.cpp | 360 ----------------------------------- src/cppjieba/KeyWordExt.h | 68 ------- src/keywordext.cpp | 56 ------ {src => test}/testlines.utf8 | 0 5 files changed, 2 insertions(+), 486 deletions(-) delete mode 100644 src/cppjieba/KeyWordExt.cpp delete mode 100644 src/cppjieba/KeyWordExt.h delete mode 100644 src/keywordext.cpp rename {src => test}/testlines.utf8 (100%) diff --git a/src/cppjieba/CMakeLists.txt b/src/cppjieba/CMakeLists.txt index 897d1e2..ec98370 100644 --- a/src/cppjieba/CMakeLists.txt +++ b/src/cppjieba/CMakeLists.txt @@ -1,4 +1,4 @@ -SET(LIBCPPJIEBA_SRC HMMSegment.cpp KeyWordExt.cpp MixSegment.cpp MPSegment.cpp Trie.cpp) +SET(LIBCPPJIEBA_SRC HMMSegment.cpp MixSegment.cpp MPSegment.cpp Trie.cpp) INCLUDE_DIRECTORIES(../limonp) @@ -7,4 +7,4 @@ ADD_LIBRARY(cppjieba SHARED ${LIBCPPJIEBA_SRC}) SET_TARGET_PROPERTIES(cppjieba PROPERTIES VERSION 1.2 SOVERSION 1) INSTALL(TARGETS cppjieba LIBRARY DESTINATION lib/CppJieba) -INSTALL(FILES ChineseFilter.hpp HMMSegment.h KeyWordExt.h MPSegment.h structs.h Trie.h globals.h ISegment.hpp MixSegment.h SegmentBase.hpp TransCode.hpp DESTINATION include/CppJieba) +INSTALL(FILES ChineseFilter.hpp HMMSegment.h MPSegment.h structs.h Trie.h globals.h ISegment.hpp MixSegment.h SegmentBase.hpp TransCode.hpp DESTINATION include/CppJieba) diff --git a/src/cppjieba/KeyWordExt.cpp b/src/cppjieba/KeyWordExt.cpp deleted file mode 100644 index c0ab565..0000000 --- a/src/cppjieba/KeyWordExt.cpp +++ /dev/null @@ -1,360 +0,0 @@ -/************************************ - * file enc : ASCII - * author : wuyanyi09@gmail.com -************************************/ -#include "KeyWordExt.h" - - -namespace CppJieba -{ - - KeyWordExt::KeyWordExt() - { - } - - KeyWordExt::~KeyWordExt() - { - } - - bool KeyWordExt::init(const char* const segDictFile) - { - LogInfo("KeyWordExt init start ..."); - if(!_segment.init(segDictFile)) - { - LogError("_segment.init failed."); - return false; - } - return true; - } - - bool KeyWordExt::loadStopWords(const char * const filePath) - { - - LogInfo("_loadStopWords(%s) start", filePath); - if(!_stopWords.empty()) - { - LogError("_stopWords has been loaded before! "); - return false; - } - if(!checkFileExist(filePath)) - { - LogError("cann't find file[%s].",filePath); - return false; - } - - ifstream ifile(filePath); - string line; - Unicode word; - while(getline(ifile, line)) - { - if(!TransCode::decode(line, word)) - { - LogError("decode failed ."); - return false; - } - _stopWords.insert(word); - } - LogInfo("load stopwords[%d] finished.", _stopWords.size()); - - return true; - } - - bool KeyWordExt::dispose() - { - _segment.dispose(); - return true; - } - - bool KeyWordExt::_wordInfoCompare(const KeyWordInfo& a, const KeyWordInfo& b) - { - return a.weight > b.weight; - } - - bool KeyWordExt::_sortWLIDF(vector& wordInfos) - { - for(uint i = 0; i < wordInfos.size(); i++) - { - KeyWordInfo& wInfo = wordInfos[i]; - wInfo.idf = - wInfo.logFreq; - wInfo.weight = log(double(wInfo.word.size() + 1)) * wInfo.idf; - } - sort(wordInfos.begin(), wordInfos.end(), _wordInfoCompare); - return true; - } - - bool KeyWordExt::_extTopN(vector& wordInfos, uint topN) - { - int dis = wordInfos.size() - topN; - if(dis <= 0) - { - return true; - } - - if(uint(dis) <= topN) - { - for(int i = 0; i< dis; i++) - { - wordInfos.pop_back(); - } - } - else// in case that topN << size; - { - - vector tmp(wordInfos.begin(), wordInfos.begin() + topN); - wordInfos.swap(tmp); - } - return true; - } - - - bool KeyWordExt::extract(const vector& words, vector& keyWordInfos, uint topN) - { - if(words.empty()) - { - return false; - } - - keyWordInfos.clear(); - for(uint i = 0; i < words.size(); i++) - { - Unicode uniWord; - if(!TransCode::decode(words[i], uniWord)) - { - LogError("decode failed"); - return false; - } - keyWordInfos.push_back(uniWord); - } - - return _extract(keyWordInfos, topN); - } - - bool KeyWordExt::extract(const string& title, vector& keyWordInfos, uint topN) - { - if(title.empty()) - { - return false; - } - - vector trieNodeInfos; - Unicode unico; - if(!TransCode::decode(title, unico)) - { - return false; - } - _segment.cut(unico.begin(), unico.end(), trieNodeInfos); - - keyWordInfos.clear(); - for(uint i = 0; i < trieNodeInfos.size(); i++) - { - keyWordInfos.push_back(trieNodeInfos[i]); - } - return _extract(keyWordInfos, topN); - } - - bool KeyWordExt::_extract(vector& keyWordInfos, uint topN) - { - if(!_filter(keyWordInfos)) - { - LogError("_filter failed."); - return false; - } - - if(!_sortWLIDF(keyWordInfos)) - { - LogError("_sortWLIDF failed."); - return false; - } - - if(!_extTopN(keyWordInfos, topN)) - { - LogError("_extTopN failed."); - return false; - } - - return true; - } - - bool KeyWordExt::_filter(vector& wordInfos) - { - if(!_filterDuplicate(wordInfos)) - { - LogError("_filterDuplicate failed."); - return false; - } - - if(!_filterSingleWord(wordInfos)) - { - LogError("_filterSingleWord failed."); - return false; - } - - if(!_filterStopWords(wordInfos)) - { - LogError("_filterStopWords failed."); - return false; - } - - if(!_filterSubstr(wordInfos)) - { - LogError("_filterSubstr failed."); - return false; - } - - return true; - } - - bool KeyWordExt::_filterStopWords(vector& wordInfos) - { - if(_stopWords.empty()) - { - return true; - } - for(vector::iterator it = wordInfos.begin(); it != wordInfos.end();) - { - if(_stopWords.find(it->word) != _stopWords.end()) - { - it = wordInfos.erase(it); - } - else - { - it ++; - } - } - return true; - } - - - bool KeyWordExt::_filterDuplicate(vector& wordInfos) - { - set st; - for(vector::iterator it = wordInfos.begin(); it != wordInfos.end(); ) - { - if(st.find(it->word) != st.end()) - { - it = wordInfos.erase(it); - } - else - { - st.insert(it->word); - it++; - } - } - return true; - } - - bool KeyWordExt::_filterSingleWord(vector& wordInfos) - { - for(vector::iterator it = wordInfos.begin(); it != wordInfos.end();) - { - - // filter single word - if(1 == it->word.size()) - { - it = wordInfos.erase(it); - } - else - { - it++; - } - } - return true; - } - - bool KeyWordExt::_filterSubstr(vector& wordInfos) - { - vector tmp ; - for(uint i = 0; i < wordInfos.size(); i++) - { - tmp.push_back(wordInfos[i].word); - } - - for(vector::iterator it = wordInfos.begin(); it != wordInfos.end(); ) - { - if(_isSubIn(tmp, it->word)) - { - it = wordInfos.erase(it); - } - else - { - it++; - } - } - - return true; - } - - //bool KeyWordExt::_isContainSubWords(const string& word) - //{ - // for(uint i = 0; i < _priorSubWords.size(); i++) - // { - // if(string::npos != word.find(_priorSubWords[i])) - // { - // return true; - // } - // } - // return false; - //} - - //bool KeyWordExt::_prioritizeSubWords(vector& wordInfos) - //{ - // if(2 > wordInfos.size()) - // { - // return true; - // } - - // KeyWordInfo prior; - // bool flag = false; - // for(vector::iterator it = wordInfos.begin(); it != wordInfos.end(); ) - // { - // if(_isContainSubWords(it->word)) - // { - // prior = *it; - // it = wordInfos.erase(it); - // flag = true; - // break; - // } - // else - // { - // it ++; - // } - // } - // if(flag) - // { - // wordInfos.insert(wordInfos.begin(), prior); - // } - // return true; - //} -} - - -#ifdef KEYWORDEXT_UT - -using namespace CppJieba; - -int main() -{ - KeyWordExt ext; - ext.init(); - if(!ext.loadSegDict("../dicts/segdict.gbk.v2.1")) - { - return 1; - } - ext._loadStopWords("../dicts/stopwords.gbk.v1.0"); - - ifstream ifile("testtitle.gbk"); - vector res; - string line; - while(getline(ifile, line)) - { - cout< -#include "MPSegment.h" -#include "structs.h" - -namespace CppJieba -{ - - class KeyWordExt - { - private: - MPSegment _segment; - //vector _priorSubWords; - set _stopWords; - public: - KeyWordExt(); - ~KeyWordExt(); - bool init(const char* const segDictFile); - bool dispose(); - bool loadStopWords(const char * const filePath); - private: - //bool _loadPriorSubWords(const char * const filePath); - - - public: - bool extract(const string& title, vector& keyWordInfos, uint topN); - bool extract(const vector& words, vector& keyWordInfos, uint topN); - private: - static bool _wordInfoCompare(const KeyWordInfo& a, const KeyWordInfo& b); - private: - bool _extract(vector& keyWordInfos, uint topN); - bool _extTopN(vector& wordInfos, uint topN); - private: - //sort by word len - idf - bool _sortWLIDF(vector& wordInfos); - private: - bool _filter(vector& ); - bool _filterDuplicate(vector& ); - bool _filterSingleWord(vector& ); - bool _filterSubstr(vector& ); - bool _filterStopWords(vector& ); - private: - inline bool _isSubIn(const vector& words, const Unicode& word)const - { - - for(uint j = 0; j < words.size(); j++) - { - if(word != words[j] && words[j].end() != search(words[j].begin(), words[j].end(), word.begin(), word.end())) - { - return true; - } - } - return false; - } - //bool _prioritizeSubWords(vector& wordInfos); - //bool _isContainSubWords(const string& word); - - }; - -} - -#endif diff --git a/src/keywordext.cpp b/src/keywordext.cpp deleted file mode 100644 index b7713d2..0000000 --- a/src/keywordext.cpp +++ /dev/null @@ -1,56 +0,0 @@ -#include -#include -#include -#include "../cppjieba/KeyWordExt.h" - -using namespace CppJieba; - - -void testKeyWordExt(const char * dictPath, const char * filePath) -{ - KeyWordExt ext; - if(!ext.init(dictPath)) - { - return; - } - - ifstream ifile(filePath); - vector res; - string line; - while(getline(ifile, line)) - { - res.clear(); - if(!line.empty()) - { - ext.extract(line, res, 20); - cout< argc) - { - cout<<"usage: \n\t"<\n" - <<"options:\n" - <<"\t--dictpath\tIf not specified, the default is "< Date: Sun, 3 Nov 2013 07:49:41 -0800 Subject: [PATCH 09/25] mv ... --- src/CMakeLists.txt | 14 ++++++++++++-- src/{cppjieba => }/ChineseFilter.hpp | 0 src/{cppjieba => }/HMMSegment.cpp | 0 src/{cppjieba => }/HMMSegment.h | 0 src/{cppjieba => }/ISegment.hpp | 0 src/{cppjieba => }/MPSegment.cpp | 0 src/{cppjieba => }/MPSegment.h | 0 src/{cppjieba => }/MixSegment.cpp | 0 src/{cppjieba => }/MixSegment.h | 0 src/{cppjieba => }/SegmentBase.hpp | 0 src/{cppjieba => }/TransCode.hpp | 0 src/{cppjieba => }/Trie.cpp | 0 src/{cppjieba => }/Trie.h | 0 src/cppjieba/CMakeLists.txt | 10 ---------- src/{cppjieba => }/globals.h | 0 src/{cppjieba => }/structs.h | 0 16 files changed, 12 insertions(+), 12 deletions(-) rename src/{cppjieba => }/ChineseFilter.hpp (100%) rename src/{cppjieba => }/HMMSegment.cpp (100%) rename src/{cppjieba => }/HMMSegment.h (100%) rename src/{cppjieba => }/ISegment.hpp (100%) rename src/{cppjieba => }/MPSegment.cpp (100%) rename src/{cppjieba => }/MPSegment.h (100%) rename src/{cppjieba => }/MixSegment.cpp (100%) rename src/{cppjieba => }/MixSegment.h (100%) rename src/{cppjieba => }/SegmentBase.hpp (100%) rename src/{cppjieba => }/TransCode.hpp (100%) rename src/{cppjieba => }/Trie.cpp (100%) rename src/{cppjieba => }/Trie.h (100%) delete mode 100644 src/cppjieba/CMakeLists.txt rename src/{cppjieba => }/globals.h (100%) rename src/{cppjieba => }/structs.h (100%) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 8cd1fc2..22615bb 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -4,5 +4,15 @@ ADD_EXECUTABLE(keywordext.demo keywordext.cpp) INCLUDE_DIRECTORIES(../limonp ../cppjieba ../husky) LINK_DIRECTORIES(../cppjieba ../husky) TARGET_LINK_LIBRARIES(segment.demo cppjieba) -TARGET_LINK_LIBRARIES(server.demo cppjieba husky -lpthread) -TARGET_LINK_LIBRARIES(keywordext.demo cppjieba) +TARGET_LINK_LIBRARIES(server.demo cppjieba husky pthread) + +SET(LIBCPPJIEBA_SRC HMMSegment.cpp MixSegment.cpp MPSegment.cpp Trie.cpp) + +INCLUDE_DIRECTORIES(../limonp) + +ADD_LIBRARY(cppjieba SHARED ${LIBCPPJIEBA_SRC}) + +SET_TARGET_PROPERTIES(cppjieba PROPERTIES VERSION 1.2 SOVERSION 1) + +INSTALL(TARGETS cppjieba LIBRARY DESTINATION lib/CppJieba) +INSTALL(FILES ChineseFilter.hpp HMMSegment.h MPSegment.h structs.h Trie.h globals.h ISegment.hpp MixSegment.h SegmentBase.hpp TransCode.hpp DESTINATION include/CppJieba) diff --git a/src/cppjieba/ChineseFilter.hpp b/src/ChineseFilter.hpp similarity index 100% rename from src/cppjieba/ChineseFilter.hpp rename to src/ChineseFilter.hpp diff --git a/src/cppjieba/HMMSegment.cpp b/src/HMMSegment.cpp similarity index 100% rename from src/cppjieba/HMMSegment.cpp rename to src/HMMSegment.cpp diff --git a/src/cppjieba/HMMSegment.h b/src/HMMSegment.h similarity index 100% rename from src/cppjieba/HMMSegment.h rename to src/HMMSegment.h diff --git a/src/cppjieba/ISegment.hpp b/src/ISegment.hpp similarity index 100% rename from src/cppjieba/ISegment.hpp rename to src/ISegment.hpp diff --git a/src/cppjieba/MPSegment.cpp b/src/MPSegment.cpp similarity index 100% rename from src/cppjieba/MPSegment.cpp rename to src/MPSegment.cpp diff --git a/src/cppjieba/MPSegment.h b/src/MPSegment.h similarity index 100% rename from src/cppjieba/MPSegment.h rename to src/MPSegment.h diff --git a/src/cppjieba/MixSegment.cpp b/src/MixSegment.cpp similarity index 100% rename from src/cppjieba/MixSegment.cpp rename to src/MixSegment.cpp diff --git a/src/cppjieba/MixSegment.h b/src/MixSegment.h similarity index 100% rename from src/cppjieba/MixSegment.h rename to src/MixSegment.h diff --git a/src/cppjieba/SegmentBase.hpp b/src/SegmentBase.hpp similarity index 100% rename from src/cppjieba/SegmentBase.hpp rename to src/SegmentBase.hpp diff --git a/src/cppjieba/TransCode.hpp b/src/TransCode.hpp similarity index 100% rename from src/cppjieba/TransCode.hpp rename to src/TransCode.hpp diff --git a/src/cppjieba/Trie.cpp b/src/Trie.cpp similarity index 100% rename from src/cppjieba/Trie.cpp rename to src/Trie.cpp diff --git a/src/cppjieba/Trie.h b/src/Trie.h similarity index 100% rename from src/cppjieba/Trie.h rename to src/Trie.h diff --git a/src/cppjieba/CMakeLists.txt b/src/cppjieba/CMakeLists.txt deleted file mode 100644 index ec98370..0000000 --- a/src/cppjieba/CMakeLists.txt +++ /dev/null @@ -1,10 +0,0 @@ -SET(LIBCPPJIEBA_SRC HMMSegment.cpp MixSegment.cpp MPSegment.cpp Trie.cpp) - -INCLUDE_DIRECTORIES(../limonp) - -ADD_LIBRARY(cppjieba SHARED ${LIBCPPJIEBA_SRC}) - -SET_TARGET_PROPERTIES(cppjieba PROPERTIES VERSION 1.2 SOVERSION 1) - -INSTALL(TARGETS cppjieba LIBRARY DESTINATION lib/CppJieba) -INSTALL(FILES ChineseFilter.hpp HMMSegment.h MPSegment.h structs.h Trie.h globals.h ISegment.hpp MixSegment.h SegmentBase.hpp TransCode.hpp DESTINATION include/CppJieba) diff --git a/src/cppjieba/globals.h b/src/globals.h similarity index 100% rename from src/cppjieba/globals.h rename to src/globals.h diff --git a/src/cppjieba/structs.h b/src/structs.h similarity index 100% rename from src/cppjieba/structs.h rename to src/structs.h From 1f4eae5bbc532bc5281d473ea9746f0d5af45615 Mon Sep 17 00:00:00 2001 From: wyy Date: Sun, 3 Nov 2013 08:47:50 -0800 Subject: [PATCH 10/25] modify cmakelists.txt --- CMakeLists.txt | 8 ++++---- dicts/CMakeLists.txt | 1 + src/CMakeLists.txt | 24 ++++++++++++++---------- src/husky/CMakeLists.txt | 3 +++ src/segment.cpp | 6 +++--- src/server.cpp | 10 +++++----- 6 files changed, 30 insertions(+), 22 deletions(-) create mode 100644 dicts/CMakeLists.txt diff --git a/CMakeLists.txt b/CMakeLists.txt index 2273c21..f1d81d1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ PROJECT(CPPJIEBA) -ADD_SUBDIRECTORY(cppjieba) -ADD_SUBDIRECTORY(husky) -ADD_SUBDIRECTORY(limonp) -ADD_SUBDIRECTORY(demo) +ADD_SUBDIRECTORY(src) +ADD_SUBDIRECTORY(src/husky) +ADD_SUBDIRECTORY(src/limonp) +ADD_SUBDIRECTORY(dicts) diff --git a/dicts/CMakeLists.txt b/dicts/CMakeLists.txt new file mode 100644 index 0000000..4fb1e46 --- /dev/null +++ b/dicts/CMakeLists.txt @@ -0,0 +1 @@ +INSTALL(FILES hmm_model.gbk hmm_model.utf8 jieba.dict.gbk jieba.dict.utf8 DESTINATION include/CppJieba/Dicts) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 22615bb..207d63f 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,18 +1,22 @@ -ADD_EXECUTABLE(segment.demo segment.cpp) -ADD_EXECUTABLE(server.demo server.cpp) -ADD_EXECUTABLE(keywordext.demo keywordext.cpp) -INCLUDE_DIRECTORIES(../limonp ../cppjieba ../husky) -LINK_DIRECTORIES(../cppjieba ../husky) -TARGET_LINK_LIBRARIES(segment.demo cppjieba) -TARGET_LINK_LIBRARIES(server.demo cppjieba husky pthread) +SET(EXECUTABLE_OUTPUT_PATH ${PROJECT_BINARY_DIR}/bin) +SET(LIBRARY_OUTPUT_PATH ${PROJECT_BINARY_DIR}/lib) + +INCLUDE_DIRECTORIES(./limonp ./husky) SET(LIBCPPJIEBA_SRC HMMSegment.cpp MixSegment.cpp MPSegment.cpp Trie.cpp) - -INCLUDE_DIRECTORIES(../limonp) - ADD_LIBRARY(cppjieba SHARED ${LIBCPPJIEBA_SRC}) +ADD_EXECUTABLE(segment segment.cpp) +ADD_EXECUTABLE(server server.cpp) + +LINK_DIRECTORIES(husky) + +TARGET_LINK_LIBRARIES(segment cppjieba) +TARGET_LINK_LIBRARIES(server cppjieba husky pthread) SET_TARGET_PROPERTIES(cppjieba PROPERTIES VERSION 1.2 SOVERSION 1) INSTALL(TARGETS cppjieba LIBRARY DESTINATION lib/CppJieba) +INSTALL(TARGETS segment RUNTIME DESTINATION bin/CppJieba) +INSTALL(TARGETS server RUNTIME DESTINATION bin/CppJieba) INSTALL(FILES ChineseFilter.hpp HMMSegment.h MPSegment.h structs.h Trie.h globals.h ISegment.hpp MixSegment.h SegmentBase.hpp TransCode.hpp DESTINATION include/CppJieba) + diff --git a/src/husky/CMakeLists.txt b/src/husky/CMakeLists.txt index d77435d..bf08ee9 100644 --- a/src/husky/CMakeLists.txt +++ b/src/husky/CMakeLists.txt @@ -1,3 +1,6 @@ + +SET(LIBRARY_OUTPUT_PATH ${PROJECT_BINARY_DIR}/lib) + SET(LIBHUSKY_SRC Daemon.cpp ServerFrame.cpp) INCLUDE_DIRECTORIES(../limonp) ADD_LIBRARY(husky SHARED ${LIBHUSKY_SRC}) diff --git a/src/segment.cpp b/src/segment.cpp index 218fb1a..5445ec0 100644 --- a/src/segment.cpp +++ b/src/segment.cpp @@ -1,9 +1,9 @@ #include #include #include -#include "../cppjieba/MPSegment.h" -#include "../cppjieba/HMMSegment.h" -#include "../cppjieba/MixSegment.h" +#include "MPSegment.h" +#include "HMMSegment.h" +#include "MixSegment.h" using namespace CppJieba; diff --git a/src/server.cpp b/src/server.cpp index 428c602..20e81ea 100644 --- a/src/server.cpp +++ b/src/server.cpp @@ -4,11 +4,11 @@ #include #include #include -#include "../husky/Daemon.h" -#include "../husky/ServerFrame.h" -#include "../cppjieba/MPSegment.h" -#include "../cppjieba/HMMSegment.h" -#include "../cppjieba/MixSegment.h" +#include +#include +#include "MPSegment.h" +#include "HMMSegment.h" +#include "MixSegment.h" using namespace Husky; using namespace CppJieba; From 664a17c4c14a28922bfd6661d0d7d6324b88167d Mon Sep 17 00:00:00 2001 From: wyy Date: Sun, 3 Nov 2013 09:02:48 -0800 Subject: [PATCH 11/25] finished install --- src/CMakeLists.txt | 4 ++-- src/husky/CMakeLists.txt | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 207d63f..0abf5c0 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -4,7 +4,7 @@ SET(LIBRARY_OUTPUT_PATH ${PROJECT_BINARY_DIR}/lib) INCLUDE_DIRECTORIES(./limonp ./husky) SET(LIBCPPJIEBA_SRC HMMSegment.cpp MixSegment.cpp MPSegment.cpp Trie.cpp) -ADD_LIBRARY(cppjieba SHARED ${LIBCPPJIEBA_SRC}) +ADD_LIBRARY(cppjieba STATIC ${LIBCPPJIEBA_SRC}) ADD_EXECUTABLE(segment segment.cpp) ADD_EXECUTABLE(server server.cpp) @@ -15,7 +15,7 @@ TARGET_LINK_LIBRARIES(server cppjieba husky pthread) SET_TARGET_PROPERTIES(cppjieba PROPERTIES VERSION 1.2 SOVERSION 1) -INSTALL(TARGETS cppjieba LIBRARY DESTINATION lib/CppJieba) +INSTALL(TARGETS cppjieba ARCHIVE DESTINATION lib/CppJieba) INSTALL(TARGETS segment RUNTIME DESTINATION bin/CppJieba) INSTALL(TARGETS server RUNTIME DESTINATION bin/CppJieba) INSTALL(FILES ChineseFilter.hpp HMMSegment.h MPSegment.h structs.h Trie.h globals.h ISegment.hpp MixSegment.h SegmentBase.hpp TransCode.hpp DESTINATION include/CppJieba) diff --git a/src/husky/CMakeLists.txt b/src/husky/CMakeLists.txt index bf08ee9..41daa97 100644 --- a/src/husky/CMakeLists.txt +++ b/src/husky/CMakeLists.txt @@ -3,7 +3,7 @@ SET(LIBRARY_OUTPUT_PATH ${PROJECT_BINARY_DIR}/lib) SET(LIBHUSKY_SRC Daemon.cpp ServerFrame.cpp) INCLUDE_DIRECTORIES(../limonp) -ADD_LIBRARY(husky SHARED ${LIBHUSKY_SRC}) +ADD_LIBRARY(husky STATIC ${LIBHUSKY_SRC}) -INSTALL(TARGETS husky LIBRARY DESTINATION lib/CppJieba) +INSTALL(TARGETS husky ARCHIVE DESTINATION lib/CppJieba) INSTALL(FILES Daemon.h globals.h HttpReqInfo.hpp ServerFrame.h ThreadManager.hpp DESTINATION include/CppJieba/Husky) From 9492803445abfa388481c0a906794d12f52b68c7 Mon Sep 17 00:00:00 2001 From: wyy Date: Sun, 3 Nov 2013 19:19:34 -0800 Subject: [PATCH 12/25] modify usage --- src/segment.cpp | 100 ++++++++++++++++-------------------------------- 1 file changed, 33 insertions(+), 67 deletions(-) diff --git a/src/segment.cpp b/src/segment.cpp index 5445ec0..a589cdb 100644 --- a/src/segment.cpp +++ b/src/segment.cpp @@ -7,32 +7,6 @@ using namespace CppJieba; -MPSegment seg; -HMMSegment hmmseg; -MixSegment mixseg; -bool init(const char * const dictPath, const char * const modelPath) -{ - if(!seg.init(dictPath)) - { - cout<<"seg init failed."<\n" + cout<<"usage: \n\t"<\n" <<"options:\n" <<"\t--algorithm\tSupported methods are [cutDAG, cutHMM, cutMix] for now. \n\t\t\tIf not specified, the default is cutDAG\n" - <<"\t--dictpath\tIf not specified, the default is "< Date: Sun, 3 Nov 2013 19:22:25 -0800 Subject: [PATCH 13/25] rm dicts out of install --- CMakeLists.txt | 1 - dicts/CMakeLists.txt | 1 - 2 files changed, 2 deletions(-) delete mode 100644 dicts/CMakeLists.txt diff --git a/CMakeLists.txt b/CMakeLists.txt index f1d81d1..544fe84 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,4 +2,3 @@ PROJECT(CPPJIEBA) ADD_SUBDIRECTORY(src) ADD_SUBDIRECTORY(src/husky) ADD_SUBDIRECTORY(src/limonp) -ADD_SUBDIRECTORY(dicts) diff --git a/dicts/CMakeLists.txt b/dicts/CMakeLists.txt deleted file mode 100644 index 4fb1e46..0000000 --- a/dicts/CMakeLists.txt +++ /dev/null @@ -1 +0,0 @@ -INSTALL(FILES hmm_model.gbk hmm_model.utf8 jieba.dict.gbk jieba.dict.utf8 DESTINATION include/CppJieba/Dicts) From 49b0b7ddde6bc905339a56c9710665259116829a Mon Sep 17 00:00:00 2001 From: wyy Date: Sun, 3 Nov 2013 19:35:00 -0800 Subject: [PATCH 14/25] modify usage --- src/segment.cpp | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/src/segment.cpp b/src/segment.cpp index a589cdb..406231c 100644 --- a/src/segment.cpp +++ b/src/segment.cpp @@ -18,7 +18,7 @@ void cut(const ISegment * seg, const char * const filePath) { res.clear(); seg->cut(line, res); - cout< Date: Sun, 3 Nov 2013 20:25:30 -0800 Subject: [PATCH 15/25] rename --- CMakeLists.txt | 4 +- src/HMMSegment.h | 4 +- src/{husky => Husky}/CMakeLists.txt | 0 src/{husky => Husky}/Daemon.cpp | 0 src/{husky => Husky}/Daemon.h | 0 src/{husky => Husky}/HttpReqInfo.hpp | 0 src/{husky => Husky}/ServerFrame.cpp | 0 src/{husky => Husky}/ServerFrame.h | 0 src/{husky => Husky}/ThreadManager.hpp | 0 src/{husky => Husky}/globals.h | 0 src/{limonp => Limonp}/ArgvContext.hpp | 0 src/{limonp => Limonp}/CMakeLists.txt | 0 src/{limonp => Limonp}/MysqlClient.hpp | 12 +++ src/{limonp => Limonp}/cast_functs.hpp | 0 src/{limonp => Limonp}/config.hpp | 0 src/{limonp => Limonp}/io_functs.hpp | 0 src/{limonp => Limonp}/logger.hpp | 0 src/{limonp => Limonp}/macro_def.hpp | 0 src/{limonp => Limonp}/map_functs.hpp | 0 src/{limonp => Limonp}/str_functs.hpp | 127 +------------------------ src/MPSegment.h | 2 +- src/MixSegment.h | 2 +- src/SegmentBase.hpp | 4 +- src/TransCode.hpp | 2 +- src/Trie.h | 4 +- src/segment.cpp | 2 +- test/Makefile | 54 ----------- test/segment.cpp | 60 ++++++++++++ test/server.cpp | 63 ++++++++++++ 29 files changed, 152 insertions(+), 188 deletions(-) rename src/{husky => Husky}/CMakeLists.txt (100%) rename src/{husky => Husky}/Daemon.cpp (100%) rename src/{husky => Husky}/Daemon.h (100%) rename src/{husky => Husky}/HttpReqInfo.hpp (100%) rename src/{husky => Husky}/ServerFrame.cpp (100%) rename src/{husky => Husky}/ServerFrame.h (100%) rename src/{husky => Husky}/ThreadManager.hpp (100%) rename src/{husky => Husky}/globals.h (100%) rename src/{limonp => Limonp}/ArgvContext.hpp (100%) rename src/{limonp => Limonp}/CMakeLists.txt (100%) rename src/{limonp => Limonp}/MysqlClient.hpp (88%) rename src/{limonp => Limonp}/cast_functs.hpp (100%) rename src/{limonp => Limonp}/config.hpp (100%) rename src/{limonp => Limonp}/io_functs.hpp (100%) rename src/{limonp => Limonp}/logger.hpp (100%) rename src/{limonp => Limonp}/macro_def.hpp (100%) rename src/{limonp => Limonp}/map_functs.hpp (100%) rename src/{limonp => Limonp}/str_functs.hpp (66%) delete mode 100644 test/Makefile create mode 100644 test/segment.cpp create mode 100644 test/server.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 544fe84..ee6b75a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ PROJECT(CPPJIEBA) ADD_SUBDIRECTORY(src) -ADD_SUBDIRECTORY(src/husky) -ADD_SUBDIRECTORY(src/limonp) +ADD_SUBDIRECTORY(src/Husky) +ADD_SUBDIRECTORY(src/Limonp) diff --git a/src/HMMSegment.h b/src/HMMSegment.h index 3691a37..a71081e 100644 --- a/src/HMMSegment.h +++ b/src/HMMSegment.h @@ -4,8 +4,8 @@ #include #include #include -#include -#include +#include "Limonp/str_functs.hpp" +#include "Limonp/logger.hpp" #include "globals.h" #include "TransCode.hpp" #include "ISegment.hpp" diff --git a/src/husky/CMakeLists.txt b/src/Husky/CMakeLists.txt similarity index 100% rename from src/husky/CMakeLists.txt rename to src/Husky/CMakeLists.txt diff --git a/src/husky/Daemon.cpp b/src/Husky/Daemon.cpp similarity index 100% rename from src/husky/Daemon.cpp rename to src/Husky/Daemon.cpp diff --git a/src/husky/Daemon.h b/src/Husky/Daemon.h similarity index 100% rename from src/husky/Daemon.h rename to src/Husky/Daemon.h diff --git a/src/husky/HttpReqInfo.hpp b/src/Husky/HttpReqInfo.hpp similarity index 100% rename from src/husky/HttpReqInfo.hpp rename to src/Husky/HttpReqInfo.hpp diff --git a/src/husky/ServerFrame.cpp b/src/Husky/ServerFrame.cpp similarity index 100% rename from src/husky/ServerFrame.cpp rename to src/Husky/ServerFrame.cpp diff --git a/src/husky/ServerFrame.h b/src/Husky/ServerFrame.h similarity index 100% rename from src/husky/ServerFrame.h rename to src/Husky/ServerFrame.h diff --git a/src/husky/ThreadManager.hpp b/src/Husky/ThreadManager.hpp similarity index 100% rename from src/husky/ThreadManager.hpp rename to src/Husky/ThreadManager.hpp diff --git a/src/husky/globals.h b/src/Husky/globals.h similarity index 100% rename from src/husky/globals.h rename to src/Husky/globals.h diff --git a/src/limonp/ArgvContext.hpp b/src/Limonp/ArgvContext.hpp similarity index 100% rename from src/limonp/ArgvContext.hpp rename to src/Limonp/ArgvContext.hpp diff --git a/src/limonp/CMakeLists.txt b/src/Limonp/CMakeLists.txt similarity index 100% rename from src/limonp/CMakeLists.txt rename to src/Limonp/CMakeLists.txt diff --git a/src/limonp/MysqlClient.hpp b/src/Limonp/MysqlClient.hpp similarity index 88% rename from src/limonp/MysqlClient.hpp rename to src/Limonp/MysqlClient.hpp index 0b7da93..8e9d620 100644 --- a/src/limonp/MysqlClient.hpp +++ b/src/Limonp/MysqlClient.hpp @@ -78,6 +78,18 @@ namespace Limonp } return true; } + uint insert(const char* tb_name, const char* keys, const vector& vals) + { + uint retn = 0; + string sql; + for(uint i = 0; i < vals.size(); i ++) + { + sql.clear(); + string_format(sql, "insert into %s (%s) values %s", tb_name, keys, vals[i].c_str()); + retn += executeSql(sql.c_str()); + } + return retn; + } bool select(const char* sql, RowsType& rows) { if(!executeSql(sql)) diff --git a/src/limonp/cast_functs.hpp b/src/Limonp/cast_functs.hpp similarity index 100% rename from src/limonp/cast_functs.hpp rename to src/Limonp/cast_functs.hpp diff --git a/src/limonp/config.hpp b/src/Limonp/config.hpp similarity index 100% rename from src/limonp/config.hpp rename to src/Limonp/config.hpp diff --git a/src/limonp/io_functs.hpp b/src/Limonp/io_functs.hpp similarity index 100% rename from src/limonp/io_functs.hpp rename to src/Limonp/io_functs.hpp diff --git a/src/limonp/logger.hpp b/src/Limonp/logger.hpp similarity index 100% rename from src/limonp/logger.hpp rename to src/Limonp/logger.hpp diff --git a/src/limonp/macro_def.hpp b/src/Limonp/macro_def.hpp similarity index 100% rename from src/limonp/macro_def.hpp rename to src/Limonp/macro_def.hpp diff --git a/src/limonp/map_functs.hpp b/src/Limonp/map_functs.hpp similarity index 100% rename from src/limonp/map_functs.hpp rename to src/Limonp/map_functs.hpp diff --git a/src/limonp/str_functs.hpp b/src/Limonp/str_functs.hpp similarity index 66% rename from src/limonp/str_functs.hpp rename to src/Limonp/str_functs.hpp index e3c87ab..8ad62a3 100644 --- a/src/limonp/str_functs.hpp +++ b/src/Limonp/str_functs.hpp @@ -19,7 +19,10 @@ #include #include #include -#include +#include +#include +#include "std_outbound.hpp" +#include "map_functs.hpp" #define print(x) cout<<(x)< - ostream& operator << (ostream& os, const pair& pr) - { - os << pr.first << ":" << pr.second ; - return os; - } - - template - ostream& operator << (ostream& os, const vector& vec) - { - if(vec.empty()) - { - return os << "[]"; - } - os<<"[\""< - string& operator << (string& str, const T& obj) - { - stringstream ss; - ss << obj; // call ostream& operator << (ostream& os, - return str = ss.str(); - } - - template - ostream& operator << (ostream& os, const map& mp) - { - if(mp.empty()) - { - os<<"{}"; - return os; - } - os<<'{'; - typename map::const_iterator it = mp.begin(); - os<<*it; - it++; - while(it != mp.end()) - { - os<<", "<<*it; - it++; - } - os<<'}'; - return os; - } - //template - // string& operator << (string& str, const map& mp) - // { - // if(mp.empty()) - // { - // str = "{}"; - // return str; - // } - // stringstream ss; - // ss<<'{'; - // typename map::const_iterator it = mp.begin(); - // ss<<*it; - // it++; - // while(it != mp.end()) - // { - // ss<<", "<<*it; - // it++; - // } - // ss<<'}'; - // str = ss.str(); - // return str; - // } - - template - ostream& operator << (ostream& os, const HashMap& mp) - { - if(mp.empty()) - { - return os << "{}"; - } - os<<'{'; - typename map::const_iterator it = mp.begin(); - os<<*it; - it++; - while(it != mp.end()) - { - os<<", "<<*it++; - } - return os<<'}'; - } - - //template - // string& operator << (string& str, const set& st) - // { - // stringstream ss; - // ss << st; - // return str = ss.str(); - // } - - template - ostream& operator << (ostream& os, const set& st) - { - if(st.empty()) - { - os << "{}"; - return os; - } - os<<'{'; - typename set::const_iterator it = st.begin(); - os<<*it; - it++; - while(it != st.end()) - { - os<<", "<<*it; - it++; - } - os<<'}'; - return os; - } - inline bool splitStr(const string& src, vector& res, const string& pattern) { if(src.empty()) diff --git a/src/MPSegment.h b/src/MPSegment.h index 769743d..a3eaae3 100644 --- a/src/MPSegment.h +++ b/src/MPSegment.h @@ -7,7 +7,7 @@ #include #include -#include +#include "Limonp/logger.hpp" #include "Trie.h" #include "globals.h" #include "ISegment.hpp" diff --git a/src/MixSegment.h b/src/MixSegment.h index e85d0e8..079db3f 100644 --- a/src/MixSegment.h +++ b/src/MixSegment.h @@ -3,7 +3,7 @@ #include "MPSegment.h" #include "HMMSegment.h" -#include +#include "Limonp/str_functs.hpp" namespace CppJieba { diff --git a/src/SegmentBase.hpp b/src/SegmentBase.hpp index 17a7130..b082f56 100644 --- a/src/SegmentBase.hpp +++ b/src/SegmentBase.hpp @@ -4,8 +4,8 @@ #include "globals.h" #include "ISegment.hpp" #include "ChineseFilter.hpp" -#include -#include +#include "Limonp/str_functs.hpp" +#include "Limonp/logger.hpp" namespace CppJieba { diff --git a/src/TransCode.hpp b/src/TransCode.hpp index e49c84d..febe3a4 100644 --- a/src/TransCode.hpp +++ b/src/TransCode.hpp @@ -7,7 +7,7 @@ #include "globals.h" -#include +#include "Limonp/str_functs.hpp" namespace CppJieba { diff --git a/src/Trie.h b/src/Trie.h index 25689be..0fa54e1 100644 --- a/src/Trie.h +++ b/src/Trie.h @@ -12,8 +12,8 @@ #include #include #include -#include -#include +#include "Limonp/str_functs.hpp" +#include "Limonp/logger.hpp" #include "TransCode.hpp" #include "globals.h" #include "structs.h" diff --git a/src/segment.cpp b/src/segment.cpp index 406231c..fbd367d 100644 --- a/src/segment.cpp +++ b/src/segment.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include "Limonp/ArgvContext.hpp" #include "MPSegment.h" #include "HMMSegment.h" #include "MixSegment.h" diff --git a/test/Makefile b/test/Makefile deleted file mode 100644 index a7d4434..0000000 --- a/test/Makefile +++ /dev/null @@ -1,54 +0,0 @@ -CXX := g++ -LD := g++ -AR := ar rc - -INCS := -I../cppjieba/ - -DEBUG_CXXFLAGS := -g -Wall -DDEBUG -DUT $(INCS) - -CXXFLAGS := ${DEBUG_CXXFLAGS} -LDFLAGS := ${DEBUG_LDFLAGS} - -DOLINK := $(LD) $(LDFLAGS) -DOPACK := $(AR) -SOURCES := $(wildcard *.cpp) -OBJS := $(patsubst %.cpp,%.o,$(SOURCES)) -UTS := $(patsubst %.cpp,%.ut,$(SOURCES)) - -CPPJIEBADIR = ../cppjieba -LIBCPPJIEBA = $(CPPJIEBADIR)/libcppjieba.a - -CPPCOMMONDIR = ../cppcommon -LIBCPPCM = $(CPPCOMMONDIR)/libcm.a - -LIBA := $(LIBCPPJIEBA) $(LIBCPPCM) -# remove the objs after compilation -.PHONY: clean $(LIBA) - -# Main Targets -all: $(UTS) - -# This is a suffix rule -#.c.o: -%.o: %.cpp - $(CXX) -c $(CXXFLAGS) $< -%.ut: %.o $(LIBA) - $(CXX) $(CXXFLAGS) -o $@ $^ - -$(LIBCPPJIEBA): - cd $(CPPJIEBADIR) && $(MAKE) - -$(LIBCPPCM): - cd $(CPPCOMMONDIR) && $(MAKE) - -clean: - rm -f *.o *.ut *.d *.d.* -# cd $(CPPJIEBADIR) && make clean -# cd $(CPPCOMMONDIR) && make clean - -sinclude $(SOURCES:.cpp=.d) -%.d:%.cpp - @set -e; rm -f $@; \ - $(CXX) -MM $< > $@.$$$$; \ - sed 's,\($*\).o[ :]*,\1.o $@ : ,g' < $@.$$$$ > $@; \ - rm -f $@.$$$$ diff --git a/test/segment.cpp b/test/segment.cpp new file mode 100644 index 0000000..55b1975 --- /dev/null +++ b/test/segment.cpp @@ -0,0 +1,60 @@ +#include +#include +#include +#include +#include +#include + +using namespace CppJieba; + +void cut(const ISegment * seg, const char * const filePath) +{ + ifstream ifile(filePath); + vector res; + string line; + while(getline(ifile, line)) + { + if(!line.empty()) + { + res.clear(); + seg->cut(line, res); + cout< +#include +#include +#include +#include +#include +#include +#include +#include "MPSegment.h" +#include "HMMSegment.h" +#include "MixSegment.h" + +using namespace Husky; +using namespace CppJieba; + +const char * const DEFAULT_DICTPATH = "../dicts/jieba.dict.utf8"; +const char * const DEFAULT_MODELPATH = "../dicts/hmm_model.utf8"; + +class ServerDemo: public IRequestHandler +{ + public: + ServerDemo(){}; + virtual ~ServerDemo(){}; + virtual bool init(){return _segment.init(DEFAULT_DICTPATH, DEFAULT_MODELPATH);}; + virtual bool dispose(){return _segment.dispose();}; + public: + virtual bool do_GET(const HttpReqInfo& httpReq, string& strSnd) + { + string sentence, tmp; + vector words; + httpReq.GET("key", tmp); + URLDecode(tmp, sentence); + _segment.cut(sentence, words); + strSnd << words; + return true; + } + private: + MixSegment _segment; +}; + +int main(int argc,char* argv[]) +{ + if(argc != 7) + { + printf("usage: %s -n THREAD_NUMBER -p LISTEN_PORT -k start|stop\n",argv[0]); + return -1; + } + ArgvContext arg(argc, argv); + unsigned int port = atoi(arg["-p"].c_str()); + unsigned int threadNum = atoi(arg["-n"].c_str()); + + ServerDemo s; + Daemon daemon(&s); + if(arg["-k"] == "start") + { + return !daemon.Start(port, threadNum); + } + else + { + return !daemon.Stop(); + } +} + From 44c7d4dcb3ad4f4fd31461d886d815052b7d3187 Mon Sep 17 00:00:00 2001 From: wyy Date: Sun, 3 Nov 2013 21:51:29 -0800 Subject: [PATCH 16/25] add Limonp/std_outbound.hpp --- src/Limonp/std_outbound.hpp | 101 ++++++++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) create mode 100644 src/Limonp/std_outbound.hpp diff --git a/src/Limonp/std_outbound.hpp b/src/Limonp/std_outbound.hpp new file mode 100644 index 0000000..ab3e5c3 --- /dev/null +++ b/src/Limonp/std_outbound.hpp @@ -0,0 +1,101 @@ +#ifndef LIMONP_STD_OUTBOUND_H +#define LIMONP_STD_OUTBOUND_H + +#include +#include +#include + +namespace std +{ + template + ostream& operator << (ostream& os, const vector& vec) + { + if(vec.empty()) + { + return os << "[]"; + } + os<<"[\""< + ostream& operator << (ostream& os, const pair& pr) + { + os << pr.first << ":" << pr.second ; + return os; + } + + + template + string& operator << (string& str, const T& obj) + { + stringstream ss; + ss << obj; // call ostream& operator << (ostream& os, + return str = ss.str(); + } + + template + ostream& operator << (ostream& os, const map& mp) + { + if(mp.empty()) + { + os<<"{}"; + return os; + } + os<<'{'; + typename map::const_iterator it = mp.begin(); + os<<*it; + it++; + while(it != mp.end()) + { + os<<", "<<*it; + it++; + } + os<<'}'; + return os; + } + template + ostream& operator << (ostream& os, const std::tr1::unordered_map& mp) + { + if(mp.empty()) + { + return os << "{}"; + } + os<<'{'; + typename std::tr1::unordered_map::const_iterator it = mp.begin(); + os<<*it; + it++; + while(it != mp.end()) + { + os<<", "<<*it++; + } + return os<<'}'; + } + + template + ostream& operator << (ostream& os, const set& st) + { + if(st.empty()) + { + os << "{}"; + return os; + } + os<<'{'; + typename set::const_iterator it = st.begin(); + os<<*it; + it++; + while(it != st.end()) + { + os<<", "<<*it; + it++; + } + os<<'}'; + return os; + } +} + +#endif From 01b225dec89d18c5438689fcff1d219c767b87ac Mon Sep 17 00:00:00 2001 From: wyy Date: Sun, 3 Nov 2013 22:02:43 -0800 Subject: [PATCH 17/25] add Husky --- src/Husky/CMakeLists.txt | 9 +- src/Husky/Daemon.h | 2 +- src/Husky/HttpReqInfo.hpp | 26 ++- src/Husky/Limonp/ArgvContext.hpp | 90 +++++++++++ src/Husky/Limonp/CMakeLists.txt | 2 + src/Husky/Limonp/MysqlClient.hpp | 114 +++++++++++++ src/Husky/Limonp/cast_functs.hpp | 87 ++++++++++ src/Husky/Limonp/config.hpp | 107 +++++++++++++ src/Husky/Limonp/io_functs.hpp | 82 ++++++++++ src/Husky/Limonp/logger.hpp | 78 +++++++++ src/Husky/Limonp/macro_def.hpp | 22 +++ src/Husky/Limonp/map_functs.hpp | 116 ++++++++++++++ src/Husky/Limonp/std_outbound.hpp | 101 ++++++++++++ src/Husky/Limonp/str_functs.hpp | 257 ++++++++++++++++++++++++++++++ src/Husky/Limonp/typedefs.h | 21 +++ src/Husky/Limonp/vec_functs.hpp | 142 +++++++++++++++++ src/Husky/ServerFrame.cpp | 10 +- 17 files changed, 1239 insertions(+), 27 deletions(-) create mode 100644 src/Husky/Limonp/ArgvContext.hpp create mode 100644 src/Husky/Limonp/CMakeLists.txt create mode 100644 src/Husky/Limonp/MysqlClient.hpp create mode 100644 src/Husky/Limonp/cast_functs.hpp create mode 100644 src/Husky/Limonp/config.hpp create mode 100644 src/Husky/Limonp/io_functs.hpp create mode 100644 src/Husky/Limonp/logger.hpp create mode 100644 src/Husky/Limonp/macro_def.hpp create mode 100644 src/Husky/Limonp/map_functs.hpp create mode 100644 src/Husky/Limonp/std_outbound.hpp create mode 100644 src/Husky/Limonp/str_functs.hpp create mode 100644 src/Husky/Limonp/typedefs.h create mode 100644 src/Husky/Limonp/vec_functs.hpp diff --git a/src/Husky/CMakeLists.txt b/src/Husky/CMakeLists.txt index 41daa97..281fd79 100644 --- a/src/Husky/CMakeLists.txt +++ b/src/Husky/CMakeLists.txt @@ -1,9 +1,8 @@ - +SET(EXECUTABLE_OUTPUT_PATH ${PROJECT_BINARY_DIR}/bin) SET(LIBRARY_OUTPUT_PATH ${PROJECT_BINARY_DIR}/lib) -SET(LIBHUSKY_SRC Daemon.cpp ServerFrame.cpp) -INCLUDE_DIRECTORIES(../limonp) +SET(LIBHUSKY_SRC Daemon.cpp ServerFrame.cpp) ADD_LIBRARY(husky STATIC ${LIBHUSKY_SRC}) -INSTALL(TARGETS husky ARCHIVE DESTINATION lib/CppJieba) -INSTALL(FILES Daemon.h globals.h HttpReqInfo.hpp ServerFrame.h ThreadManager.hpp DESTINATION include/CppJieba/Husky) +INSTALL(TARGETS husky ARCHIVE DESTINATION lib/CppJieba/Husky) +INSTALL(FILES Daemon.h globals.h HttpReqInfo.hpp ServerFrame.h ThreadManager.hpp DESTINATION include/CppJieba/Husky) diff --git a/src/Husky/Daemon.h b/src/Husky/Daemon.h index 532765a..e1402b8 100644 --- a/src/Husky/Daemon.h +++ b/src/Husky/Daemon.h @@ -8,7 +8,7 @@ #include #include #include -#include +#include "Limonp/logger.hpp" #include "ServerFrame.h" namespace Husky diff --git a/src/Husky/HttpReqInfo.hpp b/src/Husky/HttpReqInfo.hpp index fe1cb2d..a664f5f 100644 --- a/src/Husky/HttpReqInfo.hpp +++ b/src/Husky/HttpReqInfo.hpp @@ -3,14 +3,14 @@ #include #include +#include "Limonp/logger.hpp" +#include "Limonp/str_functs.hpp" #include "globals.h" -#include -#include -#include namespace Husky { using namespace Limonp; + using namespace std; static const char* const KEY_METHOD = "METHOD"; static const char* const KEY_PATH = "PATH"; @@ -161,6 +161,8 @@ namespace Husky HashMap _headerMap; HashMap _methodGetMap; HashMap _methodPostMap; + //public: + friend ostream& operator<<(ostream& os, const HttpReqInfo& obj); private: bool _find(const HashMap& mp, const string& key, string& res)const { @@ -172,19 +174,6 @@ namespace Husky res = it->second; return true; } - public: - //string toString() const;// function for debug because of heavy time consuming - //string toString() const - //{ - // string res("{"); - // res += HashMapToString(_headerMap); - // res += ","; - // res += HashMapToString(_methodGetMap); - // res += ","; - // res += HashMapToString(_methodPostMap); - // res += "}"; - // return res; - //} private: bool _parseUrl(const string& url, HashMap& mp) { @@ -227,6 +216,11 @@ namespace Husky } }; + inline std::ostream& operator << (std::ostream& os, const Husky::HttpReqInfo& obj) + { + return os << obj._headerMap << obj._methodGetMap << obj._methodPostMap; + } + } #endif diff --git a/src/Husky/Limonp/ArgvContext.hpp b/src/Husky/Limonp/ArgvContext.hpp new file mode 100644 index 0000000..8be15f1 --- /dev/null +++ b/src/Husky/Limonp/ArgvContext.hpp @@ -0,0 +1,90 @@ +/************************************ + * file enc : ascii + * author : wuyanyi09@gmail.com + ************************************/ + +#ifndef LIMONP_ARGV_FUNCTS_H +#define LIMONP_ARGV_FUNCTS_H + +#include +#include +#include "str_functs.hpp" +#include "map_functs.hpp" + +namespace Limonp +{ + using namespace std; + class ArgvContext + { + public : + ArgvContext(int argc, const char* const * argv) + { + + for(int i = 0; i < argc; i++) + { + if(strStartsWith(argv[i], "-")) + { + if(i + 1 < argc && !strStartsWith(argv[i + 1], "-")) + { + _mpss[argv[i]] = argv[i+1]; + i++; + } + else + { + _sset.insert(argv[i]); + } + } + else + { + _args.push_back(argv[i]); + } + } + } + ~ArgvContext(){}; + public: + friend ostream& operator << (ostream& os, const ArgvContext& args); + string operator [](uint i) + { + if(i < _args.size()) + { + return _args[i]; + } + return ""; + } + string operator [](const string& key) + { + map::const_iterator it = _mpss.find(key); + if(it != _mpss.end()) + { + return it->second; + } + return ""; + } + public: + bool hasKey(const string& key) + { + if(_mpss.find(key) != _mpss.end() || _sset.find(key) != _sset.end()) + { + return true; + } + return false; + } + private: + vector _args; + map _mpss; + set _sset; + + }; + + inline ostream& operator << (ostream& os, const ArgvContext& args) + { + return os< +#include +#include +#include +#include "logger.hpp" + +namespace Limonp +{ + using namespace std; + class MysqlClient + { + public: + typedef vector< vector > RowsType; + private: + const char * const HOST; + const unsigned int PORT; + const char * const USER; + const char * const PASSWD; + const char * const DB; + const char * const CHARSET; + public: + MysqlClient(const char* host, uint port, const char* user, const char* passwd, const char* db, const char* charset = "utf8"): HOST(host), PORT(port), USER(user), PASSWD(passwd), DB(db), CHARSET(charset){ _conn = NULL;}; + ~MysqlClient(){dispose();}; + public: + bool init() + { + //cout< vec; + for(uint i = 0; i < num_fields; i ++) + { + row[i] ? vec.push_back(row[i]) : vec.push_back("NULL"); + } + rows.push_back(vec); + } + mysql_free_result(result); + return true; + } + + private: + MYSQL * _conn; + + }; +} + +#endif diff --git a/src/Husky/Limonp/cast_functs.hpp b/src/Husky/Limonp/cast_functs.hpp new file mode 100644 index 0000000..cf8c54f --- /dev/null +++ b/src/Husky/Limonp/cast_functs.hpp @@ -0,0 +1,87 @@ +#ifndef LIMONP_CAST_FUNCTS_H +#define LIMONP_CAST_FUNCTS_H + +namespace Limonp +{ + //logical and or + static const int sign_32 = 0xC0000000; + static const int exponent_32 = 0x07800000; + static const int mantissa_32 = 0x007FE000; + static const int sign_exponent_32 = 0x40000000; + static const int loss_32 = 0x38000000; + + static const short sign_16 = (short)0xC000; + static const short exponent_16 = (short)0x3C00; + static const short mantissa_16 = (short)0x03FF; + static const short sign_exponent_16 = (short)0x4000; + static const int exponent_fill_32 = 0x38000000; + + //infinite + static const short infinite_16 = (short) 0x7FFF; + static const short infinitesmall_16 = (short) 0x0000; + + inline float intBitsToFloat(unsigned int x) + { + union + { + float f; + int i; + }u; + u.i = x; + return u.f; + } + + inline int floatToIntBits(float f) + { + union + { + float f; + int i ; + }u; + u.f = f; + return u.i; + } + + inline short floatToShortBits(float f) + { + int fi = floatToIntBits(f); + + // 提取关键信息 + short sign = (short) ((unsigned int)(fi & sign_32) >> 16); + short exponent = (short) ((unsigned int)(fi & exponent_32) >> 13); + short mantissa = (short) ((unsigned int)(fi & mantissa_32) >> 13); + // 生成编码结果 + short code = (short) (sign | exponent | mantissa); + // 无穷大量、无穷小量的处理 + if ((fi & loss_32) > 0 && (fi & sign_exponent_32) > 0) { + // 当指数符号为1时(正次方),且左234位为1,返回无穷大量 + return (short) (code | infinite_16); + } + if (((fi & loss_32) ^ loss_32) > 0 && (fi & sign_exponent_32) == 0) { + // 当指数符号位0时(负次方),且左234位为0(与111异或>0),返回无穷小量 + return infinitesmall_16; + } + + return code; + } + + inline float shortBitsToFloat(short s) + { + /* + * 指数空余3位:若符号位为1,补0;若符号位为0,补1。 尾数位在后补0(13个) + */ + int sign = ((int) (s & sign_16)) << 16; + int exponent = ((int) (s & exponent_16)) << 13; + // 指数符号位为0,234位补1 + if ((s & sign_exponent_16) == 0 && s != 0) { + exponent |= exponent_fill_32; + } + int mantissa = ((int) (s & mantissa_16)) << 13; + // 生成解码结果 + int code = sign | exponent | mantissa; + return intBitsToFloat(code); + + } +} + +#endif diff --git a/src/Husky/Limonp/config.hpp b/src/Husky/Limonp/config.hpp new file mode 100644 index 0000000..3e45e6d --- /dev/null +++ b/src/Husky/Limonp/config.hpp @@ -0,0 +1,107 @@ +/************************************ + * file enc : utf8 + * author : wuyanyi09@gmail.com + ************************************/ +#ifndef LIMONP_CONFIG_H +#define LIMONP_CONFIG_H + + +#include +#include +#include +#include "logger.hpp" +#include "str_functs.hpp" + +namespace Limonp +{ + using std::map; + using std::string; + using std::cout; + using std::endl; + using std::ifstream; + class Config + { + public: + Config(){_isInit = false;}; + ~Config(){}; + bool init(const string& configFile) + { + if(_isInit) + { + LogFatal("already have been initialized. "); + return false; + } + ifstream ifile(configFile.c_str()); + if(!ifile) + { + LogFatal("open configFile[%s] failed.", configFile.c_str()); + return false; + } + string line, key, value; + vector vecBuf; + while(getline(ifile, line)) + { + //line = _stripComment(line); + if(line.empty()) + { + continue; + } + vecBuf.clear(); + if(!splitStr(line, vecBuf, "=") || 2 != vecBuf.size()) + { + LogFatal("line[%s] is illegal.", line.c_str()); + return false; + } + key = vecBuf[0]; + value = vecBuf[1]; + if(_map.end() != _map.find(key)) + { + LogFatal("key[%s] already exists.", key.c_str()); + return false; + } + _map[key] = value; + } + ifile.close(); + _isInit = true; + return true; + } + void display() + { + for(map::iterator it = _map.begin(); it != _map.end(); it++) + { + cout<<"("<first<<","<second<<")"< _map; + bool _isInit; + + }; +} + +namespace Limonp +{ + extern Config gConfig; +} + +#endif diff --git a/src/Husky/Limonp/io_functs.hpp b/src/Husky/Limonp/io_functs.hpp new file mode 100644 index 0000000..0128e62 --- /dev/null +++ b/src/Husky/Limonp/io_functs.hpp @@ -0,0 +1,82 @@ +/************************************ + * file enc : utf8 + * author : wuyanyi09@gmail.com +************************************/ +#ifndef LIMONP_IO_FUNCTS_H +#define LIMONP_IO_FUNCTS_H +#include +#include +#include +namespace Limonp +{ + using namespace std; + inline string loadFile2Str(const char * const filepath) + { + ifstream in(filepath); + if(!in) + { + return ""; + } + istreambuf_iterator beg(in), end; + string str(beg, end); + in.close(); + return str; + } + + inline void loadStr2File(const char * const filename, ios_base::openmode mode, const string& str) + { + ofstream out(filename, mode); + ostreambuf_iterator itr (out); + copy(str.begin(), str.end(), itr); + out.close(); + } + + inline int ReadFromFile(const char * fileName, char* buf, int maxCount, const char* mode) + { + FILE* fp = fopen(fileName, mode); + if (!fp) + return 0; + int ret; + fgets(buf, maxCount, fp) ? ret = 1 : ret = 0; + fclose(fp); + return ret; + } + + inline int WriteStr2File(const char* fileName, const char* buf, const char* mode) + { + FILE* fp = fopen(fileName, mode); + if (!fp) + return 0; + int n = fprintf(fp, "%s", buf); + fclose(fp); + return n; + } + + inline bool checkFileExist(const string& filePath) + { + fstream _file; + _file.open(filePath.c_str(), ios::in); + if(_file) + return true; + return false; + } + + inline bool createDir(const string& dirPath, bool p = true) + { + string dir_str(dirPath); + string cmd = "mkdir"; + if(p) + { + cmd += " -p"; + } + cmd += " " + dir_str; + int res = system(cmd.c_str()); + return res; + } + + inline bool checkDirExist(const string& dirPath) + { + return checkFileExist(dirPath); + } +} +#endif diff --git a/src/Husky/Limonp/logger.hpp b/src/Husky/Limonp/logger.hpp new file mode 100644 index 0000000..ed1a3c2 --- /dev/null +++ b/src/Husky/Limonp/logger.hpp @@ -0,0 +1,78 @@ +/************************************ + * file enc : utf8 + * author : wuyanyi09@gmail.com + ************************************/ +#ifndef LIMONP_LOGGER_H +#define LIMONP_LOGGER_H + +#include +#include +#include +#include +#include +#include +#include "io_functs.hpp" +#include "str_functs.hpp" + +#define LogDebug(fmt, ...) Logger::LoggingF(LL_DEBUG, __FILE__, __LINE__, fmt, ## __VA_ARGS__) +#define LogInfo(fmt, ...) Logger::LoggingF(LL_INFO, __FILE__, __LINE__, fmt, ## __VA_ARGS__) +#define LogWarn(fmt, ...) Logger::LoggingF(LL_WARN, __FILE__, __LINE__, fmt, ## __VA_ARGS__) +#define LogError(fmt, ...) Logger::LoggingF(LL_ERROR, __FILE__, __LINE__, fmt, ## __VA_ARGS__) +#define LogFatal(fmt, ...) Logger::LoggingF(LL_FATAL, __FILE__, __LINE__, fmt, ## __VA_ARGS__) + + +namespace Limonp +{ + using namespace std; + enum {LL_DEBUG = 0, LL_INFO = 1, LL_WARN = 2, LL_ERROR = 3, LL_FATAL = 4, LEVEL_ARRAY_SIZE = 5, CSTR_BUFFER_SIZE = 1024}; + static const char * LOG_LEVEL_ARRAY[LEVEL_ARRAY_SIZE]= {"DEBUG","INFO","WARN","ERROR","FATAL"}; + static const char * LOG_FORMAT = "%s %s:%d %s %s\n"; + static const char * LOG_TIME_FORMAT = "%Y-%m-%d %H:%M:%S"; + + class Logger + { + public: + static bool Logging(uint level, const string& msg, const char* fileName, int lineNo) + { + if(level > LL_FATAL) + { + cerr<<"level's value is out of range"< -1 && n < size) { + msg.resize(n); + break; + } + if (n > -1) + size = n + 1; + else + size *= 2; + } + return Logging(level, msg, fileName, lineNo); + } + }; +} + +#endif diff --git a/src/Husky/Limonp/macro_def.hpp b/src/Husky/Limonp/macro_def.hpp new file mode 100644 index 0000000..58fe835 --- /dev/null +++ b/src/Husky/Limonp/macro_def.hpp @@ -0,0 +1,22 @@ +#ifndef LIMONP_MACRO_DEF_H +#define LIMONP_MACRO_DEF_H + +#define XX_GET_SET(varType, varName, funName)\ +private: varType varName;\ +public: inline varType get##funName(void) const {return varName;}\ +public: inline void set##funName(varType var) {varName = var;} + +#define XX_GET(varType, varName, funName)\ +private: varType varName;\ +public: inline varType get##funName(void) const {return varName;} + +#define XX_SET(varType, varName, funName)\ +private: varType varName;\ +public: inline void set##funName(varType var) {varName = var;} + +#define XX_GET_SET_BY_REF(varType, varName, funName)\ +private: varType varName;\ +public: inline const varType& get##funName(void) const {return varName;}\ +public: inline void set##funName(const varType& var){varName = var;} + +#endif diff --git a/src/Husky/Limonp/map_functs.hpp b/src/Husky/Limonp/map_functs.hpp new file mode 100644 index 0000000..44d472c --- /dev/null +++ b/src/Husky/Limonp/map_functs.hpp @@ -0,0 +1,116 @@ +/************************************ + * file enc : ascii + * author : wuyanyi09@gmail.com + ************************************/ + + +#ifndef LIMONP_MAP_FUNCTS_H +#define LIMONP_MAP_FUNCTS_H + +#include +#include +#include +#include + +#include +#define HashMap std::tr1::unordered_map + +namespace Limonp +{ + using namespace std; + + + //template + // string setToString(const set& st) + // { + // if(st.empty()) + // { + // return "{}"; + // } + // stringstream ss; + // ss<<'{'; + // typename set::const_iterator it = st.begin(); + // ss<<*it; + // it++; + // while(it != st.end()) + // { + // ss<<", "<<*it; + // it++; + // } + // ss<<'}'; + // return ss.str(); + // } + + //template + // string mapToString(const map& mp) + // { + // if(mp.empty()) + // { + // return "{}"; + // } + // stringstream ss; + // ss<<'{'; + // typename map::const_iterator it = mp.begin(); + // ss<first<<": "<second; + // it++; + // while(it != mp.end()) + // { + // ss<<", "<first<<": "<second; + // it++; + // } + // ss<<'}'; + // return ss.str(); + // } + + //template + // string HashMapToString(const HashMap& mp) + // { + // if(mp.empty()) + // { + // return "{}"; + // } + // stringstream ss; + // ss<<'{'; + // typename HashMap::const_iterator it = mp.begin(); + // ss<first<<": "<second; + // it++; + // while(it != mp.end()) + // { + // ss<<", "<first<<": "<second; + // it++; + // } + // ss<<'}'; + // return ss.str(); + // } + //template + // string pairToString(const pair& p) + // { + // stringstream ss; + // ss< + vT getMap(const map& mp, const kT & key, const vT & defaultVal) + { + typename map::const_iterator it; + it = mp.find(key); + if(mp.end() == it) + { + return defaultVal; + } + return it->second; + } + + template + void map2Vec(const map& mp, vector > & res) + { + typename map::const_iterator it = mp.begin(); + for(; it != mp.end(); it++) + { + res.push_back(*it); + } + } +} + +#endif diff --git a/src/Husky/Limonp/std_outbound.hpp b/src/Husky/Limonp/std_outbound.hpp new file mode 100644 index 0000000..ab3e5c3 --- /dev/null +++ b/src/Husky/Limonp/std_outbound.hpp @@ -0,0 +1,101 @@ +#ifndef LIMONP_STD_OUTBOUND_H +#define LIMONP_STD_OUTBOUND_H + +#include +#include +#include + +namespace std +{ + template + ostream& operator << (ostream& os, const vector& vec) + { + if(vec.empty()) + { + return os << "[]"; + } + os<<"[\""< + ostream& operator << (ostream& os, const pair& pr) + { + os << pr.first << ":" << pr.second ; + return os; + } + + + template + string& operator << (string& str, const T& obj) + { + stringstream ss; + ss << obj; // call ostream& operator << (ostream& os, + return str = ss.str(); + } + + template + ostream& operator << (ostream& os, const map& mp) + { + if(mp.empty()) + { + os<<"{}"; + return os; + } + os<<'{'; + typename map::const_iterator it = mp.begin(); + os<<*it; + it++; + while(it != mp.end()) + { + os<<", "<<*it; + it++; + } + os<<'}'; + return os; + } + template + ostream& operator << (ostream& os, const std::tr1::unordered_map& mp) + { + if(mp.empty()) + { + return os << "{}"; + } + os<<'{'; + typename std::tr1::unordered_map::const_iterator it = mp.begin(); + os<<*it; + it++; + while(it != mp.end()) + { + os<<", "<<*it++; + } + return os<<'}'; + } + + template + ostream& operator << (ostream& os, const set& st) + { + if(st.empty()) + { + os << "{}"; + return os; + } + os<<'{'; + typename set::const_iterator it = st.begin(); + os<<*it; + it++; + while(it != st.end()) + { + os<<", "<<*it; + it++; + } + os<<'}'; + return os; + } +} + +#endif diff --git a/src/Husky/Limonp/str_functs.hpp b/src/Husky/Limonp/str_functs.hpp new file mode 100644 index 0000000..8be5366 --- /dev/null +++ b/src/Husky/Limonp/str_functs.hpp @@ -0,0 +1,257 @@ +/************************************ + * file enc : ascii + * author : wuyanyi09@gmail.com + ************************************/ +#ifndef LIMONP_STR_FUNCTS_H +#define LIMONP_STR_FUNCTS_H +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "std_outbound.hpp" +#include "map_functs.hpp" + +#define print(x) cout<<(x)< -1 && n < size) { + str.resize(n); + return str; + } + if (n > -1) + size = n + 1; + else + size *= 2; + } + return str; + } + + inline void string_format(string& res, const char* fmt, ...) + { + int size = 256; + va_list ap; + while (1) { + res.resize(size); + va_start(ap, fmt); + int n = vsnprintf((char *)res.c_str(), size, fmt, ap); + va_end(ap); + if (n > -1 && n < size) { + res.resize(n); + return; + } + if (n > -1) + size = n + 1; + else + size *= 2; + } + } + + //inline bool joinStr(const vector& src, string& dest, const string& connectorStr) + //{ + // if(src.empty()) + // { + // return false; + // } + // for(uint i = 0; i < src.size() - 1; i++) + // { + // dest += src[i]; + // dest += connectorStr; + // } + // dest += src[src.size() - 1]; + // return true; + //} + + //inline string joinStr(const vector& source, const string& connector) + //{ + // string res; + // joinStr(source, res, connector); + // return res; + //} + + template + void join(T begin, T end, string& res, const string& connector) + { + if(begin == end) + { + return; + } + stringstream ss; + ss<<*begin; + begin++; + while(begin != end) + { + ss << connector << *begin; + begin ++; + } + res = ss.str(); + } + + template + string join(T begin, T end, const string& connector) + { + string res; + join(begin ,end, res, connector); + return res; + } + + + + inline bool splitStr(const string& src, vector& res, const string& pattern) + { + if(src.empty()) + { + return false; + } + res.clear(); + + size_t start = 0; + size_t end = 0; + while(start < src.size()) + { + end = src.find_first_of(pattern, start); + if(string::npos == end) + { + res.push_back(src.substr(start)); + return true; + } + res.push_back(src.substr(start, end - start)); + if(end == src.size() - 1) + { + res.push_back(""); + break; + } + start = end + 1; + } + return true; + } + + inline string& upper(string& str) + { + transform(str.begin(), str.end(), str.begin(), (int (*)(int))toupper); + return str; + } + + inline string& lower(string& str) + { + transform(str.begin(), str.end(), str.begin(), (int (*)(int))tolower); + return str; + } + + inline std::string <rim(std::string &s) + { + s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::ptr_fun(std::isspace)))); + return s; + } + + inline std::string &rtrim(std::string &s) + { + s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::ptr_fun(std::isspace))).base(), s.end()); + return s; + } + + inline std::string &trim(std::string &s) + { + return ltrim(rtrim(s)); + } + + + inline uint16_t twocharToUint16(char high, char low) + { + return (((uint16_t(high) & 0x00ff ) << 8) | (uint16_t(low) & 0x00ff)); + } + + inline pair uint16ToChar2(uint16_t in) + { + pair res; + res.first = (in>>8) & 0x00ff; //high + res.second = (in) & 0x00ff; //low + return res; + } + + inline bool strStartsWith(const string& str, const string& prefix) + { + //return str.substr(0, prefix.size()) == prefix; + if(prefix.length() > str.length()) + { + return false; + } + return 0 == str.compare(0, prefix.length(), prefix); + } + + inline bool strEndsWith(const string& str, const string& suffix) + { + if(suffix.length() > str.length()) + { + return false; + } + return 0 == str.compare(str.length() - suffix.length(), suffix.length(), suffix); + } + + inline bool isInStr(const string& str, char ch) + { + return str.find(ch) != string::npos; + } + + //inline void extractWords(const string& sentence, vector& words) + //{ + // bool flag = false; + // uint lhs = 0, len = 0; + // for(uint i = 0; i < sentence.size(); i++) + // { + // char x = sentence[i]; + // if((0x0030 <= x && x<= 0x0039) || (0x0041 <= x && x <= 0x005a ) || (0x0061 <= x && x <= 0x007a)) + // { + // if(flag) + // { + // len ++; + // } + // else + // { + // lhs = i; + // len = 1; + // } + // flag = true; + // } + // else + // { + // if(flag) + // { + // words.push_back(string(sentence, lhs, len)); + // } + // flag = false; + // } + // } + // if(flag) + // { + // words.push_back(string(sentence, lhs, len)); + // } + //} + + +} +#endif diff --git a/src/Husky/Limonp/typedefs.h b/src/Husky/Limonp/typedefs.h new file mode 100644 index 0000000..a8da002 --- /dev/null +++ b/src/Husky/Limonp/typedefs.h @@ -0,0 +1,21 @@ +/************************************ + * file enc : utf8 + * author : wuyanyi09@gmail.com +************************************/ +#ifndef LIMONP_TYPEDEFS_H +#define LIMONP_TYPEDEFS_H + +#include +#include +#include +#include +#define HashMap std::tr1::unordered_map + +namespace Limonp +{ + typedef std::vector Unicode; + typedef std::vector::const_iterator UnicodeConstIterator; +} + + +#endif diff --git a/src/Husky/Limonp/vec_functs.hpp b/src/Husky/Limonp/vec_functs.hpp new file mode 100644 index 0000000..ac18548 --- /dev/null +++ b/src/Husky/Limonp/vec_functs.hpp @@ -0,0 +1,142 @@ +/************************************ + * file enc : ascii + * author : wuyanyi09@gmail.com +************************************/ +#ifndef LIMONP_VEC_FUNCTS_H +#define LIMONP_VEC_FUNCTS_H +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define FOR_VECTOR(vec, i) for(size_t i = 0; i < vec.size(); i++) + +#define PRINT_VECTOR(vec) FOR_VECTOR(vec, i)\ +{\ + cout< + bool vecToString(const vector& vec, string& res) + { + if(vec.empty()) + { + res = "[]"; + return false; + } + stringstream ss; + ss<<"[\""< + string vecToString(const vector& vec) + { + string res; + vecToString(vec, res); + return res; + } + + template + bool isInVec(const vector& vec, const T& item) + { + typename vector::const_iterator it = find(vec.begin(), vec.end(), item); + return it != vec.end(); + } + template + void splitVec(const vector& vecSrc, vector< pair > >& outVec, const vector& patterns) + { + vector tmp; + T pattern; + size_t patternSize = patterns.size(); + for(size_t i = 0; i < vecSrc.size(); i++) + { + size_t patternPos = patternSize; + for(size_t j = 0; j < patternSize; j++) + { + if(patterns[j] == vecSrc[i]) + { + patternPos = j; + break; + } + } + if(patternPos != patternSize) + { + if(!tmp.empty()) + { + outVec.push_back(make_pair >(pattern, tmp)); + tmp.clear(); + } + pattern = patterns[patternPos]; + } + else + { + tmp.push_back(vecSrc[i]); + } + } + if(!tmp.empty()) + { + outVec.push_back(make_pair >(pattern, tmp)); + } + } + + template + void splitVec(const vector& vecSrc, vector< vector >& outVec, const vector& patternVec) + { + vector tmp; + for(size_t i = 0; i < vecSrc.size(); i++) + { + bool flag = false; + for(size_t j = 0; j < patternVec.size(); j++) + { + if(patternVec[j] == vecSrc[i]) + { + flag = true; + break; + } + } + if(flag) + { + if(!tmp.empty()) + { + outVec.push_back(tmp); + tmp.clear(); + } + } + else + { + tmp.push_back(vecSrc[i]); + } + } + if(!tmp.empty()) + { + outVec.push_back(tmp); + } + } +} + +#endif diff --git a/src/Husky/ServerFrame.cpp b/src/Husky/ServerFrame.cpp index 5d83eb8..aa3eab9 100644 --- a/src/Husky/ServerFrame.cpp +++ b/src/Husky/ServerFrame.cpp @@ -134,8 +134,8 @@ namespace Husky nRetCode = recv(hClientSock, chRecvBuf, RECV_BUFFER, 0); strRec = chRecvBuf; -#ifdef DEBUG - LogDebug("response[%s]", strRec.c_str()); +#ifdef HUKSY_DEBUG + LogDebug("request[%s]", strRec.c_str()); #endif if(SOCKET_ERROR==nRetCode) @@ -160,15 +160,15 @@ namespace Husky strHttpResp=chHttpHeader; strHttpResp+=strSnd; +#ifdef HUKSY_DEBUG + LogDebug("response'body [%s]", strSnd.c_str()); +#endif if (SOCKET_ERROR==send(hClientSock,strHttpResp.c_str(),strHttpResp.length(),0)) { LogError("error [%s]", strerror(errno)); } -#ifdef DEBUG - LogDebug("send response [%s] ", strHttpResp.c_str()); -#endif closesocket(hClientSock); } From dd55d9c8c42ec1873af8b8ab91040de7416ca350 Mon Sep 17 00:00:00 2001 From: wyy Date: Sun, 3 Nov 2013 22:07:26 -0800 Subject: [PATCH 18/25] adding Husky --- CMakeLists.txt | 3 +-- src/CMakeLists.txt | 3 +++ src/Husky/CMakeLists.txt | 2 ++ src/server.cpp | 6 +++--- 4 files changed, 9 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index ee6b75a..2b07f62 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,3 @@ PROJECT(CPPJIEBA) ADD_SUBDIRECTORY(src) -ADD_SUBDIRECTORY(src/Husky) -ADD_SUBDIRECTORY(src/Limonp) +ADD_SUBDIRECTORY(src/Husky/Limonp) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 0abf5c0..82a804c 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -20,3 +20,6 @@ INSTALL(TARGETS segment RUNTIME DESTINATION bin/CppJieba) INSTALL(TARGETS server RUNTIME DESTINATION bin/CppJieba) INSTALL(FILES ChineseFilter.hpp HMMSegment.h MPSegment.h structs.h Trie.h globals.h ISegment.hpp MixSegment.h SegmentBase.hpp TransCode.hpp DESTINATION include/CppJieba) + +ADD_SUBDIRECTORY(Husky) +ADD_SUBDIRECTORY(Limonp) diff --git a/src/Husky/CMakeLists.txt b/src/Husky/CMakeLists.txt index 281fd79..a27ad9b 100644 --- a/src/Husky/CMakeLists.txt +++ b/src/Husky/CMakeLists.txt @@ -6,3 +6,5 @@ ADD_LIBRARY(husky STATIC ${LIBHUSKY_SRC}) INSTALL(TARGETS husky ARCHIVE DESTINATION lib/CppJieba/Husky) INSTALL(FILES Daemon.h globals.h HttpReqInfo.hpp ServerFrame.h ThreadManager.hpp DESTINATION include/CppJieba/Husky) + +ADD_SUBDIRECTORY(Limonp) diff --git a/src/server.cpp b/src/server.cpp index 20e81ea..412838c 100644 --- a/src/server.cpp +++ b/src/server.cpp @@ -3,9 +3,9 @@ #include #include #include -#include -#include -#include +#include "Husky/Limonp/ArgvContext.hpp" +#include "Husky/Daemon.h" +#include "Husky/ServerFrame.h" #include "MPSegment.h" #include "HMMSegment.h" #include "MixSegment.h" From 65f0b4b60f44d283d8a1059da408b0386b26e2b0 Mon Sep 17 00:00:00 2001 From: wyy Date: Sun, 3 Nov 2013 22:13:03 -0800 Subject: [PATCH 19/25] modify test/segment.cpp --- test/segment.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/segment.cpp b/test/segment.cpp index 55b1975..e9c4a1b 100644 --- a/test/segment.cpp +++ b/test/segment.cpp @@ -28,7 +28,7 @@ int main(int argc, char ** argv) //demo { HMMSegment seg; - if(!seg.init("../dicts/jieba.dict.utf8")) + if(!seg.init("../dicts/hmm_model.utf8")) { cout<<"seg init failed."< Date: Sun, 3 Nov 2013 22:37:36 -0800 Subject: [PATCH 20/25] modify server.cpp's include --- test/server.cpp | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/test/server.cpp b/test/server.cpp index 20e81ea..334c548 100644 --- a/test/server.cpp +++ b/test/server.cpp @@ -1,14 +1,9 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include "MPSegment.h" -#include "HMMSegment.h" -#include "MixSegment.h" +#include +#include +#include +#include +#include +#include using namespace Husky; using namespace CppJieba; From e21bdc66eb36f62f66875d97365cc4881c4c0db3 Mon Sep 17 00:00:00 2001 From: wyy Date: Mon, 4 Nov 2013 04:05:45 -0800 Subject: [PATCH 21/25] rm Limonp out of Huksy --- src/Husky/CMakeLists.txt | 1 - src/Husky/Daemon.h | 2 +- src/Husky/HttpReqInfo.hpp | 4 +- src/Husky/Limonp/ArgvContext.hpp | 90 ----------- src/Husky/Limonp/CMakeLists.txt | 2 - src/Husky/Limonp/MysqlClient.hpp | 114 ------------- src/Husky/Limonp/cast_functs.hpp | 87 ---------- src/Husky/Limonp/config.hpp | 107 ------------- src/Husky/Limonp/io_functs.hpp | 82 ---------- src/Husky/Limonp/logger.hpp | 78 --------- src/Husky/Limonp/macro_def.hpp | 22 --- src/Husky/Limonp/map_functs.hpp | 116 -------------- src/Husky/Limonp/std_outbound.hpp | 101 ------------ src/Husky/Limonp/str_functs.hpp | 257 ------------------------------ src/Husky/Limonp/typedefs.h | 21 --- src/Husky/Limonp/vec_functs.hpp | 142 ----------------- 16 files changed, 3 insertions(+), 1223 deletions(-) delete mode 100644 src/Husky/Limonp/ArgvContext.hpp delete mode 100644 src/Husky/Limonp/CMakeLists.txt delete mode 100644 src/Husky/Limonp/MysqlClient.hpp delete mode 100644 src/Husky/Limonp/cast_functs.hpp delete mode 100644 src/Husky/Limonp/config.hpp delete mode 100644 src/Husky/Limonp/io_functs.hpp delete mode 100644 src/Husky/Limonp/logger.hpp delete mode 100644 src/Husky/Limonp/macro_def.hpp delete mode 100644 src/Husky/Limonp/map_functs.hpp delete mode 100644 src/Husky/Limonp/std_outbound.hpp delete mode 100644 src/Husky/Limonp/str_functs.hpp delete mode 100644 src/Husky/Limonp/typedefs.h delete mode 100644 src/Husky/Limonp/vec_functs.hpp diff --git a/src/Husky/CMakeLists.txt b/src/Husky/CMakeLists.txt index a27ad9b..6353e4b 100644 --- a/src/Husky/CMakeLists.txt +++ b/src/Husky/CMakeLists.txt @@ -7,4 +7,3 @@ ADD_LIBRARY(husky STATIC ${LIBHUSKY_SRC}) INSTALL(TARGETS husky ARCHIVE DESTINATION lib/CppJieba/Husky) INSTALL(FILES Daemon.h globals.h HttpReqInfo.hpp ServerFrame.h ThreadManager.hpp DESTINATION include/CppJieba/Husky) -ADD_SUBDIRECTORY(Limonp) diff --git a/src/Husky/Daemon.h b/src/Husky/Daemon.h index e1402b8..61506a5 100644 --- a/src/Husky/Daemon.h +++ b/src/Husky/Daemon.h @@ -8,7 +8,7 @@ #include #include #include -#include "Limonp/logger.hpp" +#include "../Limonp/logger.hpp" #include "ServerFrame.h" namespace Husky diff --git a/src/Husky/HttpReqInfo.hpp b/src/Husky/HttpReqInfo.hpp index a664f5f..9b735a2 100644 --- a/src/Husky/HttpReqInfo.hpp +++ b/src/Husky/HttpReqInfo.hpp @@ -3,8 +3,8 @@ #include #include -#include "Limonp/logger.hpp" -#include "Limonp/str_functs.hpp" +#include "../Limonp/logger.hpp" +#include "../Limonp/str_functs.hpp" #include "globals.h" namespace Husky diff --git a/src/Husky/Limonp/ArgvContext.hpp b/src/Husky/Limonp/ArgvContext.hpp deleted file mode 100644 index 8be15f1..0000000 --- a/src/Husky/Limonp/ArgvContext.hpp +++ /dev/null @@ -1,90 +0,0 @@ -/************************************ - * file enc : ascii - * author : wuyanyi09@gmail.com - ************************************/ - -#ifndef LIMONP_ARGV_FUNCTS_H -#define LIMONP_ARGV_FUNCTS_H - -#include -#include -#include "str_functs.hpp" -#include "map_functs.hpp" - -namespace Limonp -{ - using namespace std; - class ArgvContext - { - public : - ArgvContext(int argc, const char* const * argv) - { - - for(int i = 0; i < argc; i++) - { - if(strStartsWith(argv[i], "-")) - { - if(i + 1 < argc && !strStartsWith(argv[i + 1], "-")) - { - _mpss[argv[i]] = argv[i+1]; - i++; - } - else - { - _sset.insert(argv[i]); - } - } - else - { - _args.push_back(argv[i]); - } - } - } - ~ArgvContext(){}; - public: - friend ostream& operator << (ostream& os, const ArgvContext& args); - string operator [](uint i) - { - if(i < _args.size()) - { - return _args[i]; - } - return ""; - } - string operator [](const string& key) - { - map::const_iterator it = _mpss.find(key); - if(it != _mpss.end()) - { - return it->second; - } - return ""; - } - public: - bool hasKey(const string& key) - { - if(_mpss.find(key) != _mpss.end() || _sset.find(key) != _sset.end()) - { - return true; - } - return false; - } - private: - vector _args; - map _mpss; - set _sset; - - }; - - inline ostream& operator << (ostream& os, const ArgvContext& args) - { - return os< -#include -#include -#include -#include "logger.hpp" - -namespace Limonp -{ - using namespace std; - class MysqlClient - { - public: - typedef vector< vector > RowsType; - private: - const char * const HOST; - const unsigned int PORT; - const char * const USER; - const char * const PASSWD; - const char * const DB; - const char * const CHARSET; - public: - MysqlClient(const char* host, uint port, const char* user, const char* passwd, const char* db, const char* charset = "utf8"): HOST(host), PORT(port), USER(user), PASSWD(passwd), DB(db), CHARSET(charset){ _conn = NULL;}; - ~MysqlClient(){dispose();}; - public: - bool init() - { - //cout< vec; - for(uint i = 0; i < num_fields; i ++) - { - row[i] ? vec.push_back(row[i]) : vec.push_back("NULL"); - } - rows.push_back(vec); - } - mysql_free_result(result); - return true; - } - - private: - MYSQL * _conn; - - }; -} - -#endif diff --git a/src/Husky/Limonp/cast_functs.hpp b/src/Husky/Limonp/cast_functs.hpp deleted file mode 100644 index cf8c54f..0000000 --- a/src/Husky/Limonp/cast_functs.hpp +++ /dev/null @@ -1,87 +0,0 @@ -#ifndef LIMONP_CAST_FUNCTS_H -#define LIMONP_CAST_FUNCTS_H - -namespace Limonp -{ - //logical and or - static const int sign_32 = 0xC0000000; - static const int exponent_32 = 0x07800000; - static const int mantissa_32 = 0x007FE000; - static const int sign_exponent_32 = 0x40000000; - static const int loss_32 = 0x38000000; - - static const short sign_16 = (short)0xC000; - static const short exponent_16 = (short)0x3C00; - static const short mantissa_16 = (short)0x03FF; - static const short sign_exponent_16 = (short)0x4000; - static const int exponent_fill_32 = 0x38000000; - - //infinite - static const short infinite_16 = (short) 0x7FFF; - static const short infinitesmall_16 = (short) 0x0000; - - inline float intBitsToFloat(unsigned int x) - { - union - { - float f; - int i; - }u; - u.i = x; - return u.f; - } - - inline int floatToIntBits(float f) - { - union - { - float f; - int i ; - }u; - u.f = f; - return u.i; - } - - inline short floatToShortBits(float f) - { - int fi = floatToIntBits(f); - - // 提取关键信息 - short sign = (short) ((unsigned int)(fi & sign_32) >> 16); - short exponent = (short) ((unsigned int)(fi & exponent_32) >> 13); - short mantissa = (short) ((unsigned int)(fi & mantissa_32) >> 13); - // 生成编码结果 - short code = (short) (sign | exponent | mantissa); - // 无穷大量、无穷小量的处理 - if ((fi & loss_32) > 0 && (fi & sign_exponent_32) > 0) { - // 当指数符号为1时(正次方),且左234位为1,返回无穷大量 - return (short) (code | infinite_16); - } - if (((fi & loss_32) ^ loss_32) > 0 && (fi & sign_exponent_32) == 0) { - // 当指数符号位0时(负次方),且左234位为0(与111异或>0),返回无穷小量 - return infinitesmall_16; - } - - return code; - } - - inline float shortBitsToFloat(short s) - { - /* - * 指数空余3位:若符号位为1,补0;若符号位为0,补1。 尾数位在后补0(13个) - */ - int sign = ((int) (s & sign_16)) << 16; - int exponent = ((int) (s & exponent_16)) << 13; - // 指数符号位为0,234位补1 - if ((s & sign_exponent_16) == 0 && s != 0) { - exponent |= exponent_fill_32; - } - int mantissa = ((int) (s & mantissa_16)) << 13; - // 生成解码结果 - int code = sign | exponent | mantissa; - return intBitsToFloat(code); - - } -} - -#endif diff --git a/src/Husky/Limonp/config.hpp b/src/Husky/Limonp/config.hpp deleted file mode 100644 index 3e45e6d..0000000 --- a/src/Husky/Limonp/config.hpp +++ /dev/null @@ -1,107 +0,0 @@ -/************************************ - * file enc : utf8 - * author : wuyanyi09@gmail.com - ************************************/ -#ifndef LIMONP_CONFIG_H -#define LIMONP_CONFIG_H - - -#include -#include -#include -#include "logger.hpp" -#include "str_functs.hpp" - -namespace Limonp -{ - using std::map; - using std::string; - using std::cout; - using std::endl; - using std::ifstream; - class Config - { - public: - Config(){_isInit = false;}; - ~Config(){}; - bool init(const string& configFile) - { - if(_isInit) - { - LogFatal("already have been initialized. "); - return false; - } - ifstream ifile(configFile.c_str()); - if(!ifile) - { - LogFatal("open configFile[%s] failed.", configFile.c_str()); - return false; - } - string line, key, value; - vector vecBuf; - while(getline(ifile, line)) - { - //line = _stripComment(line); - if(line.empty()) - { - continue; - } - vecBuf.clear(); - if(!splitStr(line, vecBuf, "=") || 2 != vecBuf.size()) - { - LogFatal("line[%s] is illegal.", line.c_str()); - return false; - } - key = vecBuf[0]; - value = vecBuf[1]; - if(_map.end() != _map.find(key)) - { - LogFatal("key[%s] already exists.", key.c_str()); - return false; - } - _map[key] = value; - } - ifile.close(); - _isInit = true; - return true; - } - void display() - { - for(map::iterator it = _map.begin(); it != _map.end(); it++) - { - cout<<"("<first<<","<second<<")"< _map; - bool _isInit; - - }; -} - -namespace Limonp -{ - extern Config gConfig; -} - -#endif diff --git a/src/Husky/Limonp/io_functs.hpp b/src/Husky/Limonp/io_functs.hpp deleted file mode 100644 index 0128e62..0000000 --- a/src/Husky/Limonp/io_functs.hpp +++ /dev/null @@ -1,82 +0,0 @@ -/************************************ - * file enc : utf8 - * author : wuyanyi09@gmail.com -************************************/ -#ifndef LIMONP_IO_FUNCTS_H -#define LIMONP_IO_FUNCTS_H -#include -#include -#include -namespace Limonp -{ - using namespace std; - inline string loadFile2Str(const char * const filepath) - { - ifstream in(filepath); - if(!in) - { - return ""; - } - istreambuf_iterator beg(in), end; - string str(beg, end); - in.close(); - return str; - } - - inline void loadStr2File(const char * const filename, ios_base::openmode mode, const string& str) - { - ofstream out(filename, mode); - ostreambuf_iterator itr (out); - copy(str.begin(), str.end(), itr); - out.close(); - } - - inline int ReadFromFile(const char * fileName, char* buf, int maxCount, const char* mode) - { - FILE* fp = fopen(fileName, mode); - if (!fp) - return 0; - int ret; - fgets(buf, maxCount, fp) ? ret = 1 : ret = 0; - fclose(fp); - return ret; - } - - inline int WriteStr2File(const char* fileName, const char* buf, const char* mode) - { - FILE* fp = fopen(fileName, mode); - if (!fp) - return 0; - int n = fprintf(fp, "%s", buf); - fclose(fp); - return n; - } - - inline bool checkFileExist(const string& filePath) - { - fstream _file; - _file.open(filePath.c_str(), ios::in); - if(_file) - return true; - return false; - } - - inline bool createDir(const string& dirPath, bool p = true) - { - string dir_str(dirPath); - string cmd = "mkdir"; - if(p) - { - cmd += " -p"; - } - cmd += " " + dir_str; - int res = system(cmd.c_str()); - return res; - } - - inline bool checkDirExist(const string& dirPath) - { - return checkFileExist(dirPath); - } -} -#endif diff --git a/src/Husky/Limonp/logger.hpp b/src/Husky/Limonp/logger.hpp deleted file mode 100644 index ed1a3c2..0000000 --- a/src/Husky/Limonp/logger.hpp +++ /dev/null @@ -1,78 +0,0 @@ -/************************************ - * file enc : utf8 - * author : wuyanyi09@gmail.com - ************************************/ -#ifndef LIMONP_LOGGER_H -#define LIMONP_LOGGER_H - -#include -#include -#include -#include -#include -#include -#include "io_functs.hpp" -#include "str_functs.hpp" - -#define LogDebug(fmt, ...) Logger::LoggingF(LL_DEBUG, __FILE__, __LINE__, fmt, ## __VA_ARGS__) -#define LogInfo(fmt, ...) Logger::LoggingF(LL_INFO, __FILE__, __LINE__, fmt, ## __VA_ARGS__) -#define LogWarn(fmt, ...) Logger::LoggingF(LL_WARN, __FILE__, __LINE__, fmt, ## __VA_ARGS__) -#define LogError(fmt, ...) Logger::LoggingF(LL_ERROR, __FILE__, __LINE__, fmt, ## __VA_ARGS__) -#define LogFatal(fmt, ...) Logger::LoggingF(LL_FATAL, __FILE__, __LINE__, fmt, ## __VA_ARGS__) - - -namespace Limonp -{ - using namespace std; - enum {LL_DEBUG = 0, LL_INFO = 1, LL_WARN = 2, LL_ERROR = 3, LL_FATAL = 4, LEVEL_ARRAY_SIZE = 5, CSTR_BUFFER_SIZE = 1024}; - static const char * LOG_LEVEL_ARRAY[LEVEL_ARRAY_SIZE]= {"DEBUG","INFO","WARN","ERROR","FATAL"}; - static const char * LOG_FORMAT = "%s %s:%d %s %s\n"; - static const char * LOG_TIME_FORMAT = "%Y-%m-%d %H:%M:%S"; - - class Logger - { - public: - static bool Logging(uint level, const string& msg, const char* fileName, int lineNo) - { - if(level > LL_FATAL) - { - cerr<<"level's value is out of range"< -1 && n < size) { - msg.resize(n); - break; - } - if (n > -1) - size = n + 1; - else - size *= 2; - } - return Logging(level, msg, fileName, lineNo); - } - }; -} - -#endif diff --git a/src/Husky/Limonp/macro_def.hpp b/src/Husky/Limonp/macro_def.hpp deleted file mode 100644 index 58fe835..0000000 --- a/src/Husky/Limonp/macro_def.hpp +++ /dev/null @@ -1,22 +0,0 @@ -#ifndef LIMONP_MACRO_DEF_H -#define LIMONP_MACRO_DEF_H - -#define XX_GET_SET(varType, varName, funName)\ -private: varType varName;\ -public: inline varType get##funName(void) const {return varName;}\ -public: inline void set##funName(varType var) {varName = var;} - -#define XX_GET(varType, varName, funName)\ -private: varType varName;\ -public: inline varType get##funName(void) const {return varName;} - -#define XX_SET(varType, varName, funName)\ -private: varType varName;\ -public: inline void set##funName(varType var) {varName = var;} - -#define XX_GET_SET_BY_REF(varType, varName, funName)\ -private: varType varName;\ -public: inline const varType& get##funName(void) const {return varName;}\ -public: inline void set##funName(const varType& var){varName = var;} - -#endif diff --git a/src/Husky/Limonp/map_functs.hpp b/src/Husky/Limonp/map_functs.hpp deleted file mode 100644 index 44d472c..0000000 --- a/src/Husky/Limonp/map_functs.hpp +++ /dev/null @@ -1,116 +0,0 @@ -/************************************ - * file enc : ascii - * author : wuyanyi09@gmail.com - ************************************/ - - -#ifndef LIMONP_MAP_FUNCTS_H -#define LIMONP_MAP_FUNCTS_H - -#include -#include -#include -#include - -#include -#define HashMap std::tr1::unordered_map - -namespace Limonp -{ - using namespace std; - - - //template - // string setToString(const set& st) - // { - // if(st.empty()) - // { - // return "{}"; - // } - // stringstream ss; - // ss<<'{'; - // typename set::const_iterator it = st.begin(); - // ss<<*it; - // it++; - // while(it != st.end()) - // { - // ss<<", "<<*it; - // it++; - // } - // ss<<'}'; - // return ss.str(); - // } - - //template - // string mapToString(const map& mp) - // { - // if(mp.empty()) - // { - // return "{}"; - // } - // stringstream ss; - // ss<<'{'; - // typename map::const_iterator it = mp.begin(); - // ss<first<<": "<second; - // it++; - // while(it != mp.end()) - // { - // ss<<", "<first<<": "<second; - // it++; - // } - // ss<<'}'; - // return ss.str(); - // } - - //template - // string HashMapToString(const HashMap& mp) - // { - // if(mp.empty()) - // { - // return "{}"; - // } - // stringstream ss; - // ss<<'{'; - // typename HashMap::const_iterator it = mp.begin(); - // ss<first<<": "<second; - // it++; - // while(it != mp.end()) - // { - // ss<<", "<first<<": "<second; - // it++; - // } - // ss<<'}'; - // return ss.str(); - // } - //template - // string pairToString(const pair& p) - // { - // stringstream ss; - // ss< - vT getMap(const map& mp, const kT & key, const vT & defaultVal) - { - typename map::const_iterator it; - it = mp.find(key); - if(mp.end() == it) - { - return defaultVal; - } - return it->second; - } - - template - void map2Vec(const map& mp, vector > & res) - { - typename map::const_iterator it = mp.begin(); - for(; it != mp.end(); it++) - { - res.push_back(*it); - } - } -} - -#endif diff --git a/src/Husky/Limonp/std_outbound.hpp b/src/Husky/Limonp/std_outbound.hpp deleted file mode 100644 index ab3e5c3..0000000 --- a/src/Husky/Limonp/std_outbound.hpp +++ /dev/null @@ -1,101 +0,0 @@ -#ifndef LIMONP_STD_OUTBOUND_H -#define LIMONP_STD_OUTBOUND_H - -#include -#include -#include - -namespace std -{ - template - ostream& operator << (ostream& os, const vector& vec) - { - if(vec.empty()) - { - return os << "[]"; - } - os<<"[\""< - ostream& operator << (ostream& os, const pair& pr) - { - os << pr.first << ":" << pr.second ; - return os; - } - - - template - string& operator << (string& str, const T& obj) - { - stringstream ss; - ss << obj; // call ostream& operator << (ostream& os, - return str = ss.str(); - } - - template - ostream& operator << (ostream& os, const map& mp) - { - if(mp.empty()) - { - os<<"{}"; - return os; - } - os<<'{'; - typename map::const_iterator it = mp.begin(); - os<<*it; - it++; - while(it != mp.end()) - { - os<<", "<<*it; - it++; - } - os<<'}'; - return os; - } - template - ostream& operator << (ostream& os, const std::tr1::unordered_map& mp) - { - if(mp.empty()) - { - return os << "{}"; - } - os<<'{'; - typename std::tr1::unordered_map::const_iterator it = mp.begin(); - os<<*it; - it++; - while(it != mp.end()) - { - os<<", "<<*it++; - } - return os<<'}'; - } - - template - ostream& operator << (ostream& os, const set& st) - { - if(st.empty()) - { - os << "{}"; - return os; - } - os<<'{'; - typename set::const_iterator it = st.begin(); - os<<*it; - it++; - while(it != st.end()) - { - os<<", "<<*it; - it++; - } - os<<'}'; - return os; - } -} - -#endif diff --git a/src/Husky/Limonp/str_functs.hpp b/src/Husky/Limonp/str_functs.hpp deleted file mode 100644 index 8be5366..0000000 --- a/src/Husky/Limonp/str_functs.hpp +++ /dev/null @@ -1,257 +0,0 @@ -/************************************ - * file enc : ascii - * author : wuyanyi09@gmail.com - ************************************/ -#ifndef LIMONP_STR_FUNCTS_H -#define LIMONP_STR_FUNCTS_H -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "std_outbound.hpp" -#include "map_functs.hpp" - -#define print(x) cout<<(x)< -1 && n < size) { - str.resize(n); - return str; - } - if (n > -1) - size = n + 1; - else - size *= 2; - } - return str; - } - - inline void string_format(string& res, const char* fmt, ...) - { - int size = 256; - va_list ap; - while (1) { - res.resize(size); - va_start(ap, fmt); - int n = vsnprintf((char *)res.c_str(), size, fmt, ap); - va_end(ap); - if (n > -1 && n < size) { - res.resize(n); - return; - } - if (n > -1) - size = n + 1; - else - size *= 2; - } - } - - //inline bool joinStr(const vector& src, string& dest, const string& connectorStr) - //{ - // if(src.empty()) - // { - // return false; - // } - // for(uint i = 0; i < src.size() - 1; i++) - // { - // dest += src[i]; - // dest += connectorStr; - // } - // dest += src[src.size() - 1]; - // return true; - //} - - //inline string joinStr(const vector& source, const string& connector) - //{ - // string res; - // joinStr(source, res, connector); - // return res; - //} - - template - void join(T begin, T end, string& res, const string& connector) - { - if(begin == end) - { - return; - } - stringstream ss; - ss<<*begin; - begin++; - while(begin != end) - { - ss << connector << *begin; - begin ++; - } - res = ss.str(); - } - - template - string join(T begin, T end, const string& connector) - { - string res; - join(begin ,end, res, connector); - return res; - } - - - - inline bool splitStr(const string& src, vector& res, const string& pattern) - { - if(src.empty()) - { - return false; - } - res.clear(); - - size_t start = 0; - size_t end = 0; - while(start < src.size()) - { - end = src.find_first_of(pattern, start); - if(string::npos == end) - { - res.push_back(src.substr(start)); - return true; - } - res.push_back(src.substr(start, end - start)); - if(end == src.size() - 1) - { - res.push_back(""); - break; - } - start = end + 1; - } - return true; - } - - inline string& upper(string& str) - { - transform(str.begin(), str.end(), str.begin(), (int (*)(int))toupper); - return str; - } - - inline string& lower(string& str) - { - transform(str.begin(), str.end(), str.begin(), (int (*)(int))tolower); - return str; - } - - inline std::string <rim(std::string &s) - { - s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::ptr_fun(std::isspace)))); - return s; - } - - inline std::string &rtrim(std::string &s) - { - s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::ptr_fun(std::isspace))).base(), s.end()); - return s; - } - - inline std::string &trim(std::string &s) - { - return ltrim(rtrim(s)); - } - - - inline uint16_t twocharToUint16(char high, char low) - { - return (((uint16_t(high) & 0x00ff ) << 8) | (uint16_t(low) & 0x00ff)); - } - - inline pair uint16ToChar2(uint16_t in) - { - pair res; - res.first = (in>>8) & 0x00ff; //high - res.second = (in) & 0x00ff; //low - return res; - } - - inline bool strStartsWith(const string& str, const string& prefix) - { - //return str.substr(0, prefix.size()) == prefix; - if(prefix.length() > str.length()) - { - return false; - } - return 0 == str.compare(0, prefix.length(), prefix); - } - - inline bool strEndsWith(const string& str, const string& suffix) - { - if(suffix.length() > str.length()) - { - return false; - } - return 0 == str.compare(str.length() - suffix.length(), suffix.length(), suffix); - } - - inline bool isInStr(const string& str, char ch) - { - return str.find(ch) != string::npos; - } - - //inline void extractWords(const string& sentence, vector& words) - //{ - // bool flag = false; - // uint lhs = 0, len = 0; - // for(uint i = 0; i < sentence.size(); i++) - // { - // char x = sentence[i]; - // if((0x0030 <= x && x<= 0x0039) || (0x0041 <= x && x <= 0x005a ) || (0x0061 <= x && x <= 0x007a)) - // { - // if(flag) - // { - // len ++; - // } - // else - // { - // lhs = i; - // len = 1; - // } - // flag = true; - // } - // else - // { - // if(flag) - // { - // words.push_back(string(sentence, lhs, len)); - // } - // flag = false; - // } - // } - // if(flag) - // { - // words.push_back(string(sentence, lhs, len)); - // } - //} - - -} -#endif diff --git a/src/Husky/Limonp/typedefs.h b/src/Husky/Limonp/typedefs.h deleted file mode 100644 index a8da002..0000000 --- a/src/Husky/Limonp/typedefs.h +++ /dev/null @@ -1,21 +0,0 @@ -/************************************ - * file enc : utf8 - * author : wuyanyi09@gmail.com -************************************/ -#ifndef LIMONP_TYPEDEFS_H -#define LIMONP_TYPEDEFS_H - -#include -#include -#include -#include -#define HashMap std::tr1::unordered_map - -namespace Limonp -{ - typedef std::vector Unicode; - typedef std::vector::const_iterator UnicodeConstIterator; -} - - -#endif diff --git a/src/Husky/Limonp/vec_functs.hpp b/src/Husky/Limonp/vec_functs.hpp deleted file mode 100644 index ac18548..0000000 --- a/src/Husky/Limonp/vec_functs.hpp +++ /dev/null @@ -1,142 +0,0 @@ -/************************************ - * file enc : ascii - * author : wuyanyi09@gmail.com -************************************/ -#ifndef LIMONP_VEC_FUNCTS_H -#define LIMONP_VEC_FUNCTS_H -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define FOR_VECTOR(vec, i) for(size_t i = 0; i < vec.size(); i++) - -#define PRINT_VECTOR(vec) FOR_VECTOR(vec, i)\ -{\ - cout< - bool vecToString(const vector& vec, string& res) - { - if(vec.empty()) - { - res = "[]"; - return false; - } - stringstream ss; - ss<<"[\""< - string vecToString(const vector& vec) - { - string res; - vecToString(vec, res); - return res; - } - - template - bool isInVec(const vector& vec, const T& item) - { - typename vector::const_iterator it = find(vec.begin(), vec.end(), item); - return it != vec.end(); - } - template - void splitVec(const vector& vecSrc, vector< pair > >& outVec, const vector& patterns) - { - vector tmp; - T pattern; - size_t patternSize = patterns.size(); - for(size_t i = 0; i < vecSrc.size(); i++) - { - size_t patternPos = patternSize; - for(size_t j = 0; j < patternSize; j++) - { - if(patterns[j] == vecSrc[i]) - { - patternPos = j; - break; - } - } - if(patternPos != patternSize) - { - if(!tmp.empty()) - { - outVec.push_back(make_pair >(pattern, tmp)); - tmp.clear(); - } - pattern = patterns[patternPos]; - } - else - { - tmp.push_back(vecSrc[i]); - } - } - if(!tmp.empty()) - { - outVec.push_back(make_pair >(pattern, tmp)); - } - } - - template - void splitVec(const vector& vecSrc, vector< vector >& outVec, const vector& patternVec) - { - vector tmp; - for(size_t i = 0; i < vecSrc.size(); i++) - { - bool flag = false; - for(size_t j = 0; j < patternVec.size(); j++) - { - if(patternVec[j] == vecSrc[i]) - { - flag = true; - break; - } - } - if(flag) - { - if(!tmp.empty()) - { - outVec.push_back(tmp); - tmp.clear(); - } - } - else - { - tmp.push_back(vecSrc[i]); - } - } - if(!tmp.empty()) - { - outVec.push_back(tmp); - } - } -} - -#endif From 0aa0a4f0358fd0e2539b158e744176d37e76e359 Mon Sep 17 00:00:00 2001 From: wyy Date: Mon, 4 Nov 2013 04:06:39 -0800 Subject: [PATCH 22/25] update logger.hpp --- src/Limonp/logger.hpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/Limonp/logger.hpp b/src/Limonp/logger.hpp index ed1a3c2..763f26d 100644 --- a/src/Limonp/logger.hpp +++ b/src/Limonp/logger.hpp @@ -14,11 +14,13 @@ #include "io_functs.hpp" #include "str_functs.hpp" -#define LogDebug(fmt, ...) Logger::LoggingF(LL_DEBUG, __FILE__, __LINE__, fmt, ## __VA_ARGS__) -#define LogInfo(fmt, ...) Logger::LoggingF(LL_INFO, __FILE__, __LINE__, fmt, ## __VA_ARGS__) -#define LogWarn(fmt, ...) Logger::LoggingF(LL_WARN, __FILE__, __LINE__, fmt, ## __VA_ARGS__) -#define LogError(fmt, ...) Logger::LoggingF(LL_ERROR, __FILE__, __LINE__, fmt, ## __VA_ARGS__) -#define LogFatal(fmt, ...) Logger::LoggingF(LL_FATAL, __FILE__, __LINE__, fmt, ## __VA_ARGS__) +#define FILE_BASENAME strrchr(__FILE__, '/') ? strrchr(__FILE__, '/') + 1 : __FILE__ + +#define LogDebug(fmt, ...) Logger::LoggingF(LL_DEBUG, FILE_BASENAME, __LINE__, fmt, ## __VA_ARGS__) +#define LogInfo(fmt, ...) Logger::LoggingF(LL_INFO, FILE_BASENAME, __LINE__, fmt, ## __VA_ARGS__) +#define LogWarn(fmt, ...) Logger::LoggingF(LL_WARN, FILE_BASENAME, __LINE__, fmt, ## __VA_ARGS__) +#define LogError(fmt, ...) Logger::LoggingF(LL_ERROR, FILE_BASENAME, __LINE__, fmt, ## __VA_ARGS__) +#define LogFatal(fmt, ...) Logger::LoggingF(LL_FATAL, FILE_BASENAME, __LINE__, fmt, ## __VA_ARGS__) namespace Limonp From b0c2bf649048e17df4629478024382c1028ba3ca Mon Sep 17 00:00:00 2001 From: wyy Date: Mon, 4 Nov 2013 04:10:16 -0800 Subject: [PATCH 23/25] update logger.hpp --- CMakeLists.txt | 1 - src/CMakeLists.txt | 2 -- src/Husky/CMakeLists.txt | 1 - src/server.cpp | 2 +- 4 files changed, 1 insertion(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 2b07f62..8983dc2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,3 +1,2 @@ PROJECT(CPPJIEBA) ADD_SUBDIRECTORY(src) -ADD_SUBDIRECTORY(src/Husky/Limonp) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 82a804c..dc867e2 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,8 +1,6 @@ SET(EXECUTABLE_OUTPUT_PATH ${PROJECT_BINARY_DIR}/bin) SET(LIBRARY_OUTPUT_PATH ${PROJECT_BINARY_DIR}/lib) -INCLUDE_DIRECTORIES(./limonp ./husky) - SET(LIBCPPJIEBA_SRC HMMSegment.cpp MixSegment.cpp MPSegment.cpp Trie.cpp) ADD_LIBRARY(cppjieba STATIC ${LIBCPPJIEBA_SRC}) ADD_EXECUTABLE(segment segment.cpp) diff --git a/src/Husky/CMakeLists.txt b/src/Husky/CMakeLists.txt index 6353e4b..281fd79 100644 --- a/src/Husky/CMakeLists.txt +++ b/src/Husky/CMakeLists.txt @@ -6,4 +6,3 @@ ADD_LIBRARY(husky STATIC ${LIBHUSKY_SRC}) INSTALL(TARGETS husky ARCHIVE DESTINATION lib/CppJieba/Husky) INSTALL(FILES Daemon.h globals.h HttpReqInfo.hpp ServerFrame.h ThreadManager.hpp DESTINATION include/CppJieba/Husky) - diff --git a/src/server.cpp b/src/server.cpp index 412838c..c8cd571 100644 --- a/src/server.cpp +++ b/src/server.cpp @@ -3,7 +3,7 @@ #include #include #include -#include "Husky/Limonp/ArgvContext.hpp" +#include "Limonp/ArgvContext.hpp" #include "Husky/Daemon.h" #include "Husky/ServerFrame.h" #include "MPSegment.h" From 2f22c5507cb9493b6153386be20397ca8dbfd3db Mon Sep 17 00:00:00 2001 From: wyy Date: Mon, 4 Nov 2013 06:00:40 -0800 Subject: [PATCH 24/25] modify README.md --- README.md | 133 ++++++++++++++++++++++++++------------------- scripts/example.sh | 3 - 2 files changed, 77 insertions(+), 59 deletions(-) delete mode 100755 scripts/example.sh diff --git a/README.md b/README.md index 4a8bd9d..087d4a9 100644 --- a/README.md +++ b/README.md @@ -7,41 +7,63 @@ - `master`分支支持`utf8`编码 - `gbk`分支支持`gbk`编码 +## 安装与使用 -## 模块详解 +### 下载和安装 -### Trie树 -Trie.cpp/Trie.h 负责载入词典的trie树,主要供Segment模块使用。 +```sh +wget https://github.com/aszxqw/cppjieba/archive/master.zip -O cppjieba-master.zip +unzip cppjieba-master.zip +cd cppjieba-master +mkdir build +cd build +cmake -DCMAKE_INSTALL_PREFIX=/usr .. +make +sudo make install +``` -### Segment模块 +### 卸载 +```sh +cd build/ +cat install_manifest.txt | sudo xargs rm -rf +``` -MPSegment.cpp/MPSegment.h -(Maximum Probability)最大概率法:负责根据Trie树构建有向无环图和进行动态规划算法,是分词算法的核心。 +### 验证 -HMMSegment.cpp/HMMSegment.h -是根据HMM模型来进行分词,主要算法思路是根据(B,E,M,S)四个状态来代表每个字的隐藏状态。 -HMM模型由dicts/下面的`hmm_model.utf8`提供。 -分词算法即viterbi算法。 +```sh +cd test/ +g++ -o segment.demo segment.cpp -L/usr/lib/CppJieba/ -lcppjieba +./segment # you will see the demo. +``` +运行一下 `./server` 或 `./segment` 都会有对应的帮助文档显示。 -### TransCode模块 +同时,如果想知道开发时如何使用`libcppjieba.a` 请看`test/segment.cpp`源代码即可。 -TransCode.cpp/TransCode.h 负责转换编码类型,将utf8和gbk转换成`uint16_t`类型,也负责逆转换。 +如果想知道如何搭建一个`cppjieba`中文分词的http服务请见 `test/server.cpp`源代码即可。 +若还有其他问题,欢迎`send mail`或者`open issue`。 :) +### 搭建服务 -## Demo +``` +cd ./test +g++ -o server server.cpp -L/usr/lib/CppJieba/ -L/usr/lib/CppJieba/Husky -lcppjieba -lhusky -lpthread +./server -n 4 -p 11258 -k start #启动服务,监听11258这个端口。 +./server -n 4 -p 11258 -k stop #停止服务 +``` + +#### 验证服务 + +然后用chrome浏览器打开`http://127.0.0.1:11258/?key=我来自北京邮电大学` +(用chrome的原因是chrome的默认编码就是utf-8) + +或者用命令 `curl "http://127.0.0.1:11258/?key=我来自北京邮电大学"` (ubuntu中的curl安装命令`sudo apt-get install curl`) + +## 分词效果 ### MPSegment's demo -__这部分的功能经过线上考验,一直稳定运行,暂时没有发现什么bug。__ - -``` -cd ./demo; -make; -./segment_demo testlines.utf8 -``` - Output: ``` 我来到北京清华大学 @@ -59,12 +81,6 @@ Output: ### HMMSegment's demo -``` -cd ./demo; -make; -./segment_demo testlines.utf8 --modelpath ../dicts/hmm_model.utf8 --algorithm cutHMM -``` - Output: ``` 我来到北京清华大学 @@ -78,11 +94,6 @@ Output: ``` ### MixSegment's demo -``` -cd ./demo; -make; -./segment_demo testlines.utf8 --algorithm cutMix -``` Output: ``` @@ -98,43 +109,53 @@ Output: 我/来自/北京邮电大学/。。。/学号/091111xx/。。。 ``` -### Server's demo - -引入了husky这个文件夹,husky是一个简单的http服务框架。 -``` -cd ./demo; -make; -./start.sh #启动一个服务,监听11258这个端口(在start.sh里面指定)。 -``` - -关闭和重启分别是`stop.sh`和`restart.sh` - -然后用chrome浏览器打开`http://127.0.0.1:11258/?key=我来自北京邮电大学` -(用chrome的原因是chrome的默认编码就是utf-8) - -或者用命令 `curl "http://127.0.0.1:11258/?key=我来自北京邮电大学"` - - ### 效果分析 以上依次是MP,HMM,Mix三种方法的效果。 + 可以看出效果最好的是Mix,也就是融合MP和HMM的切词算法。即可以准确切出词典已有的词,又可以切出像"杭研"这样的未登录词。 -## Help + + +## 模块详解 本项目主要是如下目录组成: -### Limonp +### src + +核心目录,包含主要源代码。 + +#### Trie树 +Trie.cpp/Trie.h 负责载入词典的trie树,主要供Segment模块使用。 + +#### Segment模块 + +MPSegment.cpp/MPSegment.h +(Maximum Probability)最大概率法:负责根据Trie树构建有向无环图和进行动态规划算法,是分词算法的核心。 + +HMMSegment.cpp/HMMSegment.h +是根据HMM模型来进行分词,主要算法思路是根据(B,E,M,S)四个状态来代表每个字的隐藏状态。 +HMM模型由dicts/下面的`hmm_model.utf8`提供。 +分词算法即viterbi算法。 + +#### TransCode模块 + +TransCode.cpp/TransCode.h 负责转换编码类型,将utf8和gbk转换成`uint16_t`类型,也负责逆转换。 + +### src/Husky + +提供服务的框架代码, + +详见: https://github.com/aszxqw/husky + +### src/Limonp 主要是一些工具函数,例如字符串操作等。 直接include就可以使用。 -### cppjieba -核心目录,包含主要源代码。 -make 之后产生libcppjieb.a -使用方法参考如上cppcommon - +详见: https://github.com/aszxqw/limonp +## Help ### run `./segment_demo` to get help. diff --git a/scripts/example.sh b/scripts/example.sh deleted file mode 100755 index 466d702..0000000 --- a/scripts/example.sh +++ /dev/null @@ -1,3 +0,0 @@ -./segment.demo testlines.utf8 --dictpath ../dicts/jieba.dict.utf8 -./segment.demo testlines.utf8 --modelpath ../dicts/hmm_model.utf8 --algorithm cutHMM -./segment.demo testlines.utf8 --algorithm cutMix From 910a368afbcbb920a53e59078788f15f40498182 Mon Sep 17 00:00:00 2001 From: wyy Date: Mon, 4 Nov 2013 07:03:35 -0800 Subject: [PATCH 25/25] modify readme.md --- README.md | 31 +++++-------------------------- 1 file changed, 5 insertions(+), 26 deletions(-) diff --git a/README.md b/README.md index 087d4a9..0cef295 100644 --- a/README.md +++ b/README.md @@ -33,7 +33,7 @@ cat install_manifest.txt | sudo xargs rm -rf ```sh cd test/ g++ -o segment.demo segment.cpp -L/usr/lib/CppJieba/ -lcppjieba -./segment # you will see the demo. +./segment.demo # you will see the demo. ``` 运行一下 `./server` 或 `./segment` 都会有对应的帮助文档显示。 @@ -49,8 +49,8 @@ g++ -o segment.demo segment.cpp -L/usr/lib/CppJieba/ -lcppjieba ``` cd ./test g++ -o server server.cpp -L/usr/lib/CppJieba/ -L/usr/lib/CppJieba/Husky -lcppjieba -lhusky -lpthread -./server -n 4 -p 11258 -k start #启动服务,监听11258这个端口。 -./server -n 4 -p 11258 -k stop #停止服务 +./server -n 4 -p 11258 -k start >> run.log 2>&1 #启动服务,监听11258这个端口。 +./server -n 4 -p 11258 -k stop #停止服务 ``` #### 验证服务 @@ -155,27 +155,6 @@ TransCode.cpp/TransCode.h 负责转换编码类型,将utf8和gbk转换成`uint 详见: https://github.com/aszxqw/limonp -## Help - -### run `./segment_demo` to get help. - -如下: -``` -usage: - ./segment_demo[options] -options: - --algorithm Supported methods are [cutDAG, cutHMM, cutMix] for now. - If not specified, the default is cutDAG - --dictpath If not specified, the default is ../dicts/jieba.dict.utf8 - --modelpath If not specified, the default is ../dicts/hmm_model.utf8 - If not specified, the default is utf8. -example: - ./segment_demo testlines.utf8 --dictpath ../dicts/jieba.dict.utf8 - ./segment_demo testlines.utf8 --modelpath ../dicts/hmm_model.utf8 --algorithm cutHMM - ./segment_demo testlines.utf8 --modelpath ../dicts/hmm_model.utf8 --algorithm cutMix - -``` - ## 分词速度 ### MixSegment @@ -184,11 +163,11 @@ example: 测试环境: `Intel(R) Xeon(R) CPU E5506 @ 2.13GHz` -## Contact +## 联系客服 如果有运行问题或者任何疑问,欢迎联系 : wuyanyi09@gmail.com -## Thanks +## 鸣谢 "结巴中文"分词作者: SunJunyi https://github.com/fxsjy/jieba