From 657aee0fda460479b3e52fd52d65befa7b40a5ea Mon Sep 17 00:00:00 2001 From: wyy Date: Sat, 21 Dec 2013 21:58:15 -0800 Subject: [PATCH] mv filterAscii from ChineseFilter.hpp into SegmentBase.hpp --- README.md | 2 +- src/CMakeLists.txt | 2 +- src/ChineseFilter.hpp | 49 ------------------- src/SegmentBase.hpp | 47 ++++++++++++++++-- test/unittest/CMakeLists.txt | 2 +- .../{TChineseFilter.cpp => TSegmentBase.cpp} | 6 +-- 6 files changed, 50 insertions(+), 58 deletions(-) delete mode 100644 src/ChineseFilter.hpp rename test/unittest/{TChineseFilter.cpp => TSegmentBase.cpp} (83%) diff --git a/README.md b/README.md index 282335c..60c445f 100644 --- a/README.md +++ b/README.md @@ -28,7 +28,7 @@ sudo make install #### 测试 ```sh -cd build && ./test/segment.demo +make test ``` ### 启动服务 diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 93359f1..94bb87f 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -7,7 +7,7 @@ TARGET_LINK_LIBRARIES(cjserver pthread) INSTALL(TARGETS cjsegment RUNTIME DESTINATION bin) INSTALL(TARGETS cjserver RUNTIME DESTINATION bin) -INSTALL(FILES ChineseFilter.hpp HMMSegment.hpp MPSegment.hpp Trie.hpp ISegment.hpp MixSegment.hpp SegmentBase.hpp TransCode.hpp DESTINATION include/CppJieba) +INSTALL(FILES HMMSegment.hpp MPSegment.hpp Trie.hpp TrieManager.hpp ISegment.hpp MixSegment.hpp SegmentBase.hpp TransCode.hpp DESTINATION include/CppJieba) ADD_SUBDIRECTORY(Husky) ADD_SUBDIRECTORY(Limonp) diff --git a/src/ChineseFilter.hpp b/src/ChineseFilter.hpp deleted file mode 100644 index 1b359a8..0000000 --- a/src/ChineseFilter.hpp +++ /dev/null @@ -1,49 +0,0 @@ -#ifndef CPPJIEBA_CHINESEFILTER_H -#define CPPJIEBA_CHINESEFILTER_H - -#include "TransCode.hpp" - -namespace CppJieba -{ - - /* - * if char is ascii, count the ascii string's length and return 0; - * else count the nonascii string's length and return 1; - * if errors, return -1; - * */ - inline int filterAscii(const char* str, uint len, uint& resLen) - { - if(!str || !len) - { - return -1; - } - char x = 0x80; - int resFlag = (str[0] & x ? 1 : 0); - resLen = 0; - if(!resFlag) - { - while(resLen < len && !(str[resLen] & x)) - { - resLen ++; - } - } - else - { - while(resLen < len && (str[resLen] & x)) - { -#ifdef CPPJIEBA_GBK - resLen += 2; -#else - resLen ++; -#endif - } - } - if(resLen > len) - { - return -1; - } - return resFlag; - } -} - -#endif diff --git a/src/SegmentBase.hpp b/src/SegmentBase.hpp index 2dea84f..740d0cb 100644 --- a/src/SegmentBase.hpp +++ b/src/SegmentBase.hpp @@ -1,10 +1,11 @@ #ifndef CPPJIEBA_SEGMENTBASE_H #define CPPJIEBA_SEGMENTBASE_H -#include "ISegment.hpp" -#include "ChineseFilter.hpp" -#include "Limonp/str_functs.hpp" +#include "TransCode.hpp" #include "Limonp/logger.hpp" +#include "ISegment.hpp" +#include + namespace CppJieba { @@ -74,6 +75,46 @@ namespace CppJieba return true; #endif } + public: + + /* + * if char is ascii, count the ascii string's length and return 0; + * else count the nonascii string's length and return 1; + * if errors, return -1; + * */ + static int filterAscii(const char* str, uint len, uint& resLen) + { + if(!str || !len) + { + return -1; + } + char x = 0x80; + int resFlag = (str[0] & x ? 1 : 0); + resLen = 0; + if(!resFlag) + { + while(resLen < len && !(str[resLen] & x)) + { + resLen ++; + } + } + else + { + while(resLen < len && (str[resLen] & x)) + { +#ifdef CPPJIEBA_GBK + resLen += 2; +#else + resLen ++; +#endif + } + } + if(resLen > len) + { + return -1; + } + return resFlag; + } }; } diff --git a/test/unittest/CMakeLists.txt b/test/unittest/CMakeLists.txt index 1f59f31..a8040a2 100644 --- a/test/unittest/CMakeLists.txt +++ b/test/unittest/CMakeLists.txt @@ -5,7 +5,7 @@ SET(GTEST_ROOT_DIR gtest-1.6.0) INCLUDE_DIRECTORIES(${GTEST_ROOT_DIR} ${GTEST_ROOT_DIR}/include ${PROJECT_SOURCE_DIR}) ADD_LIBRARY(gtest STATIC ${GTEST_ROOT_DIR}/src/gtest-all.cc) -ADD_EXECUTABLE(test.run gtest_main.cc TChineseFilter.cpp TMixSegment.cpp TMPSegment.cpp THMMSegment.cpp TTrie.cpp TFullSegment.cpp TQuerySegment.cpp TTrieManager.cpp) +ADD_EXECUTABLE(test.run gtest_main.cc TSegmentBase.cpp TMixSegment.cpp TMPSegment.cpp THMMSegment.cpp TTrie.cpp TFullSegment.cpp TQuerySegment.cpp TTrieManager.cpp) TARGET_LINK_LIBRARIES(gtest pthread) TARGET_LINK_LIBRARIES(test.run gtest pthread) diff --git a/test/unittest/TChineseFilter.cpp b/test/unittest/TSegmentBase.cpp similarity index 83% rename from test/unittest/TChineseFilter.cpp rename to test/unittest/TSegmentBase.cpp index ad6e3f8..9fd6cf7 100644 --- a/test/unittest/TChineseFilter.cpp +++ b/test/unittest/TSegmentBase.cpp @@ -1,9 +1,9 @@ -#include "src/ChineseFilter.hpp" +#include "src/SegmentBase.hpp" #include "gtest/gtest.h" using namespace CppJieba; -TEST(ChineseFilterTest, Test1) +TEST(SegmentBaseTest, Test1) { const char* str = "heheh你好...hh"; string s; @@ -18,7 +18,7 @@ TEST(ChineseFilterTest, Test1) { uint len; const char* t = str + offset; - int ret = filterAscii(t, size - offset, len); + int ret = SegmentBase::filterAscii(t, size - offset, len); s.assign(t, len); res.push_back(s); //cout<