mv filterAscii from ChineseFilter.hpp into SegmentBase.hpp

This commit is contained in:
wyy 2013-12-21 21:58:15 -08:00
parent 679179859e
commit 657aee0fda
6 changed files with 50 additions and 58 deletions

View File

@ -28,7 +28,7 @@ sudo make install
#### 测试 #### 测试
```sh ```sh
cd build && ./test/segment.demo make test
``` ```
### 启动服务 ### 启动服务

View File

@ -7,7 +7,7 @@ TARGET_LINK_LIBRARIES(cjserver pthread)
INSTALL(TARGETS cjsegment RUNTIME DESTINATION bin) INSTALL(TARGETS cjsegment RUNTIME DESTINATION bin)
INSTALL(TARGETS cjserver RUNTIME DESTINATION bin) INSTALL(TARGETS cjserver RUNTIME DESTINATION bin)
INSTALL(FILES ChineseFilter.hpp HMMSegment.hpp MPSegment.hpp Trie.hpp ISegment.hpp MixSegment.hpp SegmentBase.hpp TransCode.hpp DESTINATION include/CppJieba) INSTALL(FILES HMMSegment.hpp MPSegment.hpp Trie.hpp TrieManager.hpp ISegment.hpp MixSegment.hpp SegmentBase.hpp TransCode.hpp DESTINATION include/CppJieba)
ADD_SUBDIRECTORY(Husky) ADD_SUBDIRECTORY(Husky)
ADD_SUBDIRECTORY(Limonp) ADD_SUBDIRECTORY(Limonp)

View File

@ -1,49 +0,0 @@
#ifndef CPPJIEBA_CHINESEFILTER_H
#define CPPJIEBA_CHINESEFILTER_H
#include "TransCode.hpp"
namespace CppJieba
{
/*
* if char is ascii, count the ascii string's length and return 0;
* else count the nonascii string's length and return 1;
* if errors, return -1;
* */
inline int filterAscii(const char* str, uint len, uint& resLen)
{
if(!str || !len)
{
return -1;
}
char x = 0x80;
int resFlag = (str[0] & x ? 1 : 0);
resLen = 0;
if(!resFlag)
{
while(resLen < len && !(str[resLen] & x))
{
resLen ++;
}
}
else
{
while(resLen < len && (str[resLen] & x))
{
#ifdef CPPJIEBA_GBK
resLen += 2;
#else
resLen ++;
#endif
}
}
if(resLen > len)
{
return -1;
}
return resFlag;
}
}
#endif

View File

@ -1,10 +1,11 @@
#ifndef CPPJIEBA_SEGMENTBASE_H #ifndef CPPJIEBA_SEGMENTBASE_H
#define CPPJIEBA_SEGMENTBASE_H #define CPPJIEBA_SEGMENTBASE_H
#include "ISegment.hpp" #include "TransCode.hpp"
#include "ChineseFilter.hpp"
#include "Limonp/str_functs.hpp"
#include "Limonp/logger.hpp" #include "Limonp/logger.hpp"
#include "ISegment.hpp"
#include <cassert>
namespace CppJieba namespace CppJieba
{ {
@ -74,6 +75,46 @@ namespace CppJieba
return true; return true;
#endif #endif
} }
public:
/*
* if char is ascii, count the ascii string's length and return 0;
* else count the nonascii string's length and return 1;
* if errors, return -1;
* */
static int filterAscii(const char* str, uint len, uint& resLen)
{
if(!str || !len)
{
return -1;
}
char x = 0x80;
int resFlag = (str[0] & x ? 1 : 0);
resLen = 0;
if(!resFlag)
{
while(resLen < len && !(str[resLen] & x))
{
resLen ++;
}
}
else
{
while(resLen < len && (str[resLen] & x))
{
#ifdef CPPJIEBA_GBK
resLen += 2;
#else
resLen ++;
#endif
}
}
if(resLen > len)
{
return -1;
}
return resFlag;
}
}; };
} }

View File

@ -5,7 +5,7 @@ SET(GTEST_ROOT_DIR gtest-1.6.0)
INCLUDE_DIRECTORIES(${GTEST_ROOT_DIR} ${GTEST_ROOT_DIR}/include ${PROJECT_SOURCE_DIR}) INCLUDE_DIRECTORIES(${GTEST_ROOT_DIR} ${GTEST_ROOT_DIR}/include ${PROJECT_SOURCE_DIR})
ADD_LIBRARY(gtest STATIC ${GTEST_ROOT_DIR}/src/gtest-all.cc) ADD_LIBRARY(gtest STATIC ${GTEST_ROOT_DIR}/src/gtest-all.cc)
ADD_EXECUTABLE(test.run gtest_main.cc TChineseFilter.cpp TMixSegment.cpp TMPSegment.cpp THMMSegment.cpp TTrie.cpp TFullSegment.cpp TQuerySegment.cpp TTrieManager.cpp) ADD_EXECUTABLE(test.run gtest_main.cc TSegmentBase.cpp TMixSegment.cpp TMPSegment.cpp THMMSegment.cpp TTrie.cpp TFullSegment.cpp TQuerySegment.cpp TTrieManager.cpp)
TARGET_LINK_LIBRARIES(gtest pthread) TARGET_LINK_LIBRARIES(gtest pthread)
TARGET_LINK_LIBRARIES(test.run gtest pthread) TARGET_LINK_LIBRARIES(test.run gtest pthread)

View File

@ -1,9 +1,9 @@
#include "src/ChineseFilter.hpp" #include "src/SegmentBase.hpp"
#include "gtest/gtest.h" #include "gtest/gtest.h"
using namespace CppJieba; using namespace CppJieba;
TEST(ChineseFilterTest, Test1) TEST(SegmentBaseTest, Test1)
{ {
const char* str = "heheh你好...hh"; const char* str = "heheh你好...hh";
string s; string s;
@ -18,7 +18,7 @@ TEST(ChineseFilterTest, Test1)
{ {
uint len; uint len;
const char* t = str + offset; const char* t = str + offset;
int ret = filterAscii(t, size - offset, len); int ret = SegmentBase::filterAscii(t, size - offset, len);
s.assign(t, len); s.assign(t, len);
res.push_back(s); res.push_back(s);
//cout<<s<<","<<ret<<","<<len<<endl; //cout<<s<<","<<ret<<","<<len<<endl;