mv filterAscii from ChineseFilter.hpp into SegmentBase.hpp

This commit is contained in:
wyy 2013-12-21 21:58:15 -08:00
parent 679179859e
commit 657aee0fda
6 changed files with 50 additions and 58 deletions

View File

@ -28,7 +28,7 @@ sudo make install
#### 测试
```sh
cd build && ./test/segment.demo
make test
```
### 启动服务

View File

@ -7,7 +7,7 @@ TARGET_LINK_LIBRARIES(cjserver pthread)
INSTALL(TARGETS cjsegment RUNTIME DESTINATION bin)
INSTALL(TARGETS cjserver RUNTIME DESTINATION bin)
INSTALL(FILES ChineseFilter.hpp HMMSegment.hpp MPSegment.hpp Trie.hpp ISegment.hpp MixSegment.hpp SegmentBase.hpp TransCode.hpp DESTINATION include/CppJieba)
INSTALL(FILES HMMSegment.hpp MPSegment.hpp Trie.hpp TrieManager.hpp ISegment.hpp MixSegment.hpp SegmentBase.hpp TransCode.hpp DESTINATION include/CppJieba)
ADD_SUBDIRECTORY(Husky)
ADD_SUBDIRECTORY(Limonp)

View File

@ -1,49 +0,0 @@
#ifndef CPPJIEBA_CHINESEFILTER_H
#define CPPJIEBA_CHINESEFILTER_H
#include "TransCode.hpp"
namespace CppJieba
{
/*
* if char is ascii, count the ascii string's length and return 0;
* else count the nonascii string's length and return 1;
* if errors, return -1;
* */
inline int filterAscii(const char* str, uint len, uint& resLen)
{
if(!str || !len)
{
return -1;
}
char x = 0x80;
int resFlag = (str[0] & x ? 1 : 0);
resLen = 0;
if(!resFlag)
{
while(resLen < len && !(str[resLen] & x))
{
resLen ++;
}
}
else
{
while(resLen < len && (str[resLen] & x))
{
#ifdef CPPJIEBA_GBK
resLen += 2;
#else
resLen ++;
#endif
}
}
if(resLen > len)
{
return -1;
}
return resFlag;
}
}
#endif

View File

@ -1,10 +1,11 @@
#ifndef CPPJIEBA_SEGMENTBASE_H
#define CPPJIEBA_SEGMENTBASE_H
#include "ISegment.hpp"
#include "ChineseFilter.hpp"
#include "Limonp/str_functs.hpp"
#include "TransCode.hpp"
#include "Limonp/logger.hpp"
#include "ISegment.hpp"
#include <cassert>
namespace CppJieba
{
@ -74,6 +75,46 @@ namespace CppJieba
return true;
#endif
}
public:
/*
* if char is ascii, count the ascii string's length and return 0;
* else count the nonascii string's length and return 1;
* if errors, return -1;
* */
static int filterAscii(const char* str, uint len, uint& resLen)
{
if(!str || !len)
{
return -1;
}
char x = 0x80;
int resFlag = (str[0] & x ? 1 : 0);
resLen = 0;
if(!resFlag)
{
while(resLen < len && !(str[resLen] & x))
{
resLen ++;
}
}
else
{
while(resLen < len && (str[resLen] & x))
{
#ifdef CPPJIEBA_GBK
resLen += 2;
#else
resLen ++;
#endif
}
}
if(resLen > len)
{
return -1;
}
return resFlag;
}
};
}

View File

@ -5,7 +5,7 @@ SET(GTEST_ROOT_DIR gtest-1.6.0)
INCLUDE_DIRECTORIES(${GTEST_ROOT_DIR} ${GTEST_ROOT_DIR}/include ${PROJECT_SOURCE_DIR})
ADD_LIBRARY(gtest STATIC ${GTEST_ROOT_DIR}/src/gtest-all.cc)
ADD_EXECUTABLE(test.run gtest_main.cc TChineseFilter.cpp TMixSegment.cpp TMPSegment.cpp THMMSegment.cpp TTrie.cpp TFullSegment.cpp TQuerySegment.cpp TTrieManager.cpp)
ADD_EXECUTABLE(test.run gtest_main.cc TSegmentBase.cpp TMixSegment.cpp TMPSegment.cpp THMMSegment.cpp TTrie.cpp TFullSegment.cpp TQuerySegment.cpp TTrieManager.cpp)
TARGET_LINK_LIBRARIES(gtest pthread)
TARGET_LINK_LIBRARIES(test.run gtest pthread)

View File

@ -1,9 +1,9 @@
#include "src/ChineseFilter.hpp"
#include "src/SegmentBase.hpp"
#include "gtest/gtest.h"
using namespace CppJieba;
TEST(ChineseFilterTest, Test1)
TEST(SegmentBaseTest, Test1)
{
const char* str = "heheh你好...hh";
string s;
@ -18,7 +18,7 @@ TEST(ChineseFilterTest, Test1)
{
uint len;
const char* t = str + offset;
int ret = filterAscii(t, size - offset, len);
int ret = SegmentBase::filterAscii(t, size - offset, len);
s.assign(t, len);
res.push_back(s);
//cout<<s<<","<<ret<<","<<len<<endl;