mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
mv filterAscii from ChineseFilter.hpp into SegmentBase.hpp
This commit is contained in:
parent
679179859e
commit
657aee0fda
@ -28,7 +28,7 @@ sudo make install
|
||||
#### 测试
|
||||
|
||||
```sh
|
||||
cd build && ./test/segment.demo
|
||||
make test
|
||||
```
|
||||
|
||||
### 启动服务
|
||||
|
@ -7,7 +7,7 @@ TARGET_LINK_LIBRARIES(cjserver pthread)
|
||||
|
||||
INSTALL(TARGETS cjsegment RUNTIME DESTINATION bin)
|
||||
INSTALL(TARGETS cjserver RUNTIME DESTINATION bin)
|
||||
INSTALL(FILES ChineseFilter.hpp HMMSegment.hpp MPSegment.hpp Trie.hpp ISegment.hpp MixSegment.hpp SegmentBase.hpp TransCode.hpp DESTINATION include/CppJieba)
|
||||
INSTALL(FILES HMMSegment.hpp MPSegment.hpp Trie.hpp TrieManager.hpp ISegment.hpp MixSegment.hpp SegmentBase.hpp TransCode.hpp DESTINATION include/CppJieba)
|
||||
|
||||
ADD_SUBDIRECTORY(Husky)
|
||||
ADD_SUBDIRECTORY(Limonp)
|
||||
|
@ -1,49 +0,0 @@
|
||||
#ifndef CPPJIEBA_CHINESEFILTER_H
|
||||
#define CPPJIEBA_CHINESEFILTER_H
|
||||
|
||||
#include "TransCode.hpp"
|
||||
|
||||
namespace CppJieba
|
||||
{
|
||||
|
||||
/*
|
||||
* if char is ascii, count the ascii string's length and return 0;
|
||||
* else count the nonascii string's length and return 1;
|
||||
* if errors, return -1;
|
||||
* */
|
||||
inline int filterAscii(const char* str, uint len, uint& resLen)
|
||||
{
|
||||
if(!str || !len)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
char x = 0x80;
|
||||
int resFlag = (str[0] & x ? 1 : 0);
|
||||
resLen = 0;
|
||||
if(!resFlag)
|
||||
{
|
||||
while(resLen < len && !(str[resLen] & x))
|
||||
{
|
||||
resLen ++;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
while(resLen < len && (str[resLen] & x))
|
||||
{
|
||||
#ifdef CPPJIEBA_GBK
|
||||
resLen += 2;
|
||||
#else
|
||||
resLen ++;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
if(resLen > len)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
return resFlag;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
@ -1,10 +1,11 @@
|
||||
#ifndef CPPJIEBA_SEGMENTBASE_H
|
||||
#define CPPJIEBA_SEGMENTBASE_H
|
||||
|
||||
#include "ISegment.hpp"
|
||||
#include "ChineseFilter.hpp"
|
||||
#include "Limonp/str_functs.hpp"
|
||||
#include "TransCode.hpp"
|
||||
#include "Limonp/logger.hpp"
|
||||
#include "ISegment.hpp"
|
||||
#include <cassert>
|
||||
|
||||
|
||||
namespace CppJieba
|
||||
{
|
||||
@ -74,6 +75,46 @@ namespace CppJieba
|
||||
return true;
|
||||
#endif
|
||||
}
|
||||
public:
|
||||
|
||||
/*
|
||||
* if char is ascii, count the ascii string's length and return 0;
|
||||
* else count the nonascii string's length and return 1;
|
||||
* if errors, return -1;
|
||||
* */
|
||||
static int filterAscii(const char* str, uint len, uint& resLen)
|
||||
{
|
||||
if(!str || !len)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
char x = 0x80;
|
||||
int resFlag = (str[0] & x ? 1 : 0);
|
||||
resLen = 0;
|
||||
if(!resFlag)
|
||||
{
|
||||
while(resLen < len && !(str[resLen] & x))
|
||||
{
|
||||
resLen ++;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
while(resLen < len && (str[resLen] & x))
|
||||
{
|
||||
#ifdef CPPJIEBA_GBK
|
||||
resLen += 2;
|
||||
#else
|
||||
resLen ++;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
if(resLen > len)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
return resFlag;
|
||||
}
|
||||
|
||||
};
|
||||
}
|
||||
|
@ -5,7 +5,7 @@ SET(GTEST_ROOT_DIR gtest-1.6.0)
|
||||
|
||||
INCLUDE_DIRECTORIES(${GTEST_ROOT_DIR} ${GTEST_ROOT_DIR}/include ${PROJECT_SOURCE_DIR})
|
||||
ADD_LIBRARY(gtest STATIC ${GTEST_ROOT_DIR}/src/gtest-all.cc)
|
||||
ADD_EXECUTABLE(test.run gtest_main.cc TChineseFilter.cpp TMixSegment.cpp TMPSegment.cpp THMMSegment.cpp TTrie.cpp TFullSegment.cpp TQuerySegment.cpp TTrieManager.cpp)
|
||||
ADD_EXECUTABLE(test.run gtest_main.cc TSegmentBase.cpp TMixSegment.cpp TMPSegment.cpp THMMSegment.cpp TTrie.cpp TFullSegment.cpp TQuerySegment.cpp TTrieManager.cpp)
|
||||
TARGET_LINK_LIBRARIES(gtest pthread)
|
||||
TARGET_LINK_LIBRARIES(test.run gtest pthread)
|
||||
|
||||
|
@ -1,9 +1,9 @@
|
||||
#include "src/ChineseFilter.hpp"
|
||||
#include "src/SegmentBase.hpp"
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
using namespace CppJieba;
|
||||
|
||||
TEST(ChineseFilterTest, Test1)
|
||||
TEST(SegmentBaseTest, Test1)
|
||||
{
|
||||
const char* str = "heheh你好...hh";
|
||||
string s;
|
||||
@ -18,7 +18,7 @@ TEST(ChineseFilterTest, Test1)
|
||||
{
|
||||
uint len;
|
||||
const char* t = str + offset;
|
||||
int ret = filterAscii(t, size - offset, len);
|
||||
int ret = SegmentBase::filterAscii(t, size - offset, len);
|
||||
s.assign(t, len);
|
||||
res.push_back(s);
|
||||
//cout<<s<<","<<ret<<","<<len<<endl;
|
Loading…
x
Reference in New Issue
Block a user