mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
mv filterAscii from ChineseFilter.hpp into SegmentBase.hpp
This commit is contained in:
parent
679179859e
commit
657aee0fda
@ -28,7 +28,7 @@ sudo make install
|
|||||||
#### 测试
|
#### 测试
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
cd build && ./test/segment.demo
|
make test
|
||||||
```
|
```
|
||||||
|
|
||||||
### 启动服务
|
### 启动服务
|
||||||
|
@ -7,7 +7,7 @@ TARGET_LINK_LIBRARIES(cjserver pthread)
|
|||||||
|
|
||||||
INSTALL(TARGETS cjsegment RUNTIME DESTINATION bin)
|
INSTALL(TARGETS cjsegment RUNTIME DESTINATION bin)
|
||||||
INSTALL(TARGETS cjserver RUNTIME DESTINATION bin)
|
INSTALL(TARGETS cjserver RUNTIME DESTINATION bin)
|
||||||
INSTALL(FILES ChineseFilter.hpp HMMSegment.hpp MPSegment.hpp Trie.hpp ISegment.hpp MixSegment.hpp SegmentBase.hpp TransCode.hpp DESTINATION include/CppJieba)
|
INSTALL(FILES HMMSegment.hpp MPSegment.hpp Trie.hpp TrieManager.hpp ISegment.hpp MixSegment.hpp SegmentBase.hpp TransCode.hpp DESTINATION include/CppJieba)
|
||||||
|
|
||||||
ADD_SUBDIRECTORY(Husky)
|
ADD_SUBDIRECTORY(Husky)
|
||||||
ADD_SUBDIRECTORY(Limonp)
|
ADD_SUBDIRECTORY(Limonp)
|
||||||
|
@ -1,49 +0,0 @@
|
|||||||
#ifndef CPPJIEBA_CHINESEFILTER_H
|
|
||||||
#define CPPJIEBA_CHINESEFILTER_H
|
|
||||||
|
|
||||||
#include "TransCode.hpp"
|
|
||||||
|
|
||||||
namespace CppJieba
|
|
||||||
{
|
|
||||||
|
|
||||||
/*
|
|
||||||
* if char is ascii, count the ascii string's length and return 0;
|
|
||||||
* else count the nonascii string's length and return 1;
|
|
||||||
* if errors, return -1;
|
|
||||||
* */
|
|
||||||
inline int filterAscii(const char* str, uint len, uint& resLen)
|
|
||||||
{
|
|
||||||
if(!str || !len)
|
|
||||||
{
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
char x = 0x80;
|
|
||||||
int resFlag = (str[0] & x ? 1 : 0);
|
|
||||||
resLen = 0;
|
|
||||||
if(!resFlag)
|
|
||||||
{
|
|
||||||
while(resLen < len && !(str[resLen] & x))
|
|
||||||
{
|
|
||||||
resLen ++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
while(resLen < len && (str[resLen] & x))
|
|
||||||
{
|
|
||||||
#ifdef CPPJIEBA_GBK
|
|
||||||
resLen += 2;
|
|
||||||
#else
|
|
||||||
resLen ++;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if(resLen > len)
|
|
||||||
{
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
return resFlag;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
@ -1,10 +1,11 @@
|
|||||||
#ifndef CPPJIEBA_SEGMENTBASE_H
|
#ifndef CPPJIEBA_SEGMENTBASE_H
|
||||||
#define CPPJIEBA_SEGMENTBASE_H
|
#define CPPJIEBA_SEGMENTBASE_H
|
||||||
|
|
||||||
#include "ISegment.hpp"
|
#include "TransCode.hpp"
|
||||||
#include "ChineseFilter.hpp"
|
|
||||||
#include "Limonp/str_functs.hpp"
|
|
||||||
#include "Limonp/logger.hpp"
|
#include "Limonp/logger.hpp"
|
||||||
|
#include "ISegment.hpp"
|
||||||
|
#include <cassert>
|
||||||
|
|
||||||
|
|
||||||
namespace CppJieba
|
namespace CppJieba
|
||||||
{
|
{
|
||||||
@ -74,6 +75,46 @@ namespace CppJieba
|
|||||||
return true;
|
return true;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
public:
|
||||||
|
|
||||||
|
/*
|
||||||
|
* if char is ascii, count the ascii string's length and return 0;
|
||||||
|
* else count the nonascii string's length and return 1;
|
||||||
|
* if errors, return -1;
|
||||||
|
* */
|
||||||
|
static int filterAscii(const char* str, uint len, uint& resLen)
|
||||||
|
{
|
||||||
|
if(!str || !len)
|
||||||
|
{
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
char x = 0x80;
|
||||||
|
int resFlag = (str[0] & x ? 1 : 0);
|
||||||
|
resLen = 0;
|
||||||
|
if(!resFlag)
|
||||||
|
{
|
||||||
|
while(resLen < len && !(str[resLen] & x))
|
||||||
|
{
|
||||||
|
resLen ++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
while(resLen < len && (str[resLen] & x))
|
||||||
|
{
|
||||||
|
#ifdef CPPJIEBA_GBK
|
||||||
|
resLen += 2;
|
||||||
|
#else
|
||||||
|
resLen ++;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if(resLen > len)
|
||||||
|
{
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
return resFlag;
|
||||||
|
}
|
||||||
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
@ -5,7 +5,7 @@ SET(GTEST_ROOT_DIR gtest-1.6.0)
|
|||||||
|
|
||||||
INCLUDE_DIRECTORIES(${GTEST_ROOT_DIR} ${GTEST_ROOT_DIR}/include ${PROJECT_SOURCE_DIR})
|
INCLUDE_DIRECTORIES(${GTEST_ROOT_DIR} ${GTEST_ROOT_DIR}/include ${PROJECT_SOURCE_DIR})
|
||||||
ADD_LIBRARY(gtest STATIC ${GTEST_ROOT_DIR}/src/gtest-all.cc)
|
ADD_LIBRARY(gtest STATIC ${GTEST_ROOT_DIR}/src/gtest-all.cc)
|
||||||
ADD_EXECUTABLE(test.run gtest_main.cc TChineseFilter.cpp TMixSegment.cpp TMPSegment.cpp THMMSegment.cpp TTrie.cpp TFullSegment.cpp TQuerySegment.cpp TTrieManager.cpp)
|
ADD_EXECUTABLE(test.run gtest_main.cc TSegmentBase.cpp TMixSegment.cpp TMPSegment.cpp THMMSegment.cpp TTrie.cpp TFullSegment.cpp TQuerySegment.cpp TTrieManager.cpp)
|
||||||
TARGET_LINK_LIBRARIES(gtest pthread)
|
TARGET_LINK_LIBRARIES(gtest pthread)
|
||||||
TARGET_LINK_LIBRARIES(test.run gtest pthread)
|
TARGET_LINK_LIBRARIES(test.run gtest pthread)
|
||||||
|
|
||||||
|
@ -1,9 +1,9 @@
|
|||||||
#include "src/ChineseFilter.hpp"
|
#include "src/SegmentBase.hpp"
|
||||||
#include "gtest/gtest.h"
|
#include "gtest/gtest.h"
|
||||||
|
|
||||||
using namespace CppJieba;
|
using namespace CppJieba;
|
||||||
|
|
||||||
TEST(ChineseFilterTest, Test1)
|
TEST(SegmentBaseTest, Test1)
|
||||||
{
|
{
|
||||||
const char* str = "heheh你好...hh";
|
const char* str = "heheh你好...hh";
|
||||||
string s;
|
string s;
|
||||||
@ -18,7 +18,7 @@ TEST(ChineseFilterTest, Test1)
|
|||||||
{
|
{
|
||||||
uint len;
|
uint len;
|
||||||
const char* t = str + offset;
|
const char* t = str + offset;
|
||||||
int ret = filterAscii(t, size - offset, len);
|
int ret = SegmentBase::filterAscii(t, size - offset, len);
|
||||||
s.assign(t, len);
|
s.assign(t, len);
|
||||||
res.push_back(s);
|
res.push_back(s);
|
||||||
//cout<<s<<","<<ret<<","<<len<<endl;
|
//cout<<s<<","<<ret<<","<<len<<endl;
|
Loading…
x
Reference in New Issue
Block a user