mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
108 lines
3.1 KiB
C++
108 lines
3.1 KiB
C++
#ifndef CPPJIEBA_CHINESEFILTER_H
|
|
#define CPPJIEBA_CHINESEFILTER_H
|
|
|
|
#include "globals.h"
|
|
#include "TransCode.hpp"
|
|
|
|
namespace CppJieba
|
|
{
|
|
|
|
class ChineseFilter;
|
|
|
|
class ChFilterIterator
|
|
{
|
|
public:
|
|
const Unicode * ptUnico;
|
|
UniConIter begin;
|
|
UniConIter end;
|
|
CHAR_TYPE charType;
|
|
ChFilterIterator& operator++()
|
|
{
|
|
return *this = _get(end);
|
|
}
|
|
ChFilterIterator operator++(int)
|
|
{
|
|
ChFilterIterator res = *this;
|
|
*this = _get(end);
|
|
return res;
|
|
}
|
|
bool operator==(const ChFilterIterator& iter)
|
|
{
|
|
return begin == iter.begin && end == iter.end;
|
|
}
|
|
bool operator!=(const ChFilterIterator& iter)
|
|
{
|
|
return !(*this == iter);
|
|
}
|
|
ChFilterIterator& operator=(const ChFilterIterator& iter)
|
|
{
|
|
ptUnico = iter.ptUnico;
|
|
begin = iter.begin;
|
|
end = iter.end;
|
|
charType = iter.charType;
|
|
return *this;
|
|
}
|
|
|
|
public:
|
|
ChFilterIterator(const Unicode * ptu, UniConIter be, UniConIter en, CHAR_TYPE is):ptUnico(ptu), begin(be), end(en), charType(is){};
|
|
ChFilterIterator(const Unicode * ptu):ptUnico(ptu){*this = _get(ptUnico->begin());};
|
|
private:
|
|
ChFilterIterator(){}
|
|
private:
|
|
CHAR_TYPE _charType(uint16_t x)const
|
|
{
|
|
if((0x0030 <= x && x<= 0x0039) || (0x0041 <= x && x <= 0x005a ) || (0x0061 <= x && x <= 0x007a))
|
|
{
|
|
return DIGIT_OR_LETTER;
|
|
}
|
|
if(x >= 0x4e00 && x <= 0x9fff)
|
|
{
|
|
return CHWORD;
|
|
}
|
|
return OTHERS;
|
|
}
|
|
ChFilterIterator _get(UniConIter iter)
|
|
{
|
|
UniConIter _begin = iter;
|
|
const UniConIter& _end = ptUnico->end();
|
|
if(iter == _end)
|
|
{
|
|
return ChFilterIterator(ptUnico, end, end, OTHERS);
|
|
}
|
|
CHAR_TYPE charType = _charType(*iter);
|
|
iter ++;
|
|
while(iter != _end &&charType == _charType(*iter))
|
|
{
|
|
iter++;
|
|
}
|
|
return ChFilterIterator(ptUnico, _begin, iter, charType);
|
|
}
|
|
|
|
};
|
|
class ChineseFilter
|
|
{
|
|
private:
|
|
Unicode _unico;
|
|
public:
|
|
typedef ChFilterIterator iterator;
|
|
public:
|
|
ChineseFilter(){};
|
|
~ChineseFilter(){};
|
|
public:
|
|
bool feed(const string& str)
|
|
{
|
|
return TransCode::decode(str, _unico);
|
|
}
|
|
iterator begin()
|
|
{
|
|
return iterator(&_unico);
|
|
}
|
|
iterator end()
|
|
{
|
|
return iterator(&_unico, _unico.end(), _unico.end(), OTHERS);
|
|
}
|
|
};
|
|
}
|
|
|
|
#endif
|