mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
add filterAscii
This commit is contained in:
parent
1bdce8904f
commit
5692220756
@ -99,6 +99,36 @@ namespace CppJieba
|
||||
}
|
||||
};
|
||||
|
||||
/*
|
||||
* if char is ascii, count the ascii string's length and return 0;
|
||||
* else count the nonascii string's length and return 1;
|
||||
* if errors, return -1;
|
||||
* */
|
||||
inline int filterAscii(const char* str, uint len, uint& resLen)
|
||||
{
|
||||
if(!str || !len)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
char x = 0x80;
|
||||
int resFlag = (str[0] & x ? 1 : 0);
|
||||
resLen = 1;
|
||||
if(!resFlag)
|
||||
{
|
||||
while(resLen < len && !(str[resLen] & x))
|
||||
{
|
||||
resLen ++;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
while(resLen < len && (str[resLen] & x))
|
||||
{
|
||||
resLen ++;
|
||||
}
|
||||
}
|
||||
return resFlag;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -1,26 +1,38 @@
|
||||
#include <ChineseFilter.h>
|
||||
#include "../src/ChineseFilter.hpp"
|
||||
|
||||
#ifdef UT
|
||||
using namespace CppJieba;
|
||||
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
ChineseFilter chFilter;
|
||||
ifstream ifs("../demo/testlines.utf8");
|
||||
//ChineseFilter chFilter;
|
||||
ifstream ifs(argv[1]);
|
||||
string line;
|
||||
string s;
|
||||
while(getline(ifs, line))
|
||||
{
|
||||
chFilter.feed(line);
|
||||
for(ChineseFilter::iterator it = chFilter.begin(); it != chFilter.end(); it++)
|
||||
const char * str = line.c_str();
|
||||
uint size = line.size();
|
||||
uint offset = 0;
|
||||
while(offset < size)
|
||||
{
|
||||
//cout<<__FILE__<<__LINE__<<endl;
|
||||
string tmp;
|
||||
TransCode::encode(it.begin, it.end, tmp);
|
||||
cout<<tmp<<endl;
|
||||
uint len;
|
||||
const char* t = str+offset;
|
||||
int ret = filterAscii(t, size, len);
|
||||
s.assign(t, len);
|
||||
cout<<s<<","<<ret<<","<<len<<endl;
|
||||
offset += len;
|
||||
}
|
||||
|
||||
|
||||
//chFilter.feed(line);
|
||||
//for(ChineseFilter::iterator it = chFilter.begin(); it != chFilter.end(); it++)
|
||||
//{
|
||||
// //cout<<__FILE__<<__LINE__<<endl;
|
||||
// string tmp;
|
||||
// TransCode::encode(it.begin, it.end, tmp);
|
||||
// cout<<tmp<<endl;
|
||||
//}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
Loading…
x
Reference in New Issue
Block a user