add TMixSegment.cpp for testing

This commit is contained in:
wyy 2013-12-18 22:24:39 -08:00
parent 2e2036bb73
commit 9f35b82dd1
2 changed files with 16 additions and 10 deletions

View File

@ -26,14 +26,14 @@ namespace CppJieba
virtual bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<string>& res)const = 0; virtual bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<string>& res)const = 0;
virtual bool cut(const string& str, vector<string>& res)const virtual bool cut(const string& str, vector<string>& res)const
{ {
if(!_getInitFlag()) assert(_getInitFlag());
{ //if(!_getInitFlag())
LogError("not inited."); //{
return false; // LogError("not inited.");
} // return false;
//}
Unicode unico; Unicode unico;
#ifdef NO_FILTER #ifdef NO_FILTER
unico.clear();
if(!TransCode::decode(str, unico)) if(!TransCode::decode(str, unico))
{ {
LogFatal("str[%s] decode failed.", str.c_str()); LogFatal("str[%s] decode failed.", str.c_str());

View File

@ -6,10 +6,16 @@ using namespace CppJieba;
TEST(MixSegmentTest, Test1) TEST(MixSegmentTest, Test1)
{ {
MixSegment segment("../dicts/jieba.dict.utf8", "../dicts/hmm_model.utf8");; MixSegment segment("../dicts/jieba.dict.utf8", "../dicts/hmm_model.utf8");;
const char* str = "我来自北京邮电大学。。。学号 123456"; const char* str = "我来自北京邮电大学。。。 学号 123456";
const char* res[] = {"", "来自", "北京邮电大学", "","","","","学号", " 123456"}; const char* res[] = {"", "来自", "北京邮电大学", "","",""," ","学号", " 123456"};
string s; //string s;
vector<string> buf(res, res + sizeof(res)/sizeof(res[0])); //vector<string> buf(res, res + sizeof(res)/sizeof(res[0]));
vector<string> words;
ASSERT_EQ(segment.init(), true);
ASSERT_EQ(segment.cut(str, words), true);
EXPECT_EQ(words, vector<string>(res, res + sizeof(res)/sizeof(res[0])));
//print(words);
//for(uint i = 0; i < sizeof(res)/sizeof(res[0]); i++) //for(uint i = 0; i < sizeof(res)/sizeof(res[0]); i++)
//{ //{
// buf.push_back() // buf.push_back()