diff --git a/test/testdata/testlines.utf8 b/test/testdata/testlines.utf8 index 83e208d..ab08432 100644 --- a/test/testdata/testlines.utf8 +++ b/test/testdata/testlines.utf8 @@ -8,3 +8,4 @@ 请在一米线外等候 人事处女干事 去医院做B超,叫号123 +B超 T恤 diff --git a/test/unittest/TSegments.cpp b/test/unittest/TSegments.cpp index 9986d71..8f3677f 100644 --- a/test/unittest/TSegments.cpp +++ b/test/unittest/TSegments.cpp @@ -44,13 +44,18 @@ using namespace CppJieba; TEST(MixSegmentTest, Test1) { - MixSegment segment("../dict/extra_dict/jieba.dict.small.utf8", "../dict/hmm_model.utf8");; + MixSegment segment("../dict/jieba.dict.utf8", "../dict/hmm_model.utf8");; const char* str = "我来自北京邮电大学。。。学号123456"; const char* res[] = {"我", "来自", "北京邮电大学", "。","。","。", "学号", "123456"}; + const char* str2 = "B超 T恤"; + const char* res2[] = {"B超"," ", "T恤"}; vector words; ASSERT_TRUE(segment); + ASSERT_TRUE(segment.cut(str, words)); ASSERT_EQ(words, vector(res, res + sizeof(res)/sizeof(res[0]))); + ASSERT_TRUE(segment.cut(str2, words)); + ASSERT_EQ(words, vector(res2, res2 + sizeof(res2)/sizeof(res2[0]))); } TEST(MPSegmentTest, Test1)