diff --git a/test/testdata/userdict.2.utf8 b/test/testdata/userdict.2.utf8 new file mode 100644 index 0000000..fd38536 --- /dev/null +++ b/test/testdata/userdict.2.utf8 @@ -0,0 +1 @@ +千树万树梨花开 diff --git a/test/testdata/userdict.utf8 b/test/testdata/userdict.utf8 index 1f40407..6477fef 100644 --- a/test/testdata/userdict.utf8 +++ b/test/testdata/userdict.utf8 @@ -4,3 +4,4 @@ A B iPhone6 蓝翔 nz +忽如一夜春风来 diff --git a/test/unittest/segments_test.cpp b/test/unittest/segments_test.cpp index 8436010..691710e 100644 --- a/test/unittest/segments_test.cpp +++ b/test/unittest/segments_test.cpp @@ -84,31 +84,37 @@ TEST(MixSegmentTest, UserDict) { ASSERT_EQ("[\"IBM\", \",\", \"3.14\"]", res); } } -TEST(MixSegmentTest, UserDict2) { - MixSegment segment("../test/testdata/extra_dict/jieba.dict.small.utf8", "../dict/hmm_model.utf8", "../test/testdata/userdict.utf8"); - { - const char* str = "令狐冲是云计算方面的专家"; - vector words; - segment.cut(str, words); - string res; - ASSERT_EQ("[\"令狐冲\", \"是\", \"云计算\", \"方面\", \"的\", \"专家\"]", res << words); - } - { - const char* str = "小明先就职于IBM,后在日本京都大学深造"; - vector words; - segment.cut(str, words); - string res; - res << words; - ASSERT_EQ("[\"小明\", \"先\", \"就职\", \"于\", \"I\", \"B\", \"M\", \",\", \"后\", \"在\", \"日本\", \"京都大学\", \"深造\"]", res); - } - { - const char* str = "IBM,3.14"; - vector words; - segment.cut(str, words); - string res; - res << words; - ASSERT_EQ("[\"I\", \"B\", \"M\", \",\", \"3.14\"]", res); - } +TEST(MixSegmentTest, TestUserDict) { + MixSegment segment("../test/testdata/extra_dict/jieba.dict.small.utf8", "../dict/hmm_model.utf8", + "../test/testdata/userdict.utf8"); + vector words; + string res; + + segment.cut("令狐冲是云计算方面的专家", words); + ASSERT_EQ("[\"令狐冲\", \"是\", \"云计算\", \"方面\", \"的\", \"专家\"]", res << words); + + segment.cut("小明先就职于IBM,后在日本京都大学深造", words); + res << words; + ASSERT_EQ("[\"小明\", \"先\", \"就职\", \"于\", \"I\", \"B\", \"M\", \",\", \"后\", \"在\", \"日本\", \"京都大学\", \"深造\"]", res); + + segment.cut("IBM,3.14", words); + res << words; + ASSERT_EQ("[\"I\", \"B\", \"M\", \",\", \"3.14\"]", res); + + segment.cut("忽如一夜春风来,千树万树梨花开", words); + res = limonp::join(words.begin(), words.end(), "/"); + ASSERT_EQ("忽如一夜春风来/,/千树/万树/梨花/开", res); +} + +TEST(MixSegmentTest, TestMultiUserDict) { + MixSegment segment("../test/testdata/extra_dict/jieba.dict.small.utf8", "../dict/hmm_model.utf8", + "../test/testdata/userdict.utf8:../test/testdata/userdict.2.utf8"); + vector words; + string res; + + segment.cut("忽如一夜春风来,千树万树梨花开", words); + res = limonp::join(words.begin(), words.end(), "/"); + ASSERT_EQ("忽如一夜春风来/,/千树万树梨花开", res); } TEST(MPSegmentTest, Test1) {