add multi user dict unittest

This commit is contained in:
yanyiwu 2015-09-25 16:07:01 +08:00
parent 9f359f3783
commit 5bf7454ad2
3 changed files with 33 additions and 25 deletions

1
test/testdata/userdict.2.utf8 vendored Normal file
View File

@ -0,0 +1 @@
千树万树梨花开

View File

@ -4,3 +4,4 @@ A
B
iPhone6
蓝翔 nz
忽如一夜春风来

View File

@ -84,31 +84,37 @@ TEST(MixSegmentTest, UserDict) {
ASSERT_EQ("[\"IBM\", \",\", \"3.14\"]", res);
}
}
TEST(MixSegmentTest, UserDict2) {
MixSegment segment("../test/testdata/extra_dict/jieba.dict.small.utf8", "../dict/hmm_model.utf8", "../test/testdata/userdict.utf8");
{
const char* str = "令狐冲是云计算方面的专家";
vector<string> words;
segment.cut(str, words);
string res;
ASSERT_EQ("[\"令狐冲\", \"\", \"云计算\", \"方面\", \"\", \"专家\"]", res << words);
}
{
const char* str = "小明先就职于IBM,后在日本京都大学深造";
vector<string> words;
segment.cut(str, words);
string res;
res << words;
ASSERT_EQ("[\"小明\", \"\", \"就职\", \"\", \"I\", \"B\", \"M\", \",\", \"\", \"\", \"日本\", \"京都大学\", \"深造\"]", res);
}
{
const char* str = "IBM,3.14";
vector<string> words;
segment.cut(str, words);
string res;
res << words;
ASSERT_EQ("[\"I\", \"B\", \"M\", \",\", \"3.14\"]", res);
}
TEST(MixSegmentTest, TestUserDict) {
MixSegment segment("../test/testdata/extra_dict/jieba.dict.small.utf8", "../dict/hmm_model.utf8",
"../test/testdata/userdict.utf8");
vector<string> words;
string res;
segment.cut("令狐冲是云计算方面的专家", words);
ASSERT_EQ("[\"令狐冲\", \"\", \"云计算\", \"方面\", \"\", \"专家\"]", res << words);
segment.cut("小明先就职于IBM,后在日本京都大学深造", words);
res << words;
ASSERT_EQ("[\"小明\", \"\", \"就职\", \"\", \"I\", \"B\", \"M\", \",\", \"\", \"\", \"日本\", \"京都大学\", \"深造\"]", res);
segment.cut("IBM,3.14", words);
res << words;
ASSERT_EQ("[\"I\", \"B\", \"M\", \",\", \"3.14\"]", res);
segment.cut("忽如一夜春风来,千树万树梨花开", words);
res = limonp::join(words.begin(), words.end(), "/");
ASSERT_EQ("忽如一夜春风来//千树/万树/梨花/开", res);
}
TEST(MixSegmentTest, TestMultiUserDict) {
MixSegment segment("../test/testdata/extra_dict/jieba.dict.small.utf8", "../dict/hmm_model.utf8",
"../test/testdata/userdict.utf8:../test/testdata/userdict.2.utf8");
vector<string> words;
string res;
segment.cut("忽如一夜春风来,千树万树梨花开", words);
res = limonp::join(words.begin(), words.end(), "/");
ASSERT_EQ("忽如一夜春风来//千树万树梨花开", res);
}
TEST(MPSegmentTest, Test1) {