mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
add multi user dict unittest
This commit is contained in:
parent
9f359f3783
commit
5bf7454ad2
1
test/testdata/userdict.2.utf8
vendored
Normal file
1
test/testdata/userdict.2.utf8
vendored
Normal file
@ -0,0 +1 @@
|
||||
千树万树梨花开
|
1
test/testdata/userdict.utf8
vendored
1
test/testdata/userdict.utf8
vendored
@ -4,3 +4,4 @@ A
|
||||
B
|
||||
iPhone6
|
||||
蓝翔 nz
|
||||
忽如一夜春风来
|
||||
|
@ -84,31 +84,37 @@ TEST(MixSegmentTest, UserDict) {
|
||||
ASSERT_EQ("[\"IBM\", \",\", \"3.14\"]", res);
|
||||
}
|
||||
}
|
||||
TEST(MixSegmentTest, UserDict2) {
|
||||
MixSegment segment("../test/testdata/extra_dict/jieba.dict.small.utf8", "../dict/hmm_model.utf8", "../test/testdata/userdict.utf8");
|
||||
{
|
||||
const char* str = "令狐冲是云计算方面的专家";
|
||||
vector<string> words;
|
||||
segment.cut(str, words);
|
||||
string res;
|
||||
ASSERT_EQ("[\"令狐冲\", \"是\", \"云计算\", \"方面\", \"的\", \"专家\"]", res << words);
|
||||
}
|
||||
{
|
||||
const char* str = "小明先就职于IBM,后在日本京都大学深造";
|
||||
vector<string> words;
|
||||
segment.cut(str, words);
|
||||
string res;
|
||||
res << words;
|
||||
ASSERT_EQ("[\"小明\", \"先\", \"就职\", \"于\", \"I\", \"B\", \"M\", \",\", \"后\", \"在\", \"日本\", \"京都大学\", \"深造\"]", res);
|
||||
}
|
||||
{
|
||||
const char* str = "IBM,3.14";
|
||||
vector<string> words;
|
||||
segment.cut(str, words);
|
||||
string res;
|
||||
res << words;
|
||||
ASSERT_EQ("[\"I\", \"B\", \"M\", \",\", \"3.14\"]", res);
|
||||
}
|
||||
TEST(MixSegmentTest, TestUserDict) {
|
||||
MixSegment segment("../test/testdata/extra_dict/jieba.dict.small.utf8", "../dict/hmm_model.utf8",
|
||||
"../test/testdata/userdict.utf8");
|
||||
vector<string> words;
|
||||
string res;
|
||||
|
||||
segment.cut("令狐冲是云计算方面的专家", words);
|
||||
ASSERT_EQ("[\"令狐冲\", \"是\", \"云计算\", \"方面\", \"的\", \"专家\"]", res << words);
|
||||
|
||||
segment.cut("小明先就职于IBM,后在日本京都大学深造", words);
|
||||
res << words;
|
||||
ASSERT_EQ("[\"小明\", \"先\", \"就职\", \"于\", \"I\", \"B\", \"M\", \",\", \"后\", \"在\", \"日本\", \"京都大学\", \"深造\"]", res);
|
||||
|
||||
segment.cut("IBM,3.14", words);
|
||||
res << words;
|
||||
ASSERT_EQ("[\"I\", \"B\", \"M\", \",\", \"3.14\"]", res);
|
||||
|
||||
segment.cut("忽如一夜春风来,千树万树梨花开", words);
|
||||
res = limonp::join(words.begin(), words.end(), "/");
|
||||
ASSERT_EQ("忽如一夜春风来/,/千树/万树/梨花/开", res);
|
||||
}
|
||||
|
||||
TEST(MixSegmentTest, TestMultiUserDict) {
|
||||
MixSegment segment("../test/testdata/extra_dict/jieba.dict.small.utf8", "../dict/hmm_model.utf8",
|
||||
"../test/testdata/userdict.utf8:../test/testdata/userdict.2.utf8");
|
||||
vector<string> words;
|
||||
string res;
|
||||
|
||||
segment.cut("忽如一夜春风来,千树万树梨花开", words);
|
||||
res = limonp::join(words.begin(), words.end(), "/");
|
||||
ASSERT_EQ("忽如一夜春风来/,/千树万树梨花开", res);
|
||||
}
|
||||
|
||||
TEST(MPSegmentTest, Test1) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user