diff --git a/README.md b/README.md index b84f1c1..534bdca 100644 --- a/README.md +++ b/README.md @@ -344,7 +344,9 @@ http://cppjieba-webdemo.herokuapp.com/ ## 客服 -`wuyanyi09@foxmail.com` +`i@yanyiwu.com` + +![image](http://yanyiwu.com/weedfs/2/5a7d1b5c0d/yanyiwu_personal_qrcodes.jpg) ## 鸣谢 @@ -353,7 +355,7 @@ https://github.com/fxsjy/jieba ## 作者 -- aszxqw https://github.com/aszxqw i@yanyiwu.com +- yanyiwu https://github.com/aszxqw i@yanyiwu.com - aholic https://github.com/aholic ruochen.xu@gmail.com [CppJieba]:https://github.com/aszxqw/cppjieba diff --git a/src/QuerySegment.hpp b/src/QuerySegment.hpp index e942e6c..dfd2072 100644 --- a/src/QuerySegment.hpp +++ b/src/QuerySegment.hpp @@ -24,15 +24,15 @@ namespace CppJieba public: QuerySegment(){}; - QuerySegment(const string& dict, const string& model, size_t maxWordLen) + QuerySegment(const string& dict, const string& model, size_t maxWordLen, const string& userDict = "") { - init(dict, model, maxWordLen); + init(dict, model, maxWordLen, userDict); }; virtual ~QuerySegment(){}; public: - bool init(const string& dict, const string& model, size_t maxWordLen) + bool init(const string& dict, const string& model, size_t maxWordLen, const string& userDict = "") { - LIMONP_CHECK(_mixSeg.init(dict, model)); + LIMONP_CHECK(_mixSeg.init(dict, model, userDict)); LIMONP_CHECK(_fullSeg.init(_mixSeg.getDictTrie())); assert(maxWordLen); _maxWordLen = maxWordLen; diff --git a/test/testdata/userdict.utf8 b/test/testdata/userdict.utf8 index 0f76e49..1f40407 100644 --- a/test/testdata/userdict.utf8 +++ b/test/testdata/userdict.utf8 @@ -2,5 +2,5 @@ 韩玉鉴赏 A B -iphone6 +iPhone6 蓝翔 nz diff --git a/test/unittest/TPosTagger.cpp b/test/unittest/TPosTagger.cpp index 81417c9..89e0b37 100644 --- a/test/unittest/TPosTagger.cpp +++ b/test/unittest/TPosTagger.cpp @@ -8,8 +8,8 @@ static const char * const ANS_TEST1 = "[\"我:r\", \"是:v\", \"蓝翔:x\", \" static const char * const QUERY_TEST2 = "我是蓝翔技工拖拉机学院手扶拖拉机专业的。不用多久,我就会升职加薪,当上总经理,出任CEO,迎娶白富美,走上人生巅峰。"; static const char * const ANS_TEST2 = "[\"我:r\", \"是:v\", \"蓝翔:nz\", \"技工:n\", \"拖拉机:n\", \"学院:n\", \"手扶拖拉机:n\", \"专业:n\", \"的:uj\", \"。:x\", \"不用:v\", \"多久:m\", \",:x\", \"我:r\", \"就:d\", \"会:v\", \"升职:v\", \"加薪:nr\", \",:x\", \"当上:t\", \"总经理:n\", \",:x\", \"出任:v\", \"CEO:eng\", \",:x\", \"迎娶:v\", \"白富:x\", \"美:ns\", \",:x\", \"走上:v\", \"人生:n\", \"巅峰:n\", \"。:x\"]"; -static const char * const QUERY_TEST3 = "iphone6手机的最大特点是很容易弯曲。"; -static const char * const ANS_TEST3 = "[\"iphone6:eng\", \"手机:n\", \"的:uj\", \"最大:a\", \"特点:n\", \"是:v\", \"很:zg\", \"容易:a\", \"弯曲:v\", \"。:x\"]"; +static const char * const QUERY_TEST3 = "iPhone6手机的最大特点是很容易弯曲。"; +static const char * const ANS_TEST3 = "[\"iPhone6:eng\", \"手机:n\", \"的:uj\", \"最大:a\", \"特点:n\", \"是:v\", \"很:zg\", \"容易:a\", \"弯曲:v\", \"。:x\"]"; //static const char * const ANS_TEST3 = ""; TEST(PosTaggerTest, Test) diff --git a/test/unittest/TSegments.cpp b/test/unittest/TSegments.cpp index 1955267..dcdc5c1 100644 --- a/test/unittest/TSegments.cpp +++ b/test/unittest/TSegments.cpp @@ -185,3 +185,32 @@ TEST(QuerySegment, Test1) } +TEST(QuerySegment, Test2) +{ + QuerySegment segment("../dict/extra_dict/jieba.dict.small.utf8", "../dict/hmm_model.utf8", 3, "../test/testdata/userdict.utf8"); + + { + const char* str = "小明硕士毕业于中国科学院计算所,后在日本京都大学深造"; + vector words; + + ASSERT_TRUE(segment.cut(str, words)); + + string s1, s2; + s1 << words; + s2 = "[\"小明\", \"硕士\", \"毕业\", \"于\", \"中国\", \"中国科学院\", \"科学\", \"科学院\", \"学院\", \"计算所\", \",\", \"后\", \"在\", \"日本\", \"京都\", \"京都大学\", \"大学\", \"深造\"]"; + ASSERT_EQ(s1, s2); + } + + { + const char* str = "小明硕士毕业于中国科学院计算所iPhone6"; + vector words; + + ASSERT_TRUE(segment.cut(str, words)); + + string s1, s2; + s1 << words; + s2 = "[\"小明\", \"硕士\", \"毕业\", \"于\", \"中国\", \"中国科学院\", \"科学\", \"科学院\", \"学院\", \"计算所\", \"iPhone6\"]"; + ASSERT_EQ(s1, s2); + } + +}