diff --git a/ChangeLog.md b/ChangeLog.md index cdb01bf..535ce00 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -1,5 +1,10 @@ # CppJieba ChangeLog +## next version + +1. 新增可动态增加词典的API: insertUserWord +2. cut函数增加默认参数,默认使用Mix切词算法。关于切词算法详见README.md + ## v3.0.1 1. 提升兼容性,修复在某些特定环境下的编译错误问题。 diff --git a/src/Application.hpp b/src/Application.hpp index c51a62a..3e8f104 100644 --- a/src/Application.hpp +++ b/src/Application.hpp @@ -36,7 +36,7 @@ class Application { stopWordsPath) { } void cut(const string& sentence, vector& words, - CutMethod method) const { + CutMethod method = METHOD_MIX) const { switch(method) { case METHOD_MP: mpSeg_.cut(sentence, words); @@ -57,6 +57,9 @@ class Application { LogError("argument method is illegal."); } } + void insertUserWord(const string& word, const string& tag = UNKNOWN_TAG) { + dictTrie_.insertUserWord(word, tag); + } void tag(const string& str, vector >& res) const { tagger_.tag(str, res); } diff --git a/test/unittest/TApplication.cpp b/test/unittest/TApplication.cpp index 00f1bcf..3023a51 100644 --- a/test/unittest/TApplication.cpp +++ b/test/unittest/TApplication.cpp @@ -11,6 +11,10 @@ TEST(ApplicationTest, Test1) { "../dict/stop_words.utf8"); vector words; string result; + + app.cut("他来到了网易杭研大厦", words); + result << words; + ASSERT_EQ("[\"他\", \"来到\", \"了\", \"网易\", \"杭研\", \"大厦\"]", result); app.cut("我来自北京邮电大学。", words, METHOD_MP); result << words; @@ -46,3 +50,11 @@ TEST(ApplicationTest, Test1) { result << keywordres; ASSERT_EQ(result, "[\"CEO:11.7392\", \"升职:10.8562\", \"加薪:10.6426\", \"手扶拖拉机:10.0089\", \"巅峰:9.49396\"]"); } + +//TEST(ApplicationTest, InsertUserWord) { +// CppJieba::Application app("../dict/jieba.dict.utf8", +// "../dict/hmm_model.utf8", +// "../dict/user.dict.utf8", +// "../dict/idf.utf8", +// "../dict/stop_words.utf8"); +//}