From e0db0705294cdc53e784a566a1d95b5e5840cf5a Mon Sep 17 00:00:00 2001 From: yanyiwu Date: Fri, 26 Jun 2015 12:22:11 +0800 Subject: [PATCH] =?UTF-8?q?=E5=BC=80=E6=94=BEinsertUserWord=E6=8E=A5?= =?UTF-8?q?=E5=8F=A3=EF=BC=9B=E5=A2=9E=E5=8A=A0cut=E7=9A=84=E9=BB=98?= =?UTF-8?q?=E8=AE=A4=E5=8F=82=E6=95=B0=EF=BC=8C=E9=BB=98=E8=AE=A4=E5=88=87?= =?UTF-8?q?=E8=AF=8D=E7=AE=97=E6=B3=95=E4=B8=BAMix?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ChangeLog.md | 5 +++++ src/Application.hpp | 5 ++++- test/unittest/TApplication.cpp | 12 ++++++++++++ 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/ChangeLog.md b/ChangeLog.md index cdb01bf..535ce00 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -1,5 +1,10 @@ # CppJieba ChangeLog +## next version + +1. 新增可动态增加词典的API: insertUserWord +2. cut函数增加默认参数,默认使用Mix切词算法。关于切词算法详见README.md + ## v3.0.1 1. 提升兼容性,修复在某些特定环境下的编译错误问题。 diff --git a/src/Application.hpp b/src/Application.hpp index c51a62a..3e8f104 100644 --- a/src/Application.hpp +++ b/src/Application.hpp @@ -36,7 +36,7 @@ class Application { stopWordsPath) { } void cut(const string& sentence, vector& words, - CutMethod method) const { + CutMethod method = METHOD_MIX) const { switch(method) { case METHOD_MP: mpSeg_.cut(sentence, words); @@ -57,6 +57,9 @@ class Application { LogError("argument method is illegal."); } } + void insertUserWord(const string& word, const string& tag = UNKNOWN_TAG) { + dictTrie_.insertUserWord(word, tag); + } void tag(const string& str, vector >& res) const { tagger_.tag(str, res); } diff --git a/test/unittest/TApplication.cpp b/test/unittest/TApplication.cpp index 00f1bcf..3023a51 100644 --- a/test/unittest/TApplication.cpp +++ b/test/unittest/TApplication.cpp @@ -11,6 +11,10 @@ TEST(ApplicationTest, Test1) { "../dict/stop_words.utf8"); vector words; string result; + + app.cut("他来到了网易杭研大厦", words); + result << words; + ASSERT_EQ("[\"他\", \"来到\", \"了\", \"网易\", \"杭研\", \"大厦\"]", result); app.cut("我来自北京邮电大学。", words, METHOD_MP); result << words; @@ -46,3 +50,11 @@ TEST(ApplicationTest, Test1) { result << keywordres; ASSERT_EQ(result, "[\"CEO:11.7392\", \"升职:10.8562\", \"加薪:10.6426\", \"手扶拖拉机:10.0089\", \"巅峰:9.49396\"]"); } + +//TEST(ApplicationTest, InsertUserWord) { +// CppJieba::Application app("../dict/jieba.dict.utf8", +// "../dict/hmm_model.utf8", +// "../dict/user.dict.utf8", +// "../dict/idf.utf8", +// "../dict/stop_words.utf8"); +//}