开放insertUserWord接口;增加cut的默认参数,默认切词算法为Mix

This commit is contained in:
yanyiwu 2015-06-26 12:22:11 +08:00
parent 1d27559209
commit e0db070529
3 changed files with 21 additions and 1 deletions

View File

@ -1,5 +1,10 @@
# CppJieba ChangeLog
## next version
1. 新增可动态增加词典的API: insertUserWord
2. cut函数增加默认参数默认使用Mix切词算法。关于切词算法详见README.md
## v3.0.1
1. 提升兼容性,修复在某些特定环境下的编译错误问题。

View File

@ -36,7 +36,7 @@ class Application {
stopWordsPath) {
}
void cut(const string& sentence, vector<string>& words,
CutMethod method) const {
CutMethod method = METHOD_MIX) const {
switch(method) {
case METHOD_MP:
mpSeg_.cut(sentence, words);
@ -57,6 +57,9 @@ class Application {
LogError("argument method is illegal.");
}
}
void insertUserWord(const string& word, const string& tag = UNKNOWN_TAG) {
dictTrie_.insertUserWord(word, tag);
}
void tag(const string& str, vector<pair<string, string> >& res) const {
tagger_.tag(str, res);
}

View File

@ -11,6 +11,10 @@ TEST(ApplicationTest, Test1) {
"../dict/stop_words.utf8");
vector<string> words;
string result;
app.cut("他来到了网易杭研大厦", words);
result << words;
ASSERT_EQ("[\"\", \"来到\", \"\", \"网易\", \"杭研\", \"大厦\"]", result);
app.cut("我来自北京邮电大学。", words, METHOD_MP);
result << words;
@ -46,3 +50,11 @@ TEST(ApplicationTest, Test1) {
result << keywordres;
ASSERT_EQ(result, "[\"CEO:11.7392\", \"升职:10.8562\", \"加薪:10.6426\", \"手扶拖拉机:10.0089\", \"巅峰:9.49396\"]");
}
//TEST(ApplicationTest, InsertUserWord) {
// CppJieba::Application app("../dict/jieba.dict.utf8",
// "../dict/hmm_model.utf8",
// "../dict/user.dict.utf8",
// "../dict/idf.utf8",
// "../dict/stop_words.utf8");
//}