mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
开放insertUserWord接口;增加cut的默认参数,默认切词算法为Mix
This commit is contained in:
parent
1d27559209
commit
e0db070529
@ -1,5 +1,10 @@
|
|||||||
# CppJieba ChangeLog
|
# CppJieba ChangeLog
|
||||||
|
|
||||||
|
## next version
|
||||||
|
|
||||||
|
1. 新增可动态增加词典的API: insertUserWord
|
||||||
|
2. cut函数增加默认参数,默认使用Mix切词算法。关于切词算法详见README.md
|
||||||
|
|
||||||
## v3.0.1
|
## v3.0.1
|
||||||
|
|
||||||
1. 提升兼容性,修复在某些特定环境下的编译错误问题。
|
1. 提升兼容性,修复在某些特定环境下的编译错误问题。
|
||||||
|
@ -36,7 +36,7 @@ class Application {
|
|||||||
stopWordsPath) {
|
stopWordsPath) {
|
||||||
}
|
}
|
||||||
void cut(const string& sentence, vector<string>& words,
|
void cut(const string& sentence, vector<string>& words,
|
||||||
CutMethod method) const {
|
CutMethod method = METHOD_MIX) const {
|
||||||
switch(method) {
|
switch(method) {
|
||||||
case METHOD_MP:
|
case METHOD_MP:
|
||||||
mpSeg_.cut(sentence, words);
|
mpSeg_.cut(sentence, words);
|
||||||
@ -57,6 +57,9 @@ class Application {
|
|||||||
LogError("argument method is illegal.");
|
LogError("argument method is illegal.");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
void insertUserWord(const string& word, const string& tag = UNKNOWN_TAG) {
|
||||||
|
dictTrie_.insertUserWord(word, tag);
|
||||||
|
}
|
||||||
void tag(const string& str, vector<pair<string, string> >& res) const {
|
void tag(const string& str, vector<pair<string, string> >& res) const {
|
||||||
tagger_.tag(str, res);
|
tagger_.tag(str, res);
|
||||||
}
|
}
|
||||||
|
@ -12,6 +12,10 @@ TEST(ApplicationTest, Test1) {
|
|||||||
vector<string> words;
|
vector<string> words;
|
||||||
string result;
|
string result;
|
||||||
|
|
||||||
|
app.cut("他来到了网易杭研大厦", words);
|
||||||
|
result << words;
|
||||||
|
ASSERT_EQ("[\"他\", \"来到\", \"了\", \"网易\", \"杭研\", \"大厦\"]", result);
|
||||||
|
|
||||||
app.cut("我来自北京邮电大学。", words, METHOD_MP);
|
app.cut("我来自北京邮电大学。", words, METHOD_MP);
|
||||||
result << words;
|
result << words;
|
||||||
ASSERT_EQ("[\"我\", \"来自\", \"北京邮电大学\", \"。\"]", result);
|
ASSERT_EQ("[\"我\", \"来自\", \"北京邮电大学\", \"。\"]", result);
|
||||||
@ -46,3 +50,11 @@ TEST(ApplicationTest, Test1) {
|
|||||||
result << keywordres;
|
result << keywordres;
|
||||||
ASSERT_EQ(result, "[\"CEO:11.7392\", \"升职:10.8562\", \"加薪:10.6426\", \"手扶拖拉机:10.0089\", \"巅峰:9.49396\"]");
|
ASSERT_EQ(result, "[\"CEO:11.7392\", \"升职:10.8562\", \"加薪:10.6426\", \"手扶拖拉机:10.0089\", \"巅峰:9.49396\"]");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//TEST(ApplicationTest, InsertUserWord) {
|
||||||
|
// CppJieba::Application app("../dict/jieba.dict.utf8",
|
||||||
|
// "../dict/hmm_model.utf8",
|
||||||
|
// "../dict/user.dict.utf8",
|
||||||
|
// "../dict/idf.utf8",
|
||||||
|
// "../dict/stop_words.utf8");
|
||||||
|
//}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user