mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
细粒度分词功能
This commit is contained in:
parent
3c60c35906
commit
1babe57ebc
@ -67,6 +67,10 @@ class Application {
|
||||
vector<pair<string, size_t> >& words) const {
|
||||
levelSeg_.cut(sentence, words);
|
||||
}
|
||||
void cut(const string& sentence,
|
||||
vector<string>& words, size_t max_word_len) const {
|
||||
mpSeg_.cut(sentence, words, max_word_len);
|
||||
}
|
||||
bool insertUserWord(const string& word, const string& tag = UNKNOWN_TAG) {
|
||||
return dictTrie_.insertUserWord(word, tag);
|
||||
}
|
||||
|
@ -20,6 +20,9 @@ TEST(ApplicationTest, Test1) {
|
||||
result << words;
|
||||
ASSERT_EQ("[\"我\", \"来自\", \"北京邮电大学\", \"。\"]", result);
|
||||
|
||||
app.cut("南京市长江大桥", words, 3);
|
||||
ASSERT_EQ("[\"南京市\", \"长江\", \"大桥\"]", result << words);
|
||||
|
||||
app.cut("我来自北京邮电大学。。。学号123456", words, METHOD_HMM);
|
||||
result << words;
|
||||
ASSERT_EQ("[\"我来\", \"自北京\", \"邮电大学\", \"。\", \"。\", \"。\", \"学号\", \"123456\"]", result);
|
||||
|
@ -101,6 +101,9 @@ TEST(MPSegmentTest, Test1) {
|
||||
// MaxWordLen
|
||||
ASSERT_TRUE(segment.cut("南京市长江大桥", words, 3));
|
||||
ASSERT_EQ("[\"南京市\", \"长江\", \"大桥\"]", s << words);
|
||||
|
||||
ASSERT_TRUE(segment.cut("南京市长江大桥", words, 0));
|
||||
ASSERT_EQ("[\"南\", \"京\", \"市\", \"长\", \"江\", \"大\", \"桥\"]", s << words);
|
||||
}
|
||||
|
||||
//TEST(MPSegmentTest, Test2) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user