细粒度分词功能

This commit is contained in:
yanyiwu 2015-08-30 16:35:21 +08:00
parent 3c60c35906
commit 1babe57ebc
3 changed files with 10 additions and 0 deletions

View File

@ -67,6 +67,10 @@ class Application {
vector<pair<string, size_t> >& words) const {
levelSeg_.cut(sentence, words);
}
void cut(const string& sentence,
vector<string>& words, size_t max_word_len) const {
mpSeg_.cut(sentence, words, max_word_len);
}
bool insertUserWord(const string& word, const string& tag = UNKNOWN_TAG) {
return dictTrie_.insertUserWord(word, tag);
}

View File

@ -20,6 +20,9 @@ TEST(ApplicationTest, Test1) {
result << words;
ASSERT_EQ("[\"\", \"来自\", \"北京邮电大学\", \"\"]", result);
app.cut("南京市长江大桥", words, 3);
ASSERT_EQ("[\"南京市\", \"长江\", \"大桥\"]", result << words);
app.cut("我来自北京邮电大学。。。学号123456", words, METHOD_HMM);
result << words;
ASSERT_EQ("[\"我来\", \"自北京\", \"邮电大学\", \"\", \"\", \"\", \"学号\", \"123456\"]", result);

View File

@ -101,6 +101,9 @@ TEST(MPSegmentTest, Test1) {
// MaxWordLen
ASSERT_TRUE(segment.cut("南京市长江大桥", words, 3));
ASSERT_EQ("[\"南京市\", \"长江\", \"大桥\"]", s << words);
ASSERT_TRUE(segment.cut("南京市长江大桥", words, 0));
ASSERT_EQ("[\"\", \"\", \"\", \"\", \"\", \"\", \"\"]", s << words);
}
//TEST(MPSegmentTest, Test2) {