diff --git a/src/FullSegment.hpp b/src/FullSegment.hpp index a8edb40..c3a3ca4 100644 --- a/src/FullSegment.hpp +++ b/src/FullSegment.hpp @@ -49,11 +49,14 @@ class FullSegment: public SegmentBase { for (size_t j = 0; j < dags[i].nexts.size(); j++) { const DictUnit* du = dags[i].nexts[j].second; if (du == NULL) { - continue; - } - wordLen = du->word.size(); - if (wordLen >= 2 || (dags[i].nexts.size() == 1 && maxIdx <= uIdx)) { - res.push_back(du->word); + if (dags[i].nexts.size() == 1 && maxIdx <= uIdx) { + res.push_back(Unicode(1, dags[i].rune)); + } + } else { + wordLen = du->word.size(); + if (wordLen >= 2 || (dags[i].nexts.size() == 1 && maxIdx <= uIdx)) { + res.push_back(du->word); + } } maxIdx = uIdx + wordLen > maxIdx ? uIdx + wordLen : maxIdx; } diff --git a/test/unittest/TSegments.cpp b/test/unittest/TSegments.cpp index 655518f..8871dbb 100644 --- a/test/unittest/TSegments.cpp +++ b/test/unittest/TSegments.cpp @@ -154,14 +154,17 @@ TEST(HMMSegmentTest, Test1) { TEST(FullSegment, Test1) { FullSegment segment("../test/testdata/extra_dict/jieba.dict.small.utf8"); - const char* str = "我来自北京邮电大学"; vector words; - - ASSERT_EQ(segment.cut(str, words), true); - string s; + + ASSERT_TRUE(segment.cut("我来自北京邮电大学", words)); s << words; ASSERT_EQ(s, "[\"我\", \"来自\", \"北京\", \"北京邮电大学\", \"邮电\", \"电大\", \"大学\"]"); + + + ASSERT_TRUE(segment.cut("上市公司CEO", words)); + s << words; + ASSERT_EQ(s, "[\"上市\", \"公司\", \"C\", \"E\", \"O\"]"); } TEST(QuerySegment, Test1) {