From 3c60c35906061d01e294ba087f108cda1b9dd9d9 Mon Sep 17 00:00:00 2001 From: yanyiwu Date: Sun, 30 Aug 2015 13:09:37 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E5=A4=8DFullSegment=E5=AF=B9=E4=BA=8E?= =?UTF-8?q?=E6=9C=89=E4=BA=9B=E5=8D=95=E5=AD=97=E6=B2=A1=E6=9C=89=E8=BE=93?= =?UTF-8?q?=E5=87=BA=E7=9A=84bug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/FullSegment.hpp | 13 ++++++++----- test/unittest/TSegments.cpp | 11 +++++++---- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/src/FullSegment.hpp b/src/FullSegment.hpp index a8edb40..c3a3ca4 100644 --- a/src/FullSegment.hpp +++ b/src/FullSegment.hpp @@ -49,11 +49,14 @@ class FullSegment: public SegmentBase { for (size_t j = 0; j < dags[i].nexts.size(); j++) { const DictUnit* du = dags[i].nexts[j].second; if (du == NULL) { - continue; - } - wordLen = du->word.size(); - if (wordLen >= 2 || (dags[i].nexts.size() == 1 && maxIdx <= uIdx)) { - res.push_back(du->word); + if (dags[i].nexts.size() == 1 && maxIdx <= uIdx) { + res.push_back(Unicode(1, dags[i].rune)); + } + } else { + wordLen = du->word.size(); + if (wordLen >= 2 || (dags[i].nexts.size() == 1 && maxIdx <= uIdx)) { + res.push_back(du->word); + } } maxIdx = uIdx + wordLen > maxIdx ? uIdx + wordLen : maxIdx; } diff --git a/test/unittest/TSegments.cpp b/test/unittest/TSegments.cpp index 655518f..8871dbb 100644 --- a/test/unittest/TSegments.cpp +++ b/test/unittest/TSegments.cpp @@ -154,14 +154,17 @@ TEST(HMMSegmentTest, Test1) { TEST(FullSegment, Test1) { FullSegment segment("../test/testdata/extra_dict/jieba.dict.small.utf8"); - const char* str = "我来自北京邮电大学"; vector words; - - ASSERT_EQ(segment.cut(str, words), true); - string s; + + ASSERT_TRUE(segment.cut("我来自北京邮电大学", words)); s << words; ASSERT_EQ(s, "[\"我\", \"来自\", \"北京\", \"北京邮电大学\", \"邮电\", \"电大\", \"大学\"]"); + + + ASSERT_TRUE(segment.cut("上市公司CEO", words)); + s << words; + ASSERT_EQ(s, "[\"上市\", \"公司\", \"C\", \"E\", \"O\"]"); } TEST(QuerySegment, Test1) {