diff --git a/src/MixSegment.hpp b/src/MixSegment.hpp index 02e7563..7b72289 100644 --- a/src/MixSegment.hpp +++ b/src/MixSegment.hpp @@ -34,12 +34,13 @@ class MixSegment: public SegmentBase { } void cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector& res, bool hmm) const { + if (!hmm) { + mpSeg_.cut(begin, end, res); + return; + } vector words; words.reserve(end - begin); mpSeg_.cut(begin, end, words); - if (!hmm) { - return; - } vector hmmRes; hmmRes.reserve(end - begin); diff --git a/test/unittest/TSegments.cpp b/test/unittest/TSegments.cpp index 97a36f2..0d02176 100644 --- a/test/unittest/TSegments.cpp +++ b/test/unittest/TSegments.cpp @@ -12,15 +12,42 @@ using namespace CppJieba; TEST(MixSegmentTest, Test1) { MixSegment segment("../dict/jieba.dict.utf8", "../dict/hmm_model.utf8");; - const char* str = "我来自北京邮电大学。。。学号123456,用AK47"; - const char* res[] = {"我", "来自", "北京邮电大学", "。","。","。", "学号", "123456",",","用","AK47"}; - const char* str2 = "B超 T恤"; - const char* res2[] = {"B超"," ", "T恤"}; + string sentence; vector words; - segment.cut(str, words); - ASSERT_EQ(words, vector(res, res + sizeof(res)/sizeof(res[0]))); - segment.cut(str2, words); - ASSERT_EQ(words, vector(res2, res2 + sizeof(res2)/sizeof(res2[0]))); + string actual; + string expected; + + { + sentence = "我来自北京邮电大学。。。学号123456,用AK47"; + expected = "我/来自/北京邮电大学/。/。/。/学号/123456/,/用/AK47"; + segment.cut(sentence, words); + actual = join(words.begin(), words.end(), "/"); + ASSERT_EQ(actual, expected); + } + + { + sentence = "B超 T恤"; + expected = "B超/ /T恤"; + segment.cut(sentence, words); + actual = join(words.begin(), words.end(), "/"); + ASSERT_EQ(actual, expected); + } + + { + sentence = "他来到了网易杭研大厦"; + expected = "他/来到/了/网易/杭/研/大厦"; + segment.cut(sentence, words, false); + actual = join(words.begin(), words.end(), "/"); + ASSERT_EQ(actual, expected); + } + + { + sentence = "他来到了网易杭研大厦"; + expected = "他/来到/了/网易/杭研/大厦"; + segment.cut(sentence, words); + actual = join(words.begin(), words.end(), "/"); + ASSERT_EQ(actual, expected); + } } TEST(MixSegmentTest, NoUserDict) {