mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
修复FullSegment对于有些单字没有输出的bug
This commit is contained in:
parent
001a69d8c6
commit
3c60c35906
@ -49,11 +49,14 @@ class FullSegment: public SegmentBase {
|
||||
for (size_t j = 0; j < dags[i].nexts.size(); j++) {
|
||||
const DictUnit* du = dags[i].nexts[j].second;
|
||||
if (du == NULL) {
|
||||
continue;
|
||||
}
|
||||
wordLen = du->word.size();
|
||||
if (wordLen >= 2 || (dags[i].nexts.size() == 1 && maxIdx <= uIdx)) {
|
||||
res.push_back(du->word);
|
||||
if (dags[i].nexts.size() == 1 && maxIdx <= uIdx) {
|
||||
res.push_back(Unicode(1, dags[i].rune));
|
||||
}
|
||||
} else {
|
||||
wordLen = du->word.size();
|
||||
if (wordLen >= 2 || (dags[i].nexts.size() == 1 && maxIdx <= uIdx)) {
|
||||
res.push_back(du->word);
|
||||
}
|
||||
}
|
||||
maxIdx = uIdx + wordLen > maxIdx ? uIdx + wordLen : maxIdx;
|
||||
}
|
||||
|
@ -154,14 +154,17 @@ TEST(HMMSegmentTest, Test1) {
|
||||
|
||||
TEST(FullSegment, Test1) {
|
||||
FullSegment segment("../test/testdata/extra_dict/jieba.dict.small.utf8");
|
||||
const char* str = "我来自北京邮电大学";
|
||||
vector<string> words;
|
||||
|
||||
ASSERT_EQ(segment.cut(str, words), true);
|
||||
|
||||
string s;
|
||||
|
||||
ASSERT_TRUE(segment.cut("我来自北京邮电大学", words));
|
||||
s << words;
|
||||
ASSERT_EQ(s, "[\"我\", \"来自\", \"北京\", \"北京邮电大学\", \"邮电\", \"电大\", \"大学\"]");
|
||||
|
||||
|
||||
ASSERT_TRUE(segment.cut("上市公司CEO", words));
|
||||
s << words;
|
||||
ASSERT_EQ(s, "[\"上市\", \"公司\", \"C\", \"E\", \"O\"]");
|
||||
}
|
||||
|
||||
TEST(QuerySegment, Test1) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user