mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
修复FullSegment对于有些单字没有输出的bug
This commit is contained in:
parent
001a69d8c6
commit
3c60c35906
@ -49,12 +49,15 @@ class FullSegment: public SegmentBase {
|
|||||||
for (size_t j = 0; j < dags[i].nexts.size(); j++) {
|
for (size_t j = 0; j < dags[i].nexts.size(); j++) {
|
||||||
const DictUnit* du = dags[i].nexts[j].second;
|
const DictUnit* du = dags[i].nexts[j].second;
|
||||||
if (du == NULL) {
|
if (du == NULL) {
|
||||||
continue;
|
if (dags[i].nexts.size() == 1 && maxIdx <= uIdx) {
|
||||||
|
res.push_back(Unicode(1, dags[i].rune));
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
wordLen = du->word.size();
|
wordLen = du->word.size();
|
||||||
if (wordLen >= 2 || (dags[i].nexts.size() == 1 && maxIdx <= uIdx)) {
|
if (wordLen >= 2 || (dags[i].nexts.size() == 1 && maxIdx <= uIdx)) {
|
||||||
res.push_back(du->word);
|
res.push_back(du->word);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
maxIdx = uIdx + wordLen > maxIdx ? uIdx + wordLen : maxIdx;
|
maxIdx = uIdx + wordLen > maxIdx ? uIdx + wordLen : maxIdx;
|
||||||
}
|
}
|
||||||
uIdx++;
|
uIdx++;
|
||||||
|
@ -154,14 +154,17 @@ TEST(HMMSegmentTest, Test1) {
|
|||||||
|
|
||||||
TEST(FullSegment, Test1) {
|
TEST(FullSegment, Test1) {
|
||||||
FullSegment segment("../test/testdata/extra_dict/jieba.dict.small.utf8");
|
FullSegment segment("../test/testdata/extra_dict/jieba.dict.small.utf8");
|
||||||
const char* str = "我来自北京邮电大学";
|
|
||||||
vector<string> words;
|
vector<string> words;
|
||||||
|
|
||||||
ASSERT_EQ(segment.cut(str, words), true);
|
|
||||||
|
|
||||||
string s;
|
string s;
|
||||||
|
|
||||||
|
ASSERT_TRUE(segment.cut("我来自北京邮电大学", words));
|
||||||
s << words;
|
s << words;
|
||||||
ASSERT_EQ(s, "[\"我\", \"来自\", \"北京\", \"北京邮电大学\", \"邮电\", \"电大\", \"大学\"]");
|
ASSERT_EQ(s, "[\"我\", \"来自\", \"北京\", \"北京邮电大学\", \"邮电\", \"电大\", \"大学\"]");
|
||||||
|
|
||||||
|
|
||||||
|
ASSERT_TRUE(segment.cut("上市公司CEO", words));
|
||||||
|
s << words;
|
||||||
|
ASSERT_EQ(s, "[\"上市\", \"公司\", \"C\", \"E\", \"O\"]");
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(QuerySegment, Test1) {
|
TEST(QuerySegment, Test1) {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user