mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
WordRange construct
This commit is contained in:
parent
339e3ca772
commit
6ff6fe1430
@ -63,13 +63,13 @@ class FullSegment: public SegmentBase {
|
|||||||
const DictUnit* du = dags[i].nexts[j].second;
|
const DictUnit* du = dags[i].nexts[j].second;
|
||||||
if (du == NULL) {
|
if (du == NULL) {
|
||||||
if (dags[i].nexts.size() == 1 && maxIdx <= uIdx) {
|
if (dags[i].nexts.size() == 1 && maxIdx <= uIdx) {
|
||||||
unicode::WordRange wr = {begin + i, begin + nextoffset};
|
unicode::WordRange wr(begin + i, begin + nextoffset);
|
||||||
res.push_back(wr);
|
res.push_back(wr);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
wordLen = du->word.size();
|
wordLen = du->word.size();
|
||||||
if (wordLen >= 2 || (dags[i].nexts.size() == 1 && maxIdx <= uIdx)) {
|
if (wordLen >= 2 || (dags[i].nexts.size() == 1 && maxIdx <= uIdx)) {
|
||||||
unicode::WordRange wr = {begin + i, begin + nextoffset};
|
unicode::WordRange wr(begin + i, begin + nextoffset);
|
||||||
res.push_back(wr);
|
res.push_back(wr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -57,7 +57,7 @@ class HMMSegment: public SegmentBase {
|
|||||||
}
|
}
|
||||||
right ++;
|
right ++;
|
||||||
} while (false);
|
} while (false);
|
||||||
unicode::WordRange wr = {left, right - 1};
|
unicode::WordRange wr(left, right - 1);
|
||||||
res.push_back(wr);
|
res.push_back(wr);
|
||||||
left = right;
|
left = right;
|
||||||
} else {
|
} else {
|
||||||
@ -114,7 +114,7 @@ class HMMSegment: public SegmentBase {
|
|||||||
for (size_t i = 0; i < status.size(); i++) {
|
for (size_t i = 0; i < status.size(); i++) {
|
||||||
if (status[i] % 2) { //if (HMMModel::E == status[i] || HMMModel::S == status[i])
|
if (status[i] % 2) { //if (HMMModel::E == status[i] || HMMModel::S == status[i])
|
||||||
right = begin + i + 1;
|
right = begin + i + 1;
|
||||||
unicode::WordRange wr = {left, right - 1};
|
unicode::WordRange wr(left, right - 1);
|
||||||
res.push_back(wr);
|
res.push_back(wr);
|
||||||
left = right;
|
left = right;
|
||||||
}
|
}
|
||||||
|
@ -99,11 +99,11 @@ class MPSegment: public SegmentBase {
|
|||||||
const DictUnit* p = dags[i].pInfo;
|
const DictUnit* p = dags[i].pInfo;
|
||||||
if (p) {
|
if (p) {
|
||||||
assert(p->word.size() >= 1);
|
assert(p->word.size() >= 1);
|
||||||
unicode::WordRange wr = {begin + i, begin + i + p->word.size() - 1};
|
unicode::WordRange wr(begin + i, begin + i + p->word.size() - 1);
|
||||||
words.push_back(wr);
|
words.push_back(wr);
|
||||||
i += p->word.size();
|
i += p->word.size();
|
||||||
} else { //single chinese word
|
} else { //single chinese word
|
||||||
unicode::WordRange wr = {begin + i, begin + i};
|
unicode::WordRange wr(begin + i, begin + i);
|
||||||
words.push_back(wr);
|
words.push_back(wr);
|
||||||
i++;
|
i++;
|
||||||
}
|
}
|
||||||
|
@ -31,6 +31,9 @@ typedef limonp::LocalVector<struct RuneStr> RuneStrArray;
|
|||||||
struct WordRange {
|
struct WordRange {
|
||||||
RuneStrArray::const_iterator left;
|
RuneStrArray::const_iterator left;
|
||||||
RuneStrArray::const_iterator right;
|
RuneStrArray::const_iterator right;
|
||||||
|
WordRange(RuneStrArray::const_iterator l, RuneStrArray::const_iterator r)
|
||||||
|
: left(l), right(r) {
|
||||||
|
}
|
||||||
size_t Length() const {
|
size_t Length() const {
|
||||||
return right - left + 1;
|
return right - left + 1;
|
||||||
}
|
}
|
||||||
@ -44,12 +47,6 @@ struct WordRange {
|
|||||||
}
|
}
|
||||||
}; // struct WordRange
|
}; // struct WordRange
|
||||||
|
|
||||||
//struct RuneWordStr {
|
|
||||||
// Unicode word;
|
|
||||||
// const char* str;
|
|
||||||
// size_t len;
|
|
||||||
//}; // struct RuneWordStr
|
|
||||||
|
|
||||||
struct RuneStrLite {
|
struct RuneStrLite {
|
||||||
uint32_t rune;
|
uint32_t rune;
|
||||||
uint32_t len;
|
uint32_t len;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user