mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
use offset instead of str in RuneStr
This commit is contained in:
parent
e7a45d2dde
commit
b6703aba90
@ -37,7 +37,7 @@ class FullSegment: public SegmentBase {
|
||||
}
|
||||
words.clear();
|
||||
words.reserve(wrs.size());
|
||||
GetStringsFromWordRanges(wrs, words);
|
||||
GetStringsFromWordRanges(sentence, wrs, words);
|
||||
}
|
||||
void Cut(RuneStrArray::const_iterator begin,
|
||||
RuneStrArray::const_iterator end,
|
||||
|
@ -35,7 +35,7 @@ class HMMSegment: public SegmentBase {
|
||||
}
|
||||
words.clear();
|
||||
words.reserve(wrs.size());
|
||||
GetStringsFromWordRanges(wrs, words);
|
||||
GetStringsFromWordRanges(sentence, wrs, words);
|
||||
}
|
||||
void Cut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res) const {
|
||||
RuneStrArray::const_iterator left = begin;
|
||||
|
@ -38,7 +38,7 @@ class MPSegment: public SegmentBase {
|
||||
}
|
||||
words.clear();
|
||||
words.reserve(wrs.size());
|
||||
GetStringsFromWordRanges(wrs, words);
|
||||
GetStringsFromWordRanges(sentence, wrs, words);
|
||||
}
|
||||
void Cut(RuneStrArray::const_iterator begin,
|
||||
RuneStrArray::const_iterator end,
|
||||
|
@ -31,7 +31,7 @@ class MixSegment: public SegmentBase {
|
||||
}
|
||||
words.clear();
|
||||
words.reserve(wrs.size());
|
||||
GetStringsFromWordRanges(wrs, words);
|
||||
GetStringsFromWordRanges(sentence, wrs, words);
|
||||
}
|
||||
|
||||
void Cut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res, bool hmm) const {
|
||||
|
@ -37,7 +37,7 @@ class QuerySegment: public SegmentBase {
|
||||
}
|
||||
words.clear();
|
||||
words.reserve(wrs.size());
|
||||
GetStringsFromWordRanges(wrs, words);
|
||||
GetStringsFromWordRanges(sentence, wrs, words);
|
||||
}
|
||||
void Cut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res, bool hmm) const {
|
||||
//use mix Cut first
|
||||
|
@ -9,16 +9,19 @@
|
||||
|
||||
namespace cppjieba {
|
||||
|
||||
using std::string;
|
||||
using std::vector;
|
||||
|
||||
typedef uint32_t Rune;
|
||||
|
||||
struct RuneStr {
|
||||
Rune rune;
|
||||
const char* str;
|
||||
uint32_t offset;
|
||||
uint32_t len;
|
||||
RuneStr(): rune(0), str(NULL), len(0) {
|
||||
RuneStr(): rune(0), offset(0), len(0) {
|
||||
}
|
||||
RuneStr(Rune r, const char* s, uint32_t l)
|
||||
: rune(r), str(s), len(l) {
|
||||
RuneStr(Rune r, uint32_t o, uint32_t l)
|
||||
: rune(r), offset(o), len(l) {
|
||||
}
|
||||
}; // struct RuneStr
|
||||
|
||||
@ -118,14 +121,14 @@ inline bool DecodeRunesInString(const char* s, size_t len, RuneStrArray& runes)
|
||||
if (rp.len == 0) {
|
||||
return false;
|
||||
}
|
||||
RuneStr x(rp.rune, s + i, rp.len);
|
||||
RuneStr x(rp.rune, i, rp.len);
|
||||
runes.push_back(x);
|
||||
i += rp.len;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
inline bool DecodeRunesInString(const std::string& s, RuneStrArray& runes) {
|
||||
inline bool DecodeRunesInString(const string& s, RuneStrArray& runes) {
|
||||
return DecodeRunesInString(s.c_str(), s.size(), runes);
|
||||
}
|
||||
|
||||
@ -142,16 +145,16 @@ inline bool DecodeRunesInString(const char* s, size_t len, Unicode& unicode) {
|
||||
return true;
|
||||
}
|
||||
|
||||
inline bool IsSingleWord(const std::string& str) {
|
||||
inline bool IsSingleWord(const string& str) {
|
||||
RuneStrLite rp = DecodeRuneInString(str.c_str(), str.size());
|
||||
return rp.len == str.size();
|
||||
}
|
||||
|
||||
inline bool DecodeRunesInString(const std::string& s, Unicode& unicode) {
|
||||
inline bool DecodeRunesInString(const string& s, Unicode& unicode) {
|
||||
return DecodeRunesInString(s.c_str(), s.size(), unicode);
|
||||
}
|
||||
|
||||
inline Unicode DecodeRunesInString(const std::string& s) {
|
||||
inline Unicode DecodeRunesInString(const string& s) {
|
||||
Unicode result;
|
||||
DecodeRunesInString(s, result);
|
||||
return result;
|
||||
@ -159,20 +162,21 @@ inline Unicode DecodeRunesInString(const std::string& s) {
|
||||
|
||||
|
||||
// [left, right]
|
||||
inline std::string GetStringFromRunes(RuneStrArray::const_iterator left, RuneStrArray::const_iterator right) {
|
||||
assert(right->str >= left->str);
|
||||
return std::string(left->str, right->str - left->str + right->len);
|
||||
inline string GetStringFromRunes(const string& s, RuneStrArray::const_iterator left, RuneStrArray::const_iterator right) {
|
||||
assert(right->offset >= left->offset);
|
||||
uint32_t len = right->offset - left->offset + right->len;
|
||||
return s.substr(left->offset, len);
|
||||
}
|
||||
|
||||
inline void GetStringsFromWordRanges(const std::vector<WordRange>& wrs, std::vector<std::string>& words) {
|
||||
inline void GetStringsFromWordRanges(const string& s, const vector<WordRange>& wrs, vector<string>& words) {
|
||||
for (size_t i = 0; i < wrs.size(); i++) {
|
||||
words.push_back(GetStringFromRunes(wrs[i].left, wrs[i].right));
|
||||
words.push_back(GetStringFromRunes(s, wrs[i].left, wrs[i].right));
|
||||
}
|
||||
}
|
||||
|
||||
inline std::vector<std::string> GetStringsFromWordRanges(const std::vector<WordRange>& wrs) {
|
||||
std::vector<std::string> result;
|
||||
GetStringsFromWordRanges(wrs, result);
|
||||
inline vector<string> GetStringsFromWordRanges(const string& s, const vector<WordRange>& wrs) {
|
||||
vector<string> result;
|
||||
GetStringsFromWordRanges(s, wrs, result);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -20,7 +20,7 @@ TEST(PreFilterTest, Test1) {
|
||||
while (filter.HasNext()) {
|
||||
PreFilter::Range range;
|
||||
range = filter.Next();
|
||||
words.push_back(GetStringFromRunes(range.begin, range.end - 1));
|
||||
words.push_back(GetStringFromRunes(s, range.begin, range.end - 1));
|
||||
}
|
||||
res = limonp::Join(words.begin(), words.end(), "/");
|
||||
ASSERT_EQ(res, expected);
|
||||
@ -35,7 +35,7 @@ TEST(PreFilterTest, Test1) {
|
||||
while (filter.HasNext()) {
|
||||
PreFilter::Range range;
|
||||
range = filter.Next();
|
||||
words.push_back(GetStringFromRunes(range.begin, range.end - 1));
|
||||
words.push_back(GetStringFromRunes(s, range.begin, range.end - 1));
|
||||
}
|
||||
res = limonp::Join(words.begin(), words.end(), "/");
|
||||
ASSERT_EQ(res, expected);
|
||||
|
Loading…
x
Reference in New Issue
Block a user