use offset instead of str in RuneStr

This commit is contained in:
yanyiwu 2016-04-17 22:50:32 +08:00
parent e7a45d2dde
commit b6703aba90
7 changed files with 29 additions and 25 deletions

View File

@ -37,7 +37,7 @@ class FullSegment: public SegmentBase {
}
words.clear();
words.reserve(wrs.size());
GetStringsFromWordRanges(wrs, words);
GetStringsFromWordRanges(sentence, wrs, words);
}
void Cut(RuneStrArray::const_iterator begin,
RuneStrArray::const_iterator end,

View File

@ -35,7 +35,7 @@ class HMMSegment: public SegmentBase {
}
words.clear();
words.reserve(wrs.size());
GetStringsFromWordRanges(wrs, words);
GetStringsFromWordRanges(sentence, wrs, words);
}
void Cut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res) const {
RuneStrArray::const_iterator left = begin;

View File

@ -38,7 +38,7 @@ class MPSegment: public SegmentBase {
}
words.clear();
words.reserve(wrs.size());
GetStringsFromWordRanges(wrs, words);
GetStringsFromWordRanges(sentence, wrs, words);
}
void Cut(RuneStrArray::const_iterator begin,
RuneStrArray::const_iterator end,

View File

@ -31,7 +31,7 @@ class MixSegment: public SegmentBase {
}
words.clear();
words.reserve(wrs.size());
GetStringsFromWordRanges(wrs, words);
GetStringsFromWordRanges(sentence, wrs, words);
}
void Cut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res, bool hmm) const {

View File

@ -37,7 +37,7 @@ class QuerySegment: public SegmentBase {
}
words.clear();
words.reserve(wrs.size());
GetStringsFromWordRanges(wrs, words);
GetStringsFromWordRanges(sentence, wrs, words);
}
void Cut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res, bool hmm) const {
//use mix Cut first

View File

@ -9,16 +9,19 @@
namespace cppjieba {
using std::string;
using std::vector;
typedef uint32_t Rune;
struct RuneStr {
Rune rune;
const char* str;
uint32_t offset;
uint32_t len;
RuneStr(): rune(0), str(NULL), len(0) {
RuneStr(): rune(0), offset(0), len(0) {
}
RuneStr(Rune r, const char* s, uint32_t l)
: rune(r), str(s), len(l) {
RuneStr(Rune r, uint32_t o, uint32_t l)
: rune(r), offset(o), len(l) {
}
}; // struct RuneStr
@ -118,14 +121,14 @@ inline bool DecodeRunesInString(const char* s, size_t len, RuneStrArray& runes)
if (rp.len == 0) {
return false;
}
RuneStr x(rp.rune, s + i, rp.len);
RuneStr x(rp.rune, i, rp.len);
runes.push_back(x);
i += rp.len;
}
return true;
}
inline bool DecodeRunesInString(const std::string& s, RuneStrArray& runes) {
inline bool DecodeRunesInString(const string& s, RuneStrArray& runes) {
return DecodeRunesInString(s.c_str(), s.size(), runes);
}
@ -142,37 +145,38 @@ inline bool DecodeRunesInString(const char* s, size_t len, Unicode& unicode) {
return true;
}
inline bool IsSingleWord(const std::string& str) {
inline bool IsSingleWord(const string& str) {
RuneStrLite rp = DecodeRuneInString(str.c_str(), str.size());
return rp.len == str.size();
}
inline bool DecodeRunesInString(const std::string& s, Unicode& unicode) {
inline bool DecodeRunesInString(const string& s, Unicode& unicode) {
return DecodeRunesInString(s.c_str(), s.size(), unicode);
}
inline Unicode DecodeRunesInString(const std::string& s) {
inline Unicode DecodeRunesInString(const string& s) {
Unicode result;
DecodeRunesInString(s, result);
return result;
}
//[left, right]
inline std::string GetStringFromRunes(RuneStrArray::const_iterator left, RuneStrArray::const_iterator right) {
assert(right->str >= left->str);
return std::string(left->str, right->str - left->str + right->len);
// [left, right]
inline string GetStringFromRunes(const string& s, RuneStrArray::const_iterator left, RuneStrArray::const_iterator right) {
assert(right->offset >= left->offset);
uint32_t len = right->offset - left->offset + right->len;
return s.substr(left->offset, len);
}
inline void GetStringsFromWordRanges(const std::vector<WordRange>& wrs, std::vector<std::string>& words) {
inline void GetStringsFromWordRanges(const string& s, const vector<WordRange>& wrs, vector<string>& words) {
for (size_t i = 0; i < wrs.size(); i++) {
words.push_back(GetStringFromRunes(wrs[i].left, wrs[i].right));
words.push_back(GetStringFromRunes(s, wrs[i].left, wrs[i].right));
}
}
inline std::vector<std::string> GetStringsFromWordRanges(const std::vector<WordRange>& wrs) {
std::vector<std::string> result;
GetStringsFromWordRanges(wrs, result);
inline vector<string> GetStringsFromWordRanges(const string& s, const vector<WordRange>& wrs) {
vector<string> result;
GetStringsFromWordRanges(s, wrs, result);
return result;
}

View File

@ -20,7 +20,7 @@ TEST(PreFilterTest, Test1) {
while (filter.HasNext()) {
PreFilter::Range range;
range = filter.Next();
words.push_back(GetStringFromRunes(range.begin, range.end - 1));
words.push_back(GetStringFromRunes(s, range.begin, range.end - 1));
}
res = limonp::Join(words.begin(), words.end(), "/");
ASSERT_EQ(res, expected);
@ -35,7 +35,7 @@ TEST(PreFilterTest, Test1) {
while (filter.HasNext()) {
PreFilter::Range range;
range = filter.Next();
words.push_back(GetStringFromRunes(range.begin, range.end - 1));
words.push_back(GetStringFromRunes(s, range.begin, range.end - 1));
}
res = limonp::Join(words.begin(), words.end(), "/");
ASSERT_EQ(res, expected);