use offset instead of str in RuneStr

This commit is contained in:
yanyiwu 2016-04-17 22:50:32 +08:00
parent e7a45d2dde
commit b6703aba90
7 changed files with 29 additions and 25 deletions

View File

@ -37,7 +37,7 @@ class FullSegment: public SegmentBase {
} }
words.clear(); words.clear();
words.reserve(wrs.size()); words.reserve(wrs.size());
GetStringsFromWordRanges(wrs, words); GetStringsFromWordRanges(sentence, wrs, words);
} }
void Cut(RuneStrArray::const_iterator begin, void Cut(RuneStrArray::const_iterator begin,
RuneStrArray::const_iterator end, RuneStrArray::const_iterator end,

View File

@ -35,7 +35,7 @@ class HMMSegment: public SegmentBase {
} }
words.clear(); words.clear();
words.reserve(wrs.size()); words.reserve(wrs.size());
GetStringsFromWordRanges(wrs, words); GetStringsFromWordRanges(sentence, wrs, words);
} }
void Cut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res) const { void Cut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res) const {
RuneStrArray::const_iterator left = begin; RuneStrArray::const_iterator left = begin;

View File

@ -38,7 +38,7 @@ class MPSegment: public SegmentBase {
} }
words.clear(); words.clear();
words.reserve(wrs.size()); words.reserve(wrs.size());
GetStringsFromWordRanges(wrs, words); GetStringsFromWordRanges(sentence, wrs, words);
} }
void Cut(RuneStrArray::const_iterator begin, void Cut(RuneStrArray::const_iterator begin,
RuneStrArray::const_iterator end, RuneStrArray::const_iterator end,

View File

@ -31,7 +31,7 @@ class MixSegment: public SegmentBase {
} }
words.clear(); words.clear();
words.reserve(wrs.size()); words.reserve(wrs.size());
GetStringsFromWordRanges(wrs, words); GetStringsFromWordRanges(sentence, wrs, words);
} }
void Cut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res, bool hmm) const { void Cut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res, bool hmm) const {

View File

@ -37,7 +37,7 @@ class QuerySegment: public SegmentBase {
} }
words.clear(); words.clear();
words.reserve(wrs.size()); words.reserve(wrs.size());
GetStringsFromWordRanges(wrs, words); GetStringsFromWordRanges(sentence, wrs, words);
} }
void Cut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res, bool hmm) const { void Cut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res, bool hmm) const {
//use mix Cut first //use mix Cut first

View File

@ -9,16 +9,19 @@
namespace cppjieba { namespace cppjieba {
using std::string;
using std::vector;
typedef uint32_t Rune; typedef uint32_t Rune;
struct RuneStr { struct RuneStr {
Rune rune; Rune rune;
const char* str; uint32_t offset;
uint32_t len; uint32_t len;
RuneStr(): rune(0), str(NULL), len(0) { RuneStr(): rune(0), offset(0), len(0) {
} }
RuneStr(Rune r, const char* s, uint32_t l) RuneStr(Rune r, uint32_t o, uint32_t l)
: rune(r), str(s), len(l) { : rune(r), offset(o), len(l) {
} }
}; // struct RuneStr }; // struct RuneStr
@ -118,14 +121,14 @@ inline bool DecodeRunesInString(const char* s, size_t len, RuneStrArray& runes)
if (rp.len == 0) { if (rp.len == 0) {
return false; return false;
} }
RuneStr x(rp.rune, s + i, rp.len); RuneStr x(rp.rune, i, rp.len);
runes.push_back(x); runes.push_back(x);
i += rp.len; i += rp.len;
} }
return true; return true;
} }
inline bool DecodeRunesInString(const std::string& s, RuneStrArray& runes) { inline bool DecodeRunesInString(const string& s, RuneStrArray& runes) {
return DecodeRunesInString(s.c_str(), s.size(), runes); return DecodeRunesInString(s.c_str(), s.size(), runes);
} }
@ -142,37 +145,38 @@ inline bool DecodeRunesInString(const char* s, size_t len, Unicode& unicode) {
return true; return true;
} }
inline bool IsSingleWord(const std::string& str) { inline bool IsSingleWord(const string& str) {
RuneStrLite rp = DecodeRuneInString(str.c_str(), str.size()); RuneStrLite rp = DecodeRuneInString(str.c_str(), str.size());
return rp.len == str.size(); return rp.len == str.size();
} }
inline bool DecodeRunesInString(const std::string& s, Unicode& unicode) { inline bool DecodeRunesInString(const string& s, Unicode& unicode) {
return DecodeRunesInString(s.c_str(), s.size(), unicode); return DecodeRunesInString(s.c_str(), s.size(), unicode);
} }
inline Unicode DecodeRunesInString(const std::string& s) { inline Unicode DecodeRunesInString(const string& s) {
Unicode result; Unicode result;
DecodeRunesInString(s, result); DecodeRunesInString(s, result);
return result; return result;
} }
//[left, right] // [left, right]
inline std::string GetStringFromRunes(RuneStrArray::const_iterator left, RuneStrArray::const_iterator right) { inline string GetStringFromRunes(const string& s, RuneStrArray::const_iterator left, RuneStrArray::const_iterator right) {
assert(right->str >= left->str); assert(right->offset >= left->offset);
return std::string(left->str, right->str - left->str + right->len); uint32_t len = right->offset - left->offset + right->len;
return s.substr(left->offset, len);
} }
inline void GetStringsFromWordRanges(const std::vector<WordRange>& wrs, std::vector<std::string>& words) { inline void GetStringsFromWordRanges(const string& s, const vector<WordRange>& wrs, vector<string>& words) {
for (size_t i = 0; i < wrs.size(); i++) { for (size_t i = 0; i < wrs.size(); i++) {
words.push_back(GetStringFromRunes(wrs[i].left, wrs[i].right)); words.push_back(GetStringFromRunes(s, wrs[i].left, wrs[i].right));
} }
} }
inline std::vector<std::string> GetStringsFromWordRanges(const std::vector<WordRange>& wrs) { inline vector<string> GetStringsFromWordRanges(const string& s, const vector<WordRange>& wrs) {
std::vector<std::string> result; vector<string> result;
GetStringsFromWordRanges(wrs, result); GetStringsFromWordRanges(s, wrs, result);
return result; return result;
} }

View File

@ -20,7 +20,7 @@ TEST(PreFilterTest, Test1) {
while (filter.HasNext()) { while (filter.HasNext()) {
PreFilter::Range range; PreFilter::Range range;
range = filter.Next(); range = filter.Next();
words.push_back(GetStringFromRunes(range.begin, range.end - 1)); words.push_back(GetStringFromRunes(s, range.begin, range.end - 1));
} }
res = limonp::Join(words.begin(), words.end(), "/"); res = limonp::Join(words.begin(), words.end(), "/");
ASSERT_EQ(res, expected); ASSERT_EQ(res, expected);
@ -35,7 +35,7 @@ TEST(PreFilterTest, Test1) {
while (filter.HasNext()) { while (filter.HasNext()) {
PreFilter::Range range; PreFilter::Range range;
range = filter.Next(); range = filter.Next();
words.push_back(GetStringFromRunes(range.begin, range.end - 1)); words.push_back(GetStringFromRunes(s, range.begin, range.end - 1));
} }
res = limonp::Join(words.begin(), words.end(), "/"); res = limonp::Join(words.begin(), words.end(), "/");
ASSERT_EQ(res, expected); ASSERT_EQ(res, expected);