mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
override Cut functions, add location information into Word results;
This commit is contained in:
parent
b6703aba90
commit
6fa843b527
@ -2,7 +2,11 @@
|
|||||||
|
|
||||||
## next version
|
## next version
|
||||||
|
|
||||||
|
api changes:
|
||||||
|
|
||||||
|
+ override Cut functions, add location information into Word results;
|
||||||
+ remove LevelSegment;
|
+ remove LevelSegment;
|
||||||
|
+ remove Jieba::Locate;
|
||||||
|
|
||||||
## v4.6.0
|
## v4.6.0
|
||||||
|
|
||||||
|
@ -27,6 +27,12 @@ class FullSegment: public SegmentBase {
|
|||||||
}
|
}
|
||||||
void Cut(const string& sentence,
|
void Cut(const string& sentence,
|
||||||
vector<string>& words) const {
|
vector<string>& words) const {
|
||||||
|
vector<Word> tmp;
|
||||||
|
Cut(sentence, tmp);
|
||||||
|
GetStringsFromWords(tmp, words);
|
||||||
|
}
|
||||||
|
void Cut(const string& sentence,
|
||||||
|
vector<Word>& words) const {
|
||||||
PreFilter pre_filter(symbols_, sentence);
|
PreFilter pre_filter(symbols_, sentence);
|
||||||
PreFilter::Range range;
|
PreFilter::Range range;
|
||||||
vector<WordRange> wrs;
|
vector<WordRange> wrs;
|
||||||
@ -37,7 +43,7 @@ class FullSegment: public SegmentBase {
|
|||||||
}
|
}
|
||||||
words.clear();
|
words.clear();
|
||||||
words.reserve(wrs.size());
|
words.reserve(wrs.size());
|
||||||
GetStringsFromWordRanges(sentence, wrs, words);
|
GetWordsFromWordRanges(sentence, wrs, words);
|
||||||
}
|
}
|
||||||
void Cut(RuneStrArray::const_iterator begin,
|
void Cut(RuneStrArray::const_iterator begin,
|
||||||
RuneStrArray::const_iterator end,
|
RuneStrArray::const_iterator end,
|
||||||
|
@ -25,6 +25,12 @@ class HMMSegment: public SegmentBase {
|
|||||||
|
|
||||||
void Cut(const string& sentence,
|
void Cut(const string& sentence,
|
||||||
vector<string>& words) const {
|
vector<string>& words) const {
|
||||||
|
vector<Word> tmp;
|
||||||
|
Cut(sentence, tmp);
|
||||||
|
GetStringsFromWords(tmp, words);
|
||||||
|
}
|
||||||
|
void Cut(const string& sentence,
|
||||||
|
vector<Word>& words) const {
|
||||||
PreFilter pre_filter(symbols_, sentence);
|
PreFilter pre_filter(symbols_, sentence);
|
||||||
PreFilter::Range range;
|
PreFilter::Range range;
|
||||||
vector<WordRange> wrs;
|
vector<WordRange> wrs;
|
||||||
@ -35,7 +41,7 @@ class HMMSegment: public SegmentBase {
|
|||||||
}
|
}
|
||||||
words.clear();
|
words.clear();
|
||||||
words.reserve(wrs.size());
|
words.reserve(wrs.size());
|
||||||
GetStringsFromWordRanges(sentence, wrs, words);
|
GetWordsFromWordRanges(sentence, wrs, words);
|
||||||
}
|
}
|
||||||
void Cut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res) const {
|
void Cut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res) const {
|
||||||
RuneStrArray::const_iterator left = begin;
|
RuneStrArray::const_iterator left = begin;
|
||||||
|
@ -32,35 +32,33 @@ class Jieba {
|
|||||||
void Cut(const string& sentence, vector<string>& words, bool hmm = true) const {
|
void Cut(const string& sentence, vector<string>& words, bool hmm = true) const {
|
||||||
mix_seg_.Cut(sentence, words, hmm);
|
mix_seg_.Cut(sentence, words, hmm);
|
||||||
}
|
}
|
||||||
|
void Cut(const string& sentence, vector<Word>& words, bool hmm = true) const {
|
||||||
|
mix_seg_.Cut(sentence, words, hmm);
|
||||||
|
}
|
||||||
void CutAll(const string& sentence, vector<string>& words) const {
|
void CutAll(const string& sentence, vector<string>& words) const {
|
||||||
full_seg_.Cut(sentence, words);
|
full_seg_.Cut(sentence, words);
|
||||||
}
|
}
|
||||||
|
void CutAll(const string& sentence, vector<Word>& words) const {
|
||||||
|
full_seg_.Cut(sentence, words);
|
||||||
|
}
|
||||||
void CutForSearch(const string& sentence, vector<string>& words, bool hmm = true) const {
|
void CutForSearch(const string& sentence, vector<string>& words, bool hmm = true) const {
|
||||||
query_seg_.Cut(sentence, words, hmm);
|
query_seg_.Cut(sentence, words, hmm);
|
||||||
}
|
}
|
||||||
|
void CutForSearch(const string& sentence, vector<Word>& words, bool hmm = true) const {
|
||||||
|
query_seg_.Cut(sentence, words, hmm);
|
||||||
|
}
|
||||||
void CutHMM(const string& sentence, vector<string>& words) const {
|
void CutHMM(const string& sentence, vector<string>& words) const {
|
||||||
hmm_seg_.Cut(sentence, words);
|
hmm_seg_.Cut(sentence, words);
|
||||||
}
|
}
|
||||||
//void CutLevel(const string& sentence, vector<string>& words) const {
|
void CutHMM(const string& sentence, vector<Word>& words) const {
|
||||||
// level_seg_.Cut(sentence, words);
|
hmm_seg_.Cut(sentence, words);
|
||||||
//}
|
}
|
||||||
//void CutLevel(const string& sentence, vector<pair<string, size_t> >& words) const {
|
|
||||||
// level_seg_.Cut(sentence, words);
|
|
||||||
//}
|
|
||||||
void CutSmall(const string& sentence, vector<string>& words, size_t max_word_len) const {
|
void CutSmall(const string& sentence, vector<string>& words, size_t max_word_len) const {
|
||||||
mp_seg_.Cut(sentence, words, max_word_len);
|
mp_seg_.Cut(sentence, words, max_word_len);
|
||||||
}
|
}
|
||||||
//static void Locate(const vector<string>& words, vector<LocWord>& loc_words) {
|
void CutSmall(const string& sentence, vector<Word>& words, size_t max_word_len) const {
|
||||||
// loc_words.resize(words.size());
|
mp_seg_.Cut(sentence, words, max_word_len);
|
||||||
// size_t begin = 0;
|
}
|
||||||
// for (size_t i = 0; i < words.size(); i++) {
|
|
||||||
// size_t len = TransCode::Decode(words[i]).size();
|
|
||||||
// loc_words[i].word = words[i];
|
|
||||||
// loc_words[i].begin = begin;
|
|
||||||
// loc_words[i].end = loc_words[i].begin + len;
|
|
||||||
// begin = loc_words[i].end;
|
|
||||||
// }
|
|
||||||
//}
|
|
||||||
|
|
||||||
void Tag(const string& sentence, vector<pair<string, string> >& words) const {
|
void Tag(const string& sentence, vector<pair<string, string> >& words) const {
|
||||||
pos_tagger_.Tag(sentence, words);
|
pos_tagger_.Tag(sentence, words);
|
||||||
|
@ -28,6 +28,13 @@ class MPSegment: public SegmentBase {
|
|||||||
void Cut(const string& sentence,
|
void Cut(const string& sentence,
|
||||||
vector<string>& words,
|
vector<string>& words,
|
||||||
size_t max_word_len = MAX_WORD_LENGTH) const {
|
size_t max_word_len = MAX_WORD_LENGTH) const {
|
||||||
|
vector<Word> tmp;
|
||||||
|
Cut(sentence, tmp, max_word_len);
|
||||||
|
GetStringsFromWords(tmp, words);
|
||||||
|
}
|
||||||
|
void Cut(const string& sentence,
|
||||||
|
vector<Word>& words,
|
||||||
|
size_t max_word_len = MAX_WORD_LENGTH) const {
|
||||||
PreFilter pre_filter(symbols_, sentence);
|
PreFilter pre_filter(symbols_, sentence);
|
||||||
PreFilter::Range range;
|
PreFilter::Range range;
|
||||||
vector<WordRange> wrs;
|
vector<WordRange> wrs;
|
||||||
@ -38,7 +45,7 @@ class MPSegment: public SegmentBase {
|
|||||||
}
|
}
|
||||||
words.clear();
|
words.clear();
|
||||||
words.reserve(wrs.size());
|
words.reserve(wrs.size());
|
||||||
GetStringsFromWordRanges(sentence, wrs, words);
|
GetWordsFromWordRanges(sentence, wrs, words);
|
||||||
}
|
}
|
||||||
void Cut(RuneStrArray::const_iterator begin,
|
void Cut(RuneStrArray::const_iterator begin,
|
||||||
RuneStrArray::const_iterator end,
|
RuneStrArray::const_iterator end,
|
||||||
|
@ -21,6 +21,11 @@ class MixSegment: public SegmentBase {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void Cut(const string& sentence, vector<string>& words, bool hmm = true) const {
|
void Cut(const string& sentence, vector<string>& words, bool hmm = true) const {
|
||||||
|
vector<Word> tmp;
|
||||||
|
Cut(sentence, tmp, hmm);
|
||||||
|
GetStringsFromWords(tmp, words);
|
||||||
|
}
|
||||||
|
void Cut(const string& sentence, vector<Word>& words, bool hmm = true) const {
|
||||||
PreFilter pre_filter(symbols_, sentence);
|
PreFilter pre_filter(symbols_, sentence);
|
||||||
PreFilter::Range range;
|
PreFilter::Range range;
|
||||||
vector<WordRange> wrs;
|
vector<WordRange> wrs;
|
||||||
@ -31,7 +36,7 @@ class MixSegment: public SegmentBase {
|
|||||||
}
|
}
|
||||||
words.clear();
|
words.clear();
|
||||||
words.reserve(wrs.size());
|
words.reserve(wrs.size());
|
||||||
GetStringsFromWordRanges(sentence, wrs, words);
|
GetWordsFromWordRanges(sentence, wrs, words);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Cut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res, bool hmm) const {
|
void Cut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res, bool hmm) const {
|
||||||
|
@ -27,6 +27,11 @@ class QuerySegment: public SegmentBase {
|
|||||||
~QuerySegment() {
|
~QuerySegment() {
|
||||||
}
|
}
|
||||||
void Cut(const string& sentence, vector<string>& words, bool hmm = true) const {
|
void Cut(const string& sentence, vector<string>& words, bool hmm = true) const {
|
||||||
|
vector<Word> tmp;
|
||||||
|
Cut(sentence, tmp, hmm);
|
||||||
|
GetStringsFromWords(tmp, words);
|
||||||
|
}
|
||||||
|
void Cut(const string& sentence, vector<Word>& words, bool hmm = true) const {
|
||||||
PreFilter pre_filter(symbols_, sentence);
|
PreFilter pre_filter(symbols_, sentence);
|
||||||
PreFilter::Range range;
|
PreFilter::Range range;
|
||||||
vector<WordRange> wrs;
|
vector<WordRange> wrs;
|
||||||
@ -37,7 +42,7 @@ class QuerySegment: public SegmentBase {
|
|||||||
}
|
}
|
||||||
words.clear();
|
words.clear();
|
||||||
words.reserve(wrs.size());
|
words.reserve(wrs.size());
|
||||||
GetStringsFromWordRanges(sentence, wrs, words);
|
GetWordsFromWordRanges(sentence, wrs, words);
|
||||||
}
|
}
|
||||||
void Cut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res, bool hmm) const {
|
void Cut(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end, vector<WordRange>& res, bool hmm) const {
|
||||||
//use mix Cut first
|
//use mix Cut first
|
||||||
|
@ -5,6 +5,7 @@
|
|||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
#include <ostream>
|
||||||
#include "limonp/LocalVector.hpp"
|
#include "limonp/LocalVector.hpp"
|
||||||
|
|
||||||
namespace cppjieba {
|
namespace cppjieba {
|
||||||
@ -14,6 +15,18 @@ using std::vector;
|
|||||||
|
|
||||||
typedef uint32_t Rune;
|
typedef uint32_t Rune;
|
||||||
|
|
||||||
|
struct Word {
|
||||||
|
string word;
|
||||||
|
uint32_t offset;
|
||||||
|
Word(const string& w, uint32_t o)
|
||||||
|
: word(w), offset(o) {
|
||||||
|
}
|
||||||
|
}; // struct Word
|
||||||
|
|
||||||
|
inline std::ostream& operator << (std::ostream& os, const Word& w) {
|
||||||
|
return os << "{\"word\": \"" << w.word << "\", \"offset\": " << w.offset << "}";
|
||||||
|
}
|
||||||
|
|
||||||
struct RuneStr {
|
struct RuneStr {
|
||||||
Rune rune;
|
Rune rune;
|
||||||
uint32_t offset;
|
uint32_t offset;
|
||||||
@ -162,24 +175,37 @@ inline Unicode DecodeRunesInString(const string& s) {
|
|||||||
|
|
||||||
|
|
||||||
// [left, right]
|
// [left, right]
|
||||||
|
inline Word GetWordFromRunes(const string& s, RuneStrArray::const_iterator left, RuneStrArray::const_iterator right) {
|
||||||
|
assert(right->offset >= left->offset);
|
||||||
|
uint32_t len = right->offset - left->offset + right->len;
|
||||||
|
return Word(s.substr(left->offset, len), left->offset);
|
||||||
|
}
|
||||||
|
|
||||||
inline string GetStringFromRunes(const string& s, RuneStrArray::const_iterator left, RuneStrArray::const_iterator right) {
|
inline string GetStringFromRunes(const string& s, RuneStrArray::const_iterator left, RuneStrArray::const_iterator right) {
|
||||||
assert(right->offset >= left->offset);
|
assert(right->offset >= left->offset);
|
||||||
uint32_t len = right->offset - left->offset + right->len;
|
uint32_t len = right->offset - left->offset + right->len;
|
||||||
return s.substr(left->offset, len);
|
return s.substr(left->offset, len);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void GetStringsFromWordRanges(const string& s, const vector<WordRange>& wrs, vector<string>& words) {
|
inline void GetWordsFromWordRanges(const string& s, const vector<WordRange>& wrs, vector<Word>& words) {
|
||||||
for (size_t i = 0; i < wrs.size(); i++) {
|
for (size_t i = 0; i < wrs.size(); i++) {
|
||||||
words.push_back(GetStringFromRunes(s, wrs[i].left, wrs[i].right));
|
words.push_back(GetWordFromRunes(s, wrs[i].left, wrs[i].right));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
inline vector<string> GetStringsFromWordRanges(const string& s, const vector<WordRange>& wrs) {
|
inline vector<Word> GetWordsFromWordRanges(const string& s, const vector<WordRange>& wrs) {
|
||||||
vector<string> result;
|
vector<Word> result;
|
||||||
GetStringsFromWordRanges(s, wrs, result);
|
GetWordsFromWordRanges(s, wrs, result);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline void GetStringsFromWords(const vector<Word>& words, vector<string>& strs) {
|
||||||
|
strs.resize(words.size());
|
||||||
|
for (size_t i = 0; i < words.size(); ++i) {
|
||||||
|
strs[i] = words[i].word;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace cppjieba
|
} // namespace cppjieba
|
||||||
|
|
||||||
#endif // CPPJIEBA_UNICODE_H
|
#endif // CPPJIEBA_UNICODE_H
|
||||||
|
@ -37,35 +37,45 @@ TEST(JiebaTest, Test1) {
|
|||||||
result << words;
|
result << words;
|
||||||
ASSERT_EQ("[\"他\", \"来到\", \"了\", \"网易\", \"杭研\", \"大厦\"]", result);
|
ASSERT_EQ("[\"他\", \"来到\", \"了\", \"网易\", \"杭研\", \"大厦\"]", result);
|
||||||
|
|
||||||
//jieba.CutLevel("南京市长江大桥", words);
|
}
|
||||||
//result << words;
|
TEST(JiebaTest, WordTest) {
|
||||||
//ASSERT_EQ("[\"南京市\", \"长江大桥\", \"南京\", \"长江\", \"大桥\"]", result);
|
cppjieba::Jieba jieba("../dict/jieba.dict.utf8",
|
||||||
|
"../dict/hmm_model.utf8",
|
||||||
|
"../dict/user.dict.utf8");
|
||||||
|
vector<Word> words;
|
||||||
|
string result;
|
||||||
|
|
||||||
//vector<pair<string, size_t> > word_levels;
|
jieba.Cut("他来到了网易杭研大厦", words);
|
||||||
//jieba.CutLevel("南京市长江大桥", word_levels);
|
result << words;
|
||||||
//result << word_levels;
|
ASSERT_EQ("[\"{\"word\": \"\xE4\xBB\x96\", \"offset\": 0}\", \"{\"word\": \"\xE6\x9D\xA5\xE5\x88\xB0\", \"offset\": 3}\", \"{\"word\": \"\xE4\xBA\x86\", \"offset\": 9}\", \"{\"word\": \"\xE7\xBD\x91\xE6\x98\x93\", \"offset\": 12}\", \"{\"word\": \"\xE6\x9D\xAD\xE7\xA0\x94\", \"offset\": 18}\", \"{\"word\": \"\xE5\xA4\xA7\xE5\x8E\xA6\", \"offset\": 24}\"]", result);
|
||||||
//ASSERT_EQ("[\"南京市:0\", \"长江大桥:0\", \"南京:1\", \"长江:1\", \"大桥:1\"]", result);
|
|
||||||
|
|
||||||
//vector<Jieba::LocWord> loc_words;
|
jieba.Cut("我来自北京邮电大学。", words, false);
|
||||||
//jieba.Cut("南京市长江大桥", words);
|
result << words;
|
||||||
//jieba.Locate(words, loc_words);
|
//ASSERT_EQ("[\"我\", \"来自\", \"北京邮电大学\", \"。\"]", result);
|
||||||
//ASSERT_EQ(loc_words.size(), 2u);
|
ASSERT_EQ("[\"{\"word\": \"\xE6\x88\x91\", \"offset\": 0}\", \"{\"word\": \"\xE6\x9D\xA5\xE8\x87\xAA\", \"offset\": 3}\", \"{\"word\": \"\xE5\x8C\x97\xE4\xBA\xAC\xE9\x82\xAE\xE7\x94\xB5\xE5\xA4\xA7\xE5\xAD\xA6\", \"offset\": 9}\", \"{\"word\": \"\xE3\x80\x82\", \"offset\": 27}\"]", result);
|
||||||
//ASSERT_EQ(loc_words[0].word, "南京市");
|
|
||||||
//ASSERT_EQ(loc_words[0].begin, 0u);
|
|
||||||
//ASSERT_EQ(loc_words[0].end, 3u);
|
|
||||||
//ASSERT_EQ(loc_words[1].word, "长江大桥");
|
|
||||||
//ASSERT_EQ(loc_words[1].begin, 3u);
|
|
||||||
//ASSERT_EQ(loc_words[1].end, 7u);
|
|
||||||
|
|
||||||
//vector<pair<string, string> > tagres;
|
jieba.CutSmall("南京市长江大桥", words, 3);
|
||||||
//jieba.Tag("iPhone6手机的最大特点是很容易弯曲。", tagres);
|
//ASSERT_EQ("[\"南京市\", \"长江\", \"大桥\"]", result << words);
|
||||||
//result << tagres;
|
ASSERT_EQ("[\"{\"word\": \"\xE5\x8D\x97\xE4\xBA\xAC\xE5\xB8\x82\", \"offset\": 0}\", \"{\"word\": \"\xE9\x95\xBF\xE6\xB1\x9F\", \"offset\": 9}\", \"{\"word\": \"\xE5\xA4\xA7\xE6\xA1\xA5\", \"offset\": 15}\"]", result << words);
|
||||||
//ASSERT_EQ("[\"iPhone6:eng\", \"手机:n\", \"的:uj\", \"最大:a\", \"特点:n\", \"是:v\", \"很:zg\", \"容易:a\", \"弯曲:v\", \"。:x\"]", result);
|
|
||||||
|
|
||||||
//vector<pair<string, double> > keywordres;
|
jieba.CutHMM("我来自北京邮电大学。。。学号123456", words);
|
||||||
//jieba.Extract("我是拖拉机学院手扶拖拉机专业的。不用多久,我就会升职加薪,当上CEO,走上人生巅峰。", keywordres, 5);
|
result << words;
|
||||||
//result << keywordres;
|
ASSERT_EQ("[\"{\"word\": \"\xE6\x88\x91\xE6\x9D\xA5\", \"offset\": 0}\", \"{\"word\": \"\xE8\x87\xAA\xE5\x8C\x97\xE4\xBA\xAC\", \"offset\": 6}\", \"{\"word\": \"\xE9\x82\xAE\xE7\x94\xB5\xE5\xA4\xA7\xE5\xAD\xA6\", \"offset\": 15}\", \"{\"word\": \"\xE3\x80\x82\", \"offset\": 27}\", \"{\"word\": \"\xE3\x80\x82\", \"offset\": 30}\", \"{\"word\": \"\xE3\x80\x82\", \"offset\": 33}\", \"{\"word\": \"\xE5\xAD\xA6\xE5\x8F\xB7\", \"offset\": 36}\", \"{\"word\": \"123456\", \"offset\": 42}\"]", result);
|
||||||
//ASSERT_EQ(result, "[\"CEO:11.7392\", \"升职:10.8562\", \"加薪:10.6426\", \"手扶拖拉机:10.0089\", \"巅峰:9.49396\"]");
|
|
||||||
|
jieba.Cut("我来自北京邮电大学。。。学号123456,用AK47", words);
|
||||||
|
result << words;
|
||||||
|
//ASSERT_EQ("[\"我\", \"来自\", \"北京邮电大学\", \"。\", \"。\", \"。\", \"学号\", \"123456\", \",\", \"用\", \"AK47\"]", result);
|
||||||
|
ASSERT_EQ("[\"{\"word\": \"\xE6\x88\x91\", \"offset\": 0}\", \"{\"word\": \"\xE6\x9D\xA5\xE8\x87\xAA\", \"offset\": 3}\", \"{\"word\": \"\xE5\x8C\x97\xE4\xBA\xAC\xE9\x82\xAE\xE7\x94\xB5\xE5\xA4\xA7\xE5\xAD\xA6\", \"offset\": 9}\", \"{\"word\": \"\xE3\x80\x82\", \"offset\": 27}\", \"{\"word\": \"\xE3\x80\x82\", \"offset\": 30}\", \"{\"word\": \"\xE3\x80\x82\", \"offset\": 33}\", \"{\"word\": \"\xE5\xAD\xA6\xE5\x8F\xB7\", \"offset\": 36}\", \"{\"word\": \"123456\", \"offset\": 42}\", \"{\"word\": \"\xEF\xBC\x8C\", \"offset\": 48}\", \"{\"word\": \"\xE7\x94\xA8\", \"offset\": 51}\", \"{\"word\": \"AK47\", \"offset\": 54}\"]", result);
|
||||||
|
|
||||||
|
jieba.CutAll("我来自北京邮电大学", words);
|
||||||
|
result << words;
|
||||||
|
//ASSERT_EQ(result, "[\"我\", \"来自\", \"北京\", \"北京邮电\", \"北京邮电大学\", \"邮电\", \"邮电大学\", \"电大\", \"大学\"]");
|
||||||
|
ASSERT_EQ("[\"{\"word\": \"\xE6\x88\x91\", \"offset\": 0}\", \"{\"word\": \"\xE6\x9D\xA5\xE8\x87\xAA\", \"offset\": 3}\", \"{\"word\": \"\xE5\x8C\x97\xE4\xBA\xAC\", \"offset\": 9}\", \"{\"word\": \"\xE5\x8C\x97\xE4\xBA\xAC\xE9\x82\xAE\xE7\x94\xB5\", \"offset\": 9}\", \"{\"word\": \"\xE5\x8C\x97\xE4\xBA\xAC\xE9\x82\xAE\xE7\x94\xB5\xE5\xA4\xA7\xE5\xAD\xA6\", \"offset\": 9}\", \"{\"word\": \"\xE9\x82\xAE\xE7\x94\xB5\", \"offset\": 15}\", \"{\"word\": \"\xE9\x82\xAE\xE7\x94\xB5\xE5\xA4\xA7\xE5\xAD\xA6\", \"offset\": 15}\", \"{\"word\": \"\xE7\x94\xB5\xE5\xA4\xA7\", \"offset\": 18}\", \"{\"word\": \"\xE5\xA4\xA7\xE5\xAD\xA6\", \"offset\": 21}\"]", result);
|
||||||
|
|
||||||
|
jieba.CutForSearch("他来到了网易杭研大厦", words);
|
||||||
|
result << words;
|
||||||
|
//ASSERT_EQ("[\"他\", \"来到\", \"了\", \"网易\", \"杭研\", \"大厦\"]", result);
|
||||||
|
ASSERT_EQ("[\"{\"word\": \"\xE4\xBB\x96\", \"offset\": 0}\", \"{\"word\": \"\xE6\x9D\xA5\xE5\x88\xB0\", \"offset\": 3}\", \"{\"word\": \"\xE4\xBA\x86\", \"offset\": 9}\", \"{\"word\": \"\xE7\xBD\x91\xE6\x98\x93\", \"offset\": 12}\", \"{\"word\": \"\xE6\x9D\xAD\xE7\xA0\x94\", \"offset\": 18}\", \"{\"word\": \"\xE5\xA4\xA7\xE5\x8E\xA6\", \"offset\": 24}\"]", result);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(JiebaTest, InsertUserWord) {
|
TEST(JiebaTest, InsertUserWord) {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user