mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
[code style] uppercase function name
This commit is contained in:
parent
f17c2d10e2
commit
83cc67cb15
@ -127,8 +127,8 @@ class DictTrie {
|
||||
const string& word,
|
||||
double weight,
|
||||
const string& tag) {
|
||||
if (!TransCode::decode(word, node_info.word)) {
|
||||
LogError("decode %s failed.", word.c_str());
|
||||
if (!TransCode::Decode(word, node_info.word)) {
|
||||
LogError("Decode %s failed.", word.c_str());
|
||||
return false;
|
||||
}
|
||||
node_info.weight = weight;
|
||||
|
@ -36,7 +36,7 @@ class FullSegment: public SegmentBase {
|
||||
range = pre_filter.Next();
|
||||
Cut(range.begin, range.end, uwords);
|
||||
}
|
||||
TransCode::encode(uwords, words);
|
||||
TransCode::Encode(uwords, words);
|
||||
}
|
||||
void Cut(Unicode::const_iterator begin,
|
||||
Unicode::const_iterator end,
|
||||
|
@ -118,7 +118,7 @@ struct HMMModel {
|
||||
LogError("emitProb illegal.");
|
||||
return false;
|
||||
}
|
||||
if (!TransCode::decode(tmp2[0], unicode) || unicode.size() != 1) {
|
||||
if (!TransCode::Decode(tmp2[0], unicode) || unicode.size() != 1) {
|
||||
LogError("TransCode failed.");
|
||||
return false;
|
||||
}
|
||||
|
@ -33,7 +33,7 @@ class HMMSegment: public SegmentBase {
|
||||
range = pre_filter.Next();
|
||||
Cut(range.begin, range.end, uwords);
|
||||
}
|
||||
TransCode::encode(uwords, words);
|
||||
TransCode::Encode(uwords, words);
|
||||
}
|
||||
void Cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const {
|
||||
Unicode::const_iterator left = begin;
|
||||
@ -41,7 +41,7 @@ class HMMSegment: public SegmentBase {
|
||||
while (right != end) {
|
||||
if (*right < 0x80) {
|
||||
if (left != right) {
|
||||
Cut(left, right, res);
|
||||
InternalCut(left, right, res);
|
||||
}
|
||||
left = right;
|
||||
do {
|
||||
@ -62,7 +62,7 @@ class HMMSegment: public SegmentBase {
|
||||
}
|
||||
}
|
||||
if (left != right) {
|
||||
Cut(left, right, res);
|
||||
InternalCut(left, right, res);
|
||||
}
|
||||
}
|
||||
private:
|
||||
@ -102,7 +102,7 @@ class HMMSegment: public SegmentBase {
|
||||
}
|
||||
return begin;
|
||||
}
|
||||
void Cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const {
|
||||
void InternalCut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const {
|
||||
vector<size_t> status;
|
||||
Viterbi(begin, end, status);
|
||||
|
||||
|
@ -31,9 +31,9 @@ class KeywordExtractor {
|
||||
~KeywordExtractor() {
|
||||
}
|
||||
|
||||
bool extract(const string& sentence, vector<string>& keywords, size_t topN) const {
|
||||
bool Extract(const string& sentence, vector<string>& keywords, size_t topN) const {
|
||||
vector<pair<string, double> > topWords;
|
||||
if (!extract(sentence, topWords, topN)) {
|
||||
if (!Extract(sentence, topWords, topN)) {
|
||||
return false;
|
||||
}
|
||||
for (size_t i = 0; i < topWords.size(); i++) {
|
||||
@ -42,7 +42,7 @@ class KeywordExtractor {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool extract(const string& sentence, vector<pair<string, double> >& keywords, size_t topN) const {
|
||||
bool Extract(const string& sentence, vector<pair<string, double> >& keywords, size_t topN) const {
|
||||
vector<string> words;
|
||||
segment_.Cut(sentence, words);
|
||||
|
||||
@ -122,7 +122,7 @@ class KeywordExtractor {
|
||||
|
||||
bool IsSingleWord(const string& str) const {
|
||||
Unicode unicode;
|
||||
TransCode::decode(str, unicode);
|
||||
TransCode::Decode(str, unicode);
|
||||
if (unicode.size() == 1)
|
||||
return true;
|
||||
return false;
|
||||
|
@ -51,12 +51,12 @@ class LevelSegment: public SegmentBase{
|
||||
vector<pair<string, size_t> >& words) const {
|
||||
words.clear();
|
||||
Unicode unicode;
|
||||
TransCode::decode(sentence, unicode);
|
||||
TransCode::Decode(sentence, unicode);
|
||||
vector<pair<Unicode, size_t> > unicodeWords;
|
||||
Cut(unicode.begin(), unicode.end(), unicodeWords);
|
||||
words.resize(unicodeWords.size());
|
||||
for (size_t i = 0; i < words.size(); i++) {
|
||||
TransCode::encode(unicodeWords[i].first, words[i].first);
|
||||
TransCode::Encode(unicodeWords[i].first, words[i].first);
|
||||
words[i].second = unicodeWords[i].second;
|
||||
}
|
||||
}
|
||||
|
@ -38,7 +38,7 @@ class MPSegment: public SegmentBase {
|
||||
range = pre_filter.Next();
|
||||
Cut(range.begin, range.end, uwords, max_word_len);
|
||||
}
|
||||
TransCode::encode(uwords, words);
|
||||
TransCode::Encode(uwords, words);
|
||||
}
|
||||
void Cut(Unicode::const_iterator begin,
|
||||
Unicode::const_iterator end,
|
||||
|
@ -30,7 +30,7 @@ class MixSegment: public SegmentBase {
|
||||
range = pre_filter.Next();
|
||||
Cut(range.begin, range.end, uwords, hmm);
|
||||
}
|
||||
TransCode::encode(uwords, words);
|
||||
TransCode::Encode(uwords, words);
|
||||
}
|
||||
|
||||
void Cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res, bool hmm) const {
|
||||
|
@ -25,7 +25,7 @@ class PosTagger {
|
||||
~PosTagger() {
|
||||
}
|
||||
|
||||
bool tag(const string& src, vector<pair<string, string> >& res) const {
|
||||
bool Tag(const string& src, vector<pair<string, string> >& res) const {
|
||||
vector<string> CutRes;
|
||||
segment_.Cut(src, CutRes);
|
||||
|
||||
@ -34,8 +34,8 @@ class PosTagger {
|
||||
const DictTrie * dict = segment_.GetDictTrie();
|
||||
assert(dict != NULL);
|
||||
for (vector<string>::iterator itr = CutRes.begin(); itr != CutRes.end(); ++itr) {
|
||||
if (!TransCode::decode(*itr, unico)) {
|
||||
LogError("decode failed.");
|
||||
if (!TransCode::Decode(*itr, unico)) {
|
||||
LogError("Decode failed.");
|
||||
return false;
|
||||
}
|
||||
tmp = dict->Find(unico.begin(), unico.end());
|
||||
|
@ -26,7 +26,7 @@ class PreFilter {
|
||||
PreFilter(const unordered_set<Rune>& symbols,
|
||||
const string& sentence)
|
||||
: symbols_(symbols) {
|
||||
TransCode::decode(sentence, sentence_);
|
||||
TransCode::Decode(sentence, sentence_);
|
||||
cursor_ = sentence_.begin();
|
||||
}
|
||||
~PreFilter() {
|
||||
|
@ -35,7 +35,7 @@ class QuerySegment: public SegmentBase {
|
||||
range = pre_filter.Next();
|
||||
Cut(range.begin, range.end, uwords, hmm);
|
||||
}
|
||||
TransCode::encode(uwords, words);
|
||||
TransCode::Encode(uwords, words);
|
||||
}
|
||||
void Cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res, bool hmm) const {
|
||||
//use mix Cut first
|
||||
|
@ -17,7 +17,7 @@ typedef uint16_t Rune;
|
||||
typedef limonp::LocalVector<Rune> Unicode;
|
||||
|
||||
namespace TransCode {
|
||||
inline bool decode(const string& str, Unicode& res) {
|
||||
inline bool Decode(const string& str, Unicode& res) {
|
||||
#ifdef CPPJIEBA_GBK
|
||||
return gbkTrans(str, res);
|
||||
#else
|
||||
@ -25,7 +25,7 @@ inline bool decode(const string& str, Unicode& res) {
|
||||
#endif
|
||||
}
|
||||
|
||||
inline void encode(Unicode::const_iterator begin, Unicode::const_iterator end, string& res) {
|
||||
inline void Encode(Unicode::const_iterator begin, Unicode::const_iterator end, string& res) {
|
||||
#ifdef CPPJIEBA_GBK
|
||||
gbkTrans(begin, end, res);
|
||||
#else
|
||||
@ -33,34 +33,34 @@ inline void encode(Unicode::const_iterator begin, Unicode::const_iterator end, s
|
||||
#endif
|
||||
}
|
||||
|
||||
inline void encode(const Unicode& uni, string& res) {
|
||||
encode(uni.begin(), uni.end(), res);
|
||||
inline void Encode(const Unicode& uni, string& res) {
|
||||
Encode(uni.begin(), uni.end(), res);
|
||||
}
|
||||
|
||||
// compiler is expected to optimized this function to avoid return value copy
|
||||
inline string encode(Unicode::const_iterator begin, Unicode::const_iterator end) {
|
||||
inline string Encode(Unicode::const_iterator begin, Unicode::const_iterator end) {
|
||||
string res;
|
||||
res.reserve(end - begin);
|
||||
encode(begin, end, res);
|
||||
Encode(begin, end, res);
|
||||
return res;
|
||||
}
|
||||
|
||||
inline string encode(const Unicode& unicode) {
|
||||
return encode(unicode.begin(), unicode.end());
|
||||
inline string Encode(const Unicode& unicode) {
|
||||
return Encode(unicode.begin(), unicode.end());
|
||||
}
|
||||
|
||||
// compiler is expected to optimized this function to avoid return value copy
|
||||
inline Unicode decode(const string& str) {
|
||||
inline Unicode Decode(const string& str) {
|
||||
Unicode unicode;
|
||||
unicode.reserve(str.size());
|
||||
decode(str, unicode);
|
||||
Decode(str, unicode);
|
||||
return unicode;
|
||||
}
|
||||
|
||||
inline void encode(const vector<Unicode>& input, vector<string>& output) {
|
||||
inline void Encode(const vector<Unicode>& input, vector<string>& output) {
|
||||
output.resize(input.size());
|
||||
for (size_t i = 0; i < output.size(); i++) {
|
||||
encode(input[i], output[i]);
|
||||
Encode(input[i], output[i]);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -35,13 +35,13 @@ int main(int argc, char** argv) {
|
||||
|
||||
//cout << "[demo] TAGGING" << endl;
|
||||
//vector<pair<string, string> > tagres;
|
||||
//jieba.tag(s, tagres);
|
||||
//jieba.Tag(s, tagres);
|
||||
//cout << s << endl;
|
||||
//cout << tagres << endl;;
|
||||
|
||||
//cout << "[demo] KEYWORD" << endl;
|
||||
//vector<pair<string, double> > keywordres;
|
||||
//jieba.extract(s, keywordres, 5);
|
||||
//jieba.Extract(s, keywordres, 5);
|
||||
//cout << s << endl;
|
||||
//cout << keywordres << endl;
|
||||
|
||||
|
@ -9,7 +9,7 @@
|
||||
|
||||
using namespace cppjieba;
|
||||
|
||||
void cut(size_t times = 50) {
|
||||
void Cut(size_t times = 50) {
|
||||
MixSegment seg("../dict/jieba.dict.utf8", "../dict/hmm_model.utf8");
|
||||
vector<string> res;
|
||||
string doc;
|
||||
@ -21,15 +21,15 @@ void cut(size_t times = 50) {
|
||||
printf("process [%3.0lf %%]\r", 100.0*(i+1)/times);
|
||||
fflush(stdout);
|
||||
res.clear();
|
||||
seg.cut(doc, res);
|
||||
seg.Cut(doc, res);
|
||||
}
|
||||
printf("\n");
|
||||
long endTime = clock();
|
||||
ColorPrintln(GREEN, "cut: [%.3lf seconds]time consumed.", double(endTime - beginTime)/CLOCKS_PER_SEC);
|
||||
ColorPrintln(GREEN, "Cut: [%.3lf seconds]time consumed.", double(endTime - beginTime)/CLOCKS_PER_SEC);
|
||||
}
|
||||
|
||||
void extract(size_t times = 400) {
|
||||
KeywordExtractor extractor("../dict/jieba.dict.utf8", "../dict/hmm_model.utf8", "../dict/idf.utf8", "../dict/stop_words.utf8");
|
||||
void Extract(size_t times = 400) {
|
||||
KeywordExtractor Extractor("../dict/jieba.dict.utf8", "../dict/hmm_model.utf8", "../dict/idf.utf8", "../dict/stop_words.utf8");
|
||||
vector<string> words;
|
||||
string doc;
|
||||
ifstream ifs("../test/testdata/review.100");
|
||||
@ -40,15 +40,15 @@ void extract(size_t times = 400) {
|
||||
printf("process [%3.0lf %%]\r", 100.0*(i+1)/times);
|
||||
fflush(stdout);
|
||||
words.clear();
|
||||
extractor.extract(doc, words, 5);
|
||||
Extractor.Extract(doc, words, 5);
|
||||
}
|
||||
printf("\n");
|
||||
long endTime = clock();
|
||||
ColorPrintln(GREEN, "extract: [%.3lf seconds]time consumed.", double(endTime - beginTime)/CLOCKS_PER_SEC);
|
||||
ColorPrintln(GREEN, "Extract: [%.3lf seconds]time consumed.", double(endTime - beginTime)/CLOCKS_PER_SEC);
|
||||
}
|
||||
|
||||
int main(int argc, char ** argv) {
|
||||
cut();
|
||||
extract();
|
||||
Cut();
|
||||
Extract();
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
@ -47,12 +47,12 @@ TEST(JiebaTest, Test1) {
|
||||
ASSERT_EQ("[\"南京市:0\", \"长江大桥:0\", \"南京:1\", \"长江:1\", \"大桥:1\"]", result);
|
||||
|
||||
//vector<pair<string, string> > tagres;
|
||||
//jieba.tag("iPhone6手机的最大特点是很容易弯曲。", tagres);
|
||||
//jieba.Tag("iPhone6手机的最大特点是很容易弯曲。", tagres);
|
||||
//result << tagres;
|
||||
//ASSERT_EQ("[\"iPhone6:eng\", \"手机:n\", \"的:uj\", \"最大:a\", \"特点:n\", \"是:v\", \"很:zg\", \"容易:a\", \"弯曲:v\", \"。:x\"]", result);
|
||||
|
||||
//vector<pair<string, double> > keywordres;
|
||||
//jieba.extract("我是拖拉机学院手扶拖拉机专业的。不用多久,我就会升职加薪,当上CEO,走上人生巅峰。", keywordres, 5);
|
||||
//jieba.Extract("我是拖拉机学院手扶拖拉机专业的。不用多久,我就会升职加薪,当上CEO,走上人生巅峰。", keywordres, 5);
|
||||
//result << keywordres;
|
||||
//ASSERT_EQ(result, "[\"CEO:11.7392\", \"升职:10.8562\", \"加薪:10.6426\", \"手扶拖拉机:10.0089\", \"巅峰:9.49396\"]");
|
||||
}
|
||||
|
@ -4,14 +4,14 @@
|
||||
using namespace cppjieba;
|
||||
|
||||
TEST(KeywordExtractorTest, Test1) {
|
||||
KeywordExtractor extractor("../test/testdata/extra_dict/jieba.dict.small.utf8", "../dict/hmm_model.utf8", "../dict/idf.utf8", "../dict/stop_words.utf8");
|
||||
KeywordExtractor Extractor("../test/testdata/extra_dict/jieba.dict.small.utf8", "../dict/hmm_model.utf8", "../dict/idf.utf8", "../dict/stop_words.utf8");
|
||||
|
||||
{
|
||||
string s("我是拖拉机学院手扶拖拉机专业的。不用多久,我就会升职加薪,当上CEO,走上人生巅峰。");
|
||||
string res;
|
||||
vector<pair<string, double> > wordweights;
|
||||
size_t topN = 5;
|
||||
extractor.extract(s, wordweights, topN);
|
||||
Extractor.Extract(s, wordweights, topN);
|
||||
res << wordweights;
|
||||
ASSERT_EQ(res, "[\"CEO:11.7392\", \"升职:10.8562\", \"加薪:10.6426\", \"手扶拖拉机:10.0089\", \"巅峰:9.49396\"]");
|
||||
}
|
||||
@ -21,21 +21,21 @@ TEST(KeywordExtractorTest, Test1) {
|
||||
string res;
|
||||
vector<pair<string, double> > wordweights;
|
||||
size_t topN = 5;
|
||||
extractor.extract(s, wordweights, topN);
|
||||
Extractor.Extract(s, wordweights, topN);
|
||||
res << wordweights;
|
||||
ASSERT_EQ(res, "[\"iPhone6:11.7392\", \"一部:6.47592\"]");
|
||||
}
|
||||
}
|
||||
|
||||
TEST(KeywordExtractorTest, Test2) {
|
||||
KeywordExtractor extractor("../test/testdata/extra_dict/jieba.dict.small.utf8", "../dict/hmm_model.utf8", "../dict/idf.utf8", "../dict/stop_words.utf8", "../test/testdata/userdict.utf8");
|
||||
KeywordExtractor Extractor("../test/testdata/extra_dict/jieba.dict.small.utf8", "../dict/hmm_model.utf8", "../dict/idf.utf8", "../dict/stop_words.utf8", "../test/testdata/userdict.utf8");
|
||||
|
||||
{
|
||||
string s("蓝翔优秀毕业生");
|
||||
string res;
|
||||
vector<pair<string, double> > wordweights;
|
||||
size_t topN = 5;
|
||||
extractor.extract(s, wordweights, topN);
|
||||
Extractor.Extract(s, wordweights, topN);
|
||||
res << wordweights;
|
||||
ASSERT_EQ(res, "[\"蓝翔:11.7392\", \"毕业生:8.13549\", \"优秀:6.78347\"]");
|
||||
}
|
||||
@ -45,7 +45,7 @@ TEST(KeywordExtractorTest, Test2) {
|
||||
string res;
|
||||
vector<pair<string, double> > wordweights;
|
||||
size_t topN = 5;
|
||||
extractor.extract(s, wordweights, topN);
|
||||
Extractor.Extract(s, wordweights, topN);
|
||||
res << wordweights;
|
||||
ASSERT_EQ(res, "[\"iPhone6:11.7392\", \"一部:6.47592\"]");
|
||||
}
|
||||
|
@ -16,7 +16,7 @@ TEST(PosTaggerTest, Test) {
|
||||
PosTagger tagger("../dict/jieba.dict.utf8", "../dict/hmm_model.utf8");
|
||||
{
|
||||
vector<pair<string, string> > res;
|
||||
tagger.tag(QUERY_TEST1, res);
|
||||
tagger.Tag(QUERY_TEST1, res);
|
||||
string s;
|
||||
s << res;
|
||||
ASSERT_TRUE(s == ANS_TEST1);
|
||||
@ -26,14 +26,14 @@ TEST(PosTagger, TestUserDict) {
|
||||
PosTagger tagger("../dict/jieba.dict.utf8", "../dict/hmm_model.utf8", "../test/testdata/userdict.utf8");
|
||||
{
|
||||
vector<pair<string, string> > res;
|
||||
tagger.tag(QUERY_TEST2, res);
|
||||
tagger.Tag(QUERY_TEST2, res);
|
||||
string s;
|
||||
s << res;
|
||||
ASSERT_EQ(s, ANS_TEST2);
|
||||
}
|
||||
{
|
||||
vector<pair<string, string> > res;
|
||||
tagger.tag(QUERY_TEST3, res);
|
||||
tagger.Tag(QUERY_TEST3, res);
|
||||
string s;
|
||||
s << res;
|
||||
ASSERT_EQ(s, ANS_TEST3);
|
||||
|
@ -18,7 +18,7 @@ TEST(PreFilterTest, Test1) {
|
||||
while (filter.HasNext()) {
|
||||
PreFilter::Range range;
|
||||
range = filter.Next();
|
||||
words.push_back(TransCode::encode(range.begin, range.end));
|
||||
words.push_back(TransCode::Encode(range.begin, range.end));
|
||||
}
|
||||
res = join(words.begin(), words.end(), "/");
|
||||
ASSERT_EQ(res, expected);
|
||||
@ -32,7 +32,7 @@ TEST(PreFilterTest, Test1) {
|
||||
while (filter.HasNext()) {
|
||||
PreFilter::Range range;
|
||||
range = filter.Next();
|
||||
words.push_back(TransCode::encode(range.begin, range.end));
|
||||
words.push_back(TransCode::Encode(range.begin, range.end));
|
||||
}
|
||||
res = join(words.begin(), words.end(), "/");
|
||||
for (size_t i = 0; i < words.size(); i++) {
|
||||
|
@ -19,7 +19,7 @@ TEST(MixSegmentTest, Test1) {
|
||||
{
|
||||
sentence = "我来自北京邮电大学。。。学号123456,用AK47";
|
||||
expected = "我/来自/北京邮电大学/。/。/。/学号/123456/,/用/AK47";
|
||||
segment.cut(sentence, words);
|
||||
segment.Cut(sentence, words);
|
||||
actual = join(words.begin(), words.end(), "/");
|
||||
ASSERT_EQ(actual, expected);
|
||||
}
|
||||
@ -27,7 +27,7 @@ TEST(MixSegmentTest, Test1) {
|
||||
{
|
||||
sentence = "B超 T恤";
|
||||
expected = "B超/ /T恤";
|
||||
segment.cut(sentence, words);
|
||||
segment.Cut(sentence, words);
|
||||
actual = join(words.begin(), words.end(), "/");
|
||||
ASSERT_EQ(actual, expected);
|
||||
}
|
||||
@ -35,7 +35,7 @@ TEST(MixSegmentTest, Test1) {
|
||||
{
|
||||
sentence = "他来到了网易杭研大厦";
|
||||
expected = "他/来到/了/网易/杭/研/大厦";
|
||||
segment.cut(sentence, words, false);
|
||||
segment.Cut(sentence, words, false);
|
||||
actual = join(words.begin(), words.end(), "/");
|
||||
ASSERT_EQ(actual, expected);
|
||||
}
|
||||
@ -43,7 +43,7 @@ TEST(MixSegmentTest, Test1) {
|
||||
{
|
||||
sentence = "他来到了网易杭研大厦";
|
||||
expected = "他/来到/了/网易/杭研/大厦";
|
||||
segment.cut(sentence, words);
|
||||
segment.Cut(sentence, words);
|
||||
actual = join(words.begin(), words.end(), "/");
|
||||
ASSERT_EQ(actual, expected);
|
||||
}
|
||||
@ -53,7 +53,7 @@ TEST(MixSegmentTest, NoUserDict) {
|
||||
MixSegment segment("../test/testdata/extra_dict/jieba.dict.small.utf8", "../dict/hmm_model.utf8");
|
||||
const char* str = "令狐冲是云计算方面的专家";
|
||||
vector<string> words;
|
||||
segment.cut(str, words);
|
||||
segment.Cut(str, words);
|
||||
string res;
|
||||
ASSERT_EQ("[\"令狐冲\", \"是\", \"云\", \"计算\", \"方面\", \"的\", \"专家\"]", res << words);
|
||||
|
||||
@ -63,14 +63,14 @@ TEST(MixSegmentTest, UserDict) {
|
||||
{
|
||||
const char* str = "令狐冲是云计算方面的专家";
|
||||
vector<string> words;
|
||||
segment.cut(str, words);
|
||||
segment.Cut(str, words);
|
||||
string res;
|
||||
ASSERT_EQ("[\"令狐冲\", \"是\", \"云计算\", \"方面\", \"的\", \"专家\"]", res << words);
|
||||
}
|
||||
{
|
||||
const char* str = "小明先就职于IBM,后在日本京都大学深造";
|
||||
vector<string> words;
|
||||
segment.cut(str, words);
|
||||
segment.Cut(str, words);
|
||||
string res;
|
||||
res << words;
|
||||
ASSERT_EQ("[\"小明\", \"先\", \"就职\", \"于\", \"IBM\", \",\", \"后\", \"在\", \"日本\", \"京都大学\", \"深造\"]", res);
|
||||
@ -78,7 +78,7 @@ TEST(MixSegmentTest, UserDict) {
|
||||
{
|
||||
const char* str = "IBM,3.14";
|
||||
vector<string> words;
|
||||
segment.cut(str, words);
|
||||
segment.Cut(str, words);
|
||||
string res;
|
||||
res << words;
|
||||
ASSERT_EQ("[\"IBM\", \",\", \"3.14\"]", res);
|
||||
@ -90,18 +90,18 @@ TEST(MixSegmentTest, TestUserDict) {
|
||||
vector<string> words;
|
||||
string res;
|
||||
|
||||
segment.cut("令狐冲是云计算方面的专家", words);
|
||||
segment.Cut("令狐冲是云计算方面的专家", words);
|
||||
ASSERT_EQ("[\"令狐冲\", \"是\", \"云计算\", \"方面\", \"的\", \"专家\"]", res << words);
|
||||
|
||||
segment.cut("小明先就职于IBM,后在日本京都大学深造", words);
|
||||
segment.Cut("小明先就职于IBM,后在日本京都大学深造", words);
|
||||
res << words;
|
||||
ASSERT_EQ("[\"小明\", \"先\", \"就职\", \"于\", \"I\", \"B\", \"M\", \",\", \"后\", \"在\", \"日本\", \"京都大学\", \"深造\"]", res);
|
||||
|
||||
segment.cut("IBM,3.14", words);
|
||||
segment.Cut("IBM,3.14", words);
|
||||
res << words;
|
||||
ASSERT_EQ("[\"I\", \"B\", \"M\", \",\", \"3.14\"]", res);
|
||||
|
||||
segment.cut("忽如一夜春风来,千树万树梨花开", words);
|
||||
segment.Cut("忽如一夜春风来,千树万树梨花开", words);
|
||||
res = limonp::join(words.begin(), words.end(), "/");
|
||||
ASSERT_EQ("忽如一夜春风来/,/千树/万树/梨花/开", res);
|
||||
}
|
||||
@ -112,7 +112,7 @@ TEST(MixSegmentTest, TestMultiUserDict) {
|
||||
vector<string> words;
|
||||
string res;
|
||||
|
||||
segment.cut("忽如一夜春风来,千树万树梨花开", words);
|
||||
segment.Cut("忽如一夜春风来,千树万树梨花开", words);
|
||||
res = limonp::join(words.begin(), words.end(), "/");
|
||||
ASSERT_EQ("忽如一夜春风来/,/千树万树梨花开", res);
|
||||
}
|
||||
@ -121,27 +121,27 @@ TEST(MPSegmentTest, Test1) {
|
||||
MPSegment segment("../dict/jieba.dict.utf8");;
|
||||
string s;
|
||||
vector<string> words;
|
||||
segment.cut("我来自北京邮电大学。", words);
|
||||
segment.Cut("我来自北京邮电大学。", words);
|
||||
ASSERT_EQ("[\"我\", \"来自\", \"北京邮电大学\", \"。\"]", s << words);
|
||||
|
||||
segment.cut("B超 T恤", words);
|
||||
segment.Cut("B超 T恤", words);
|
||||
ASSERT_EQ(s << words, "[\"B超\", \" \", \"T恤\"]");
|
||||
|
||||
segment.cut("南京市长江大桥", words);
|
||||
segment.Cut("南京市长江大桥", words);
|
||||
ASSERT_EQ("[\"南京市\", \"长江大桥\"]", s << words);
|
||||
|
||||
// MaxWordLen
|
||||
segment.cut("南京市长江大桥", words, 3);
|
||||
segment.Cut("南京市长江大桥", words, 3);
|
||||
ASSERT_EQ("[\"南京市\", \"长江\", \"大桥\"]", s << words);
|
||||
|
||||
segment.cut("南京市长江大桥", words, 0);
|
||||
segment.Cut("南京市长江大桥", words, 0);
|
||||
ASSERT_EQ("[\"南\", \"京\", \"市\", \"长\", \"江\", \"大\", \"桥\"]", s << words);
|
||||
|
||||
segment.cut("湖南长沙市天心区", words);
|
||||
segment.Cut("湖南长沙市天心区", words);
|
||||
s = join(words.begin(), words.end(), "/");
|
||||
ASSERT_EQ("湖南长沙市/天心区", s);
|
||||
|
||||
segment.cut("湖南长沙市天心区", words, 3);
|
||||
segment.Cut("湖南长沙市天心区", words, 3);
|
||||
s = join(words.begin(), words.end(), "/");
|
||||
ASSERT_EQ("湖南/长沙市/天心区", s);
|
||||
}
|
||||
@ -164,7 +164,7 @@ TEST(MPSegmentTest, Test1) {
|
||||
// res += line;
|
||||
// res += '\n';
|
||||
//
|
||||
// segment.cut(line, words);
|
||||
// segment.Cut(line, words);
|
||||
// string s;
|
||||
// s << words;
|
||||
// res += s;
|
||||
@ -182,7 +182,7 @@ TEST(HMMSegmentTest, Test1) {
|
||||
const char* str = "我来自北京邮电大学。。。学号123456";
|
||||
const char* res[] = {"我来", "自北京", "邮电大学", "。", "。", "。", "学号", "123456"};
|
||||
vector<string> words;
|
||||
segment.cut(str, words);
|
||||
segment.Cut(str, words);
|
||||
ASSERT_EQ(words, vector<string>(res, res + sizeof(res)/sizeof(res[0])));
|
||||
}
|
||||
|
||||
@ -190,7 +190,7 @@ TEST(HMMSegmentTest, Test1) {
|
||||
const char* str = "IBM,1.2,123";
|
||||
const char* res[] = {"IBM", ",", "1.2", ",", "123"};
|
||||
vector<string> words;
|
||||
segment.cut(str, words);
|
||||
segment.Cut(str, words);
|
||||
ASSERT_EQ(words, vector<string>(res, res + sizeof(res)/sizeof(res[0])));
|
||||
}
|
||||
}
|
||||
@ -200,12 +200,12 @@ TEST(FullSegment, Test1) {
|
||||
vector<string> words;
|
||||
string s;
|
||||
|
||||
segment.cut("我来自北京邮电大学", words);
|
||||
segment.Cut("我来自北京邮电大学", words);
|
||||
s << words;
|
||||
ASSERT_EQ(s, "[\"我\", \"来自\", \"北京\", \"北京邮电大学\", \"邮电\", \"电大\", \"大学\"]");
|
||||
|
||||
|
||||
segment.cut("上市公司CEO", words);
|
||||
segment.Cut("上市公司CEO", words);
|
||||
s << words;
|
||||
ASSERT_EQ(s, "[\"上市\", \"公司\", \"C\", \"E\", \"O\"]");
|
||||
}
|
||||
@ -215,7 +215,7 @@ TEST(QuerySegment, Test1) {
|
||||
const char* str = "小明硕士毕业于中国科学院计算所,后在日本京都大学深造";
|
||||
vector<string> words;
|
||||
|
||||
segment.cut(str, words);
|
||||
segment.Cut(str, words);
|
||||
|
||||
string s1, s2;
|
||||
s1 << words;
|
||||
@ -231,7 +231,7 @@ TEST(QuerySegment, Test2) {
|
||||
const char* str = "小明硕士毕业于中国科学院计算所,后在日本京都大学深造";
|
||||
vector<string> words;
|
||||
|
||||
segment.cut(str, words);
|
||||
segment.Cut(str, words);
|
||||
|
||||
string s1, s2;
|
||||
s1 << words;
|
||||
@ -243,7 +243,7 @@ TEST(QuerySegment, Test2) {
|
||||
const char* str = "小明硕士毕业于中国科学院计算所iPhone6";
|
||||
vector<string> words;
|
||||
|
||||
segment.cut(str, words);
|
||||
segment.Cut(str, words);
|
||||
|
||||
string s1, s2;
|
||||
s1 << words;
|
||||
@ -257,10 +257,10 @@ TEST(LevelSegmentTest, Test0) {
|
||||
string s;
|
||||
LevelSegment segment("../test/testdata/extra_dict/jieba.dict.small.utf8");
|
||||
vector<pair<string, size_t> > words;
|
||||
segment.cut("南京市长江大桥", words);
|
||||
segment.Cut("南京市长江大桥", words);
|
||||
ASSERT_EQ("[\"南京市:0\", \"长江大桥:0\", \"南京:1\", \"长江:1\", \"大桥:1\"]", s << words);
|
||||
|
||||
vector<string> res;
|
||||
segment.cut("南京市长江大桥", res);
|
||||
segment.Cut("南京市长江大桥", res);
|
||||
ASSERT_EQ("[\"南京市\", \"长江大桥\", \"南京\", \"长江\", \"大桥\"]", s << res);
|
||||
}
|
||||
|
@ -15,7 +15,7 @@ TEST(TrieTest, Empty) {
|
||||
TEST(TrieTest, Construct) {
|
||||
vector<Unicode> keys;
|
||||
vector<const DictUnit*> values;
|
||||
keys.push_back(TransCode::decode("你"));
|
||||
keys.push_back(TransCode::Decode("你"));
|
||||
values.push_back((const DictUnit*)(NULL));
|
||||
Trie trie(keys, values);
|
||||
}
|
||||
@ -32,7 +32,7 @@ TEST(DictTrieTest, Test1) {
|
||||
ASSERT_LT(trie.GetMinWeight() + 15.6479, 0.001);
|
||||
string word("来到");
|
||||
Unicode uni;
|
||||
ASSERT_TRUE(TransCode::decode(word, uni));
|
||||
ASSERT_TRUE(TransCode::Decode(word, uni));
|
||||
DictUnit nodeInfo;
|
||||
nodeInfo.word = uni;
|
||||
nodeInfo.tag = "v";
|
||||
@ -45,13 +45,13 @@ TEST(DictTrieTest, Test1) {
|
||||
LocalVector<pair<size_t, const DictUnit*> > res;
|
||||
const char * words[] = {"清", "清华", "清华大学"};
|
||||
for (size_t i = 0; i < sizeof(words)/sizeof(words[0]); i++) {
|
||||
ASSERT_TRUE(TransCode::decode(words[i], uni));
|
||||
ASSERT_TRUE(TransCode::Decode(words[i], uni));
|
||||
res.push_back(make_pair(uni.size() - 1, trie.Find(uni.begin(), uni.end())));
|
||||
//resMap[uni.size() - 1] = trie.Find(uni.begin(), uni.end());
|
||||
}
|
||||
vector<pair<size_t, const DictUnit*> > vec;
|
||||
vector<struct Dag> dags;
|
||||
ASSERT_TRUE(TransCode::decode(word, uni));
|
||||
ASSERT_TRUE(TransCode::Decode(word, uni));
|
||||
trie.Find(uni.begin(), uni.end(), dags);
|
||||
ASSERT_EQ(dags.size(), uni.size());
|
||||
ASSERT_NE(dags.size(), 0u);
|
||||
@ -65,7 +65,7 @@ TEST(DictTrieTest, UserDict) {
|
||||
DictTrie trie(DICT_FILE, "../test/testdata/userdict.utf8");
|
||||
string word = "云计算";
|
||||
Unicode unicode;
|
||||
ASSERT_TRUE(TransCode::decode(word, unicode));
|
||||
ASSERT_TRUE(TransCode::Decode(word, unicode));
|
||||
const DictUnit * unit = trie.Find(unicode.begin(), unicode.end());
|
||||
ASSERT_TRUE(unit);
|
||||
string res ;
|
||||
@ -77,7 +77,7 @@ TEST(DictTrieTest, UserDictWithMaxWeight) {
|
||||
DictTrie trie(DICT_FILE, "../test/testdata/userdict.utf8", DictTrie::WordWeightMax);
|
||||
string word = "云计算";
|
||||
Unicode unicode;
|
||||
ASSERT_TRUE(TransCode::decode(word, unicode));
|
||||
ASSERT_TRUE(TransCode::Decode(word, unicode));
|
||||
const DictUnit * unit = trie.Find(unicode.begin(), unicode.end());
|
||||
ASSERT_TRUE(unit);
|
||||
string res ;
|
||||
@ -91,7 +91,7 @@ TEST(DictTrieTest, Dag) {
|
||||
{
|
||||
string word = "清华大学";
|
||||
Unicode unicode;
|
||||
ASSERT_TRUE(TransCode::decode(word, unicode));
|
||||
ASSERT_TRUE(TransCode::Decode(word, unicode));
|
||||
vector<struct Dag> res;
|
||||
trie.Find(unicode.begin(), unicode.end(), res);
|
||||
|
||||
@ -105,7 +105,7 @@ TEST(DictTrieTest, Dag) {
|
||||
{
|
||||
string word = "北京邮电大学";
|
||||
Unicode unicode;
|
||||
ASSERT_TRUE(TransCode::decode(word, unicode));
|
||||
ASSERT_TRUE(TransCode::Decode(word, unicode));
|
||||
vector<struct Dag> res;
|
||||
trie.Find(unicode.begin(), unicode.end(), res);
|
||||
|
||||
@ -119,7 +119,7 @@ TEST(DictTrieTest, Dag) {
|
||||
{
|
||||
string word = "长江大桥";
|
||||
Unicode unicode;
|
||||
ASSERT_TRUE(TransCode::decode(word, unicode));
|
||||
ASSERT_TRUE(TransCode::Decode(word, unicode));
|
||||
vector<struct Dag> res;
|
||||
trie.Find(unicode.begin(), unicode.end(), res);
|
||||
|
||||
@ -133,7 +133,7 @@ TEST(DictTrieTest, Dag) {
|
||||
{
|
||||
string word = "长江大桥";
|
||||
Unicode unicode;
|
||||
ASSERT_TRUE(TransCode::decode(word, unicode));
|
||||
ASSERT_TRUE(TransCode::Decode(word, unicode));
|
||||
vector<struct Dag> res;
|
||||
trie.Find(unicode.begin(), unicode.end(), res, 3);
|
||||
|
||||
@ -147,7 +147,7 @@ TEST(DictTrieTest, Dag) {
|
||||
{
|
||||
string word = "长江大桥";
|
||||
Unicode unicode;
|
||||
ASSERT_TRUE(TransCode::decode(word, unicode));
|
||||
ASSERT_TRUE(TransCode::Decode(word, unicode));
|
||||
vector<struct Dag> res;
|
||||
trie.Find(unicode.begin(), unicode.end(), res, 4);
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user