[code style] uppercase function name

This commit is contained in:
yanyiwu 2015-10-29 12:39:10 +08:00
parent f17c2d10e2
commit 83cc67cb15
20 changed files with 98 additions and 98 deletions

View File

@ -127,8 +127,8 @@ class DictTrie {
const string& word, const string& word,
double weight, double weight,
const string& tag) { const string& tag) {
if (!TransCode::decode(word, node_info.word)) { if (!TransCode::Decode(word, node_info.word)) {
LogError("decode %s failed.", word.c_str()); LogError("Decode %s failed.", word.c_str());
return false; return false;
} }
node_info.weight = weight; node_info.weight = weight;

View File

@ -36,7 +36,7 @@ class FullSegment: public SegmentBase {
range = pre_filter.Next(); range = pre_filter.Next();
Cut(range.begin, range.end, uwords); Cut(range.begin, range.end, uwords);
} }
TransCode::encode(uwords, words); TransCode::Encode(uwords, words);
} }
void Cut(Unicode::const_iterator begin, void Cut(Unicode::const_iterator begin,
Unicode::const_iterator end, Unicode::const_iterator end,

View File

@ -118,7 +118,7 @@ struct HMMModel {
LogError("emitProb illegal."); LogError("emitProb illegal.");
return false; return false;
} }
if (!TransCode::decode(tmp2[0], unicode) || unicode.size() != 1) { if (!TransCode::Decode(tmp2[0], unicode) || unicode.size() != 1) {
LogError("TransCode failed."); LogError("TransCode failed.");
return false; return false;
} }

View File

@ -33,7 +33,7 @@ class HMMSegment: public SegmentBase {
range = pre_filter.Next(); range = pre_filter.Next();
Cut(range.begin, range.end, uwords); Cut(range.begin, range.end, uwords);
} }
TransCode::encode(uwords, words); TransCode::Encode(uwords, words);
} }
void Cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const { void Cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const {
Unicode::const_iterator left = begin; Unicode::const_iterator left = begin;
@ -41,7 +41,7 @@ class HMMSegment: public SegmentBase {
while (right != end) { while (right != end) {
if (*right < 0x80) { if (*right < 0x80) {
if (left != right) { if (left != right) {
Cut(left, right, res); InternalCut(left, right, res);
} }
left = right; left = right;
do { do {
@ -62,7 +62,7 @@ class HMMSegment: public SegmentBase {
} }
} }
if (left != right) { if (left != right) {
Cut(left, right, res); InternalCut(left, right, res);
} }
} }
private: private:
@ -102,7 +102,7 @@ class HMMSegment: public SegmentBase {
} }
return begin; return begin;
} }
void Cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const { void InternalCut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const {
vector<size_t> status; vector<size_t> status;
Viterbi(begin, end, status); Viterbi(begin, end, status);

View File

@ -31,9 +31,9 @@ class KeywordExtractor {
~KeywordExtractor() { ~KeywordExtractor() {
} }
bool extract(const string& sentence, vector<string>& keywords, size_t topN) const { bool Extract(const string& sentence, vector<string>& keywords, size_t topN) const {
vector<pair<string, double> > topWords; vector<pair<string, double> > topWords;
if (!extract(sentence, topWords, topN)) { if (!Extract(sentence, topWords, topN)) {
return false; return false;
} }
for (size_t i = 0; i < topWords.size(); i++) { for (size_t i = 0; i < topWords.size(); i++) {
@ -42,7 +42,7 @@ class KeywordExtractor {
return true; return true;
} }
bool extract(const string& sentence, vector<pair<string, double> >& keywords, size_t topN) const { bool Extract(const string& sentence, vector<pair<string, double> >& keywords, size_t topN) const {
vector<string> words; vector<string> words;
segment_.Cut(sentence, words); segment_.Cut(sentence, words);
@ -122,7 +122,7 @@ class KeywordExtractor {
bool IsSingleWord(const string& str) const { bool IsSingleWord(const string& str) const {
Unicode unicode; Unicode unicode;
TransCode::decode(str, unicode); TransCode::Decode(str, unicode);
if (unicode.size() == 1) if (unicode.size() == 1)
return true; return true;
return false; return false;

View File

@ -51,12 +51,12 @@ class LevelSegment: public SegmentBase{
vector<pair<string, size_t> >& words) const { vector<pair<string, size_t> >& words) const {
words.clear(); words.clear();
Unicode unicode; Unicode unicode;
TransCode::decode(sentence, unicode); TransCode::Decode(sentence, unicode);
vector<pair<Unicode, size_t> > unicodeWords; vector<pair<Unicode, size_t> > unicodeWords;
Cut(unicode.begin(), unicode.end(), unicodeWords); Cut(unicode.begin(), unicode.end(), unicodeWords);
words.resize(unicodeWords.size()); words.resize(unicodeWords.size());
for (size_t i = 0; i < words.size(); i++) { for (size_t i = 0; i < words.size(); i++) {
TransCode::encode(unicodeWords[i].first, words[i].first); TransCode::Encode(unicodeWords[i].first, words[i].first);
words[i].second = unicodeWords[i].second; words[i].second = unicodeWords[i].second;
} }
} }

View File

@ -38,7 +38,7 @@ class MPSegment: public SegmentBase {
range = pre_filter.Next(); range = pre_filter.Next();
Cut(range.begin, range.end, uwords, max_word_len); Cut(range.begin, range.end, uwords, max_word_len);
} }
TransCode::encode(uwords, words); TransCode::Encode(uwords, words);
} }
void Cut(Unicode::const_iterator begin, void Cut(Unicode::const_iterator begin,
Unicode::const_iterator end, Unicode::const_iterator end,

View File

@ -30,7 +30,7 @@ class MixSegment: public SegmentBase {
range = pre_filter.Next(); range = pre_filter.Next();
Cut(range.begin, range.end, uwords, hmm); Cut(range.begin, range.end, uwords, hmm);
} }
TransCode::encode(uwords, words); TransCode::Encode(uwords, words);
} }
void Cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res, bool hmm) const { void Cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res, bool hmm) const {

View File

@ -25,7 +25,7 @@ class PosTagger {
~PosTagger() { ~PosTagger() {
} }
bool tag(const string& src, vector<pair<string, string> >& res) const { bool Tag(const string& src, vector<pair<string, string> >& res) const {
vector<string> CutRes; vector<string> CutRes;
segment_.Cut(src, CutRes); segment_.Cut(src, CutRes);
@ -34,8 +34,8 @@ class PosTagger {
const DictTrie * dict = segment_.GetDictTrie(); const DictTrie * dict = segment_.GetDictTrie();
assert(dict != NULL); assert(dict != NULL);
for (vector<string>::iterator itr = CutRes.begin(); itr != CutRes.end(); ++itr) { for (vector<string>::iterator itr = CutRes.begin(); itr != CutRes.end(); ++itr) {
if (!TransCode::decode(*itr, unico)) { if (!TransCode::Decode(*itr, unico)) {
LogError("decode failed."); LogError("Decode failed.");
return false; return false;
} }
tmp = dict->Find(unico.begin(), unico.end()); tmp = dict->Find(unico.begin(), unico.end());

View File

@ -26,7 +26,7 @@ class PreFilter {
PreFilter(const unordered_set<Rune>& symbols, PreFilter(const unordered_set<Rune>& symbols,
const string& sentence) const string& sentence)
: symbols_(symbols) { : symbols_(symbols) {
TransCode::decode(sentence, sentence_); TransCode::Decode(sentence, sentence_);
cursor_ = sentence_.begin(); cursor_ = sentence_.begin();
} }
~PreFilter() { ~PreFilter() {

View File

@ -35,7 +35,7 @@ class QuerySegment: public SegmentBase {
range = pre_filter.Next(); range = pre_filter.Next();
Cut(range.begin, range.end, uwords, hmm); Cut(range.begin, range.end, uwords, hmm);
} }
TransCode::encode(uwords, words); TransCode::Encode(uwords, words);
} }
void Cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res, bool hmm) const { void Cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res, bool hmm) const {
//use mix Cut first //use mix Cut first

View File

@ -17,7 +17,7 @@ typedef uint16_t Rune;
typedef limonp::LocalVector<Rune> Unicode; typedef limonp::LocalVector<Rune> Unicode;
namespace TransCode { namespace TransCode {
inline bool decode(const string& str, Unicode& res) { inline bool Decode(const string& str, Unicode& res) {
#ifdef CPPJIEBA_GBK #ifdef CPPJIEBA_GBK
return gbkTrans(str, res); return gbkTrans(str, res);
#else #else
@ -25,7 +25,7 @@ inline bool decode(const string& str, Unicode& res) {
#endif #endif
} }
inline void encode(Unicode::const_iterator begin, Unicode::const_iterator end, string& res) { inline void Encode(Unicode::const_iterator begin, Unicode::const_iterator end, string& res) {
#ifdef CPPJIEBA_GBK #ifdef CPPJIEBA_GBK
gbkTrans(begin, end, res); gbkTrans(begin, end, res);
#else #else
@ -33,34 +33,34 @@ inline void encode(Unicode::const_iterator begin, Unicode::const_iterator end, s
#endif #endif
} }
inline void encode(const Unicode& uni, string& res) { inline void Encode(const Unicode& uni, string& res) {
encode(uni.begin(), uni.end(), res); Encode(uni.begin(), uni.end(), res);
} }
// compiler is expected to optimized this function to avoid return value copy // compiler is expected to optimized this function to avoid return value copy
inline string encode(Unicode::const_iterator begin, Unicode::const_iterator end) { inline string Encode(Unicode::const_iterator begin, Unicode::const_iterator end) {
string res; string res;
res.reserve(end - begin); res.reserve(end - begin);
encode(begin, end, res); Encode(begin, end, res);
return res; return res;
} }
inline string encode(const Unicode& unicode) { inline string Encode(const Unicode& unicode) {
return encode(unicode.begin(), unicode.end()); return Encode(unicode.begin(), unicode.end());
} }
// compiler is expected to optimized this function to avoid return value copy // compiler is expected to optimized this function to avoid return value copy
inline Unicode decode(const string& str) { inline Unicode Decode(const string& str) {
Unicode unicode; Unicode unicode;
unicode.reserve(str.size()); unicode.reserve(str.size());
decode(str, unicode); Decode(str, unicode);
return unicode; return unicode;
} }
inline void encode(const vector<Unicode>& input, vector<string>& output) { inline void Encode(const vector<Unicode>& input, vector<string>& output) {
output.resize(input.size()); output.resize(input.size());
for (size_t i = 0; i < output.size(); i++) { for (size_t i = 0; i < output.size(); i++) {
encode(input[i], output[i]); Encode(input[i], output[i]);
} }
} }

View File

@ -35,13 +35,13 @@ int main(int argc, char** argv) {
//cout << "[demo] TAGGING" << endl; //cout << "[demo] TAGGING" << endl;
//vector<pair<string, string> > tagres; //vector<pair<string, string> > tagres;
//jieba.tag(s, tagres); //jieba.Tag(s, tagres);
//cout << s << endl; //cout << s << endl;
//cout << tagres << endl;; //cout << tagres << endl;;
//cout << "[demo] KEYWORD" << endl; //cout << "[demo] KEYWORD" << endl;
//vector<pair<string, double> > keywordres; //vector<pair<string, double> > keywordres;
//jieba.extract(s, keywordres, 5); //jieba.Extract(s, keywordres, 5);
//cout << s << endl; //cout << s << endl;
//cout << keywordres << endl; //cout << keywordres << endl;

View File

@ -9,7 +9,7 @@
using namespace cppjieba; using namespace cppjieba;
void cut(size_t times = 50) { void Cut(size_t times = 50) {
MixSegment seg("../dict/jieba.dict.utf8", "../dict/hmm_model.utf8"); MixSegment seg("../dict/jieba.dict.utf8", "../dict/hmm_model.utf8");
vector<string> res; vector<string> res;
string doc; string doc;
@ -21,15 +21,15 @@ void cut(size_t times = 50) {
printf("process [%3.0lf %%]\r", 100.0*(i+1)/times); printf("process [%3.0lf %%]\r", 100.0*(i+1)/times);
fflush(stdout); fflush(stdout);
res.clear(); res.clear();
seg.cut(doc, res); seg.Cut(doc, res);
} }
printf("\n"); printf("\n");
long endTime = clock(); long endTime = clock();
ColorPrintln(GREEN, "cut: [%.3lf seconds]time consumed.", double(endTime - beginTime)/CLOCKS_PER_SEC); ColorPrintln(GREEN, "Cut: [%.3lf seconds]time consumed.", double(endTime - beginTime)/CLOCKS_PER_SEC);
} }
void extract(size_t times = 400) { void Extract(size_t times = 400) {
KeywordExtractor extractor("../dict/jieba.dict.utf8", "../dict/hmm_model.utf8", "../dict/idf.utf8", "../dict/stop_words.utf8"); KeywordExtractor Extractor("../dict/jieba.dict.utf8", "../dict/hmm_model.utf8", "../dict/idf.utf8", "../dict/stop_words.utf8");
vector<string> words; vector<string> words;
string doc; string doc;
ifstream ifs("../test/testdata/review.100"); ifstream ifs("../test/testdata/review.100");
@ -40,15 +40,15 @@ void extract(size_t times = 400) {
printf("process [%3.0lf %%]\r", 100.0*(i+1)/times); printf("process [%3.0lf %%]\r", 100.0*(i+1)/times);
fflush(stdout); fflush(stdout);
words.clear(); words.clear();
extractor.extract(doc, words, 5); Extractor.Extract(doc, words, 5);
} }
printf("\n"); printf("\n");
long endTime = clock(); long endTime = clock();
ColorPrintln(GREEN, "extract: [%.3lf seconds]time consumed.", double(endTime - beginTime)/CLOCKS_PER_SEC); ColorPrintln(GREEN, "Extract: [%.3lf seconds]time consumed.", double(endTime - beginTime)/CLOCKS_PER_SEC);
} }
int main(int argc, char ** argv) { int main(int argc, char ** argv) {
cut(); Cut();
extract(); Extract();
return EXIT_SUCCESS; return EXIT_SUCCESS;
} }

View File

@ -47,12 +47,12 @@ TEST(JiebaTest, Test1) {
ASSERT_EQ("[\"南京市:0\", \"长江大桥:0\", \"南京:1\", \"长江:1\", \"大桥:1\"]", result); ASSERT_EQ("[\"南京市:0\", \"长江大桥:0\", \"南京:1\", \"长江:1\", \"大桥:1\"]", result);
//vector<pair<string, string> > tagres; //vector<pair<string, string> > tagres;
//jieba.tag("iPhone6手机的最大特点是很容易弯曲。", tagres); //jieba.Tag("iPhone6手机的最大特点是很容易弯曲。", tagres);
//result << tagres; //result << tagres;
//ASSERT_EQ("[\"iPhone6:eng\", \"手机:n\", \"的:uj\", \"最大:a\", \"特点:n\", \"是:v\", \"很:zg\", \"容易:a\", \"弯曲:v\", \"。:x\"]", result); //ASSERT_EQ("[\"iPhone6:eng\", \"手机:n\", \"的:uj\", \"最大:a\", \"特点:n\", \"是:v\", \"很:zg\", \"容易:a\", \"弯曲:v\", \"。:x\"]", result);
//vector<pair<string, double> > keywordres; //vector<pair<string, double> > keywordres;
//jieba.extract("我是拖拉机学院手扶拖拉机专业的。不用多久我就会升职加薪当上CEO走上人生巅峰。", keywordres, 5); //jieba.Extract("我是拖拉机学院手扶拖拉机专业的。不用多久我就会升职加薪当上CEO走上人生巅峰。", keywordres, 5);
//result << keywordres; //result << keywordres;
//ASSERT_EQ(result, "[\"CEO:11.7392\", \"升职:10.8562\", \"加薪:10.6426\", \"手扶拖拉机:10.0089\", \"巅峰:9.49396\"]"); //ASSERT_EQ(result, "[\"CEO:11.7392\", \"升职:10.8562\", \"加薪:10.6426\", \"手扶拖拉机:10.0089\", \"巅峰:9.49396\"]");
} }

View File

@ -4,14 +4,14 @@
using namespace cppjieba; using namespace cppjieba;
TEST(KeywordExtractorTest, Test1) { TEST(KeywordExtractorTest, Test1) {
KeywordExtractor extractor("../test/testdata/extra_dict/jieba.dict.small.utf8", "../dict/hmm_model.utf8", "../dict/idf.utf8", "../dict/stop_words.utf8"); KeywordExtractor Extractor("../test/testdata/extra_dict/jieba.dict.small.utf8", "../dict/hmm_model.utf8", "../dict/idf.utf8", "../dict/stop_words.utf8");
{ {
string s("我是拖拉机学院手扶拖拉机专业的。不用多久我就会升职加薪当上CEO走上人生巅峰。"); string s("我是拖拉机学院手扶拖拉机专业的。不用多久我就会升职加薪当上CEO走上人生巅峰。");
string res; string res;
vector<pair<string, double> > wordweights; vector<pair<string, double> > wordweights;
size_t topN = 5; size_t topN = 5;
extractor.extract(s, wordweights, topN); Extractor.Extract(s, wordweights, topN);
res << wordweights; res << wordweights;
ASSERT_EQ(res, "[\"CEO:11.7392\", \"升职:10.8562\", \"加薪:10.6426\", \"手扶拖拉机:10.0089\", \"巅峰:9.49396\"]"); ASSERT_EQ(res, "[\"CEO:11.7392\", \"升职:10.8562\", \"加薪:10.6426\", \"手扶拖拉机:10.0089\", \"巅峰:9.49396\"]");
} }
@ -21,21 +21,21 @@ TEST(KeywordExtractorTest, Test1) {
string res; string res;
vector<pair<string, double> > wordweights; vector<pair<string, double> > wordweights;
size_t topN = 5; size_t topN = 5;
extractor.extract(s, wordweights, topN); Extractor.Extract(s, wordweights, topN);
res << wordweights; res << wordweights;
ASSERT_EQ(res, "[\"iPhone6:11.7392\", \"一部:6.47592\"]"); ASSERT_EQ(res, "[\"iPhone6:11.7392\", \"一部:6.47592\"]");
} }
} }
TEST(KeywordExtractorTest, Test2) { TEST(KeywordExtractorTest, Test2) {
KeywordExtractor extractor("../test/testdata/extra_dict/jieba.dict.small.utf8", "../dict/hmm_model.utf8", "../dict/idf.utf8", "../dict/stop_words.utf8", "../test/testdata/userdict.utf8"); KeywordExtractor Extractor("../test/testdata/extra_dict/jieba.dict.small.utf8", "../dict/hmm_model.utf8", "../dict/idf.utf8", "../dict/stop_words.utf8", "../test/testdata/userdict.utf8");
{ {
string s("蓝翔优秀毕业生"); string s("蓝翔优秀毕业生");
string res; string res;
vector<pair<string, double> > wordweights; vector<pair<string, double> > wordweights;
size_t topN = 5; size_t topN = 5;
extractor.extract(s, wordweights, topN); Extractor.Extract(s, wordweights, topN);
res << wordweights; res << wordweights;
ASSERT_EQ(res, "[\"蓝翔:11.7392\", \"毕业生:8.13549\", \"优秀:6.78347\"]"); ASSERT_EQ(res, "[\"蓝翔:11.7392\", \"毕业生:8.13549\", \"优秀:6.78347\"]");
} }
@ -45,7 +45,7 @@ TEST(KeywordExtractorTest, Test2) {
string res; string res;
vector<pair<string, double> > wordweights; vector<pair<string, double> > wordweights;
size_t topN = 5; size_t topN = 5;
extractor.extract(s, wordweights, topN); Extractor.Extract(s, wordweights, topN);
res << wordweights; res << wordweights;
ASSERT_EQ(res, "[\"iPhone6:11.7392\", \"一部:6.47592\"]"); ASSERT_EQ(res, "[\"iPhone6:11.7392\", \"一部:6.47592\"]");
} }

View File

@ -16,7 +16,7 @@ TEST(PosTaggerTest, Test) {
PosTagger tagger("../dict/jieba.dict.utf8", "../dict/hmm_model.utf8"); PosTagger tagger("../dict/jieba.dict.utf8", "../dict/hmm_model.utf8");
{ {
vector<pair<string, string> > res; vector<pair<string, string> > res;
tagger.tag(QUERY_TEST1, res); tagger.Tag(QUERY_TEST1, res);
string s; string s;
s << res; s << res;
ASSERT_TRUE(s == ANS_TEST1); ASSERT_TRUE(s == ANS_TEST1);
@ -26,14 +26,14 @@ TEST(PosTagger, TestUserDict) {
PosTagger tagger("../dict/jieba.dict.utf8", "../dict/hmm_model.utf8", "../test/testdata/userdict.utf8"); PosTagger tagger("../dict/jieba.dict.utf8", "../dict/hmm_model.utf8", "../test/testdata/userdict.utf8");
{ {
vector<pair<string, string> > res; vector<pair<string, string> > res;
tagger.tag(QUERY_TEST2, res); tagger.Tag(QUERY_TEST2, res);
string s; string s;
s << res; s << res;
ASSERT_EQ(s, ANS_TEST2); ASSERT_EQ(s, ANS_TEST2);
} }
{ {
vector<pair<string, string> > res; vector<pair<string, string> > res;
tagger.tag(QUERY_TEST3, res); tagger.Tag(QUERY_TEST3, res);
string s; string s;
s << res; s << res;
ASSERT_EQ(s, ANS_TEST3); ASSERT_EQ(s, ANS_TEST3);

View File

@ -18,7 +18,7 @@ TEST(PreFilterTest, Test1) {
while (filter.HasNext()) { while (filter.HasNext()) {
PreFilter::Range range; PreFilter::Range range;
range = filter.Next(); range = filter.Next();
words.push_back(TransCode::encode(range.begin, range.end)); words.push_back(TransCode::Encode(range.begin, range.end));
} }
res = join(words.begin(), words.end(), "/"); res = join(words.begin(), words.end(), "/");
ASSERT_EQ(res, expected); ASSERT_EQ(res, expected);
@ -32,7 +32,7 @@ TEST(PreFilterTest, Test1) {
while (filter.HasNext()) { while (filter.HasNext()) {
PreFilter::Range range; PreFilter::Range range;
range = filter.Next(); range = filter.Next();
words.push_back(TransCode::encode(range.begin, range.end)); words.push_back(TransCode::Encode(range.begin, range.end));
} }
res = join(words.begin(), words.end(), "/"); res = join(words.begin(), words.end(), "/");
for (size_t i = 0; i < words.size(); i++) { for (size_t i = 0; i < words.size(); i++) {

View File

@ -19,7 +19,7 @@ TEST(MixSegmentTest, Test1) {
{ {
sentence = "我来自北京邮电大学。。。学号123456用AK47"; sentence = "我来自北京邮电大学。。。学号123456用AK47";
expected = "我/来自/北京邮电大学/。/。/。/学号/123456//用/AK47"; expected = "我/来自/北京邮电大学/。/。/。/学号/123456//用/AK47";
segment.cut(sentence, words); segment.Cut(sentence, words);
actual = join(words.begin(), words.end(), "/"); actual = join(words.begin(), words.end(), "/");
ASSERT_EQ(actual, expected); ASSERT_EQ(actual, expected);
} }
@ -27,7 +27,7 @@ TEST(MixSegmentTest, Test1) {
{ {
sentence = "B超 T恤"; sentence = "B超 T恤";
expected = "B超/ /T恤"; expected = "B超/ /T恤";
segment.cut(sentence, words); segment.Cut(sentence, words);
actual = join(words.begin(), words.end(), "/"); actual = join(words.begin(), words.end(), "/");
ASSERT_EQ(actual, expected); ASSERT_EQ(actual, expected);
} }
@ -35,7 +35,7 @@ TEST(MixSegmentTest, Test1) {
{ {
sentence = "他来到了网易杭研大厦"; sentence = "他来到了网易杭研大厦";
expected = "他/来到/了/网易/杭/研/大厦"; expected = "他/来到/了/网易/杭/研/大厦";
segment.cut(sentence, words, false); segment.Cut(sentence, words, false);
actual = join(words.begin(), words.end(), "/"); actual = join(words.begin(), words.end(), "/");
ASSERT_EQ(actual, expected); ASSERT_EQ(actual, expected);
} }
@ -43,7 +43,7 @@ TEST(MixSegmentTest, Test1) {
{ {
sentence = "他来到了网易杭研大厦"; sentence = "他来到了网易杭研大厦";
expected = "他/来到/了/网易/杭研/大厦"; expected = "他/来到/了/网易/杭研/大厦";
segment.cut(sentence, words); segment.Cut(sentence, words);
actual = join(words.begin(), words.end(), "/"); actual = join(words.begin(), words.end(), "/");
ASSERT_EQ(actual, expected); ASSERT_EQ(actual, expected);
} }
@ -53,7 +53,7 @@ TEST(MixSegmentTest, NoUserDict) {
MixSegment segment("../test/testdata/extra_dict/jieba.dict.small.utf8", "../dict/hmm_model.utf8"); MixSegment segment("../test/testdata/extra_dict/jieba.dict.small.utf8", "../dict/hmm_model.utf8");
const char* str = "令狐冲是云计算方面的专家"; const char* str = "令狐冲是云计算方面的专家";
vector<string> words; vector<string> words;
segment.cut(str, words); segment.Cut(str, words);
string res; string res;
ASSERT_EQ("[\"令狐冲\", \"\", \"\", \"计算\", \"方面\", \"\", \"专家\"]", res << words); ASSERT_EQ("[\"令狐冲\", \"\", \"\", \"计算\", \"方面\", \"\", \"专家\"]", res << words);
@ -63,14 +63,14 @@ TEST(MixSegmentTest, UserDict) {
{ {
const char* str = "令狐冲是云计算方面的专家"; const char* str = "令狐冲是云计算方面的专家";
vector<string> words; vector<string> words;
segment.cut(str, words); segment.Cut(str, words);
string res; string res;
ASSERT_EQ("[\"令狐冲\", \"\", \"云计算\", \"方面\", \"\", \"专家\"]", res << words); ASSERT_EQ("[\"令狐冲\", \"\", \"云计算\", \"方面\", \"\", \"专家\"]", res << words);
} }
{ {
const char* str = "小明先就职于IBM,后在日本京都大学深造"; const char* str = "小明先就职于IBM,后在日本京都大学深造";
vector<string> words; vector<string> words;
segment.cut(str, words); segment.Cut(str, words);
string res; string res;
res << words; res << words;
ASSERT_EQ("[\"小明\", \"\", \"就职\", \"\", \"IBM\", \",\", \"\", \"\", \"日本\", \"京都大学\", \"深造\"]", res); ASSERT_EQ("[\"小明\", \"\", \"就职\", \"\", \"IBM\", \",\", \"\", \"\", \"日本\", \"京都大学\", \"深造\"]", res);
@ -78,7 +78,7 @@ TEST(MixSegmentTest, UserDict) {
{ {
const char* str = "IBM,3.14"; const char* str = "IBM,3.14";
vector<string> words; vector<string> words;
segment.cut(str, words); segment.Cut(str, words);
string res; string res;
res << words; res << words;
ASSERT_EQ("[\"IBM\", \",\", \"3.14\"]", res); ASSERT_EQ("[\"IBM\", \",\", \"3.14\"]", res);
@ -90,18 +90,18 @@ TEST(MixSegmentTest, TestUserDict) {
vector<string> words; vector<string> words;
string res; string res;
segment.cut("令狐冲是云计算方面的专家", words); segment.Cut("令狐冲是云计算方面的专家", words);
ASSERT_EQ("[\"令狐冲\", \"\", \"云计算\", \"方面\", \"\", \"专家\"]", res << words); ASSERT_EQ("[\"令狐冲\", \"\", \"云计算\", \"方面\", \"\", \"专家\"]", res << words);
segment.cut("小明先就职于IBM,后在日本京都大学深造", words); segment.Cut("小明先就职于IBM,后在日本京都大学深造", words);
res << words; res << words;
ASSERT_EQ("[\"小明\", \"\", \"就职\", \"\", \"I\", \"B\", \"M\", \",\", \"\", \"\", \"日本\", \"京都大学\", \"深造\"]", res); ASSERT_EQ("[\"小明\", \"\", \"就职\", \"\", \"I\", \"B\", \"M\", \",\", \"\", \"\", \"日本\", \"京都大学\", \"深造\"]", res);
segment.cut("IBM,3.14", words); segment.Cut("IBM,3.14", words);
res << words; res << words;
ASSERT_EQ("[\"I\", \"B\", \"M\", \",\", \"3.14\"]", res); ASSERT_EQ("[\"I\", \"B\", \"M\", \",\", \"3.14\"]", res);
segment.cut("忽如一夜春风来,千树万树梨花开", words); segment.Cut("忽如一夜春风来,千树万树梨花开", words);
res = limonp::join(words.begin(), words.end(), "/"); res = limonp::join(words.begin(), words.end(), "/");
ASSERT_EQ("忽如一夜春风来//千树/万树/梨花/开", res); ASSERT_EQ("忽如一夜春风来//千树/万树/梨花/开", res);
} }
@ -112,7 +112,7 @@ TEST(MixSegmentTest, TestMultiUserDict) {
vector<string> words; vector<string> words;
string res; string res;
segment.cut("忽如一夜春风来,千树万树梨花开", words); segment.Cut("忽如一夜春风来,千树万树梨花开", words);
res = limonp::join(words.begin(), words.end(), "/"); res = limonp::join(words.begin(), words.end(), "/");
ASSERT_EQ("忽如一夜春风来//千树万树梨花开", res); ASSERT_EQ("忽如一夜春风来//千树万树梨花开", res);
} }
@ -121,27 +121,27 @@ TEST(MPSegmentTest, Test1) {
MPSegment segment("../dict/jieba.dict.utf8");; MPSegment segment("../dict/jieba.dict.utf8");;
string s; string s;
vector<string> words; vector<string> words;
segment.cut("我来自北京邮电大学。", words); segment.Cut("我来自北京邮电大学。", words);
ASSERT_EQ("[\"\", \"来自\", \"北京邮电大学\", \"\"]", s << words); ASSERT_EQ("[\"\", \"来自\", \"北京邮电大学\", \"\"]", s << words);
segment.cut("B超 T恤", words); segment.Cut("B超 T恤", words);
ASSERT_EQ(s << words, "[\"B超\", \" \", \"T恤\"]"); ASSERT_EQ(s << words, "[\"B超\", \" \", \"T恤\"]");
segment.cut("南京市长江大桥", words); segment.Cut("南京市长江大桥", words);
ASSERT_EQ("[\"南京市\", \"长江大桥\"]", s << words); ASSERT_EQ("[\"南京市\", \"长江大桥\"]", s << words);
// MaxWordLen // MaxWordLen
segment.cut("南京市长江大桥", words, 3); segment.Cut("南京市长江大桥", words, 3);
ASSERT_EQ("[\"南京市\", \"长江\", \"大桥\"]", s << words); ASSERT_EQ("[\"南京市\", \"长江\", \"大桥\"]", s << words);
segment.cut("南京市长江大桥", words, 0); segment.Cut("南京市长江大桥", words, 0);
ASSERT_EQ("[\"\", \"\", \"\", \"\", \"\", \"\", \"\"]", s << words); ASSERT_EQ("[\"\", \"\", \"\", \"\", \"\", \"\", \"\"]", s << words);
segment.cut("湖南长沙市天心区", words); segment.Cut("湖南长沙市天心区", words);
s = join(words.begin(), words.end(), "/"); s = join(words.begin(), words.end(), "/");
ASSERT_EQ("湖南长沙市/天心区", s); ASSERT_EQ("湖南长沙市/天心区", s);
segment.cut("湖南长沙市天心区", words, 3); segment.Cut("湖南长沙市天心区", words, 3);
s = join(words.begin(), words.end(), "/"); s = join(words.begin(), words.end(), "/");
ASSERT_EQ("湖南/长沙市/天心区", s); ASSERT_EQ("湖南/长沙市/天心区", s);
} }
@ -164,7 +164,7 @@ TEST(MPSegmentTest, Test1) {
// res += line; // res += line;
// res += '\n'; // res += '\n';
// //
// segment.cut(line, words); // segment.Cut(line, words);
// string s; // string s;
// s << words; // s << words;
// res += s; // res += s;
@ -182,7 +182,7 @@ TEST(HMMSegmentTest, Test1) {
const char* str = "我来自北京邮电大学。。。学号123456"; const char* str = "我来自北京邮电大学。。。学号123456";
const char* res[] = {"我来", "自北京", "邮电大学", "", "", "", "学号", "123456"}; const char* res[] = {"我来", "自北京", "邮电大学", "", "", "", "学号", "123456"};
vector<string> words; vector<string> words;
segment.cut(str, words); segment.Cut(str, words);
ASSERT_EQ(words, vector<string>(res, res + sizeof(res)/sizeof(res[0]))); ASSERT_EQ(words, vector<string>(res, res + sizeof(res)/sizeof(res[0])));
} }
@ -190,7 +190,7 @@ TEST(HMMSegmentTest, Test1) {
const char* str = "IBM,1.2,123"; const char* str = "IBM,1.2,123";
const char* res[] = {"IBM", ",", "1.2", ",", "123"}; const char* res[] = {"IBM", ",", "1.2", ",", "123"};
vector<string> words; vector<string> words;
segment.cut(str, words); segment.Cut(str, words);
ASSERT_EQ(words, vector<string>(res, res + sizeof(res)/sizeof(res[0]))); ASSERT_EQ(words, vector<string>(res, res + sizeof(res)/sizeof(res[0])));
} }
} }
@ -200,12 +200,12 @@ TEST(FullSegment, Test1) {
vector<string> words; vector<string> words;
string s; string s;
segment.cut("我来自北京邮电大学", words); segment.Cut("我来自北京邮电大学", words);
s << words; s << words;
ASSERT_EQ(s, "[\"\", \"来自\", \"北京\", \"北京邮电大学\", \"邮电\", \"电大\", \"大学\"]"); ASSERT_EQ(s, "[\"\", \"来自\", \"北京\", \"北京邮电大学\", \"邮电\", \"电大\", \"大学\"]");
segment.cut("上市公司CEO", words); segment.Cut("上市公司CEO", words);
s << words; s << words;
ASSERT_EQ(s, "[\"上市\", \"公司\", \"C\", \"E\", \"O\"]"); ASSERT_EQ(s, "[\"上市\", \"公司\", \"C\", \"E\", \"O\"]");
} }
@ -215,7 +215,7 @@ TEST(QuerySegment, Test1) {
const char* str = "小明硕士毕业于中国科学院计算所,后在日本京都大学深造"; const char* str = "小明硕士毕业于中国科学院计算所,后在日本京都大学深造";
vector<string> words; vector<string> words;
segment.cut(str, words); segment.Cut(str, words);
string s1, s2; string s1, s2;
s1 << words; s1 << words;
@ -231,7 +231,7 @@ TEST(QuerySegment, Test2) {
const char* str = "小明硕士毕业于中国科学院计算所,后在日本京都大学深造"; const char* str = "小明硕士毕业于中国科学院计算所,后在日本京都大学深造";
vector<string> words; vector<string> words;
segment.cut(str, words); segment.Cut(str, words);
string s1, s2; string s1, s2;
s1 << words; s1 << words;
@ -243,7 +243,7 @@ TEST(QuerySegment, Test2) {
const char* str = "小明硕士毕业于中国科学院计算所iPhone6"; const char* str = "小明硕士毕业于中国科学院计算所iPhone6";
vector<string> words; vector<string> words;
segment.cut(str, words); segment.Cut(str, words);
string s1, s2; string s1, s2;
s1 << words; s1 << words;
@ -257,10 +257,10 @@ TEST(LevelSegmentTest, Test0) {
string s; string s;
LevelSegment segment("../test/testdata/extra_dict/jieba.dict.small.utf8"); LevelSegment segment("../test/testdata/extra_dict/jieba.dict.small.utf8");
vector<pair<string, size_t> > words; vector<pair<string, size_t> > words;
segment.cut("南京市长江大桥", words); segment.Cut("南京市长江大桥", words);
ASSERT_EQ("[\"南京市:0\", \"长江大桥:0\", \"南京:1\", \"长江:1\", \"大桥:1\"]", s << words); ASSERT_EQ("[\"南京市:0\", \"长江大桥:0\", \"南京:1\", \"长江:1\", \"大桥:1\"]", s << words);
vector<string> res; vector<string> res;
segment.cut("南京市长江大桥", res); segment.Cut("南京市长江大桥", res);
ASSERT_EQ("[\"南京市\", \"长江大桥\", \"南京\", \"长江\", \"大桥\"]", s << res); ASSERT_EQ("[\"南京市\", \"长江大桥\", \"南京\", \"长江\", \"大桥\"]", s << res);
} }

View File

@ -15,7 +15,7 @@ TEST(TrieTest, Empty) {
TEST(TrieTest, Construct) { TEST(TrieTest, Construct) {
vector<Unicode> keys; vector<Unicode> keys;
vector<const DictUnit*> values; vector<const DictUnit*> values;
keys.push_back(TransCode::decode("")); keys.push_back(TransCode::Decode(""));
values.push_back((const DictUnit*)(NULL)); values.push_back((const DictUnit*)(NULL));
Trie trie(keys, values); Trie trie(keys, values);
} }
@ -32,7 +32,7 @@ TEST(DictTrieTest, Test1) {
ASSERT_LT(trie.GetMinWeight() + 15.6479, 0.001); ASSERT_LT(trie.GetMinWeight() + 15.6479, 0.001);
string word("来到"); string word("来到");
Unicode uni; Unicode uni;
ASSERT_TRUE(TransCode::decode(word, uni)); ASSERT_TRUE(TransCode::Decode(word, uni));
DictUnit nodeInfo; DictUnit nodeInfo;
nodeInfo.word = uni; nodeInfo.word = uni;
nodeInfo.tag = "v"; nodeInfo.tag = "v";
@ -45,13 +45,13 @@ TEST(DictTrieTest, Test1) {
LocalVector<pair<size_t, const DictUnit*> > res; LocalVector<pair<size_t, const DictUnit*> > res;
const char * words[] = {"", "清华", "清华大学"}; const char * words[] = {"", "清华", "清华大学"};
for (size_t i = 0; i < sizeof(words)/sizeof(words[0]); i++) { for (size_t i = 0; i < sizeof(words)/sizeof(words[0]); i++) {
ASSERT_TRUE(TransCode::decode(words[i], uni)); ASSERT_TRUE(TransCode::Decode(words[i], uni));
res.push_back(make_pair(uni.size() - 1, trie.Find(uni.begin(), uni.end()))); res.push_back(make_pair(uni.size() - 1, trie.Find(uni.begin(), uni.end())));
//resMap[uni.size() - 1] = trie.Find(uni.begin(), uni.end()); //resMap[uni.size() - 1] = trie.Find(uni.begin(), uni.end());
} }
vector<pair<size_t, const DictUnit*> > vec; vector<pair<size_t, const DictUnit*> > vec;
vector<struct Dag> dags; vector<struct Dag> dags;
ASSERT_TRUE(TransCode::decode(word, uni)); ASSERT_TRUE(TransCode::Decode(word, uni));
trie.Find(uni.begin(), uni.end(), dags); trie.Find(uni.begin(), uni.end(), dags);
ASSERT_EQ(dags.size(), uni.size()); ASSERT_EQ(dags.size(), uni.size());
ASSERT_NE(dags.size(), 0u); ASSERT_NE(dags.size(), 0u);
@ -65,7 +65,7 @@ TEST(DictTrieTest, UserDict) {
DictTrie trie(DICT_FILE, "../test/testdata/userdict.utf8"); DictTrie trie(DICT_FILE, "../test/testdata/userdict.utf8");
string word = "云计算"; string word = "云计算";
Unicode unicode; Unicode unicode;
ASSERT_TRUE(TransCode::decode(word, unicode)); ASSERT_TRUE(TransCode::Decode(word, unicode));
const DictUnit * unit = trie.Find(unicode.begin(), unicode.end()); const DictUnit * unit = trie.Find(unicode.begin(), unicode.end());
ASSERT_TRUE(unit); ASSERT_TRUE(unit);
string res ; string res ;
@ -77,7 +77,7 @@ TEST(DictTrieTest, UserDictWithMaxWeight) {
DictTrie trie(DICT_FILE, "../test/testdata/userdict.utf8", DictTrie::WordWeightMax); DictTrie trie(DICT_FILE, "../test/testdata/userdict.utf8", DictTrie::WordWeightMax);
string word = "云计算"; string word = "云计算";
Unicode unicode; Unicode unicode;
ASSERT_TRUE(TransCode::decode(word, unicode)); ASSERT_TRUE(TransCode::Decode(word, unicode));
const DictUnit * unit = trie.Find(unicode.begin(), unicode.end()); const DictUnit * unit = trie.Find(unicode.begin(), unicode.end());
ASSERT_TRUE(unit); ASSERT_TRUE(unit);
string res ; string res ;
@ -91,7 +91,7 @@ TEST(DictTrieTest, Dag) {
{ {
string word = "清华大学"; string word = "清华大学";
Unicode unicode; Unicode unicode;
ASSERT_TRUE(TransCode::decode(word, unicode)); ASSERT_TRUE(TransCode::Decode(word, unicode));
vector<struct Dag> res; vector<struct Dag> res;
trie.Find(unicode.begin(), unicode.end(), res); trie.Find(unicode.begin(), unicode.end(), res);
@ -105,7 +105,7 @@ TEST(DictTrieTest, Dag) {
{ {
string word = "北京邮电大学"; string word = "北京邮电大学";
Unicode unicode; Unicode unicode;
ASSERT_TRUE(TransCode::decode(word, unicode)); ASSERT_TRUE(TransCode::Decode(word, unicode));
vector<struct Dag> res; vector<struct Dag> res;
trie.Find(unicode.begin(), unicode.end(), res); trie.Find(unicode.begin(), unicode.end(), res);
@ -119,7 +119,7 @@ TEST(DictTrieTest, Dag) {
{ {
string word = "长江大桥"; string word = "长江大桥";
Unicode unicode; Unicode unicode;
ASSERT_TRUE(TransCode::decode(word, unicode)); ASSERT_TRUE(TransCode::Decode(word, unicode));
vector<struct Dag> res; vector<struct Dag> res;
trie.Find(unicode.begin(), unicode.end(), res); trie.Find(unicode.begin(), unicode.end(), res);
@ -133,7 +133,7 @@ TEST(DictTrieTest, Dag) {
{ {
string word = "长江大桥"; string word = "长江大桥";
Unicode unicode; Unicode unicode;
ASSERT_TRUE(TransCode::decode(word, unicode)); ASSERT_TRUE(TransCode::Decode(word, unicode));
vector<struct Dag> res; vector<struct Dag> res;
trie.Find(unicode.begin(), unicode.end(), res, 3); trie.Find(unicode.begin(), unicode.end(), res, 3);
@ -147,7 +147,7 @@ TEST(DictTrieTest, Dag) {
{ {
string word = "长江大桥"; string word = "长江大桥";
Unicode unicode; Unicode unicode;
ASSERT_TRUE(TransCode::decode(word, unicode)); ASSERT_TRUE(TransCode::Decode(word, unicode));
vector<struct Dag> res; vector<struct Dag> res;
trie.Find(unicode.begin(), unicode.end(), res, 4); trie.Find(unicode.begin(), unicode.end(), res, 4);