add keyword extrator into load_test

This commit is contained in:
wyy 2014-04-19 23:56:43 +08:00
parent cbe9642972
commit 2645a4e837

View File

@ -6,42 +6,51 @@
#include "../src/MPSegment.hpp" #include "../src/MPSegment.hpp"
#include "../src/HMMSegment.hpp" #include "../src/HMMSegment.hpp"
#include "../src/MixSegment.hpp" #include "../src/MixSegment.hpp"
#include "../src/KeywordExtractor.hpp"
using namespace CppJieba; using namespace CppJieba;
void cut(const ISegment * seg, const char * const filePath, size_t times = 30) void cut(size_t times = 20)
{ {
ifstream ifile(filePath); MixSegment seg("../dict/jieba.dict.utf8", "../dict/hmm_model.utf8");
if(!ifile) assert(seg);
{
LogFatal("open file[%s] failed.", filePath);
return;
}
LogInfo("open file[%s].", filePath);
vector<string> res; vector<string> res;
string doc; string doc;
loadFile2Str(filePath, doc); loadFile2Str("../test/testdata/weicheng.utf8", doc);
for(uint i = 0; i < times; i ++) long beginTime = clock();
for(size_t i = 0; i < times; i ++)
{ {
printf("process [%3.0lf %%]\r", 100.0*(i+1)/times); printf("process [%3.0lf %%]\r", 100.0*(i+1)/times);
fflush(stdout); fflush(stdout);
res.clear(); res.clear();
seg->cut(doc, res); seg.cut(doc, res);
} }
printf("\n"); long endTime = clock();
printf("\ncut: [%.3lf seconds]time consumed.\n", double(endTime - beginTime)/CLOCKS_PER_SEC);
}
void extract(size_t times = 400)
{
KeywordExtractor extractor("../dict/jieba.dict.utf8", "../dict/hmm_model.utf8", "../dict/idf.utf8", "../dict/stop_words.utf8");
assert(extractor);
vector<string> words;
string doc;
loadFile2Str("../test/testdata/review.100", doc);
long beginTime = clock();
for(size_t i = 0; i < times; i ++)
{
printf("process [%3.0lf %%]\r", 100.0*(i+1)/times);
fflush(stdout);
words.clear();
extractor.extract(doc, words, 5);
}
long endTime = clock();
printf("\nextract: [%.3lf seconds]time consumed.\n", double(endTime - beginTime)/CLOCKS_PER_SEC);
} }
int main(int argc, char ** argv) int main(int argc, char ** argv)
{ {
MixSegment seg("../dict/jieba.dict.utf8", "../dict/hmm_model.utf8"); cut();
if(!seg) extract();
{
cout<<"seg init failed."<<endl;
return EXIT_FAILURE;
}
long beginTime = clock();
cut(&seg, "../test/testdata/weicheng.utf8");
long endTime = clock();
printf("[%.3lf seconds]time consumed.\n", double(endTime - beginTime)/CLOCKS_PER_SEC);
return EXIT_SUCCESS; return EXIT_SUCCESS;
} }