mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
add keyword extrator into load_test
This commit is contained in:
parent
cbe9642972
commit
2645a4e837
@ -6,42 +6,51 @@
|
||||
#include "../src/MPSegment.hpp"
|
||||
#include "../src/HMMSegment.hpp"
|
||||
#include "../src/MixSegment.hpp"
|
||||
#include "../src/KeywordExtractor.hpp"
|
||||
|
||||
using namespace CppJieba;
|
||||
|
||||
void cut(const ISegment * seg, const char * const filePath, size_t times = 30)
|
||||
void cut(size_t times = 20)
|
||||
{
|
||||
ifstream ifile(filePath);
|
||||
if(!ifile)
|
||||
{
|
||||
LogFatal("open file[%s] failed.", filePath);
|
||||
return;
|
||||
}
|
||||
LogInfo("open file[%s].", filePath);
|
||||
MixSegment seg("../dict/jieba.dict.utf8", "../dict/hmm_model.utf8");
|
||||
assert(seg);
|
||||
vector<string> res;
|
||||
string doc;
|
||||
loadFile2Str(filePath, doc);
|
||||
for(uint i = 0; i < times; i ++)
|
||||
loadFile2Str("../test/testdata/weicheng.utf8", doc);
|
||||
long beginTime = clock();
|
||||
for(size_t i = 0; i < times; i ++)
|
||||
{
|
||||
printf("process [%3.0lf %%]\r", 100.0*(i+1)/times);
|
||||
fflush(stdout);
|
||||
res.clear();
|
||||
seg->cut(doc, res);
|
||||
seg.cut(doc, res);
|
||||
}
|
||||
printf("\n");
|
||||
long endTime = clock();
|
||||
printf("\ncut: [%.3lf seconds]time consumed.\n", double(endTime - beginTime)/CLOCKS_PER_SEC);
|
||||
}
|
||||
|
||||
void extract(size_t times = 400)
|
||||
{
|
||||
KeywordExtractor extractor("../dict/jieba.dict.utf8", "../dict/hmm_model.utf8", "../dict/idf.utf8", "../dict/stop_words.utf8");
|
||||
assert(extractor);
|
||||
vector<string> words;
|
||||
string doc;
|
||||
loadFile2Str("../test/testdata/review.100", doc);
|
||||
long beginTime = clock();
|
||||
for(size_t i = 0; i < times; i ++)
|
||||
{
|
||||
printf("process [%3.0lf %%]\r", 100.0*(i+1)/times);
|
||||
fflush(stdout);
|
||||
words.clear();
|
||||
extractor.extract(doc, words, 5);
|
||||
}
|
||||
long endTime = clock();
|
||||
printf("\nextract: [%.3lf seconds]time consumed.\n", double(endTime - beginTime)/CLOCKS_PER_SEC);
|
||||
}
|
||||
|
||||
int main(int argc, char ** argv)
|
||||
{
|
||||
MixSegment seg("../dict/jieba.dict.utf8", "../dict/hmm_model.utf8");
|
||||
if(!seg)
|
||||
{
|
||||
cout<<"seg init failed."<<endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
long beginTime = clock();
|
||||
cut(&seg, "../test/testdata/weicheng.utf8");
|
||||
long endTime = clock();
|
||||
printf("[%.3lf seconds]time consumed.\n", double(endTime - beginTime)/CLOCKS_PER_SEC);
|
||||
cut();
|
||||
extract();
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user