From 2645a4e837e6ad0ca3d3a5ade77ee2e3baf84f53 Mon Sep 17 00:00:00 2001 From: wyy Date: Sat, 19 Apr 2014 23:56:43 +0800 Subject: [PATCH] add keyword extrator into load_test --- test/load_test.cpp | 53 +++++++++++++++++++++++++++------------------- 1 file changed, 31 insertions(+), 22 deletions(-) diff --git a/test/load_test.cpp b/test/load_test.cpp index 241c68c..4b0dcd6 100644 --- a/test/load_test.cpp +++ b/test/load_test.cpp @@ -6,42 +6,51 @@ #include "../src/MPSegment.hpp" #include "../src/HMMSegment.hpp" #include "../src/MixSegment.hpp" +#include "../src/KeywordExtractor.hpp" using namespace CppJieba; -void cut(const ISegment * seg, const char * const filePath, size_t times = 30) +void cut(size_t times = 20) { - ifstream ifile(filePath); - if(!ifile) - { - LogFatal("open file[%s] failed.", filePath); - return; - } - LogInfo("open file[%s].", filePath); + MixSegment seg("../dict/jieba.dict.utf8", "../dict/hmm_model.utf8"); + assert(seg); vector res; string doc; - loadFile2Str(filePath, doc); - for(uint i = 0; i < times; i ++) + loadFile2Str("../test/testdata/weicheng.utf8", doc); + long beginTime = clock(); + for(size_t i = 0; i < times; i ++) { printf("process [%3.0lf %%]\r", 100.0*(i+1)/times); fflush(stdout); res.clear(); - seg->cut(doc, res); + seg.cut(doc, res); } - printf("\n"); + long endTime = clock(); + printf("\ncut: [%.3lf seconds]time consumed.\n", double(endTime - beginTime)/CLOCKS_PER_SEC); +} + +void extract(size_t times = 400) +{ + KeywordExtractor extractor("../dict/jieba.dict.utf8", "../dict/hmm_model.utf8", "../dict/idf.utf8", "../dict/stop_words.utf8"); + assert(extractor); + vector words; + string doc; + loadFile2Str("../test/testdata/review.100", doc); + long beginTime = clock(); + for(size_t i = 0; i < times; i ++) + { + printf("process [%3.0lf %%]\r", 100.0*(i+1)/times); + fflush(stdout); + words.clear(); + extractor.extract(doc, words, 5); + } + long endTime = clock(); + printf("\nextract: [%.3lf seconds]time consumed.\n", double(endTime - beginTime)/CLOCKS_PER_SEC); } int main(int argc, char ** argv) { - MixSegment seg("../dict/jieba.dict.utf8", "../dict/hmm_model.utf8"); - if(!seg) - { - cout<<"seg init failed."<