#include #include #include #include "cppjieba/MPSegment.hpp" #include "cppjieba/HMMSegment.hpp" #include "cppjieba/MixSegment.hpp" #include "cppjieba/KeywordExtractor.hpp" #include "limonp/Colors.hpp" #include "cppjieba/Jieba.hpp" #include "gtest/gtest.h" #include "test_paths.h" using namespace cppjieba; void Cut(size_t times = 50) { MixSegment seg(DICT_DIR "/jieba.dict.utf8", DICT_DIR "/hmm_model.utf8"); vector res; string doc; ifstream ifs(TEST_DATA_DIR "/weicheng.utf8"); assert(ifs); doc << ifs; long beginTime = clock(); for (size_t i = 0; i < times; i ++) { printf("process [%3.0lf %%]\r", 100.0*(i+1)/times); fflush(stdout); res.clear(); seg.Cut(doc, res); } printf("\n"); long endTime = clock(); ColorPrintln(GREEN, "Cut: [%.3lf seconds]time consumed.", double(endTime - beginTime)/CLOCKS_PER_SEC); } void Extract(size_t times = 400) { KeywordExtractor Extractor(DICT_DIR "/jieba.dict.utf8", DICT_DIR "/hmm_model.utf8", DICT_DIR "/idf.utf8", DICT_DIR "/stop_words.utf8"); vector words; string doc; ifstream ifs(TEST_DATA_DIR "/review.100"); assert(ifs); doc << ifs; long beginTime = clock(); for (size_t i = 0; i < times; i ++) { printf("process [%3.0lf %%]\r", 100.0*(i+1)/times); fflush(stdout); words.clear(); Extractor.Extract(doc, words, 5); } printf("\n"); long endTime = clock(); ColorPrintln(GREEN, "Extract: [%.3lf seconds]time consumed.", double(endTime - beginTime)/CLOCKS_PER_SEC); } TEST(LoadTest, Test1) { Jieba jieba(DICT_DIR "/jieba.dict.utf8", DICT_DIR "/hmm_model.utf8", DICT_DIR "/user.dict.utf8", DICT_DIR "/idf.utf8", DICT_DIR "/stop_words.utf8"); vector words; string result; jieba.Cut("他来到了网易杭研大厦", words); result << words; string expected = "[\"他\", \"来到\", \"了\", \"网易\", \"杭研\", \"大厦\"]"; ASSERT_EQ(expected, result); } int main(int argc, char** argv) { testing::InitGoogleTest(&argc, argv); Cut(); Extract(); return RUN_ALL_TESTS(); }