mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
- Added configuration for test paths in CMake to simplify file references. - Updated load_test.cpp and various unit tests to use defined path macros for dictionary and test data files. - Introduced test_paths.h.in to manage directory paths consistently across tests.
77 lines
2.2 KiB
C++
77 lines
2.2 KiB
C++
#include <iostream>
|
|
#include <ctime>
|
|
#include <fstream>
|
|
#include "cppjieba/MPSegment.hpp"
|
|
#include "cppjieba/HMMSegment.hpp"
|
|
#include "cppjieba/MixSegment.hpp"
|
|
#include "cppjieba/KeywordExtractor.hpp"
|
|
#include "limonp/Colors.hpp"
|
|
#include "cppjieba/Jieba.hpp"
|
|
#include "gtest/gtest.h"
|
|
#include "test_paths.h"
|
|
|
|
using namespace cppjieba;
|
|
|
|
void Cut(size_t times = 50) {
|
|
MixSegment seg(DICT_DIR "/jieba.dict.utf8", DICT_DIR "/hmm_model.utf8");
|
|
vector<string> res;
|
|
string doc;
|
|
ifstream ifs(TEST_DATA_DIR "/weicheng.utf8");
|
|
assert(ifs);
|
|
doc << ifs;
|
|
long beginTime = clock();
|
|
for (size_t i = 0; i < times; i ++) {
|
|
printf("process [%3.0lf %%]\r", 100.0*(i+1)/times);
|
|
fflush(stdout);
|
|
res.clear();
|
|
seg.Cut(doc, res);
|
|
}
|
|
printf("\n");
|
|
long endTime = clock();
|
|
ColorPrintln(GREEN, "Cut: [%.3lf seconds]time consumed.", double(endTime - beginTime)/CLOCKS_PER_SEC);
|
|
}
|
|
|
|
void Extract(size_t times = 400) {
|
|
KeywordExtractor Extractor(DICT_DIR "/jieba.dict.utf8",
|
|
DICT_DIR "/hmm_model.utf8",
|
|
DICT_DIR "/idf.utf8",
|
|
DICT_DIR "/stop_words.utf8");
|
|
vector<string> words;
|
|
string doc;
|
|
ifstream ifs(TEST_DATA_DIR "/review.100");
|
|
assert(ifs);
|
|
doc << ifs;
|
|
long beginTime = clock();
|
|
for (size_t i = 0; i < times; i ++) {
|
|
printf("process [%3.0lf %%]\r", 100.0*(i+1)/times);
|
|
fflush(stdout);
|
|
words.clear();
|
|
Extractor.Extract(doc, words, 5);
|
|
}
|
|
printf("\n");
|
|
long endTime = clock();
|
|
ColorPrintln(GREEN, "Extract: [%.3lf seconds]time consumed.", double(endTime - beginTime)/CLOCKS_PER_SEC);
|
|
}
|
|
|
|
TEST(LoadTest, Test1) {
|
|
Jieba jieba(DICT_DIR "/jieba.dict.utf8",
|
|
DICT_DIR "/hmm_model.utf8",
|
|
DICT_DIR "/user.dict.utf8",
|
|
DICT_DIR "/idf.utf8",
|
|
DICT_DIR "/stop_words.utf8");
|
|
vector<string> words;
|
|
string result;
|
|
|
|
jieba.Cut("他来到了网易杭研大厦", words);
|
|
result << words;
|
|
string expected = "[\"他\", \"来到\", \"了\", \"网易\", \"杭研\", \"大厦\"]";
|
|
ASSERT_EQ(expected, result);
|
|
}
|
|
|
|
int main(int argc, char** argv) {
|
|
testing::InitGoogleTest(&argc, argv);
|
|
Cut();
|
|
Extract();
|
|
return RUN_ALL_TESTS();
|
|
}
|