mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
- Removed default install path setting from CMakeLists.txt. - Updated test CMakeLists to include Google Test and configure test paths. - Refactored load_test and unittest files to use defined paths for dictionaries and test data. - Added test paths header for better path management in tests. - Ensured all tests are properly linked and configured for execution.
88 lines
3.3 KiB
C++
88 lines
3.3 KiB
C++
#include "cppjieba/KeywordExtractor.hpp"
|
||
#include "gtest/gtest.h"
|
||
#include "test_paths.h"
|
||
|
||
using namespace cppjieba;
|
||
|
||
TEST(KeywordExtractorTest, Test1) {
|
||
KeywordExtractor Extractor(TEST_DATA_DIR "/extra_dict/jieba.dict.small.utf8",
|
||
DICT_DIR "/hmm_model.utf8",
|
||
DICT_DIR "/idf.utf8",
|
||
DICT_DIR "/stop_words.utf8");
|
||
|
||
{
|
||
string s("你好世界世界而且而且");
|
||
string res;
|
||
size_t topN = 5;
|
||
|
||
{
|
||
vector<string> words;
|
||
Extractor.Extract(s, words, topN);
|
||
res << words;
|
||
ASSERT_EQ(res, "[\"世界\", \"你好\"]");
|
||
}
|
||
|
||
{
|
||
vector<pair<string, double> > words;
|
||
Extractor.Extract(s, words, topN);
|
||
res << words;
|
||
ASSERT_EQ(res, "[世界:8.73506, 你好:7.95788]");
|
||
}
|
||
|
||
{
|
||
vector<KeywordExtractor::Word> words;
|
||
Extractor.Extract(s, words, topN);
|
||
res << words;
|
||
ASSERT_EQ(res, "[{\"word\": \"\xE4\xB8\x96\xE7\x95\x8C\", \"offset\": [6, 12], \"weight\": 8.73506}, {\"word\": \"\xE4\xBD\xA0\xE5\xA5\xBD\", \"offset\": [0], \"weight\": 7.95788}]");
|
||
}
|
||
}
|
||
|
||
{
|
||
string s("我是拖拉机学院手扶拖拉机专业的。不用多久,我就会升职加薪,当上CEO,走上人生巅峰。");
|
||
string res;
|
||
vector<KeywordExtractor::Word> wordweights;
|
||
size_t topN = 5;
|
||
Extractor.Extract(s, wordweights, topN);
|
||
res << wordweights;
|
||
ASSERT_EQ(res, "[{\"word\": \"CEO\", \"offset\": [93], \"weight\": 11.7392}, {\"word\": \"\xE5\x8D\x87\xE8\x81\x8C\", \"offset\": [72], \"weight\": 10.8562}, {\"word\": \"\xE5\x8A\xA0\xE8\x96\xAA\", \"offset\": [78], \"weight\": 10.6426}, {\"word\": \"\xE6\x89\x8B\xE6\x89\xB6\xE6\x8B\x96\xE6\x8B\x89\xE6\x9C\xBA\", \"offset\": [21], \"weight\": 10.0089}, {\"word\": \"\xE5\xB7\x85\xE5\xB3\xB0\", \"offset\": [111], \"weight\": 9.49396}]");
|
||
}
|
||
|
||
{
|
||
string s("一部iPhone6");
|
||
string res;
|
||
vector<KeywordExtractor::Word> wordweights;
|
||
size_t topN = 5;
|
||
Extractor.Extract(s, wordweights, topN);
|
||
res << wordweights;
|
||
ASSERT_EQ(res, "[{\"word\": \"iPhone6\", \"offset\": [6], \"weight\": 11.7392}, {\"word\": \"\xE4\xB8\x80\xE9\x83\xA8\", \"offset\": [0], \"weight\": 6.47592}]");
|
||
}
|
||
}
|
||
|
||
TEST(KeywordExtractorTest, Test2) {
|
||
KeywordExtractor Extractor(TEST_DATA_DIR "/extra_dict/jieba.dict.small.utf8",
|
||
DICT_DIR "/hmm_model.utf8",
|
||
DICT_DIR "/idf.utf8",
|
||
DICT_DIR "/stop_words.utf8",
|
||
TEST_DATA_DIR "/userdict.utf8");
|
||
|
||
{
|
||
string s("蓝翔优秀毕业生");
|
||
string res;
|
||
vector<KeywordExtractor::Word> wordweights;
|
||
size_t topN = 5;
|
||
Extractor.Extract(s, wordweights, topN);
|
||
res << wordweights;
|
||
ASSERT_EQ(res, "[{\"word\": \"\xE8\x93\x9D\xE7\xBF\x94\", \"offset\": [0], \"weight\": 11.7392}, {\"word\": \"\xE6\xAF\x95\xE4\xB8\x9A\xE7\x94\x9F\", \"offset\": [12], \"weight\": 8.13549}, {\"word\": \"\xE4\xBC\x98\xE7\xA7\x80\", \"offset\": [6], \"weight\": 6.78347}]");
|
||
}
|
||
|
||
{
|
||
string s("一部iPhone6");
|
||
string res;
|
||
vector<KeywordExtractor::Word> wordweights;
|
||
size_t topN = 5;
|
||
Extractor.Extract(s, wordweights, topN);
|
||
res << wordweights;
|
||
ASSERT_EQ(res, "[{\"word\": \"iPhone6\", \"offset\": [6], \"weight\": 11.7392}, {\"word\": \"\xE4\xB8\x80\xE9\x83\xA8\", \"offset\": [0], \"weight\": 6.47592}]");
|
||
}
|
||
}
|