mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
build: enhance test configuration and path management
- Added configuration for test paths in CMake to simplify file references. - Updated load_test.cpp and various unit tests to use defined path macros for dictionary and test data files. - Introduced test_paths.h.in to manage directory paths consistently across tests.
This commit is contained in:
parent
aa410a69bb
commit
9cd64a1694
@ -1,4 +1,7 @@
|
|||||||
SET(EXECUTABLE_OUTPUT_PATH ${PROJECT_BINARY_DIR})
|
SET(EXECUTABLE_OUTPUT_PATH ${PROJECT_BINARY_DIR})
|
||||||
|
|
||||||
|
# Configure test paths
|
||||||
|
configure_file("${CMAKE_CURRENT_SOURCE_DIR}/test_paths.h.in" "${CMAKE_BINARY_DIR}/test/test_paths.h")
|
||||||
|
|
||||||
ADD_EXECUTABLE(load_test load_test.cpp)
|
ADD_EXECUTABLE(load_test load_test.cpp)
|
||||||
ADD_SUBDIRECTORY(unittest)
|
ADD_SUBDIRECTORY(unittest)
|
@ -6,14 +6,17 @@
|
|||||||
#include "cppjieba/MixSegment.hpp"
|
#include "cppjieba/MixSegment.hpp"
|
||||||
#include "cppjieba/KeywordExtractor.hpp"
|
#include "cppjieba/KeywordExtractor.hpp"
|
||||||
#include "limonp/Colors.hpp"
|
#include "limonp/Colors.hpp"
|
||||||
|
#include "cppjieba/Jieba.hpp"
|
||||||
|
#include "gtest/gtest.h"
|
||||||
|
#include "test_paths.h"
|
||||||
|
|
||||||
using namespace cppjieba;
|
using namespace cppjieba;
|
||||||
|
|
||||||
void Cut(size_t times = 50) {
|
void Cut(size_t times = 50) {
|
||||||
MixSegment seg("../dict/jieba.dict.utf8", "../dict/hmm_model.utf8");
|
MixSegment seg(DICT_DIR "/jieba.dict.utf8", DICT_DIR "/hmm_model.utf8");
|
||||||
vector<string> res;
|
vector<string> res;
|
||||||
string doc;
|
string doc;
|
||||||
ifstream ifs("../test/testdata/weicheng.utf8");
|
ifstream ifs(TEST_DATA_DIR "/weicheng.utf8");
|
||||||
assert(ifs);
|
assert(ifs);
|
||||||
doc << ifs;
|
doc << ifs;
|
||||||
long beginTime = clock();
|
long beginTime = clock();
|
||||||
@ -29,10 +32,13 @@ void Cut(size_t times = 50) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void Extract(size_t times = 400) {
|
void Extract(size_t times = 400) {
|
||||||
KeywordExtractor Extractor("../dict/jieba.dict.utf8", "../dict/hmm_model.utf8", "../dict/idf.utf8", "../dict/stop_words.utf8");
|
KeywordExtractor Extractor(DICT_DIR "/jieba.dict.utf8",
|
||||||
|
DICT_DIR "/hmm_model.utf8",
|
||||||
|
DICT_DIR "/idf.utf8",
|
||||||
|
DICT_DIR "/stop_words.utf8");
|
||||||
vector<string> words;
|
vector<string> words;
|
||||||
string doc;
|
string doc;
|
||||||
ifstream ifs("../test/testdata/review.100");
|
ifstream ifs(TEST_DATA_DIR "/review.100");
|
||||||
assert(ifs);
|
assert(ifs);
|
||||||
doc << ifs;
|
doc << ifs;
|
||||||
long beginTime = clock();
|
long beginTime = clock();
|
||||||
@ -47,8 +53,24 @@ void Extract(size_t times = 400) {
|
|||||||
ColorPrintln(GREEN, "Extract: [%.3lf seconds]time consumed.", double(endTime - beginTime)/CLOCKS_PER_SEC);
|
ColorPrintln(GREEN, "Extract: [%.3lf seconds]time consumed.", double(endTime - beginTime)/CLOCKS_PER_SEC);
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char ** argv) {
|
TEST(LoadTest, Test1) {
|
||||||
|
Jieba jieba(DICT_DIR "/jieba.dict.utf8",
|
||||||
|
DICT_DIR "/hmm_model.utf8",
|
||||||
|
DICT_DIR "/user.dict.utf8",
|
||||||
|
DICT_DIR "/idf.utf8",
|
||||||
|
DICT_DIR "/stop_words.utf8");
|
||||||
|
vector<string> words;
|
||||||
|
string result;
|
||||||
|
|
||||||
|
jieba.Cut("他来到了网易杭研大厦", words);
|
||||||
|
result << words;
|
||||||
|
string expected = "[\"他\", \"来到\", \"了\", \"网易\", \"杭研\", \"大厦\"]";
|
||||||
|
ASSERT_EQ(expected, result);
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char** argv) {
|
||||||
|
testing::InitGoogleTest(&argc, argv);
|
||||||
Cut();
|
Cut();
|
||||||
Extract();
|
Extract();
|
||||||
return EXIT_SUCCESS;
|
return RUN_ALL_TESTS();
|
||||||
}
|
}
|
||||||
|
7
test/test_paths.h.in
Normal file
7
test/test_paths.h.in
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
#ifndef TEST_PATHS_H
|
||||||
|
#define TEST_PATHS_H
|
||||||
|
|
||||||
|
#define TEST_DATA_DIR "@CMAKE_CURRENT_SOURCE_DIR@/testdata"
|
||||||
|
#define DICT_DIR "@CMAKE_SOURCE_DIR@/dict"
|
||||||
|
|
||||||
|
#endif // TEST_PATHS_H
|
@ -1,3 +1,4 @@
|
|||||||
|
message(STATUS "MSVC value: ${MSVC}")
|
||||||
if (MSVC)
|
if (MSVC)
|
||||||
set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreadedDebugDLL")
|
set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreadedDebugDLL")
|
||||||
set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
|
set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
|
||||||
@ -19,6 +20,12 @@ SET(LIBRARY_OUTPUT_PATH ${PROJECT_BINARY_DIR}/lib)
|
|||||||
|
|
||||||
ADD_DEFINITIONS(-DLOGGING_LEVEL=LL_WARNING)
|
ADD_DEFINITIONS(-DLOGGING_LEVEL=LL_WARNING)
|
||||||
|
|
||||||
|
# Add include directories
|
||||||
|
INCLUDE_DIRECTORIES(
|
||||||
|
${CMAKE_CURRENT_BINARY_DIR}
|
||||||
|
${CMAKE_BINARY_DIR}/test
|
||||||
|
)
|
||||||
|
|
||||||
ADD_EXECUTABLE(test.run
|
ADD_EXECUTABLE(test.run
|
||||||
gtest_main.cpp
|
gtest_main.cpp
|
||||||
keyword_extractor_test.cpp
|
keyword_extractor_test.cpp
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
#include "cppjieba/Jieba.hpp"
|
#include "cppjieba/Jieba.hpp"
|
||||||
#include "gtest/gtest.h"
|
#include "gtest/gtest.h"
|
||||||
|
#include "test_paths.h"
|
||||||
|
|
||||||
using namespace cppjieba;
|
using namespace cppjieba;
|
||||||
|
|
||||||
@ -37,11 +38,11 @@ TEST(JiebaTest, Test0) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
TEST(JiebaTest, Test1) {
|
TEST(JiebaTest, Test1) {
|
||||||
cppjieba::Jieba jieba("../dict/jieba.dict.utf8",
|
cppjieba::Jieba jieba(DICT_DIR "/jieba.dict.utf8",
|
||||||
"../dict/hmm_model.utf8",
|
DICT_DIR "/hmm_model.utf8",
|
||||||
"../dict/user.dict.utf8",
|
DICT_DIR "/user.dict.utf8",
|
||||||
"../dict/idf.utf8",
|
DICT_DIR "/idf.utf8",
|
||||||
"../dict/stop_words.utf8");
|
DICT_DIR "/stop_words.utf8");
|
||||||
vector<string> words;
|
vector<string> words;
|
||||||
string result;
|
string result;
|
||||||
|
|
||||||
@ -71,14 +72,14 @@ TEST(JiebaTest, Test1) {
|
|||||||
jieba.CutForSearch("他来到了网易杭研大厦", words);
|
jieba.CutForSearch("他来到了网易杭研大厦", words);
|
||||||
result << words;
|
result << words;
|
||||||
ASSERT_EQ("[\"他\", \"来到\", \"了\", \"网易\", \"杭研\", \"大厦\"]", result);
|
ASSERT_EQ("[\"他\", \"来到\", \"了\", \"网易\", \"杭研\", \"大厦\"]", result);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(JiebaTest, WordTest) {
|
TEST(JiebaTest, WordTest) {
|
||||||
cppjieba::Jieba jieba("../dict/jieba.dict.utf8",
|
cppjieba::Jieba jieba(DICT_DIR "/jieba.dict.utf8",
|
||||||
"../dict/hmm_model.utf8",
|
DICT_DIR "/hmm_model.utf8",
|
||||||
"../dict/user.dict.utf8",
|
DICT_DIR "/user.dict.utf8",
|
||||||
"../dict/idf.utf8",
|
DICT_DIR "/idf.utf8",
|
||||||
"../dict/stop_words.utf8");
|
DICT_DIR "/stop_words.utf8");
|
||||||
vector<Word> words;
|
vector<Word> words;
|
||||||
string result;
|
string result;
|
||||||
|
|
||||||
@ -116,11 +117,11 @@ TEST(JiebaTest, WordTest) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
TEST(JiebaTest, InsertUserWord) {
|
TEST(JiebaTest, InsertUserWord) {
|
||||||
cppjieba::Jieba jieba("../dict/jieba.dict.utf8",
|
cppjieba::Jieba jieba(DICT_DIR "/jieba.dict.utf8",
|
||||||
"../dict/hmm_model.utf8",
|
DICT_DIR "/hmm_model.utf8",
|
||||||
"../dict/user.dict.utf8",
|
DICT_DIR "/user.dict.utf8",
|
||||||
"../dict/idf.utf8",
|
DICT_DIR "/idf.utf8",
|
||||||
"../dict/stop_words.utf8");
|
DICT_DIR "/stop_words.utf8");
|
||||||
vector<string> words;
|
vector<string> words;
|
||||||
string result;
|
string result;
|
||||||
|
|
||||||
|
@ -1,10 +1,14 @@
|
|||||||
#include "cppjieba/KeywordExtractor.hpp"
|
#include "cppjieba/KeywordExtractor.hpp"
|
||||||
#include "gtest/gtest.h"
|
#include "gtest/gtest.h"
|
||||||
|
#include "test_paths.h"
|
||||||
|
|
||||||
using namespace cppjieba;
|
using namespace cppjieba;
|
||||||
|
|
||||||
TEST(KeywordExtractorTest, Test1) {
|
TEST(KeywordExtractorTest, Test1) {
|
||||||
KeywordExtractor Extractor("../test/testdata/extra_dict/jieba.dict.small.utf8", "../dict/hmm_model.utf8", "../dict/idf.utf8", "../dict/stop_words.utf8");
|
KeywordExtractor Extractor(TEST_DATA_DIR "/extra_dict/jieba.dict.small.utf8",
|
||||||
|
DICT_DIR "/hmm_model.utf8",
|
||||||
|
DICT_DIR "/idf.utf8",
|
||||||
|
DICT_DIR "/stop_words.utf8");
|
||||||
|
|
||||||
{
|
{
|
||||||
string s("你好世界世界而且而且");
|
string s("你好世界世界而且而且");
|
||||||
@ -55,7 +59,11 @@ TEST(KeywordExtractorTest, Test1) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
TEST(KeywordExtractorTest, Test2) {
|
TEST(KeywordExtractorTest, Test2) {
|
||||||
KeywordExtractor Extractor("../test/testdata/extra_dict/jieba.dict.small.utf8", "../dict/hmm_model.utf8", "../dict/idf.utf8", "../dict/stop_words.utf8", "../test/testdata/userdict.utf8");
|
KeywordExtractor Extractor(TEST_DATA_DIR "/extra_dict/jieba.dict.small.utf8",
|
||||||
|
DICT_DIR "/hmm_model.utf8",
|
||||||
|
DICT_DIR "/idf.utf8",
|
||||||
|
DICT_DIR "/stop_words.utf8",
|
||||||
|
TEST_DATA_DIR "/userdict.utf8");
|
||||||
|
|
||||||
{
|
{
|
||||||
string s("蓝翔优秀毕业生");
|
string s("蓝翔优秀毕业生");
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
#include "cppjieba/MixSegment.hpp"
|
#include "cppjieba/MixSegment.hpp"
|
||||||
#include "gtest/gtest.h"
|
#include "gtest/gtest.h"
|
||||||
|
#include "test_paths.h"
|
||||||
|
|
||||||
using namespace cppjieba;
|
using namespace cppjieba;
|
||||||
|
|
||||||
@ -13,7 +14,7 @@ static const char * const ANS_TEST3 = "[iPhone6:eng, 手机:n, 的:uj, 最大:a,
|
|||||||
//static const char * const ANS_TEST3 = "";
|
//static const char * const ANS_TEST3 = "";
|
||||||
|
|
||||||
TEST(PosTaggerTest, Test) {
|
TEST(PosTaggerTest, Test) {
|
||||||
MixSegment tagger("../dict/jieba.dict.utf8", "../dict/hmm_model.utf8");
|
MixSegment tagger(DICT_DIR "/jieba.dict.utf8", DICT_DIR "/hmm_model.utf8");
|
||||||
{
|
{
|
||||||
vector<pair<string, string> > res;
|
vector<pair<string, string> > res;
|
||||||
tagger.Tag(QUERY_TEST1, res);
|
tagger.Tag(QUERY_TEST1, res);
|
||||||
@ -23,7 +24,7 @@ TEST(PosTaggerTest, Test) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
TEST(PosTagger, TestUserDict) {
|
TEST(PosTagger, TestUserDict) {
|
||||||
MixSegment tagger("../dict/jieba.dict.utf8", "../dict/hmm_model.utf8", "../test/testdata/userdict.utf8");
|
MixSegment tagger(DICT_DIR "/jieba.dict.utf8", DICT_DIR "/hmm_model.utf8", TEST_DATA_DIR "/userdict.utf8");
|
||||||
{
|
{
|
||||||
vector<pair<string, string> > res;
|
vector<pair<string, string> > res;
|
||||||
tagger.Tag(QUERY_TEST2, res);
|
tagger.Tag(QUERY_TEST2, res);
|
||||||
|
@ -5,11 +5,12 @@
|
|||||||
#include "cppjieba/FullSegment.hpp"
|
#include "cppjieba/FullSegment.hpp"
|
||||||
#include "cppjieba/QuerySegment.hpp"
|
#include "cppjieba/QuerySegment.hpp"
|
||||||
#include "gtest/gtest.h"
|
#include "gtest/gtest.h"
|
||||||
|
#include "test_paths.h"
|
||||||
|
|
||||||
using namespace cppjieba;
|
using namespace cppjieba;
|
||||||
|
|
||||||
TEST(MixSegmentTest, Test1) {
|
TEST(MixSegmentTest, Test1) {
|
||||||
MixSegment segment("../dict/jieba.dict.utf8", "../dict/hmm_model.utf8");;
|
MixSegment segment(DICT_DIR "/jieba.dict.utf8", DICT_DIR "/hmm_model.utf8");
|
||||||
string sentence;
|
string sentence;
|
||||||
vector<string> words;
|
vector<string> words;
|
||||||
string actual;
|
string actual;
|
||||||
@ -49,16 +50,18 @@ TEST(MixSegmentTest, Test1) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
TEST(MixSegmentTest, NoUserDict) {
|
TEST(MixSegmentTest, NoUserDict) {
|
||||||
MixSegment segment("../test/testdata/extra_dict/jieba.dict.small.utf8", "../dict/hmm_model.utf8");
|
MixSegment segment(TEST_DATA_DIR "/extra_dict/jieba.dict.small.utf8", DICT_DIR "/hmm_model.utf8");
|
||||||
const char* str = "令狐冲是云计算方面的专家";
|
const char* str = "令狐冲是云计算方面的专家";
|
||||||
vector<string> words;
|
vector<string> words;
|
||||||
segment.Cut(str, words);
|
segment.Cut(str, words);
|
||||||
string res;
|
string res;
|
||||||
ASSERT_EQ("[\"令狐冲\", \"是\", \"云\", \"计算\", \"方面\", \"的\", \"专家\"]", res << words);
|
ASSERT_EQ("[\"令狐冲\", \"是\", \"云\", \"计算\", \"方面\", \"的\", \"专家\"]", res << words);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(MixSegmentTest, UserDict) {
|
TEST(MixSegmentTest, UserDict) {
|
||||||
MixSegment segment("../test/testdata/extra_dict/jieba.dict.small.utf8", "../dict/hmm_model.utf8", "../dict/user.dict.utf8");
|
MixSegment segment(TEST_DATA_DIR "/extra_dict/jieba.dict.small.utf8",
|
||||||
|
DICT_DIR "/hmm_model.utf8",
|
||||||
|
DICT_DIR "/user.dict.utf8");
|
||||||
{
|
{
|
||||||
const char* str = "令狐冲是云计算方面的专家";
|
const char* str = "令狐冲是云计算方面的专家";
|
||||||
vector<string> words;
|
vector<string> words;
|
||||||
@ -83,9 +86,10 @@ TEST(MixSegmentTest, UserDict) {
|
|||||||
ASSERT_EQ("[\"IBM\", \",\", \"3.14\"]", res);
|
ASSERT_EQ("[\"IBM\", \",\", \"3.14\"]", res);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(MixSegmentTest, TestUserDict) {
|
TEST(MixSegmentTest, TestUserDict) {
|
||||||
MixSegment segment("../test/testdata/extra_dict/jieba.dict.small.utf8", "../dict/hmm_model.utf8",
|
MixSegment segment(TEST_DATA_DIR "/extra_dict/jieba.dict.small.utf8", DICT_DIR "/hmm_model.utf8",
|
||||||
"../test/testdata/userdict.utf8");
|
TEST_DATA_DIR "/userdict.utf8");
|
||||||
vector<string> words;
|
vector<string> words;
|
||||||
string res;
|
string res;
|
||||||
|
|
||||||
@ -123,8 +127,8 @@ TEST(MixSegmentTest, TestUserDict) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
TEST(MixSegmentTest, TestMultiUserDict) {
|
TEST(MixSegmentTest, TestMultiUserDict) {
|
||||||
MixSegment segment("../test/testdata/extra_dict/jieba.dict.small.utf8", "../dict/hmm_model.utf8",
|
MixSegment segment(TEST_DATA_DIR "/extra_dict/jieba.dict.small.utf8", DICT_DIR "/hmm_model.utf8",
|
||||||
"../test/testdata/userdict.utf8;../test/testdata/userdict.2.utf8");
|
TEST_DATA_DIR "/userdict.utf8;" TEST_DATA_DIR "/userdict.2.utf8");
|
||||||
vector<string> words;
|
vector<string> words;
|
||||||
string res;
|
string res;
|
||||||
|
|
||||||
@ -134,7 +138,7 @@ TEST(MixSegmentTest, TestMultiUserDict) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
TEST(MPSegmentTest, Test1) {
|
TEST(MPSegmentTest, Test1) {
|
||||||
MPSegment segment("../dict/jieba.dict.utf8");;
|
MPSegment segment(DICT_DIR "/jieba.dict.utf8");
|
||||||
string s;
|
string s;
|
||||||
vector<string> words;
|
vector<string> words;
|
||||||
segment.Cut("我来自北京邮电大学。", words);
|
segment.Cut("我来自北京邮电大学。", words);
|
||||||
@ -163,7 +167,7 @@ TEST(MPSegmentTest, Test1) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
TEST(HMMSegmentTest, Test1) {
|
TEST(HMMSegmentTest, Test1) {
|
||||||
HMMSegment segment("../dict/hmm_model.utf8");;
|
HMMSegment segment(DICT_DIR "/hmm_model.utf8");
|
||||||
{
|
{
|
||||||
const char* str = "我来自北京邮电大学。。。学号123456";
|
const char* str = "我来自北京邮电大学。。。学号123456";
|
||||||
const char* res[] = {"我来", "自北京", "邮电大学", "。", "。", "。", "学号", "123456"};
|
const char* res[] = {"我来", "自北京", "邮电大学", "。", "。", "。", "学号", "123456"};
|
||||||
@ -182,7 +186,7 @@ TEST(HMMSegmentTest, Test1) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
TEST(FullSegment, Test1) {
|
TEST(FullSegment, Test1) {
|
||||||
FullSegment segment("../test/testdata/extra_dict/jieba.dict.small.utf8");
|
FullSegment segment(TEST_DATA_DIR "/extra_dict/jieba.dict.small.utf8");
|
||||||
vector<string> words;
|
vector<string> words;
|
||||||
string s;
|
string s;
|
||||||
|
|
||||||
@ -197,7 +201,7 @@ TEST(FullSegment, Test1) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
TEST(QuerySegment, Test1) {
|
TEST(QuerySegment, Test1) {
|
||||||
QuerySegment segment("../dict/jieba.dict.utf8", "../dict/hmm_model.utf8", "");
|
QuerySegment segment(DICT_DIR "/jieba.dict.utf8", DICT_DIR "/hmm_model.utf8", "");
|
||||||
vector<string> words;
|
vector<string> words;
|
||||||
string s1, s2;
|
string s1, s2;
|
||||||
|
|
||||||
@ -218,7 +222,9 @@ TEST(QuerySegment, Test1) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
TEST(QuerySegment, Test2) {
|
TEST(QuerySegment, Test2) {
|
||||||
QuerySegment segment("../test/testdata/extra_dict/jieba.dict.small.utf8", "../dict/hmm_model.utf8", "../test/testdata/userdict.utf8|../test/testdata/userdict.english");
|
QuerySegment segment(TEST_DATA_DIR "/extra_dict/jieba.dict.small.utf8",
|
||||||
|
DICT_DIR "/hmm_model.utf8",
|
||||||
|
TEST_DATA_DIR "/userdict.utf8|" TEST_DATA_DIR "/userdict.english");
|
||||||
vector<string> words;
|
vector<string> words;
|
||||||
string s1, s2;
|
string s1, s2;
|
||||||
|
|
||||||
@ -242,14 +248,13 @@ TEST(QuerySegment, Test2) {
|
|||||||
s2 = "中国/科学/学院/科学院/中国科学院";
|
s2 = "中国/科学/学院/科学院/中国科学院";
|
||||||
ASSERT_EQ(s1, s2);
|
ASSERT_EQ(s1, s2);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(MPSegmentTest, Unicode32) {
|
TEST(MPSegmentTest, Unicode32) {
|
||||||
string s("天气很好,🙋 我们去郊游。");
|
string s("天气很好,🙋 我们去郊游。");
|
||||||
vector<string> words;
|
vector<string> words;
|
||||||
|
|
||||||
MPSegment segment("../dict/jieba.dict.utf8");;
|
MPSegment segment(DICT_DIR "/jieba.dict.utf8");
|
||||||
segment.Cut(s, words);
|
segment.Cut(s, words);
|
||||||
|
|
||||||
ASSERT_EQ(Join(words.begin(), words.end(), "/"), "天气/很/好/,/🙋/ /我们/去/郊游/。");
|
ASSERT_EQ(Join(words.begin(), words.end(), "/"), "天气/很/好/,/🙋/ /我们/去/郊游/。");
|
||||||
|
@ -1,13 +1,14 @@
|
|||||||
#include "cppjieba/TextRankExtractor.hpp"
|
#include "cppjieba/TextRankExtractor.hpp"
|
||||||
#include "gtest/gtest.h"
|
#include "gtest/gtest.h"
|
||||||
|
#include "test_paths.h"
|
||||||
|
|
||||||
using namespace cppjieba;
|
using namespace cppjieba;
|
||||||
|
|
||||||
TEST(TextRankExtractorTest, Test1) {
|
TEST(TextRankExtractorTest, Test1) {
|
||||||
TextRankExtractor Extractor(
|
TextRankExtractor Extractor(
|
||||||
"../test/testdata/extra_dict/jieba.dict.small.utf8",
|
TEST_DATA_DIR "/extra_dict/jieba.dict.small.utf8",
|
||||||
"../dict/hmm_model.utf8",
|
DICT_DIR "/hmm_model.utf8",
|
||||||
"../dict/stop_words.utf8");
|
DICT_DIR "/stop_words.utf8");
|
||||||
{
|
{
|
||||||
string s("你好世界世界而且而且");
|
string s("你好世界世界而且而且");
|
||||||
string res;
|
string res;
|
||||||
@ -59,10 +60,10 @@ TEST(TextRankExtractorTest, Test1) {
|
|||||||
|
|
||||||
TEST(TextRankExtractorTest, Test2) {
|
TEST(TextRankExtractorTest, Test2) {
|
||||||
TextRankExtractor Extractor(
|
TextRankExtractor Extractor(
|
||||||
"../test/testdata/extra_dict/jieba.dict.small.utf8",
|
TEST_DATA_DIR "/extra_dict/jieba.dict.small.utf8",
|
||||||
"../dict/hmm_model.utf8",
|
DICT_DIR "/hmm_model.utf8",
|
||||||
"../dict/stop_words.utf8",
|
DICT_DIR "/stop_words.utf8",
|
||||||
"../test/testdata/userdict.utf8");
|
TEST_DATA_DIR "/userdict.utf8");
|
||||||
|
|
||||||
{
|
{
|
||||||
string s("\xe8\x93\x9d\xe7\xbf\x94\xe4\xbc\x98\xe7\xa7\x80\xe6\xaf\x95\xe4\xb8\x9a\xe7\x94\x9f");
|
string s("\xe8\x93\x9d\xe7\xbf\x94\xe4\xbc\x98\xe7\xa7\x80\xe6\xaf\x95\xe4\xb8\x9a\xe7\x94\x9f");
|
||||||
|
@ -1,10 +1,11 @@
|
|||||||
#include "cppjieba/DictTrie.hpp"
|
#include "cppjieba/DictTrie.hpp"
|
||||||
#include "cppjieba/MPSegment.hpp"
|
#include "cppjieba/MPSegment.hpp"
|
||||||
#include "gtest/gtest.h"
|
#include "gtest/gtest.h"
|
||||||
|
#include "test_paths.h"
|
||||||
|
|
||||||
using namespace cppjieba;
|
using namespace cppjieba;
|
||||||
|
|
||||||
static const char* const DICT_FILE = "../test/testdata/extra_dict/jieba.dict.small.utf8";
|
static const char* const DICT_FILE = TEST_DATA_DIR "/extra_dict/jieba.dict.small.utf8";
|
||||||
|
|
||||||
TEST(TrieTest, Empty) {
|
TEST(TrieTest, Empty) {
|
||||||
vector<Unicode> keys;
|
vector<Unicode> keys;
|
||||||
@ -33,12 +34,6 @@ TEST(DictTrieTest, Test1) {
|
|||||||
string word("来到");
|
string word("来到");
|
||||||
cppjieba::RuneStrArray uni;
|
cppjieba::RuneStrArray uni;
|
||||||
ASSERT_TRUE(DecodeUTF8RunesInString(word, uni));
|
ASSERT_TRUE(DecodeUTF8RunesInString(word, uni));
|
||||||
//DictUnit nodeInfo;
|
|
||||||
//nodeInfo.word = uni;
|
|
||||||
//nodeInfo.tag = "v";
|
|
||||||
//nodeInfo.weight = -8.87033;
|
|
||||||
//s1 << nodeInfo;
|
|
||||||
//s2 << (*trie.Find(uni.begin(), uni.end()));
|
|
||||||
const DictUnit* du = trie.Find(uni.begin(), uni.end());
|
const DictUnit* du = trie.Find(uni.begin(), uni.end());
|
||||||
ASSERT_TRUE(du != NULL);
|
ASSERT_TRUE(du != NULL);
|
||||||
ASSERT_EQ(2u, du->word.size());
|
ASSERT_EQ(2u, du->word.size());
|
||||||
@ -47,14 +42,12 @@ TEST(DictTrieTest, Test1) {
|
|||||||
ASSERT_EQ("v", du->tag);
|
ASSERT_EQ("v", du->tag);
|
||||||
ASSERT_NEAR(-8.870, du->weight, 0.001);
|
ASSERT_NEAR(-8.870, du->weight, 0.001);
|
||||||
|
|
||||||
//EXPECT_EQ("[\"26469\", \"21040\"] v -8.870", s2);
|
|
||||||
word = "清华大学";
|
word = "清华大学";
|
||||||
LocalVector<pair<size_t, const DictUnit*> > res;
|
LocalVector<pair<size_t, const DictUnit*> > res;
|
||||||
const char * words[] = {"清", "清华", "清华大学"};
|
const char * words[] = {"清", "清华", "清华大学"};
|
||||||
for (size_t i = 0; i < sizeof(words)/sizeof(words[0]); i++) {
|
for (size_t i = 0; i < sizeof(words)/sizeof(words[0]); i++) {
|
||||||
ASSERT_TRUE(DecodeUTF8RunesInString(words[i], uni));
|
ASSERT_TRUE(DecodeUTF8RunesInString(words[i], uni));
|
||||||
res.push_back(make_pair(uni.size() - 1, trie.Find(uni.begin(), uni.end())));
|
res.push_back(make_pair(uni.size() - 1, trie.Find(uni.begin(), uni.end())));
|
||||||
//resMap[uni.size() - 1] = trie.Find(uni.begin(), uni.end());
|
|
||||||
}
|
}
|
||||||
vector<pair<size_t, const DictUnit*> > vec;
|
vector<pair<size_t, const DictUnit*> > vec;
|
||||||
vector<struct Dag> dags;
|
vector<struct Dag> dags;
|
||||||
@ -65,11 +58,10 @@ TEST(DictTrieTest, Test1) {
|
|||||||
s1 << res;
|
s1 << res;
|
||||||
s2 << dags[0].nexts;
|
s2 << dags[0].nexts;
|
||||||
ASSERT_EQ(s1, s2);
|
ASSERT_EQ(s1, s2);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(DictTrieTest, UserDict) {
|
TEST(DictTrieTest, UserDict) {
|
||||||
DictTrie trie(DICT_FILE, "../test/testdata/userdict.utf8");
|
DictTrie trie(DICT_FILE, TEST_DATA_DIR "/userdict.utf8");
|
||||||
string word = "云计算";
|
string word = "云计算";
|
||||||
cppjieba::RuneStrArray unicode;
|
cppjieba::RuneStrArray unicode;
|
||||||
ASSERT_TRUE(DecodeUTF8RunesInString(word, unicode));
|
ASSERT_TRUE(DecodeUTF8RunesInString(word, unicode));
|
||||||
@ -93,7 +85,7 @@ TEST(DictTrieTest, UserDict) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
TEST(DictTrieTest, UserDictWithMaxWeight) {
|
TEST(DictTrieTest, UserDictWithMaxWeight) {
|
||||||
DictTrie trie(DICT_FILE, "../test/testdata/userdict.utf8", DictTrie::WordWeightMax);
|
DictTrie trie(DICT_FILE, TEST_DATA_DIR "/userdict.utf8", DictTrie::WordWeightMax);
|
||||||
string word = "云计算";
|
string word = "云计算";
|
||||||
cppjieba::RuneStrArray unicode;
|
cppjieba::RuneStrArray unicode;
|
||||||
ASSERT_TRUE(DecodeUTF8RunesInString(word, unicode));
|
ASSERT_TRUE(DecodeUTF8RunesInString(word, unicode));
|
||||||
@ -103,7 +95,7 @@ TEST(DictTrieTest, UserDictWithMaxWeight) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
TEST(DictTrieTest, Dag) {
|
TEST(DictTrieTest, Dag) {
|
||||||
DictTrie trie(DICT_FILE, "../test/testdata/userdict.utf8");
|
DictTrie trie(DICT_FILE, TEST_DATA_DIR "/userdict.utf8");
|
||||||
|
|
||||||
{
|
{
|
||||||
string word = "清华大学";
|
string word = "清华大学";
|
||||||
|
Loading…
x
Reference in New Issue
Block a user