diff --git a/CMakeLists.txt b/CMakeLists.txt index 28810e6..721ef87 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,9 +4,6 @@ PROJECT(CPPJIEBA) INCLUDE_DIRECTORIES(${PROJECT_SOURCE_DIR}/deps/limonp/include ${PROJECT_SOURCE_DIR}/include) -if (CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT) - set (CMAKE_INSTALL_PREFIX "/usr/local/cppjieba" CACHE PATH "default install path" FORCE ) -endif() if(NOT DEFINED CMAKE_CXX_STANDARD) set(CMAKE_CXX_STANDARD 11) @@ -29,12 +26,7 @@ endif() option(CPPJIEBA_BUILD_TESTS "Build cppjieba tests" ${CPPJIEBA_TOP_LEVEL_PROJECT}) if(CPPJIEBA_BUILD_TESTS) - ENABLE_TESTING() - - message(STATUS "MSVC value: ${MSVC}") ADD_SUBDIRECTORY(test) - ADD_TEST(NAME ./test/test.run COMMAND ./test/test.run) - ADD_TEST(NAME ./load_test COMMAND ./load_test) endif() diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index e5345c5..f225e5b 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -1,4 +1,38 @@ +ENABLE_TESTING() + SET(EXECUTABLE_OUTPUT_PATH ${PROJECT_BINARY_DIR}) +# Configure test paths +configure_file("${CMAKE_CURRENT_SOURCE_DIR}/test_paths.h.in" "${CMAKE_BINARY_DIR}/test/test_paths.h") + +# Add include directories +INCLUDE_DIRECTORIES( + ${CMAKE_CURRENT_BINARY_DIR} + ${CMAKE_BINARY_DIR}/test + ${PROJECT_SOURCE_DIR}/include + ${PROJECT_SOURCE_DIR}/deps/limonp/include + ${CMAKE_BINARY_DIR}/_deps/googletest-src/googletest/include +) + +# Add Google Test +include(FetchContent) +FetchContent_Declare( + googletest + GIT_REPOSITORY https://github.com/google/googletest.git + GIT_TAG release-1.12.1 +) +FetchContent_MakeAvailable(googletest) + +# Add UTF-8 support for MSVC +if(MSVC) + add_compile_options(/utf-8) +endif() + ADD_EXECUTABLE(load_test load_test.cpp) +TARGET_LINK_LIBRARIES(load_test gtest gtest_main) + ADD_SUBDIRECTORY(unittest) + +# Add test configurations +ADD_TEST(NAME load_test COMMAND load_test) +SET_TESTS_PROPERTIES(load_test PROPERTIES WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}) diff --git a/test/load_test.cpp b/test/load_test.cpp index 0a4e51b..ef07979 100644 --- a/test/load_test.cpp +++ b/test/load_test.cpp @@ -6,14 +6,17 @@ #include "cppjieba/MixSegment.hpp" #include "cppjieba/KeywordExtractor.hpp" #include "limonp/Colors.hpp" +#include "cppjieba/Jieba.hpp" +#include "gtest/gtest.h" +#include "test_paths.h" using namespace cppjieba; void Cut(size_t times = 50) { - MixSegment seg("../dict/jieba.dict.utf8", "../dict/hmm_model.utf8"); + MixSegment seg(DICT_DIR "/jieba.dict.utf8", DICT_DIR "/hmm_model.utf8"); vector res; string doc; - ifstream ifs("../test/testdata/weicheng.utf8"); + ifstream ifs(TEST_DATA_DIR "/weicheng.utf8"); assert(ifs); doc << ifs; long beginTime = clock(); @@ -29,10 +32,13 @@ void Cut(size_t times = 50) { } void Extract(size_t times = 400) { - KeywordExtractor Extractor("../dict/jieba.dict.utf8", "../dict/hmm_model.utf8", "../dict/idf.utf8", "../dict/stop_words.utf8"); + KeywordExtractor Extractor(DICT_DIR "/jieba.dict.utf8", + DICT_DIR "/hmm_model.utf8", + DICT_DIR "/idf.utf8", + DICT_DIR "/stop_words.utf8"); vector words; string doc; - ifstream ifs("../test/testdata/review.100"); + ifstream ifs(TEST_DATA_DIR "/review.100"); assert(ifs); doc << ifs; long beginTime = clock(); @@ -47,8 +53,24 @@ void Extract(size_t times = 400) { ColorPrintln(GREEN, "Extract: [%.3lf seconds]time consumed.", double(endTime - beginTime)/CLOCKS_PER_SEC); } -int main(int argc, char ** argv) { +TEST(LoadTest, Test1) { + Jieba jieba(DICT_DIR "/jieba.dict.utf8", + DICT_DIR "/hmm_model.utf8", + DICT_DIR "/user.dict.utf8", + DICT_DIR "/idf.utf8", + DICT_DIR "/stop_words.utf8"); + vector words; + string result; + + jieba.Cut("他来到了网易杭研大厦", words); + result << words; + string expected = "[\"他\", \"来到\", \"了\", \"网易\", \"杭研\", \"大厦\"]"; + ASSERT_EQ(expected, result); +} + +int main(int argc, char** argv) { + testing::InitGoogleTest(&argc, argv); Cut(); Extract(); - return EXIT_SUCCESS; + return RUN_ALL_TESTS(); } diff --git a/test/test_paths.h.in b/test/test_paths.h.in new file mode 100644 index 0000000..c2d96b1 --- /dev/null +++ b/test/test_paths.h.in @@ -0,0 +1,7 @@ +#ifndef TEST_PATHS_H +#define TEST_PATHS_H + +#define TEST_DATA_DIR "@CMAKE_CURRENT_SOURCE_DIR@/testdata" +#define DICT_DIR "@CMAKE_SOURCE_DIR@/dict" + +#endif // TEST_PATHS_H \ No newline at end of file diff --git a/test/unittest/CMakeLists.txt b/test/unittest/CMakeLists.txt index 9871638..f2be7c0 100644 --- a/test/unittest/CMakeLists.txt +++ b/test/unittest/CMakeLists.txt @@ -1,3 +1,4 @@ +message(STATUS "MSVC value: ${MSVC}") if (MSVC) set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreadedDebugDLL") set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) @@ -13,12 +14,20 @@ FetchContent_Declare( ) FetchContent_MakeAvailable(googletest) - -SET(EXECUTABLE_OUTPUT_PATH ${PROJECT_BINARY_DIR}/test) +SET(EXECUTABLE_OUTPUT_PATH ${PROJECT_BINARY_DIR}) SET(LIBRARY_OUTPUT_PATH ${PROJECT_BINARY_DIR}/lib) ADD_DEFINITIONS(-DLOGGING_LEVEL=LL_WARNING) +# Add include directories +INCLUDE_DIRECTORIES( + ${CMAKE_CURRENT_BINARY_DIR} + ${CMAKE_BINARY_DIR}/test + ${PROJECT_SOURCE_DIR}/include + ${PROJECT_SOURCE_DIR}/deps/limonp/include + ${CMAKE_BINARY_DIR}/_deps/googletest-src/googletest/include +) + ADD_EXECUTABLE(test.run gtest_main.cpp keyword_extractor_test.cpp @@ -31,4 +40,8 @@ ADD_EXECUTABLE(test.run textrank_test.cpp ) -TARGET_LINK_LIBRARIES(test.run gtest) +TARGET_LINK_LIBRARIES(test.run gtest gtest_main) + +enable_testing() +ADD_TEST(NAME test.run COMMAND test.run) +SET_TESTS_PROPERTIES(test.run PROPERTIES WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}) diff --git a/test/unittest/jieba_test.cpp b/test/unittest/jieba_test.cpp index d5558ac..cb006ba 100644 --- a/test/unittest/jieba_test.cpp +++ b/test/unittest/jieba_test.cpp @@ -1,5 +1,6 @@ #include "cppjieba/Jieba.hpp" #include "gtest/gtest.h" +#include "test_paths.h" using namespace cppjieba; @@ -37,11 +38,11 @@ TEST(JiebaTest, Test0) { } TEST(JiebaTest, Test1) { - cppjieba::Jieba jieba("../dict/jieba.dict.utf8", - "../dict/hmm_model.utf8", - "../dict/user.dict.utf8", - "../dict/idf.utf8", - "../dict/stop_words.utf8"); + cppjieba::Jieba jieba(DICT_DIR "/jieba.dict.utf8", + DICT_DIR "/hmm_model.utf8", + DICT_DIR "/user.dict.utf8", + DICT_DIR "/idf.utf8", + DICT_DIR "/stop_words.utf8"); vector words; string result; @@ -71,14 +72,14 @@ TEST(JiebaTest, Test1) { jieba.CutForSearch("他来到了网易杭研大厦", words); result << words; ASSERT_EQ("[\"他\", \"来到\", \"了\", \"网易\", \"杭研\", \"大厦\"]", result); - } + TEST(JiebaTest, WordTest) { - cppjieba::Jieba jieba("../dict/jieba.dict.utf8", - "../dict/hmm_model.utf8", - "../dict/user.dict.utf8", - "../dict/idf.utf8", - "../dict/stop_words.utf8"); + cppjieba::Jieba jieba(DICT_DIR "/jieba.dict.utf8", + DICT_DIR "/hmm_model.utf8", + DICT_DIR "/user.dict.utf8", + DICT_DIR "/idf.utf8", + DICT_DIR "/stop_words.utf8"); vector words; string result; @@ -116,11 +117,11 @@ TEST(JiebaTest, WordTest) { } TEST(JiebaTest, InsertUserWord) { - cppjieba::Jieba jieba("../dict/jieba.dict.utf8", - "../dict/hmm_model.utf8", - "../dict/user.dict.utf8", - "../dict/idf.utf8", - "../dict/stop_words.utf8"); + cppjieba::Jieba jieba(DICT_DIR "/jieba.dict.utf8", + DICT_DIR "/hmm_model.utf8", + DICT_DIR "/user.dict.utf8", + DICT_DIR "/idf.utf8", + DICT_DIR "/stop_words.utf8"); vector words; string result; diff --git a/test/unittest/keyword_extractor_test.cpp b/test/unittest/keyword_extractor_test.cpp index de298b6..e7183c6 100644 --- a/test/unittest/keyword_extractor_test.cpp +++ b/test/unittest/keyword_extractor_test.cpp @@ -1,10 +1,14 @@ #include "cppjieba/KeywordExtractor.hpp" #include "gtest/gtest.h" +#include "test_paths.h" using namespace cppjieba; TEST(KeywordExtractorTest, Test1) { - KeywordExtractor Extractor("../test/testdata/extra_dict/jieba.dict.small.utf8", "../dict/hmm_model.utf8", "../dict/idf.utf8", "../dict/stop_words.utf8"); + KeywordExtractor Extractor(TEST_DATA_DIR "/extra_dict/jieba.dict.small.utf8", + DICT_DIR "/hmm_model.utf8", + DICT_DIR "/idf.utf8", + DICT_DIR "/stop_words.utf8"); { string s("你好世界世界而且而且"); @@ -55,7 +59,11 @@ TEST(KeywordExtractorTest, Test1) { } TEST(KeywordExtractorTest, Test2) { - KeywordExtractor Extractor("../test/testdata/extra_dict/jieba.dict.small.utf8", "../dict/hmm_model.utf8", "../dict/idf.utf8", "../dict/stop_words.utf8", "../test/testdata/userdict.utf8"); + KeywordExtractor Extractor(TEST_DATA_DIR "/extra_dict/jieba.dict.small.utf8", + DICT_DIR "/hmm_model.utf8", + DICT_DIR "/idf.utf8", + DICT_DIR "/stop_words.utf8", + TEST_DATA_DIR "/userdict.utf8"); { string s("蓝翔优秀毕业生"); diff --git a/test/unittest/pos_tagger_test.cpp b/test/unittest/pos_tagger_test.cpp index 745c1dd..d2aec1d 100644 --- a/test/unittest/pos_tagger_test.cpp +++ b/test/unittest/pos_tagger_test.cpp @@ -1,5 +1,6 @@ #include "cppjieba/MixSegment.hpp" #include "gtest/gtest.h" +#include "test_paths.h" using namespace cppjieba; @@ -13,7 +14,7 @@ static const char * const ANS_TEST3 = "[iPhone6:eng, 手机:n, 的:uj, 最大:a, //static const char * const ANS_TEST3 = ""; TEST(PosTaggerTest, Test) { - MixSegment tagger("../dict/jieba.dict.utf8", "../dict/hmm_model.utf8"); + MixSegment tagger(DICT_DIR "/jieba.dict.utf8", DICT_DIR "/hmm_model.utf8"); { vector > res; tagger.Tag(QUERY_TEST1, res); @@ -23,7 +24,7 @@ TEST(PosTaggerTest, Test) { } } TEST(PosTagger, TestUserDict) { - MixSegment tagger("../dict/jieba.dict.utf8", "../dict/hmm_model.utf8", "../test/testdata/userdict.utf8"); + MixSegment tagger(DICT_DIR "/jieba.dict.utf8", DICT_DIR "/hmm_model.utf8", TEST_DATA_DIR "/userdict.utf8"); { vector > res; tagger.Tag(QUERY_TEST2, res); diff --git a/test/unittest/segments_test.cpp b/test/unittest/segments_test.cpp index bdd5a19..ac1dee8 100644 --- a/test/unittest/segments_test.cpp +++ b/test/unittest/segments_test.cpp @@ -5,11 +5,12 @@ #include "cppjieba/FullSegment.hpp" #include "cppjieba/QuerySegment.hpp" #include "gtest/gtest.h" +#include "test_paths.h" using namespace cppjieba; TEST(MixSegmentTest, Test1) { - MixSegment segment("../dict/jieba.dict.utf8", "../dict/hmm_model.utf8");; + MixSegment segment(DICT_DIR "/jieba.dict.utf8", DICT_DIR "/hmm_model.utf8"); string sentence; vector words; string actual; @@ -49,16 +50,18 @@ TEST(MixSegmentTest, Test1) { } TEST(MixSegmentTest, NoUserDict) { - MixSegment segment("../test/testdata/extra_dict/jieba.dict.small.utf8", "../dict/hmm_model.utf8"); + MixSegment segment(TEST_DATA_DIR "/extra_dict/jieba.dict.small.utf8", DICT_DIR "/hmm_model.utf8"); const char* str = "令狐冲是云计算方面的专家"; vector words; segment.Cut(str, words); string res; ASSERT_EQ("[\"令狐冲\", \"是\", \"云\", \"计算\", \"方面\", \"的\", \"专家\"]", res << words); - } + TEST(MixSegmentTest, UserDict) { - MixSegment segment("../test/testdata/extra_dict/jieba.dict.small.utf8", "../dict/hmm_model.utf8", "../dict/user.dict.utf8"); + MixSegment segment(TEST_DATA_DIR "/extra_dict/jieba.dict.small.utf8", + DICT_DIR "/hmm_model.utf8", + DICT_DIR "/user.dict.utf8"); { const char* str = "令狐冲是云计算方面的专家"; vector words; @@ -83,9 +86,10 @@ TEST(MixSegmentTest, UserDict) { ASSERT_EQ("[\"IBM\", \",\", \"3.14\"]", res); } } + TEST(MixSegmentTest, TestUserDict) { - MixSegment segment("../test/testdata/extra_dict/jieba.dict.small.utf8", "../dict/hmm_model.utf8", - "../test/testdata/userdict.utf8"); + MixSegment segment(TEST_DATA_DIR "/extra_dict/jieba.dict.small.utf8", DICT_DIR "/hmm_model.utf8", + TEST_DATA_DIR "/userdict.utf8"); vector words; string res; @@ -123,8 +127,8 @@ TEST(MixSegmentTest, TestUserDict) { } TEST(MixSegmentTest, TestMultiUserDict) { - MixSegment segment("../test/testdata/extra_dict/jieba.dict.small.utf8", "../dict/hmm_model.utf8", - "../test/testdata/userdict.utf8;../test/testdata/userdict.2.utf8"); + MixSegment segment(TEST_DATA_DIR "/extra_dict/jieba.dict.small.utf8", DICT_DIR "/hmm_model.utf8", + TEST_DATA_DIR "/userdict.utf8;" TEST_DATA_DIR "/userdict.2.utf8"); vector words; string res; @@ -134,7 +138,7 @@ TEST(MixSegmentTest, TestMultiUserDict) { } TEST(MPSegmentTest, Test1) { - MPSegment segment("../dict/jieba.dict.utf8");; + MPSegment segment(DICT_DIR "/jieba.dict.utf8"); string s; vector words; segment.Cut("我来自北京邮电大学。", words); @@ -163,7 +167,7 @@ TEST(MPSegmentTest, Test1) { } TEST(HMMSegmentTest, Test1) { - HMMSegment segment("../dict/hmm_model.utf8");; + HMMSegment segment(DICT_DIR "/hmm_model.utf8"); { const char* str = "我来自北京邮电大学。。。学号123456"; const char* res[] = {"我来", "自北京", "邮电大学", "。", "。", "。", "学号", "123456"}; @@ -182,7 +186,7 @@ TEST(HMMSegmentTest, Test1) { } TEST(FullSegment, Test1) { - FullSegment segment("../test/testdata/extra_dict/jieba.dict.small.utf8"); + FullSegment segment(TEST_DATA_DIR "/extra_dict/jieba.dict.small.utf8"); vector words; string s; @@ -197,7 +201,7 @@ TEST(FullSegment, Test1) { } TEST(QuerySegment, Test1) { - QuerySegment segment("../dict/jieba.dict.utf8", "../dict/hmm_model.utf8", ""); + QuerySegment segment(DICT_DIR "/jieba.dict.utf8", DICT_DIR "/hmm_model.utf8", ""); vector words; string s1, s2; @@ -218,7 +222,9 @@ TEST(QuerySegment, Test1) { } TEST(QuerySegment, Test2) { - QuerySegment segment("../test/testdata/extra_dict/jieba.dict.small.utf8", "../dict/hmm_model.utf8", "../test/testdata/userdict.utf8|../test/testdata/userdict.english"); + QuerySegment segment(TEST_DATA_DIR "/extra_dict/jieba.dict.small.utf8", + DICT_DIR "/hmm_model.utf8", + TEST_DATA_DIR "/userdict.utf8|" TEST_DATA_DIR "/userdict.english"); vector words; string s1, s2; @@ -242,14 +248,13 @@ TEST(QuerySegment, Test2) { s2 = "中国/科学/学院/科学院/中国科学院"; ASSERT_EQ(s1, s2); } - } TEST(MPSegmentTest, Unicode32) { string s("天气很好,🙋 我们去郊游。"); vector words; - MPSegment segment("../dict/jieba.dict.utf8");; + MPSegment segment(DICT_DIR "/jieba.dict.utf8"); segment.Cut(s, words); ASSERT_EQ(Join(words.begin(), words.end(), "/"), "天气/很/好/,/🙋/ /我们/去/郊游/。"); diff --git a/test/unittest/textrank_test.cpp b/test/unittest/textrank_test.cpp index ef7ac27..41f9e68 100644 --- a/test/unittest/textrank_test.cpp +++ b/test/unittest/textrank_test.cpp @@ -1,13 +1,14 @@ #include "cppjieba/TextRankExtractor.hpp" #include "gtest/gtest.h" +#include "test_paths.h" using namespace cppjieba; TEST(TextRankExtractorTest, Test1) { TextRankExtractor Extractor( - "../test/testdata/extra_dict/jieba.dict.small.utf8", - "../dict/hmm_model.utf8", - "../dict/stop_words.utf8"); + TEST_DATA_DIR "/extra_dict/jieba.dict.small.utf8", + DICT_DIR "/hmm_model.utf8", + DICT_DIR "/stop_words.utf8"); { string s("你好世界世界而且而且"); string res; @@ -59,10 +60,10 @@ TEST(TextRankExtractorTest, Test1) { TEST(TextRankExtractorTest, Test2) { TextRankExtractor Extractor( - "../test/testdata/extra_dict/jieba.dict.small.utf8", - "../dict/hmm_model.utf8", - "../dict/stop_words.utf8", - "../test/testdata/userdict.utf8"); + TEST_DATA_DIR "/extra_dict/jieba.dict.small.utf8", + DICT_DIR "/hmm_model.utf8", + DICT_DIR "/stop_words.utf8", + TEST_DATA_DIR "/userdict.utf8"); { string s("\xe8\x93\x9d\xe7\xbf\x94\xe4\xbc\x98\xe7\xa7\x80\xe6\xaf\x95\xe4\xb8\x9a\xe7\x94\x9f"); diff --git a/test/unittest/trie_test.cpp b/test/unittest/trie_test.cpp index 2e51993..3ad90fb 100644 --- a/test/unittest/trie_test.cpp +++ b/test/unittest/trie_test.cpp @@ -1,10 +1,11 @@ #include "cppjieba/DictTrie.hpp" #include "cppjieba/MPSegment.hpp" #include "gtest/gtest.h" +#include "test_paths.h" using namespace cppjieba; -static const char* const DICT_FILE = "../test/testdata/extra_dict/jieba.dict.small.utf8"; +static const char* const DICT_FILE = TEST_DATA_DIR "/extra_dict/jieba.dict.small.utf8"; TEST(TrieTest, Empty) { vector keys; @@ -33,12 +34,6 @@ TEST(DictTrieTest, Test1) { string word("来到"); cppjieba::RuneStrArray uni; ASSERT_TRUE(DecodeUTF8RunesInString(word, uni)); - //DictUnit nodeInfo; - //nodeInfo.word = uni; - //nodeInfo.tag = "v"; - //nodeInfo.weight = -8.87033; - //s1 << nodeInfo; - //s2 << (*trie.Find(uni.begin(), uni.end())); const DictUnit* du = trie.Find(uni.begin(), uni.end()); ASSERT_TRUE(du != NULL); ASSERT_EQ(2u, du->word.size()); @@ -47,14 +42,12 @@ TEST(DictTrieTest, Test1) { ASSERT_EQ("v", du->tag); ASSERT_NEAR(-8.870, du->weight, 0.001); - //EXPECT_EQ("[\"26469\", \"21040\"] v -8.870", s2); word = "清华大学"; LocalVector > res; const char * words[] = {"清", "清华", "清华大学"}; for (size_t i = 0; i < sizeof(words)/sizeof(words[0]); i++) { ASSERT_TRUE(DecodeUTF8RunesInString(words[i], uni)); res.push_back(make_pair(uni.size() - 1, trie.Find(uni.begin(), uni.end()))); - //resMap[uni.size() - 1] = trie.Find(uni.begin(), uni.end()); } vector > vec; vector dags; @@ -65,11 +58,10 @@ TEST(DictTrieTest, Test1) { s1 << res; s2 << dags[0].nexts; ASSERT_EQ(s1, s2); - } TEST(DictTrieTest, UserDict) { - DictTrie trie(DICT_FILE, "../test/testdata/userdict.utf8"); + DictTrie trie(DICT_FILE, TEST_DATA_DIR "/userdict.utf8"); string word = "云计算"; cppjieba::RuneStrArray unicode; ASSERT_TRUE(DecodeUTF8RunesInString(word, unicode)); @@ -93,7 +85,7 @@ TEST(DictTrieTest, UserDict) { } TEST(DictTrieTest, UserDictWithMaxWeight) { - DictTrie trie(DICT_FILE, "../test/testdata/userdict.utf8", DictTrie::WordWeightMax); + DictTrie trie(DICT_FILE, TEST_DATA_DIR "/userdict.utf8", DictTrie::WordWeightMax); string word = "云计算"; cppjieba::RuneStrArray unicode; ASSERT_TRUE(DecodeUTF8RunesInString(word, unicode)); @@ -103,7 +95,7 @@ TEST(DictTrieTest, UserDictWithMaxWeight) { } TEST(DictTrieTest, Dag) { - DictTrie trie(DICT_FILE, "../test/testdata/userdict.utf8"); + DictTrie trie(DICT_FILE, TEST_DATA_DIR "/userdict.utf8"); { string word = "清华大学";