diff --git a/ChangeLog.md b/ChangeLog.md index 8e313d2..f1a6fb0 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -1,3 +1,7 @@ +## v2.3.5 + +1. 修改一些测试用例的文件,减少测试时编译的时间。 + ## v2.3.4 1. 修改了设计上的问题,删除了`TrieManager`这个类,以避免造成一些可能的隐患。 diff --git a/test/unittest/CMakeLists.txt b/test/unittest/CMakeLists.txt index ac3b930..79457cd 100644 --- a/test/unittest/CMakeLists.txt +++ b/test/unittest/CMakeLists.txt @@ -6,7 +6,7 @@ SET(GTEST_ROOT_DIR gtest-1.6.0) ADD_DEFINITIONS(-DLOGGER_LEVEL=LL_WARN) INCLUDE_DIRECTORIES(${GTEST_ROOT_DIR} ${GTEST_ROOT_DIR}/include ${PROJECT_SOURCE_DIR}) ADD_LIBRARY(gtest STATIC ${GTEST_ROOT_DIR}/src/gtest-all.cc) -ADD_EXECUTABLE(test.run gtest_main.cpp TKeywordExtractor.cpp TMPSegment.cpp TTrie.cpp TFullSegment.cpp TQuerySegment.cpp THMMSegment.cpp TMixSegment.cpp TSegmentBase.cpp) +ADD_EXECUTABLE(test.run gtest_main.cpp TKeywordExtractor.cpp TTrie.cpp TSegments.cpp ) TARGET_LINK_LIBRARIES(gtest pthread) TARGET_LINK_LIBRARIES(test.run gtest pthread) diff --git a/test/unittest/TFullSegment.cpp b/test/unittest/TFullSegment.cpp deleted file mode 100644 index 2c99e21..0000000 --- a/test/unittest/TFullSegment.cpp +++ /dev/null @@ -1,18 +0,0 @@ -#include "src/FullSegment.hpp" -#include "gtest/gtest.h" - -using namespace CppJieba; - -TEST(FullSegment, Test1) -{ - FullSegment segment("../dict/extra_dict/jieba.dict.small.utf8"); - const char* str = "我来自北京邮电大学。。。 学号 123456"; - vector words; - - ASSERT_EQ(segment.cut(str, words), true); - - string s; - s << words; - ASSERT_EQ(s, "[\"我\", \"来自\", \"北京\", \"北京邮电大学\", \"邮电\", \"电大\", \"大学\", \"。\", \"。\", \"。\", \" \", \"学\", \"号\", \" 123456\"]"); -} - diff --git a/test/unittest/THMMSegment.cpp b/test/unittest/THMMSegment.cpp deleted file mode 100644 index fc7c04c..0000000 --- a/test/unittest/THMMSegment.cpp +++ /dev/null @@ -1,19 +0,0 @@ -#include "src/HMMSegment.hpp" -#include "gtest/gtest.h" - -using namespace CppJieba; - -TEST(HMMSegmentTest, Test1) -{ - HMMSegment segment("../dict/hmm_model.utf8");; - const char* str = "我来自北京邮电大学。。。 学号 123456"; - const char* res[] = {"我来", "自北京", "邮电大学", "。", "。", "。", " ", "学号", " 123456"}; - //string s; - //vector buf(res, res + sizeof(res)/sizeof(res[0])); - vector words; - ASSERT_TRUE(segment); - ASSERT_TRUE(segment.cut(str, words)); - //print(words); - EXPECT_EQ(words, vector(res, res + sizeof(res)/sizeof(res[0]))); -} - diff --git a/test/unittest/TMPSegment.cpp b/test/unittest/TMPSegment.cpp deleted file mode 100644 index 3ca7268..0000000 --- a/test/unittest/TMPSegment.cpp +++ /dev/null @@ -1,46 +0,0 @@ -#include "src/MPSegment.hpp" -#include "src/Limonp/io_functs.hpp" -#include "gtest/gtest.h" - -using namespace CppJieba; -using namespace Limonp; - -TEST(MPSegmentTest, Test1) -{ - MPSegment segment("../dict/extra_dict/jieba.dict.small.utf8");; - const char* str = "我来自北京邮电大学。。。 学号 123456"; - const char* res[] = {"我", "来自", "北京邮电大学", "。","。","。"," ","学","号", " 123456"}; - vector words; - ASSERT_TRUE(segment); - ASSERT_TRUE(segment.cut(str, words)); - //print(words); - EXPECT_EQ(words, vector(res, res + sizeof(res)/sizeof(res[0]))); -} - -TEST(MPSegmentTest, Test2) -{ - MPSegment segment("../dict/extra_dict/jieba.dict.small.utf8"); - string line; - ifstream ifs("../test/testdata/review.100"); - vector words; - - string eRes; - loadFile2Str("../test/testdata/review.100.res", eRes); - string res; - - while(getline(ifs, line)) - { - res += line; - res += '\n'; - - words.clear(); - segment.cut(line, words); - string s; - s << words; - res += s; - res += '\n'; - } - WriteStr2File("../test/testdata/review.100.res", res.c_str(), "w"); - //ASSERT_EQ(res, eRes); - -} diff --git a/test/unittest/TMixSegment.cpp b/test/unittest/TMixSegment.cpp deleted file mode 100644 index fe904a4..0000000 --- a/test/unittest/TMixSegment.cpp +++ /dev/null @@ -1,16 +0,0 @@ -#include "src/MixSegment.hpp" -#include "gtest/gtest.h" - -using namespace CppJieba; - -TEST(MixSegmentTest, Test1) -{ - MixSegment segment("../dict/extra_dict/jieba.dict.small.utf8", "../dict/hmm_model.utf8");; - const char* str = "我来自北京邮电大学。。。 学号 123456"; - const char* res[] = {"我", "来自", "北京邮电大学", "。","。","。"," ","学号", " 123456"}; - vector words; - ASSERT_TRUE(segment); - ASSERT_TRUE(segment.cut(str, words)); - EXPECT_EQ(words, vector(res, res + sizeof(res)/sizeof(res[0]))); -} - diff --git a/test/unittest/TQuerySegment.cpp b/test/unittest/TQuerySegment.cpp deleted file mode 100644 index 7b76561..0000000 --- a/test/unittest/TQuerySegment.cpp +++ /dev/null @@ -1,20 +0,0 @@ -#include "src/QuerySegment.hpp" -#include "gtest/gtest.h" - -using namespace CppJieba; - -TEST(QuerySegment, Test1) -{ - QuerySegment segment("../dict/extra_dict/jieba.dict.small.utf8", "../dict/hmm_model.utf8", 3); - const char* str = "小明硕士毕业于中国科学院计算所,后在日本京都大学深造"; - vector words; - - ASSERT_TRUE(segment.cut(str, words)); - - string s1, s2; - s1 << words; - s2 = "[\"小明\", \"硕士\", \"毕业\", \"于\", \"中国\", \"中国科学院\", \"科学\", \"科学院\", \"学院\", \"计算所\", \",\", \"后\", \"在\", \"日本\", \"京都\", \"京都大学\", \"大学\", \"深造\"]"; - ASSERT_EQ(s1, s2); - -} - diff --git a/test/unittest/TSegmentBase.cpp b/test/unittest/TSegmentBase.cpp deleted file mode 100644 index 6ee708e..0000000 --- a/test/unittest/TSegmentBase.cpp +++ /dev/null @@ -1,36 +0,0 @@ -#include "src/SegmentBase.hpp" -#include "gtest/gtest.h" - -using namespace CppJieba; - -TEST(SegmentBaseTest, Test1) -{ - const char* str = "heheh你好...hh"; - string s; - vector buf; - buf.push_back("heheh"); - buf.push_back("你好"); - buf.push_back("...hh"); - vector res; - size_t size = strlen(str); - size_t offset = 0; - while(offset < size) - { - size_t len = 0; - const char* t = str + offset; - SegmentBase::filterAscii(t, size - offset, len); - s.assign(t, len); - res.push_back(s); - //cout< buf; + buf.push_back("heheh"); + buf.push_back("你好"); + buf.push_back("...hh"); + vector res; + size_t size = strlen(str); + size_t offset = 0; + while(offset < size) + { + size_t len = 0; + const char* t = str + offset; + SegmentBase::filterAscii(t, size - offset, len); + s.assign(t, len); + res.push_back(s); + //cout< words; + ASSERT_TRUE(segment); + ASSERT_TRUE(segment.cut(str, words)); + EXPECT_EQ(words, vector(res, res + sizeof(res)/sizeof(res[0]))); +} + +TEST(MPSegmentTest, Test1) +{ + MPSegment segment("../dict/extra_dict/jieba.dict.small.utf8");; + const char* str = "我来自北京邮电大学。。。 学号 123456"; + const char* res[] = {"我", "来自", "北京邮电大学", "。","。","。"," ","学","号", " 123456"}; + vector words; + ASSERT_TRUE(segment); + ASSERT_TRUE(segment.cut(str, words)); + //print(words); + EXPECT_EQ(words, vector(res, res + sizeof(res)/sizeof(res[0]))); +} + +TEST(MPSegmentTest, Test2) +{ + MPSegment segment("../dict/extra_dict/jieba.dict.small.utf8"); + string line; + ifstream ifs("../test/testdata/review.100"); + vector words; + + string eRes; + loadFile2Str("../test/testdata/review.100.res", eRes); + string res; + + while(getline(ifs, line)) + { + res += line; + res += '\n'; + + words.clear(); + segment.cut(line, words); + string s; + s << words; + res += s; + res += '\n'; + } + WriteStr2File("../test/testdata/review.100.res", res.c_str(), "w"); + //ASSERT_EQ(res, eRes); + +} +TEST(HMMSegmentTest, Test1) +{ + HMMSegment segment("../dict/hmm_model.utf8");; + const char* str = "我来自北京邮电大学。。。 学号 123456"; + const char* res[] = {"我来", "自北京", "邮电大学", "。", "。", "。", " ", "学号", " 123456"}; + //string s; + //vector buf(res, res + sizeof(res)/sizeof(res[0])); + vector words; + ASSERT_TRUE(segment); + ASSERT_TRUE(segment.cut(str, words)); + //print(words); + EXPECT_EQ(words, vector(res, res + sizeof(res)/sizeof(res[0]))); +} + +TEST(FullSegment, Test1) +{ + FullSegment segment("../dict/extra_dict/jieba.dict.small.utf8"); + const char* str = "我来自北京邮电大学。。。 学号 123456"; + vector words; + + ASSERT_EQ(segment.cut(str, words), true); + + string s; + s << words; + ASSERT_EQ(s, "[\"我\", \"来自\", \"北京\", \"北京邮电大学\", \"邮电\", \"电大\", \"大学\", \"。\", \"。\", \"。\", \" \", \"学\", \"号\", \" 123456\"]"); +} + +TEST(QuerySegment, Test1) +{ + QuerySegment segment("../dict/extra_dict/jieba.dict.small.utf8", "../dict/hmm_model.utf8", 3); + const char* str = "小明硕士毕业于中国科学院计算所,后在日本京都大学深造"; + vector words; + + ASSERT_TRUE(segment.cut(str, words)); + + string s1, s2; + s1 << words; + s2 = "[\"小明\", \"硕士\", \"毕业\", \"于\", \"中国\", \"中国科学院\", \"科学\", \"科学院\", \"学院\", \"计算所\", \",\", \"后\", \"在\", \"日本\", \"京都\", \"京都大学\", \"大学\", \"深造\"]"; + ASSERT_EQ(s1, s2); + +} +