mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
merge some testfile into one testfile to reduce compiler cost
This commit is contained in:
parent
52b6c61326
commit
d2d6868b75
@ -1,3 +1,7 @@
|
||||
## v2.3.5
|
||||
|
||||
1. 修改一些测试用例的文件,减少测试时编译的时间。
|
||||
|
||||
## v2.3.4
|
||||
|
||||
1. 修改了设计上的问题,删除了`TrieManager`这个类,以避免造成一些可能的隐患。
|
||||
|
@ -6,7 +6,7 @@ SET(GTEST_ROOT_DIR gtest-1.6.0)
|
||||
ADD_DEFINITIONS(-DLOGGER_LEVEL=LL_WARN)
|
||||
INCLUDE_DIRECTORIES(${GTEST_ROOT_DIR} ${GTEST_ROOT_DIR}/include ${PROJECT_SOURCE_DIR})
|
||||
ADD_LIBRARY(gtest STATIC ${GTEST_ROOT_DIR}/src/gtest-all.cc)
|
||||
ADD_EXECUTABLE(test.run gtest_main.cpp TKeywordExtractor.cpp TMPSegment.cpp TTrie.cpp TFullSegment.cpp TQuerySegment.cpp THMMSegment.cpp TMixSegment.cpp TSegmentBase.cpp)
|
||||
ADD_EXECUTABLE(test.run gtest_main.cpp TKeywordExtractor.cpp TTrie.cpp TSegments.cpp )
|
||||
TARGET_LINK_LIBRARIES(gtest pthread)
|
||||
TARGET_LINK_LIBRARIES(test.run gtest pthread)
|
||||
|
||||
|
@ -1,18 +0,0 @@
|
||||
#include "src/FullSegment.hpp"
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
using namespace CppJieba;
|
||||
|
||||
TEST(FullSegment, Test1)
|
||||
{
|
||||
FullSegment segment("../dict/extra_dict/jieba.dict.small.utf8");
|
||||
const char* str = "我来自北京邮电大学。。。 学号 123456";
|
||||
vector<string> words;
|
||||
|
||||
ASSERT_EQ(segment.cut(str, words), true);
|
||||
|
||||
string s;
|
||||
s << words;
|
||||
ASSERT_EQ(s, "[\"我\", \"来自\", \"北京\", \"北京邮电大学\", \"邮电\", \"电大\", \"大学\", \"。\", \"。\", \"。\", \" \", \"学\", \"号\", \" 123456\"]");
|
||||
}
|
||||
|
@ -1,19 +0,0 @@
|
||||
#include "src/HMMSegment.hpp"
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
using namespace CppJieba;
|
||||
|
||||
TEST(HMMSegmentTest, Test1)
|
||||
{
|
||||
HMMSegment segment("../dict/hmm_model.utf8");;
|
||||
const char* str = "我来自北京邮电大学。。。 学号 123456";
|
||||
const char* res[] = {"我来", "自北京", "邮电大学", "。", "。", "。", " ", "学号", " 123456"};
|
||||
//string s;
|
||||
//vector<string> buf(res, res + sizeof(res)/sizeof(res[0]));
|
||||
vector<string> words;
|
||||
ASSERT_TRUE(segment);
|
||||
ASSERT_TRUE(segment.cut(str, words));
|
||||
//print(words);
|
||||
EXPECT_EQ(words, vector<string>(res, res + sizeof(res)/sizeof(res[0])));
|
||||
}
|
||||
|
@ -1,46 +0,0 @@
|
||||
#include "src/MPSegment.hpp"
|
||||
#include "src/Limonp/io_functs.hpp"
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
using namespace CppJieba;
|
||||
using namespace Limonp;
|
||||
|
||||
TEST(MPSegmentTest, Test1)
|
||||
{
|
||||
MPSegment segment("../dict/extra_dict/jieba.dict.small.utf8");;
|
||||
const char* str = "我来自北京邮电大学。。。 学号 123456";
|
||||
const char* res[] = {"我", "来自", "北京邮电大学", "。","。","。"," ","学","号", " 123456"};
|
||||
vector<string> words;
|
||||
ASSERT_TRUE(segment);
|
||||
ASSERT_TRUE(segment.cut(str, words));
|
||||
//print(words);
|
||||
EXPECT_EQ(words, vector<string>(res, res + sizeof(res)/sizeof(res[0])));
|
||||
}
|
||||
|
||||
TEST(MPSegmentTest, Test2)
|
||||
{
|
||||
MPSegment segment("../dict/extra_dict/jieba.dict.small.utf8");
|
||||
string line;
|
||||
ifstream ifs("../test/testdata/review.100");
|
||||
vector<string> words;
|
||||
|
||||
string eRes;
|
||||
loadFile2Str("../test/testdata/review.100.res", eRes);
|
||||
string res;
|
||||
|
||||
while(getline(ifs, line))
|
||||
{
|
||||
res += line;
|
||||
res += '\n';
|
||||
|
||||
words.clear();
|
||||
segment.cut(line, words);
|
||||
string s;
|
||||
s << words;
|
||||
res += s;
|
||||
res += '\n';
|
||||
}
|
||||
WriteStr2File("../test/testdata/review.100.res", res.c_str(), "w");
|
||||
//ASSERT_EQ(res, eRes);
|
||||
|
||||
}
|
@ -1,16 +0,0 @@
|
||||
#include "src/MixSegment.hpp"
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
using namespace CppJieba;
|
||||
|
||||
TEST(MixSegmentTest, Test1)
|
||||
{
|
||||
MixSegment segment("../dict/extra_dict/jieba.dict.small.utf8", "../dict/hmm_model.utf8");;
|
||||
const char* str = "我来自北京邮电大学。。。 学号 123456";
|
||||
const char* res[] = {"我", "来自", "北京邮电大学", "。","。","。"," ","学号", " 123456"};
|
||||
vector<string> words;
|
||||
ASSERT_TRUE(segment);
|
||||
ASSERT_TRUE(segment.cut(str, words));
|
||||
EXPECT_EQ(words, vector<string>(res, res + sizeof(res)/sizeof(res[0])));
|
||||
}
|
||||
|
@ -1,20 +0,0 @@
|
||||
#include "src/QuerySegment.hpp"
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
using namespace CppJieba;
|
||||
|
||||
TEST(QuerySegment, Test1)
|
||||
{
|
||||
QuerySegment segment("../dict/extra_dict/jieba.dict.small.utf8", "../dict/hmm_model.utf8", 3);
|
||||
const char* str = "小明硕士毕业于中国科学院计算所,后在日本京都大学深造";
|
||||
vector<string> words;
|
||||
|
||||
ASSERT_TRUE(segment.cut(str, words));
|
||||
|
||||
string s1, s2;
|
||||
s1 << words;
|
||||
s2 = "[\"小明\", \"硕士\", \"毕业\", \"于\", \"中国\", \"中国科学院\", \"科学\", \"科学院\", \"学院\", \"计算所\", \",\", \"后\", \"在\", \"日本\", \"京都\", \"京都大学\", \"大学\", \"深造\"]";
|
||||
ASSERT_EQ(s1, s2);
|
||||
|
||||
}
|
||||
|
@ -1,36 +0,0 @@
|
||||
#include "src/SegmentBase.hpp"
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
using namespace CppJieba;
|
||||
|
||||
TEST(SegmentBaseTest, Test1)
|
||||
{
|
||||
const char* str = "heheh你好...hh";
|
||||
string s;
|
||||
vector<string> buf;
|
||||
buf.push_back("heheh");
|
||||
buf.push_back("你好");
|
||||
buf.push_back("...hh");
|
||||
vector<string> res;
|
||||
size_t size = strlen(str);
|
||||
size_t offset = 0;
|
||||
while(offset < size)
|
||||
{
|
||||
size_t len = 0;
|
||||
const char* t = str + offset;
|
||||
SegmentBase::filterAscii(t, size - offset, len);
|
||||
s.assign(t, len);
|
||||
res.push_back(s);
|
||||
//cout<<s<<","<<ret<<","<<len<<endl;
|
||||
//cout<<str<<endl;
|
||||
offset += len;
|
||||
}
|
||||
EXPECT_EQ(res, buf);
|
||||
}
|
||||
|
||||
//int main(int argc, char** argv)
|
||||
//{
|
||||
// //ChineseFilter chFilter;
|
||||
// return 0;
|
||||
//}
|
||||
|
135
test/unittest/TSegments.cpp
Normal file
135
test/unittest/TSegments.cpp
Normal file
@ -0,0 +1,135 @@
|
||||
#include "src/SegmentBase.hpp"
|
||||
#include "src/MixSegment.hpp"
|
||||
#include "src/MPSegment.hpp"
|
||||
#include "src/HMMSegment.hpp"
|
||||
#include "src/Limonp/io_functs.hpp"
|
||||
#include "src/FullSegment.hpp"
|
||||
#include "src/QuerySegment.hpp"
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
using namespace CppJieba;
|
||||
|
||||
TEST(SegmentBaseTest, Test1)
|
||||
{
|
||||
const char* str = "heheh你好...hh";
|
||||
string s;
|
||||
vector<string> buf;
|
||||
buf.push_back("heheh");
|
||||
buf.push_back("你好");
|
||||
buf.push_back("...hh");
|
||||
vector<string> res;
|
||||
size_t size = strlen(str);
|
||||
size_t offset = 0;
|
||||
while(offset < size)
|
||||
{
|
||||
size_t len = 0;
|
||||
const char* t = str + offset;
|
||||
SegmentBase::filterAscii(t, size - offset, len);
|
||||
s.assign(t, len);
|
||||
res.push_back(s);
|
||||
//cout<<s<<","<<ret<<","<<len<<endl;
|
||||
//cout<<str<<endl;
|
||||
offset += len;
|
||||
}
|
||||
EXPECT_EQ(res, buf);
|
||||
}
|
||||
|
||||
//int main(int argc, char** argv)
|
||||
//{
|
||||
// //ChineseFilter chFilter;
|
||||
// return 0;
|
||||
//}
|
||||
|
||||
|
||||
TEST(MixSegmentTest, Test1)
|
||||
{
|
||||
MixSegment segment("../dict/extra_dict/jieba.dict.small.utf8", "../dict/hmm_model.utf8");;
|
||||
const char* str = "我来自北京邮电大学。。。 学号 123456";
|
||||
const char* res[] = {"我", "来自", "北京邮电大学", "。","。","。"," ","学号", " 123456"};
|
||||
vector<string> words;
|
||||
ASSERT_TRUE(segment);
|
||||
ASSERT_TRUE(segment.cut(str, words));
|
||||
EXPECT_EQ(words, vector<string>(res, res + sizeof(res)/sizeof(res[0])));
|
||||
}
|
||||
|
||||
TEST(MPSegmentTest, Test1)
|
||||
{
|
||||
MPSegment segment("../dict/extra_dict/jieba.dict.small.utf8");;
|
||||
const char* str = "我来自北京邮电大学。。。 学号 123456";
|
||||
const char* res[] = {"我", "来自", "北京邮电大学", "。","。","。"," ","学","号", " 123456"};
|
||||
vector<string> words;
|
||||
ASSERT_TRUE(segment);
|
||||
ASSERT_TRUE(segment.cut(str, words));
|
||||
//print(words);
|
||||
EXPECT_EQ(words, vector<string>(res, res + sizeof(res)/sizeof(res[0])));
|
||||
}
|
||||
|
||||
TEST(MPSegmentTest, Test2)
|
||||
{
|
||||
MPSegment segment("../dict/extra_dict/jieba.dict.small.utf8");
|
||||
string line;
|
||||
ifstream ifs("../test/testdata/review.100");
|
||||
vector<string> words;
|
||||
|
||||
string eRes;
|
||||
loadFile2Str("../test/testdata/review.100.res", eRes);
|
||||
string res;
|
||||
|
||||
while(getline(ifs, line))
|
||||
{
|
||||
res += line;
|
||||
res += '\n';
|
||||
|
||||
words.clear();
|
||||
segment.cut(line, words);
|
||||
string s;
|
||||
s << words;
|
||||
res += s;
|
||||
res += '\n';
|
||||
}
|
||||
WriteStr2File("../test/testdata/review.100.res", res.c_str(), "w");
|
||||
//ASSERT_EQ(res, eRes);
|
||||
|
||||
}
|
||||
TEST(HMMSegmentTest, Test1)
|
||||
{
|
||||
HMMSegment segment("../dict/hmm_model.utf8");;
|
||||
const char* str = "我来自北京邮电大学。。。 学号 123456";
|
||||
const char* res[] = {"我来", "自北京", "邮电大学", "。", "。", "。", " ", "学号", " 123456"};
|
||||
//string s;
|
||||
//vector<string> buf(res, res + sizeof(res)/sizeof(res[0]));
|
||||
vector<string> words;
|
||||
ASSERT_TRUE(segment);
|
||||
ASSERT_TRUE(segment.cut(str, words));
|
||||
//print(words);
|
||||
EXPECT_EQ(words, vector<string>(res, res + sizeof(res)/sizeof(res[0])));
|
||||
}
|
||||
|
||||
TEST(FullSegment, Test1)
|
||||
{
|
||||
FullSegment segment("../dict/extra_dict/jieba.dict.small.utf8");
|
||||
const char* str = "我来自北京邮电大学。。。 学号 123456";
|
||||
vector<string> words;
|
||||
|
||||
ASSERT_EQ(segment.cut(str, words), true);
|
||||
|
||||
string s;
|
||||
s << words;
|
||||
ASSERT_EQ(s, "[\"我\", \"来自\", \"北京\", \"北京邮电大学\", \"邮电\", \"电大\", \"大学\", \"。\", \"。\", \"。\", \" \", \"学\", \"号\", \" 123456\"]");
|
||||
}
|
||||
|
||||
TEST(QuerySegment, Test1)
|
||||
{
|
||||
QuerySegment segment("../dict/extra_dict/jieba.dict.small.utf8", "../dict/hmm_model.utf8", 3);
|
||||
const char* str = "小明硕士毕业于中国科学院计算所,后在日本京都大学深造";
|
||||
vector<string> words;
|
||||
|
||||
ASSERT_TRUE(segment.cut(str, words));
|
||||
|
||||
string s1, s2;
|
||||
s1 << words;
|
||||
s2 = "[\"小明\", \"硕士\", \"毕业\", \"于\", \"中国\", \"中国科学院\", \"科学\", \"科学院\", \"学院\", \"计算所\", \",\", \"后\", \"在\", \"日本\", \"京都\", \"京都大学\", \"大学\", \"深造\"]";
|
||||
ASSERT_EQ(s1, s2);
|
||||
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user