add ttrie.cpp

This commit is contained in:
wyy 2013-12-19 08:22:09 -08:00
parent 202e4670f1
commit 3395b57227
3 changed files with 44 additions and 5 deletions

View File

@ -17,6 +17,7 @@
#include "TransCode.hpp"
namespace CppJieba
{
using namespace Limonp;
@ -50,13 +51,17 @@ namespace CppJieba
TrieNodeInfo(const Unicode& _word):word(_word),freq(0),logFreq(MIN_DOUBLE)
{
}
string toString()const
bool operator == (const TrieNodeInfo & rhs) const
{
string tmp;
TransCode::encode(word, tmp);
return string_format("{word:%s,freq:%d, logFreq:%lf}", tmp.c_str(), freq, logFreq);
return word == rhs.word && freq == rhs.freq && tag == rhs.tag && abs(logFreq - rhs.logFreq) < 0.001;
}
};
inline ostream& operator << (ostream& os, const TrieNodeInfo & nodeInfo)
{
return os << nodeInfo.word << ":" << nodeInfo.freq << ":" << nodeInfo.tag << ":" << nodeInfo.logFreq ;
}
typedef unordered_map<uint, const TrieNodeInfo*> DagType;
class Trie

View File

@ -5,7 +5,7 @@ SET(GTEST_ROOT_DIR gtest-1.6.0)
INCLUDE_DIRECTORIES(${GTEST_ROOT_DIR} ${GTEST_ROOT_DIR}/include ${PROJECT_SOURCE_DIR})
ADD_LIBRARY(gtest STATIC ${GTEST_ROOT_DIR}/src/gtest-all.cc)
ADD_EXECUTABLE(test.run gtest_main.cc TChineseFilter.cpp TMixSegment.cpp TMPSegment.cpp THMMSegment.cpp)
ADD_EXECUTABLE(test.run gtest_main.cc TChineseFilter.cpp TMixSegment.cpp TMPSegment.cpp THMMSegment.cpp TTrie.cpp)
TARGET_LINK_LIBRARIES(gtest pthread)
TARGET_LINK_LIBRARIES(test.run gtest pthread)

34
test/unittest/TTrie.cpp Normal file
View File

@ -0,0 +1,34 @@
#include "src/Trie.hpp"
#include "gtest/gtest.h"
using namespace CppJieba;
static const char* const DICT_FILE = "../dicts/jieba.dict.utf8";
TEST(TrieTest, Test1)
{
Trie trie;
ASSERT_TRUE(trie.init());
ASSERT_TRUE(trie.loadDict(DICT_FILE));
ASSERT_LT(trie.getMinLogFreq() + 17.2184, 0.001);
string word("来到");
Unicode uni;
ASSERT_TRUE(TransCode::decode(word, uni));
TrieNodeInfo nodeInfo;
nodeInfo.word = uni;
nodeInfo.freq = 8779;
nodeInfo.tag = "v";
nodeInfo.logFreq = -8.83144;
EXPECT_EQ(nodeInfo, *trie.find(uni.begin(), uni.end()));
word = "清华大学";
vector<pair<uint, const TrieNodeInfo*> > res;
//TrieNodeInfo
//res.push_back(make_pair(0, ))
vector<pair<uint, const TrieNodeInfo*> > vec;
ASSERT_TRUE(TransCode::decode(word, uni));
//print(uni);
//ASSERT_TRUE(trie.find(uni.begin(), uni.end(), vec));
print(vec);
}