From 3395b572279f3b5c4071e654ad34055d342ad96f Mon Sep 17 00:00:00 2001 From: wyy Date: Thu, 19 Dec 2013 08:22:09 -0800 Subject: [PATCH] add ttrie.cpp --- src/Trie.hpp | 13 +++++++++---- test/unittest/CMakeLists.txt | 2 +- test/unittest/TTrie.cpp | 34 ++++++++++++++++++++++++++++++++++ 3 files changed, 44 insertions(+), 5 deletions(-) create mode 100644 test/unittest/TTrie.cpp diff --git a/src/Trie.hpp b/src/Trie.hpp index 9e3d58b..3b0fb45 100644 --- a/src/Trie.hpp +++ b/src/Trie.hpp @@ -17,6 +17,7 @@ #include "TransCode.hpp" + namespace CppJieba { using namespace Limonp; @@ -50,13 +51,17 @@ namespace CppJieba TrieNodeInfo(const Unicode& _word):word(_word),freq(0),logFreq(MIN_DOUBLE) { } - string toString()const + bool operator == (const TrieNodeInfo & rhs) const { - string tmp; - TransCode::encode(word, tmp); - return string_format("{word:%s,freq:%d, logFreq:%lf}", tmp.c_str(), freq, logFreq); + return word == rhs.word && freq == rhs.freq && tag == rhs.tag && abs(logFreq - rhs.logFreq) < 0.001; } }; + + inline ostream& operator << (ostream& os, const TrieNodeInfo & nodeInfo) + { + return os << nodeInfo.word << ":" << nodeInfo.freq << ":" << nodeInfo.tag << ":" << nodeInfo.logFreq ; + } + typedef unordered_map DagType; class Trie diff --git a/test/unittest/CMakeLists.txt b/test/unittest/CMakeLists.txt index ad5c23d..34c3781 100644 --- a/test/unittest/CMakeLists.txt +++ b/test/unittest/CMakeLists.txt @@ -5,7 +5,7 @@ SET(GTEST_ROOT_DIR gtest-1.6.0) INCLUDE_DIRECTORIES(${GTEST_ROOT_DIR} ${GTEST_ROOT_DIR}/include ${PROJECT_SOURCE_DIR}) ADD_LIBRARY(gtest STATIC ${GTEST_ROOT_DIR}/src/gtest-all.cc) -ADD_EXECUTABLE(test.run gtest_main.cc TChineseFilter.cpp TMixSegment.cpp TMPSegment.cpp THMMSegment.cpp) +ADD_EXECUTABLE(test.run gtest_main.cc TChineseFilter.cpp TMixSegment.cpp TMPSegment.cpp THMMSegment.cpp TTrie.cpp) TARGET_LINK_LIBRARIES(gtest pthread) TARGET_LINK_LIBRARIES(test.run gtest pthread) diff --git a/test/unittest/TTrie.cpp b/test/unittest/TTrie.cpp new file mode 100644 index 0000000..b7e4f71 --- /dev/null +++ b/test/unittest/TTrie.cpp @@ -0,0 +1,34 @@ +#include "src/Trie.hpp" +#include "gtest/gtest.h" + +using namespace CppJieba; + +static const char* const DICT_FILE = "../dicts/jieba.dict.utf8"; + +TEST(TrieTest, Test1) +{ + Trie trie; + ASSERT_TRUE(trie.init()); + ASSERT_TRUE(trie.loadDict(DICT_FILE)); + ASSERT_LT(trie.getMinLogFreq() + 17.2184, 0.001); + string word("来到"); + Unicode uni; + ASSERT_TRUE(TransCode::decode(word, uni)); + TrieNodeInfo nodeInfo; + nodeInfo.word = uni; + nodeInfo.freq = 8779; + nodeInfo.tag = "v"; + nodeInfo.logFreq = -8.83144; + EXPECT_EQ(nodeInfo, *trie.find(uni.begin(), uni.end())); + word = "清华大学"; + vector > res; + //TrieNodeInfo + //res.push_back(make_pair(0, )) + + vector > vec; + ASSERT_TRUE(TransCode::decode(word, uni)); + //print(uni); + //ASSERT_TRUE(trie.find(uni.begin(), uni.end(), vec)); + print(vec); +} +