rm TrieManager.hpp

This commit is contained in:
wyy 2014-03-15 22:02:48 +08:00
parent e3b58d6ddc
commit ddaa5589f1
11 changed files with 42 additions and 258 deletions

View File

@ -9,14 +9,13 @@
#include "ISegment.hpp"
#include "SegmentBase.hpp"
#include "TransCode.hpp"
#include "TrieManager.hpp"
namespace CppJieba
{
class FullSegment: public SegmentBase
{
private:
Trie* _trie;
Trie _trie;
public:
FullSegment(){_setInitFlag(false);};
@ -30,12 +29,8 @@ namespace CppJieba
LogError("already inited before now.");
return false;
}
_trie = TrieManager::getInstance().getTrie(dictPath.c_str());
if (NULL == _trie)
{
LogError("get NULL pointor from getTrie(\"%s\")", dictPath.c_str());
return false;
}
_trie.init(dictPath.c_str());
assert(_trie);
return _setInitFlag(true);
}
@ -66,7 +61,7 @@ namespace CppJieba
for (Unicode::const_iterator uItr = begin; uItr != end; uItr++)
{
//find word start from uItr
if (_trie->find(uItr, end, tRes))
if (_trie.find(uItr, end, tRes))
{
for (vector<pair<uint, const TrieNodeInfo*> >::const_iterator itr = tRes.begin(); itr != tRes.end(); itr++)
{

View File

@ -10,7 +10,7 @@
#include <cassert>
#include "Limonp/logger.hpp"
#include "Trie.hpp"
#include "TrieManager.hpp"
#include "Trie.hpp"
#include "ISegment.hpp"
#include "SegmentBase.hpp"
@ -25,15 +25,14 @@ namespace CppJieba
double weight;
SegmentChar(uint16_t uni):uniCh(uni), pInfo(NULL), weight(0.0)
{
}
{}
};
typedef vector<SegmentChar> SegmentContext;
class MPSegment: public SegmentBase
{
protected:
Trie* _trie;
Trie _trie;
public:
MPSegment(){_setInitFlag(false);};
@ -50,12 +49,8 @@ namespace CppJieba
LogError("already inited before now.");
return false;
}
_trie = TrieManager::getInstance().getTrie(dictPath.c_str());
if (_trie == NULL)
{
LogError("get a NULL pointor form getTrie(\"%s\").", dictPath.c_str());
return false;
}
_trie.init(dictPath);
assert(_trie);
LogInfo("MPSegment init(%s) ok", dictPath.c_str());
return _setInitFlag(true);
}
@ -129,7 +124,7 @@ namespace CppJieba
{
SegmentChar schar(*it);
uint i = it - begin;
_trie->find(it, end, i, schar.dag);
_trie.find(it, end, i, schar.dag);
//DagType::iterator dagIter;
if(schar.dag.end() == schar.dag.find(i))
{
@ -167,7 +162,7 @@ namespace CppJieba
}
else
{
val += _trie->getMinLogFreq();
val += _trie.getMinLogFreq();
}
if(val > segContext[i].weight)
{
@ -195,7 +190,7 @@ namespace CppJieba
TrieNodeInfo nodeInfo;
nodeInfo.word.push_back(segContext[i].uniCh);
nodeInfo.freq = 0;
nodeInfo.logFreq = _trie->getMinLogFreq();
nodeInfo.logFreq = _trie.getMinLogFreq();
res.push_back(nodeInfo);
i++;
}

View File

@ -18,16 +18,13 @@ namespace CppJieba
explicit MixSegment(const string& mpSegDict, const string& hmmSegDict)
{
_setInitFlag(init(mpSegDict, hmmSegDict));
assert(_getInitFlag());
}
virtual ~MixSegment(){}
public:
bool init(const string& mpSegDict, const string& hmmSegDict)
{
if(_getInitFlag())
{
LogError("inited.");
return false;
}
assert(!_getInitFlag());
if(!_mpSeg.init(mpSegDict))
{
LogError("_mpSeg init");

View File

@ -4,7 +4,6 @@
#include "MixSegment.hpp"
#include "Limonp/str_functs.hpp"
#include "Trie.hpp"
#include "TrieManager.hpp"
namespace CppJieba
{
@ -14,7 +13,7 @@ namespace CppJieba
{
private:
MixSegment _segment;
Trie* _trie;
Trie _trie;
public:
PosTagger(){_setInitFlag(false);};
@ -26,17 +25,10 @@ namespace CppJieba
public:
bool init(const string& dictPath, const string& hmmFilePath, const string& charStatus, const string& startProb, const string& emitProb, const string& endProb, const string& transProb)
{
if (_getInitFlag())
{
LogError("already inited before.");
return false;
}
_trie = TrieManager::getInstance().getTrie(dictPath.c_str());
if (NULL == _trie)
{
LogError("get a NULL pointor from getTrie(\"%s\").", dictPath.c_str());
return false;
}
assert(!_getInitFlag());
_trie.init(dictPath);
assert(_trie);
return _setInitFlag(_segment.init(dictPath, hmmFilePath));
};
@ -59,7 +51,7 @@ namespace CppJieba
LogError("decode failed.");
return false;
}
tmp = _trie->find(unico.begin(), unico.end());
tmp = _trie.find(unico.begin(), unico.end());
res.push_back(make_pair(*itr, tmp == NULL ? "x" : tmp->tag));
}
tmp = NULL;

View File

@ -11,7 +11,7 @@
#include "FullSegment.hpp"
#include "MixSegment.hpp"
#include "TransCode.hpp"
#include "TrieManager.hpp"
#include "Trie.hpp"
namespace CppJieba
{

View File

@ -14,6 +14,7 @@
#include <limits>
#include "Limonp/str_functs.hpp"
#include "Limonp/logger.hpp"
#include "Limonp/InitOnOff.hpp"
#include "TransCode.hpp"
@ -64,14 +65,13 @@ namespace CppJieba
typedef map<uint, const TrieNodeInfo*> DagType;
class Trie
class Trie: public InitOnOff
{
private:
TrieNode* _root;
vector<TrieNodeInfo> _nodeInfoVec;
bool _initFlag;
int64_t _freqSum;
double _minLogFreq;
@ -81,57 +81,28 @@ namespace CppJieba
_root = NULL;
_freqSum = 0;
_minLogFreq = MAX_DOUBLE;
_initFlag = false;
_setInitFlag(false);
}
Trie(const string& filePath): Trie()
{
_setInitFlag(init(filePath));
}
~Trie()
{
dispose();
}
bool init()
{
if(_getInitFlag())
{
LogError("already initted!");
return false;
}
try
{
_root = new TrieNode;
}
catch(const bad_alloc& e)
{
return false;
}
if(NULL == _root)
{
return false;
}
_setInitFlag(true);
return true;
}
bool dispose()
{
if(!_getInitFlag())
{
return false;
return;
}
bool ret = _deleteNode(_root);
if(!ret)
{
LogFatal("_deleteNode failed!");
return false;
}
_root = NULL;
_nodeInfoVec.clear();
_setInitFlag(false);
return ret;
_deleteNode(_root);
}
bool loadDict(const char * const filePath)
public:
bool init(const string& filePath)
{
assert(_getInitFlag());
if(!_trieInsert(filePath))
assert(!_getInitFlag());
_root = new TrieNode;
assert(_root);
if(!_trieInsert(filePath.c_str()))
{
LogError("_trieInsert failed.");
return false;
@ -141,13 +112,9 @@ namespace CppJieba
LogError("_countWeight failed.");
return false;
}
return true;
return _setInitFlag(true);
}
private:
void _setInitFlag(bool on){_initFlag = on;};
bool _getInitFlag()const{return _initFlag;};
public:
const TrieNodeInfo* find(Unicode::const_iterator begin, Unicode::const_iterator end)const
{
@ -271,12 +238,6 @@ namespace CppJieba
private:
bool _insert(const TrieNodeInfo& nodeInfo)
{
if(!_getInitFlag())
{
LogFatal("not initted!");
return false;
}
const Unicode& uintVec = nodeInfo.word;
TrieNode* p = _root;
@ -358,10 +319,9 @@ namespace CppJieba
nodeInfo.tag = vecBuf[2];
}
//_insert node
if(!_insert(nodeInfo))
{
LogError("_insert node failed!");
assert(false);
}
}
return true;
@ -405,7 +365,7 @@ namespace CppJieba
return true;
}
bool _deleteNode(TrieNode* node)
void _deleteNode(TrieNode* node)
{
for(TrieNodeMap::iterator it = node->hmap.begin(); it != node->hmap.end(); it++)
{
@ -414,7 +374,6 @@ namespace CppJieba
}
delete node;
return true;
}
};

View File

@ -1,73 +0,0 @@
#ifndef CPPJIEBA_TRIEMANAGER_H
#define CPPJIEBA_TRIEMANAGER_H
#include "Trie.hpp"
#include "Limonp/md5.hpp"
#include "Limonp/logger.hpp"
namespace CppJieba
{
using namespace Limonp;
class TrieManager
{
private:
unordered_map<std::string, Trie*> _tries;
TrieManager(){};
TrieManager(TrieManager& tm){};
public:
Trie* getTrie(const char* dictpath)
{
string md5;
if (!md5File(dictpath, md5))
{
LogError("error when getting md5 for file '%s'", dictpath);
return NULL;
}
if (_tries.find(md5) != _tries.end())
{
return _tries[md5.c_str()];
}
//LogDebug("create a new trie for md5: '%s'", md5.c_str());
Trie* trie = NULL;
try
{
trie = new Trie();
}
catch (const bad_alloc& e)
{
LogError("error when new a trie for file '%s'", dictpath);
return NULL;
}
if (NULL == trie)
{
LogError("get NULL from new trie for file '%s'", dictpath);
return NULL;
}
if (!trie->init())
{
LogError("trie init error for file '%s'", dictpath);
return NULL;
}
if (!trie->loadDict(dictpath))
{
LogError("trie->loadDict(%s) failed...", dictpath);
return NULL;
}
_tries[md5.c_str()] = trie;
LogDebug("trie->loadDict(%s)", dictpath);
return trie;
}
static TrieManager& getInstance()
{
static TrieManager _this;
return _this;
}
};
}
#endif

View File

@ -6,7 +6,7 @@ SET(GTEST_ROOT_DIR gtest-1.6.0)
ADD_DEFINITIONS(-DLOGGER_LEVEL=LL_WARN)
INCLUDE_DIRECTORIES(${GTEST_ROOT_DIR} ${GTEST_ROOT_DIR}/include ${PROJECT_SOURCE_DIR})
ADD_LIBRARY(gtest STATIC ${GTEST_ROOT_DIR}/src/gtest-all.cc)
ADD_EXECUTABLE(test.run gtest_main.cpp TKeywordExtractor.cpp TMPSegment.cpp TTrie.cpp TFullSegment.cpp TMd5.cpp TQuerySegment.cpp TTrieManager TFullSegment.cpp TMd5.cpp TQuerySegment.cpp TTrieManager.cpp THMMSegment.cpp TMixSegment.cpp TSegmentBase.cpp)
ADD_EXECUTABLE(test.run gtest_main.cpp TKeywordExtractor.cpp TMPSegment.cpp TTrie.cpp TFullSegment.cpp TQuerySegment.cpp THMMSegment.cpp TMixSegment.cpp TSegmentBase.cpp)
TARGET_LINK_LIBRARIES(gtest pthread)
TARGET_LINK_LIBRARIES(test.run gtest pthread)

View File

@ -1,28 +0,0 @@
#include "src/TrieManager.hpp"
#include "gtest/gtest.h"
using namespace CppJieba;
const char* const DICT_FILE[] = {
"../test/testdata/jieba.dict.0.utf8",
"../test/testdata/jieba.dict.0.1.utf8",
"../test/testdata/jieba.dict.1.utf8",
"../test/testdata/jieba.dict.2.utf8"};
const char* const DICT_FILE_MD5[] = {
"5aef74a56b363d994095c407c4809d84",
"5aef74a56b363d994095c407c4809d84",
"55f1116c05c8051ab53171f0b7455197",
"b123553a2418c4bda51abc64d705d5d4"};
TEST(Md5Test, Test1)
{
ASSERT_EQ(sizeof(DICT_FILE)/sizeof(DICT_FILE[0]), sizeof(DICT_FILE_MD5)/sizeof(DICT_FILE_MD5[0]));
string tmp;
for (uint i = 0; i < sizeof(DICT_FILE)/sizeof(DICT_FILE[0]); i++)
{
md5File(DICT_FILE[i], tmp);
ASSERT_EQ(tmp, string(DICT_FILE_MD5[i]));
}
}

View File

@ -8,8 +8,7 @@ static const char* const DICT_FILE = "../dict/jieba.dict.utf8";
TEST(TrieTest, Test1)
{
Trie trie;
ASSERT_TRUE(trie.init());
ASSERT_TRUE(trie.loadDict(DICT_FILE));
ASSERT_TRUE(trie.init(DICT_FILE));
ASSERT_LT(trie.getMinLogFreq() + 17.2184, 0.001);
string word("来到");
Unicode uni;

View File

@ -1,52 +0,0 @@
#include "src/TrieManager.hpp"
#include "gtest/gtest.h"
using namespace CppJieba;
struct md5_ptr
{
string md5;
Trie* ptr;
};
typedef struct md5_ptr MD5_PTR;
static const char* const DICT_FILE[] = {
"../test/testdata/jieba.dict.0.utf8",
"../test/testdata/jieba.dict.0.utf8",
"../test/testdata/jieba.dict.0.utf8",
"../test/testdata/jieba.dict.0.1.utf8",
"../test/testdata/jieba.dict.0.1.utf8",
"../test/testdata/jieba.dict.0.1.utf8",
"../test/testdata/jieba.dict.1.utf8",
"../test/testdata/jieba.dict.1.utf8",
"../test/testdata/jieba.dict.1.utf8",
"../test/testdata/jieba.dict.2.utf8",
"../test/testdata/jieba.dict.2.utf8",
"../test/testdata/jieba.dict.2.utf8",
"../test/testdata/jieba.dict.2.utf8"};
TEST(TrieManagerTest, Test1)
{
vector<MD5_PTR> tries(sizeof(DICT_FILE)/sizeof(DICT_FILE[0]));
for (uint i = 0; i < tries.size(); i++)
{
tries[i].ptr = TrieManager::getInstance().getTrie(DICT_FILE[i]);
ASSERT_TRUE(md5File(DICT_FILE[i], tries[i].md5));
}
for (uint i = 0; i < tries.size(); i++)
{
for (uint j = i + 1; j < tries.size(); j++)
{
if (tries[i].md5 == tries[j].md5)
{
ASSERT_EQ(tries[i].ptr, tries[j].ptr);
}
else
{
ASSERT_NE(tries[i].ptr, tries[j].ptr);
}
}
}
}