merge dev

This commit is contained in:
wyy 2014-04-11 13:07:51 +08:00
commit 0ad0d74d19
8 changed files with 332 additions and 251 deletions

179
src/DictTrie.hpp Normal file
View File

@ -0,0 +1,179 @@
#ifndef CPPJIEBA_DICT_TRIE_HPP
#define CPPJIEBA_DICT_TRIE_HPP
#include <iostream>
#include <fstream>
#include <map>
#include <cstring>
#include <stdint.h>
#include <cmath>
#include <limits>
#include "Limonp/str_functs.hpp"
#include "Limonp/logger.hpp"
#include "Limonp/InitOnOff.hpp"
#include "TransCode.hpp"
#include "Trie.hpp"
namespace CppJieba
{
using namespace Limonp;
const double MIN_DOUBLE = -3.14e+100;
const double MAX_DOUBLE = 3.14e+100;
const size_t DICT_COLUMN_NUM = 3;
struct DictUnit
{
Unicode word;
size_t freq;
string tag;
double logFreq; //logFreq = log(freq/sum(freq));
};
inline ostream & operator << (ostream& os, const DictUnit& unit)
{
string s;
s << unit.word;
return os << string_format("%s %u %s %.3lf", s.c_str(), unit.freq, unit.tag.c_str(), unit.logFreq);
}
typedef map<size_t, const DictUnit*> DagType;
class DictTrie: public InitOnOff
{
public:
typedef Trie<Unicode::value_type, DictUnit> TrieType;
private:
vector<DictUnit> _nodeInfos;
TrieType * _trie;
size_t _freqSum;
double _minLogFreq;
public:
DictTrie()
{
_trie = NULL;
_freqSum = 0;
_minLogFreq = MAX_DOUBLE;
_setInitFlag(false);
}
DictTrie(const string& filePath)
{
new (this) DictTrie();
_setInitFlag(init(filePath));
}
~DictTrie()
{
if(_trie)
{
delete _trie;
}
}
private:
public:
bool init(const string& filePath)
{
assert(!_getInitFlag());
_loadDict(filePath, _nodeInfos);
_freqSum = _calculateFreqSum(_nodeInfos);
assert(_freqSum);
_minLogFreq = _calculateLogFreqAndGetMinValue(_nodeInfos, _freqSum);
_trie = _creatTrie(_nodeInfos);
return _setInitFlag(_trie);
}
public:
const DictUnit* find(Unicode::const_iterator begin, Unicode::const_iterator end) const
{
return _trie->find(begin, end);
}
bool find(Unicode::const_iterator begin, Unicode::const_iterator end, DagType& dag, size_t offset = 0) const
{
return _trie->find(begin, end, dag, offset);
}
public:
double getMinLogFreq() const {return _minLogFreq;};
private:
TrieType * _creatTrie(const vector<DictUnit>& dictUnits)
{
if(dictUnits.empty())
{
return NULL;
}
vector<Unicode> words;
vector<const DictUnit*> valuePointers;
for(size_t i = 0 ; i < dictUnits.size(); i ++)
{
words.push_back(dictUnits[i].word);
valuePointers.push_back(&dictUnits[i]);
}
TrieType * trie = new TrieType(words, valuePointers);
return trie;
}
void _loadDict(const string& filePath, vector<DictUnit>& nodeInfos) const
{
ifstream ifs(filePath.c_str());
if(!ifs)
{
LogFatal("open %s failed.", filePath.c_str());
exit(1);
}
string line;
vector<string> buf;
nodeInfos.clear();
DictUnit nodeInfo;
for(size_t lineno = 0 ; getline(ifs, line); lineno++)
{
split(line, buf, " ");
assert(buf.size() == DICT_COLUMN_NUM);
if(!TransCode::decode(buf[0], nodeInfo.word))
{
LogError("line[%u:%s] illegal.", lineno, line.c_str());
continue;
}
nodeInfo.freq = atoi(buf[1].c_str());
nodeInfo.tag = buf[2];
nodeInfos.push_back(nodeInfo);
}
}
size_t _calculateFreqSum(const vector<DictUnit>& nodeInfos) const
{
size_t freqSum = 0;
for(size_t i = 0; i < nodeInfos.size(); i++)
{
freqSum += nodeInfos[i].freq;
}
return freqSum;
}
double _calculateLogFreqAndGetMinValue(vector<DictUnit>& nodeInfos, size_t freqSum) const
{
assert(freqSum);
double minLogFreq = MAX_DOUBLE;
for(size_t i = 0; i < nodeInfos.size(); i++)
{
DictUnit& nodeInfo = nodeInfos[i];
assert(nodeInfo.freq);
nodeInfo.logFreq = log(double(nodeInfo.freq)/double(freqSum));
if(minLogFreq > nodeInfo.logFreq)
{
minLogFreq = nodeInfo.logFreq;
}
}
return minLogFreq;
}
};
}
#endif

View File

@ -5,7 +5,7 @@
#include <set> #include <set>
#include <cassert> #include <cassert>
#include "Limonp/logger.hpp" #include "Limonp/logger.hpp"
#include "Trie.hpp" #include "DictTrie.hpp"
#include "ISegment.hpp" #include "ISegment.hpp"
#include "SegmentBase.hpp" #include "SegmentBase.hpp"
#include "TransCode.hpp" #include "TransCode.hpp"
@ -15,7 +15,7 @@ namespace CppJieba
class FullSegment: public SegmentBase class FullSegment: public SegmentBase
{ {
private: private:
Trie _trie; DictTrie _dictTrie;
public: public:
FullSegment(){_setInitFlag(false);}; FullSegment(){_setInitFlag(false);};
@ -29,8 +29,8 @@ namespace CppJieba
LogError("already inited before now."); LogError("already inited before now.");
return false; return false;
} }
_trie.init(dictPath.c_str()); _dictTrie.init(dictPath.c_str());
assert(_trie); assert(_dictTrie);
return _setInitFlag(true); return _setInitFlag(true);
} }
@ -61,10 +61,10 @@ namespace CppJieba
for (Unicode::const_iterator uItr = begin; uItr != end; uItr++) for (Unicode::const_iterator uItr = begin; uItr != end; uItr++)
{ {
//find word start from uItr //find word start from uItr
if (_trie.find(uItr, end, tRes, 0)) if (_dictTrie.find(uItr, end, tRes, 0))
{ {
for(DagType::const_iterator itr = tRes.begin(); itr != tRes.end(); itr++) for(DagType::const_iterator itr = tRes.begin(); itr != tRes.end(); itr++)
//for (vector<pair<size_t, const TrieNodeInfo*> >::const_iterator itr = tRes.begin(); itr != tRes.end(); itr++) //for (vector<pair<size_t, const DictUnit*> >::const_iterator itr = tRes.begin(); itr != tRes.end(); itr++)
{ {
wordLen = itr->second->word.size(); wordLen = itr->second->word.size();
if (wordLen >= 2 || (tRes.size() == 1 && maxIdx <= uIdx)) if (wordLen >= 2 || (tRes.size() == 1 && maxIdx <= uIdx))

View File

@ -10,7 +10,7 @@
#include "TransCode.hpp" #include "TransCode.hpp"
#include "ISegment.hpp" #include "ISegment.hpp"
#include "SegmentBase.hpp" #include "SegmentBase.hpp"
#include "Trie.hpp" #include "DictTrie.hpp"
namespace CppJieba namespace CppJieba
{ {

View File

@ -9,8 +9,8 @@
#include <set> #include <set>
#include <cassert> #include <cassert>
#include "Limonp/logger.hpp" #include "Limonp/logger.hpp"
#include "Trie.hpp" #include "DictTrie.hpp"
#include "Trie.hpp" #include "DictTrie.hpp"
#include "ISegment.hpp" #include "ISegment.hpp"
#include "SegmentBase.hpp" #include "SegmentBase.hpp"
@ -21,7 +21,7 @@ namespace CppJieba
{ {
uint16_t uniCh; uint16_t uniCh;
DagType dag; DagType dag;
const TrieNodeInfo * pInfo; const DictUnit * pInfo;
double weight; double weight;
SegmentChar():uniCh(0), pInfo(NULL), weight(0.0) SegmentChar():uniCh(0), pInfo(NULL), weight(0.0)
@ -32,7 +32,7 @@ namespace CppJieba
class MPSegment: public SegmentBase class MPSegment: public SegmentBase
{ {
protected: protected:
Trie _trie; DictTrie _dictTrie;
public: public:
MPSegment(){_setInitFlag(false);}; MPSegment(){_setInitFlag(false);};
@ -49,8 +49,8 @@ namespace CppJieba
LogError("already inited before now."); LogError("already inited before now.");
return false; return false;
} }
_trie.init(dictPath); _dictTrie.init(dictPath);
assert(_trie); assert(_dictTrie);
LogInfo("MPSegment init(%s) ok", dictPath.c_str()); LogInfo("MPSegment init(%s) ok", dictPath.c_str());
return _setInitFlag(true); return _setInitFlag(true);
} }
@ -124,7 +124,7 @@ namespace CppJieba
schar.uniCh = *it; schar.uniCh = *it;
offset = it - begin; offset = it - begin;
schar.dag.clear(); schar.dag.clear();
_trie.find(it, end, schar.dag, offset); _dictTrie.find(it, end, schar.dag, offset);
if(!isIn(schar.dag, offset)) if(!isIn(schar.dag, offset))
{ {
schar.dag[offset] = NULL; schar.dag[offset] = NULL;
@ -142,7 +142,7 @@ namespace CppJieba
} }
size_t nextPos; size_t nextPos;
const TrieNodeInfo* p; const DictUnit* p;
double val; double val;
for(int i = segContext.size() - 1; i >= 0; i--) for(int i = segContext.size() - 1; i >= 0; i--)
@ -165,7 +165,7 @@ namespace CppJieba
} }
else else
{ {
val += _trie.getMinLogFreq(); val += _dictTrie.getMinLogFreq();
} }
if(val > segContext[i].weight) if(val > segContext[i].weight)
{ {
@ -182,7 +182,7 @@ namespace CppJieba
size_t i = 0; size_t i = 0;
while(i < segContext.size()) while(i < segContext.size())
{ {
const TrieNodeInfo* p = segContext[i].pInfo; const DictUnit* p = segContext[i].pInfo;
if(p) if(p)
{ {
res.push_back(p->word); res.push_back(p->word);

View File

@ -3,7 +3,7 @@
#include "MixSegment.hpp" #include "MixSegment.hpp"
#include "Limonp/str_functs.hpp" #include "Limonp/str_functs.hpp"
#include "Trie.hpp" #include "DictTrie.hpp"
namespace CppJieba namespace CppJieba
{ {
@ -13,7 +13,7 @@ namespace CppJieba
{ {
private: private:
MixSegment _segment; MixSegment _segment;
Trie _trie; DictTrie _dictTrie;
public: public:
PosTagger(){_setInitFlag(false);}; PosTagger(){_setInitFlag(false);};
@ -27,8 +27,8 @@ namespace CppJieba
{ {
assert(!_getInitFlag()); assert(!_getInitFlag());
_trie.init(dictPath); _dictTrie.init(dictPath);
assert(_trie); assert(_dictTrie);
return _setInitFlag(_segment.init(dictPath, hmmFilePath)); return _setInitFlag(_segment.init(dictPath, hmmFilePath));
}; };
@ -42,7 +42,7 @@ namespace CppJieba
return false; return false;
} }
const TrieNodeInfo *tmp = NULL; const DictUnit *tmp = NULL;
Unicode unico; Unicode unico;
for (vector<string>::iterator itr = cutRes.begin(); itr != cutRes.end(); ++itr) for (vector<string>::iterator itr = cutRes.begin(); itr != cutRes.end(); ++itr)
{ {
@ -51,7 +51,7 @@ namespace CppJieba
LogError("decode failed."); LogError("decode failed.");
return false; return false;
} }
tmp = _trie.find(unico.begin(), unico.end()); tmp = _dictTrie.find(unico.begin(), unico.end());
res.push_back(make_pair(*itr, tmp == NULL ? "x" : tmp->tag)); res.push_back(make_pair(*itr, tmp == NULL ? "x" : tmp->tag));
} }
tmp = NULL; tmp = NULL;

View File

@ -5,13 +5,13 @@
#include <set> #include <set>
#include <cassert> #include <cassert>
#include "Limonp/logger.hpp" #include "Limonp/logger.hpp"
#include "Trie.hpp" #include "DictTrie.hpp"
#include "ISegment.hpp" #include "ISegment.hpp"
#include "SegmentBase.hpp" #include "SegmentBase.hpp"
#include "FullSegment.hpp" #include "FullSegment.hpp"
#include "MixSegment.hpp" #include "MixSegment.hpp"
#include "TransCode.hpp" #include "TransCode.hpp"
#include "Trie.hpp" #include "DictTrie.hpp"
namespace CppJieba namespace CppJieba
{ {

View File

@ -1,240 +1,142 @@
/************************************ #ifndef CPPJIEBA_TRIE_HPP
* file enc : ASCII #define CPPJIEBA_TRIE_HPP
* author : wuyanyi09@gmail.com
************************************/
#ifndef CPPJIEBA_TRIE_H
#define CPPJIEBA_TRIE_H
#include <iostream>
#include <fstream>
#include <map>
#include <cstring>
#include <stdint.h>
#include <cmath>
#include <limits>
#include "Limonp/str_functs.hpp"
#include "Limonp/logger.hpp"
#include "Limonp/InitOnOff.hpp"
#include "TransCode.hpp"
#include "Limonp/std_outbound.hpp"
#include <vector>
namespace CppJieba namespace CppJieba
{ {
using namespace Limonp; using namespace std;
const double MIN_DOUBLE = -3.14e+100; template <class KeyType, class ValueType>
const double MAX_DOUBLE = 3.14e+100; class TrieNode
const size_t DICT_COLUMN_NUM = 3;
typedef map<uint16_t, struct TrieNode*> TrieNodeMap;
struct TrieNodeInfo;
struct TrieNode
{ {
TrieNodeMap hmap;
const TrieNodeInfo * ptTrieNodeInfo;
TrieNode(): ptTrieNodeInfo(NULL)
{}
};
struct TrieNodeInfo
{
Unicode word;
size_t freq;
string tag;
double logFreq; //logFreq = log(freq/sum(freq));
};
inline ostream& operator << (ostream& os, const TrieNodeInfo & nodeInfo)
{
return os << nodeInfo.word << ":" << nodeInfo.freq << ":" << nodeInfo.tag << ":" << nodeInfo.logFreq ;
}
typedef map<size_t, const TrieNodeInfo*> DagType;
class Trie: public InitOnOff
{
private:
TrieNode* _root;
vector<TrieNodeInfo> _nodeInfos;
int64_t _freqSum;
double _minLogFreq;
public: public:
Trie() typedef map<KeyType, TrieNode<KeyType, ValueType>* > KeyMapType;
public:
KeyMapType * ptKeyMap;
const ValueType * ptValue;
};
template <class KeyType, class ValueType>
class Trie
{ {
_root = new TrieNode; public:
_freqSum = 0; typedef TrieNode<KeyType, ValueType> TrieNodeType;
_minLogFreq = MAX_DOUBLE; private:
_setInitFlag(false); TrieNodeType* _root;
} public:
Trie(const string& filePath) Trie(const vector<vector<KeyType> >& keys, const vector<const ValueType* >& valuePointers)
{ {
new (this) Trie(); _root = new TrieNodeType;
_setInitFlag(init(filePath)); _root->ptKeyMap = NULL;
_root->ptValue = NULL;
_createTrie(keys, valuePointers);
} }
~Trie() ~Trie()
{
if(_root)
{ {
_deleteNode(_root); _deleteNode(_root);
} }
private:
public:
bool init(const string& filePath)
{
assert(!_getInitFlag());
_loadDict(filePath, _nodeInfos);
_createTrie(_nodeInfos, _root);
_freqSum = _calculateFreqSum(_nodeInfos);
assert(_freqSum);
_minLogFreq = _calculateLogFreqAndGetMinValue(_nodeInfos, _freqSum);
return _setInitFlag(true);
} }
public: public:
const TrieNodeInfo* find(Unicode::const_iterator begin, Unicode::const_iterator end)const const ValueType* find(typename vector<KeyType>::const_iterator begin, typename vector<KeyType>::const_iterator end) const
{ {
TrieNodeMap::const_iterator citer; typename TrieNodeType::KeyMapType::const_iterator citer;
const TrieNode* p = _root; const TrieNodeType* ptNode = _root;
for(Unicode::const_iterator it = begin; it != end; it++) for(typename vector<KeyType>::const_iterator it = begin; it != end; it++)
{ {
citer = p->hmap.find(*it); assert(ptNode);
if(p->hmap.end() == citer) if(NULL == ptNode->ptKeyMap || ptNode->ptKeyMap->end() == (citer = ptNode->ptKeyMap->find(*it)))
{ {
return NULL; return NULL;
} }
p = citer->second; ptNode = citer->second;
} }
return p->ptTrieNodeInfo; return ptNode->ptValue;
} }
bool find(typename vector<KeyType>::const_iterator begin, typename vector<KeyType> ::const_iterator end, map<typename vector<KeyType>::size_type, const ValueType* >& ordererMap, size_t offset = 0) const
bool find(Unicode::const_iterator begin, Unicode::const_iterator end, DagType & res, size_t offset = 0) const
{ {
const TrieNode* p = _root; const TrieNodeType * ptNode = _root;
TrieNodeMap::const_iterator citer; typename TrieNodeType::KeyMapType::const_iterator citer;
for (Unicode::const_iterator itr = begin; itr != end; itr++) ordererMap.clear();
for(typename vector<KeyType>::const_iterator itr = begin; itr != end ; itr++)
{ {
citer = p->hmap.find(*itr); assert(ptNode);
if(p->hmap.end() == citer) if(NULL == ptNode->ptKeyMap || ptNode->ptKeyMap->end() == (citer = ptNode->ptKeyMap->find(*itr)))
{ {
break; break;
} }
p = citer->second; ptNode = citer->second;
if(p->ptTrieNodeInfo) if(ptNode->ptValue)
{ {
res[itr - begin + offset] = p->ptTrieNodeInfo; ordererMap[itr - begin + offset] = ptNode->ptValue;
} }
} }
return !res.empty(); return ordererMap.size();
} }
public:
double getMinLogFreq() const {return _minLogFreq;};
private: private:
void _insertNode(const TrieNodeInfo& nodeInfo, TrieNode* ptNode) const void _createTrie(const vector<vector<KeyType> >& keys, const vector<const ValueType*>& valuePointers)
{ {
const Unicode& unico = nodeInfo.word; if(valuePointers.empty() || keys.empty())
TrieNodeMap::const_iterator citer;
for(size_t i = 0; i < unico.size(); i++)
{ {
uint16_t cu = unico[i]; return;
assert(ptNode); }
citer = ptNode->hmap.find(cu); assert(keys.size() == valuePointers.size());
if(ptNode->hmap.end() == citer)
for(size_t i = 0; i < keys.size(); i++)
{ {
TrieNode * next = new TrieNode; _insertNode(keys[i], valuePointers[i]);
ptNode->hmap[cu] = next; }
ptNode = next; }
private:
void _insertNode(const vector<KeyType>& key, const ValueType* ptValue)
{
TrieNodeType* ptNode = _root;
typename TrieNodeType::KeyMapType::const_iterator kmIter;
for(typename vector<KeyType>::const_iterator citer = key.begin(); citer != key.end(); citer++)
{
if(NULL == ptNode->ptKeyMap)
{
ptNode->ptKeyMap = new typename TrieNodeType::KeyMapType;
}
kmIter = ptNode->ptKeyMap->find(*citer);
if(ptNode->ptKeyMap->end() == kmIter)
{
TrieNodeType * nextNode = new TrieNodeType;
nextNode->ptKeyMap = NULL;
nextNode->ptValue = NULL;
(*ptNode->ptKeyMap)[*citer] = nextNode;
ptNode = nextNode;
} }
else else
{ {
ptNode = citer->second; ptNode = kmIter->second;
}
}
ptNode->ptTrieNodeInfo = &nodeInfo;
}
private:
void _loadDict(const string& filePath, vector<TrieNodeInfo>& nodeInfos) const
{
ifstream ifs(filePath.c_str());
if(!ifs)
{
LogFatal("open %s failed.", filePath.c_str());
exit(1);
}
string line;
vector<string> buf;
nodeInfos.clear();
TrieNodeInfo nodeInfo;
for(size_t lineno = 0 ; getline(ifs, line); lineno++)
{
split(line, buf, " ");
assert(buf.size() == DICT_COLUMN_NUM);
if(!TransCode::decode(buf[0], nodeInfo.word))
{
LogError("line[%u:%s] illegal.", lineno, line.c_str());
continue;
}
nodeInfo.freq = atoi(buf[1].c_str());
nodeInfo.tag = buf[2];
nodeInfos.push_back(nodeInfo);
} }
} }
bool _createTrie(const vector<TrieNodeInfo>& nodeInfos, TrieNode * ptNode) ptNode->ptValue = ptValue;
{
for(size_t i = 0; i < _nodeInfos.size(); i++)
{
_insertNode(_nodeInfos[i], ptNode);
} }
return true; void _deleteNode(TrieNodeType* node)
}
size_t _calculateFreqSum(const vector<TrieNodeInfo>& nodeInfos) const
{
size_t freqSum = 0;
for(size_t i = 0; i < nodeInfos.size(); i++)
{
freqSum += nodeInfos[i].freq;
}
return freqSum;
}
double _calculateLogFreqAndGetMinValue(vector<TrieNodeInfo>& nodeInfos, size_t freqSum) const
{
assert(freqSum);
double minLogFreq = MAX_DOUBLE;
for(size_t i = 0; i < nodeInfos.size(); i++)
{
TrieNodeInfo& nodeInfo = nodeInfos[i];
assert(nodeInfo.freq);
nodeInfo.logFreq = log(double(nodeInfo.freq)/double(freqSum));
if(minLogFreq > nodeInfo.logFreq)
{
minLogFreq = nodeInfo.logFreq;
}
}
return minLogFreq;
}
void _deleteNode(TrieNode* node)
{ {
if(!node) if(!node)
{ {
return; return;
} }
for(TrieNodeMap::iterator it = node->hmap.begin(); it != node->hmap.end(); it++) if(node->ptKeyMap)
{ {
TrieNode* next = it->second; typename TrieNodeType::KeyMapType::iterator it;
_deleteNode(next); for(it = node->ptKeyMap->begin(); it != node->ptKeyMap->end(); it++)
{
_deleteNode(it->second);
}
delete node->ptKeyMap;
} }
delete node; delete node;
} }
}; };
} }

View File

@ -1,30 +1,30 @@
#include "src/Trie.hpp" #include "src/DictTrie.hpp"
#include "gtest/gtest.h" #include "gtest/gtest.h"
using namespace CppJieba; using namespace CppJieba;
static const char* const DICT_FILE = "../dict/extra_dict/jieba.dict.small.utf8"; static const char* const DICT_FILE = "../dict/extra_dict/jieba.dict.small.utf8";
TEST(TrieTest, NewAndDelete) TEST(DictTrieTest, NewAndDelete)
{ {
Trie * trie; DictTrie * trie;
trie = new Trie(DICT_FILE); trie = new DictTrie(DICT_FILE);
delete trie; delete trie;
trie = new Trie(); trie = new DictTrie();
delete trie; delete trie;
} }
TEST(TrieTest, Test1) TEST(DictTrieTest, Test1)
{ {
string s1, s2; string s1, s2;
Trie trie; DictTrie trie;
ASSERT_TRUE(trie.init(DICT_FILE)); ASSERT_TRUE(trie.init(DICT_FILE));
ASSERT_LT(trie.getMinLogFreq() + 15.6479, 0.001); ASSERT_LT(trie.getMinLogFreq() + 15.6479, 0.001);
string word("来到"); string word("来到");
Unicode uni; Unicode uni;
ASSERT_TRUE(TransCode::decode(word, uni)); ASSERT_TRUE(TransCode::decode(word, uni));
TrieNodeInfo nodeInfo; DictUnit nodeInfo;
nodeInfo.word = uni; nodeInfo.word = uni;
nodeInfo.freq = 8779; nodeInfo.freq = 8779;
nodeInfo.tag = "v"; nodeInfo.tag = "v";
@ -32,11 +32,11 @@ TEST(TrieTest, Test1)
s1 << nodeInfo; s1 << nodeInfo;
s2 << (*trie.find(uni.begin(), uni.end())); s2 << (*trie.find(uni.begin(), uni.end()));
EXPECT_EQ("[\"26469\", \"21040\"]:8779:v:-8.87033", s2); EXPECT_EQ("[\"26469\", \"21040\"] 8779 v -8.870", s2);
word = "清华大学"; word = "清华大学";
vector<pair<size_t, const TrieNodeInfo*> > res; vector<pair<size_t, const DictUnit*> > res;
map<size_t, const TrieNodeInfo* > resMap; map<size_t, const DictUnit* > resMap;
map<size_t, const TrieNodeInfo* > mp; map<size_t, const DictUnit* > mp;
const char * words[] = {"", "清华", "清华大学"}; const char * words[] = {"", "清华", "清华大学"};
for(size_t i = 0; i < sizeof(words)/sizeof(words[0]); i++) for(size_t i = 0; i < sizeof(words)/sizeof(words[0]); i++)
{ {
@ -44,10 +44,10 @@ TEST(TrieTest, Test1)
res.push_back(make_pair(uni.size() - 1, trie.find(uni.begin(), uni.end()))); res.push_back(make_pair(uni.size() - 1, trie.find(uni.begin(), uni.end())));
resMap[uni.size() - 1] = trie.find(uni.begin(), uni.end()); resMap[uni.size() - 1] = trie.find(uni.begin(), uni.end());
} }
//TrieNodeInfo //DictUnit
//res.push_back(make_pair(0, )) //res.push_back(make_pair(0, ))
vector<pair<size_t, const TrieNodeInfo*> > vec; vector<pair<size_t, const DictUnit*> > vec;
ASSERT_TRUE(TransCode::decode(word, uni)); ASSERT_TRUE(TransCode::decode(word, uni));
//print(uni); //print(uni);
ASSERT_TRUE(trie.find(uni.begin(), uni.end(), mp, 0)); ASSERT_TRUE(trie.find(uni.begin(), uni.end(), mp, 0));