mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
去除一些 template 代码
This commit is contained in:
parent
c119dc0a93
commit
7868f7cdff
@ -25,11 +25,9 @@ namespace CppJieba
|
|||||||
|
|
||||||
class DictTrie
|
class DictTrie
|
||||||
{
|
{
|
||||||
public:
|
|
||||||
typedef Trie<Unicode::value_type, DictUnit, Unicode, vector<Unicode>, vector<const DictUnit*> > TrieType;
|
|
||||||
private:
|
private:
|
||||||
vector<DictUnit> _nodeInfos;
|
vector<DictUnit> _nodeInfos;
|
||||||
TrieType * _trie;
|
Trie * _trie;
|
||||||
|
|
||||||
double _minWeight;
|
double _minWeight;
|
||||||
private:
|
private:
|
||||||
@ -100,7 +98,7 @@ namespace CppJieba
|
|||||||
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
TrieType * _createTrie(const vector<DictUnit>& dictUnits)
|
Trie * _createTrie(const vector<DictUnit>& dictUnits)
|
||||||
{
|
{
|
||||||
assert(dictUnits.size());
|
assert(dictUnits.size());
|
||||||
vector<Unicode> words;
|
vector<Unicode> words;
|
||||||
@ -111,7 +109,7 @@ namespace CppJieba
|
|||||||
valuePointers.push_back(&dictUnits[i]);
|
valuePointers.push_back(&dictUnits[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
TrieType * trie = new TrieType(words, valuePointers);
|
Trie * trie = new Trie(words, valuePointers);
|
||||||
return trie;
|
return trie;
|
||||||
}
|
}
|
||||||
void _loadUserDict(const string& filePath, double defaultWeight, const string& defaultTag)
|
void _loadUserDict(const string& filePath, double defaultWeight, const string& defaultTag)
|
||||||
|
90
src/Trie.hpp
90
src/Trie.hpp
@ -16,6 +16,7 @@ namespace CppJieba
|
|||||||
string tag;
|
string tag;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// for debugging
|
||||||
inline ostream & operator << (ostream& os, const DictUnit& unit)
|
inline ostream & operator << (ostream& os, const DictUnit& unit)
|
||||||
{
|
{
|
||||||
string s;
|
string s;
|
||||||
@ -38,17 +39,20 @@ namespace CppJieba
|
|||||||
{}
|
{}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <class KeyType, class ValueType>
|
typedef Unicode::value_type TrieKey;
|
||||||
|
|
||||||
class TrieNode
|
class TrieNode
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
typedef unordered_map<KeyType, TrieNode<KeyType, ValueType>* > NextMap;
|
typedef unordered_map<TrieKey, TrieNode*> NextMap;
|
||||||
public:
|
public:
|
||||||
TrieNode * fail;
|
TrieNode * fail;
|
||||||
NextMap * next;
|
NextMap * next;
|
||||||
const ValueType * ptValue;
|
const DictUnit * ptValue;
|
||||||
public:
|
public:
|
||||||
const TrieNode * findNext(KeyType key) const
|
TrieNode(): fail(NULL), next(NULL), ptValue(NULL)
|
||||||
|
{}
|
||||||
|
const TrieNode * findNext(TrieKey key) const
|
||||||
{
|
{
|
||||||
if(next == NULL)
|
if(next == NULL)
|
||||||
{
|
{
|
||||||
@ -61,22 +65,16 @@ namespace CppJieba
|
|||||||
}
|
}
|
||||||
return iter->second;
|
return iter->second;
|
||||||
}
|
}
|
||||||
public:
|
|
||||||
TrieNode(): fail(NULL), next(NULL), ptValue(NULL) {
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
template <class KeyType, class ValueType, class KeyContainerType = vector<KeyType>, class KeysContainerType = vector<KeyContainerType>, class ValueContainerType = vector<const ValueType* > >
|
|
||||||
class Trie
|
class Trie
|
||||||
{
|
{
|
||||||
public:
|
|
||||||
typedef TrieNode<KeyType, ValueType> TrieNodeType;
|
|
||||||
private:
|
private:
|
||||||
TrieNodeType* _root;
|
TrieNode* _root;
|
||||||
public:
|
public:
|
||||||
Trie(const KeysContainerType& keys, const ValueContainerType& valuePointers)
|
Trie(const vector<Unicode>& keys, const vector<const DictUnit*> & valuePointers)
|
||||||
{
|
{
|
||||||
_root = new TrieNodeType;
|
_root = new TrieNode;
|
||||||
_createTrie(keys, valuePointers);
|
_createTrie(keys, valuePointers);
|
||||||
_build();// build automation
|
_build();// build automation
|
||||||
}
|
}
|
||||||
@ -88,11 +86,11 @@ namespace CppJieba
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
public:
|
public:
|
||||||
const ValueType* find(typename KeyContainerType::const_iterator begin, typename KeyContainerType::const_iterator end) const
|
const DictUnit* find(typename Unicode::const_iterator begin, typename Unicode::const_iterator end) const
|
||||||
{
|
{
|
||||||
typename TrieNodeType::NextMap::const_iterator citer;
|
typename TrieNode::NextMap::const_iterator citer;
|
||||||
const TrieNodeType* ptNode = _root;
|
const TrieNode* ptNode = _root;
|
||||||
for(typename KeyContainerType::const_iterator it = begin; it != end; it++)
|
for(typename Unicode::const_iterator it = begin; it != end; it++)
|
||||||
{// build automation
|
{// build automation
|
||||||
assert(ptNode);
|
assert(ptNode);
|
||||||
if(NULL == ptNode->next || ptNode->next->end() == (citer = ptNode->next->find(*it)))
|
if(NULL == ptNode->next || ptNode->next->end() == (citer = ptNode->next->find(*it)))
|
||||||
@ -105,21 +103,21 @@ namespace CppJieba
|
|||||||
}
|
}
|
||||||
// aho-corasick-automation
|
// aho-corasick-automation
|
||||||
void find(
|
void find(
|
||||||
typename KeyContainerType::const_iterator begin,
|
typename Unicode::const_iterator begin,
|
||||||
typename KeyContainerType::const_iterator end,
|
typename Unicode::const_iterator end,
|
||||||
vector<struct SegmentChar>& res
|
vector<struct SegmentChar>& res
|
||||||
) const
|
) const
|
||||||
{
|
{
|
||||||
res.resize(end - begin);
|
res.resize(end - begin);
|
||||||
const TrieNodeType * now = _root;
|
const TrieNode * now = _root;
|
||||||
//typename TrieNodeType::NextMap::const_iterator iter;
|
//typename TrieNode::NextMap::const_iterator iter;
|
||||||
const TrieNodeType* node;
|
const TrieNode* node;
|
||||||
for (size_t i = 0; i < end - begin; i++)
|
for (size_t i = 0; i < end - begin; i++)
|
||||||
{
|
{
|
||||||
Unicode::value_type ch = *(begin + i);
|
Unicode::value_type ch = *(begin + i);
|
||||||
res[i].uniCh = ch;
|
res[i].uniCh = ch;
|
||||||
assert(res[i].dag.empty());
|
assert(res[i].dag.empty());
|
||||||
res[i].dag.push_back(pair<typename KeysContainerType::size_type, const ValueType* >(i, NULL));
|
res[i].dag.push_back(pair<typename vector<Unicode >::size_type, const DictUnit* >(i, NULL));
|
||||||
bool flag = false;
|
bool flag = false;
|
||||||
|
|
||||||
// rollback
|
// rollback
|
||||||
@ -148,13 +146,13 @@ namespace CppJieba
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
now = node;
|
now = node;
|
||||||
const TrieNodeType * temp = now;
|
const TrieNode * temp = now;
|
||||||
while(temp != _root)
|
while(temp != _root)
|
||||||
{
|
{
|
||||||
if (temp->ptValue)
|
if (temp->ptValue)
|
||||||
{
|
{
|
||||||
size_t pos = i - temp->ptValue->word.size() + 1;
|
size_t pos = i - temp->ptValue->word.size() + 1;
|
||||||
res[pos].dag.push_back(pair<typename KeysContainerType::size_type, const ValueType* >(i, temp->ptValue));
|
res[pos].dag.push_back(pair<typename vector<Unicode >::size_type, const DictUnit* >(i, temp->ptValue));
|
||||||
if(pos == i)
|
if(pos == i)
|
||||||
{
|
{
|
||||||
res[pos].dag[0].second = temp->ptValue;
|
res[pos].dag[0].second = temp->ptValue;
|
||||||
@ -167,14 +165,14 @@ namespace CppJieba
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
bool find(
|
bool find(
|
||||||
typename KeyContainerType::const_iterator begin,
|
typename Unicode::const_iterator begin,
|
||||||
typename KeyContainerType::const_iterator end,
|
typename Unicode::const_iterator end,
|
||||||
DagType & res,
|
DagType & res,
|
||||||
size_t offset = 0) const
|
size_t offset = 0) const
|
||||||
{
|
{
|
||||||
const TrieNodeType * ptNode = _root;
|
const TrieNode * ptNode = _root;
|
||||||
typename TrieNodeType::NextMap::const_iterator citer;
|
typename TrieNode::NextMap::const_iterator citer;
|
||||||
for(typename KeyContainerType::const_iterator itr = begin; itr != end ; itr++)
|
for(typename Unicode::const_iterator itr = begin; itr != end ; itr++)
|
||||||
{
|
{
|
||||||
assert(ptNode);
|
assert(ptNode);
|
||||||
if(NULL == ptNode->next || ptNode->next->end() == (citer = ptNode->next->find(*itr)))
|
if(NULL == ptNode->next || ptNode->next->end() == (citer = ptNode->next->find(*itr)))
|
||||||
@ -190,7 +188,7 @@ namespace CppJieba
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
res.push_back(pair<typename KeysContainerType::size_type, const ValueType* >(itr - begin + offset, ptNode->ptValue));
|
res.push_back(pair<typename vector<Unicode >::size_type, const DictUnit* >(itr - begin + offset, ptNode->ptValue));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -199,23 +197,23 @@ namespace CppJieba
|
|||||||
private:
|
private:
|
||||||
void _build()
|
void _build()
|
||||||
{
|
{
|
||||||
queue<TrieNodeType*> que;
|
queue<TrieNode*> que;
|
||||||
assert(_root->ptValue == NULL);
|
assert(_root->ptValue == NULL);
|
||||||
assert(_root->next);
|
assert(_root->next);
|
||||||
_root->fail = NULL;
|
_root->fail = NULL;
|
||||||
for(typename TrieNodeType::NextMap::iterator iter = _root->next->begin(); iter != _root->next->end(); iter++) {
|
for(typename TrieNode::NextMap::iterator iter = _root->next->begin(); iter != _root->next->end(); iter++) {
|
||||||
iter->second->fail = _root;
|
iter->second->fail = _root;
|
||||||
que.push(iter->second);
|
que.push(iter->second);
|
||||||
}
|
}
|
||||||
TrieNodeType* back = NULL;
|
TrieNode* back = NULL;
|
||||||
typename TrieNodeType::NextMap::iterator backiter;
|
typename TrieNode::NextMap::iterator backiter;
|
||||||
while(!que.empty()) {
|
while(!que.empty()) {
|
||||||
TrieNodeType * now = que.front();
|
TrieNode * now = que.front();
|
||||||
que.pop();
|
que.pop();
|
||||||
if(now->next == NULL) {
|
if(now->next == NULL) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
for(typename TrieNodeType::NextMap::iterator iter = now->next->begin(); iter != now->next->end(); iter++) {
|
for(typename TrieNode::NextMap::iterator iter = now->next->begin(); iter != now->next->end(); iter++) {
|
||||||
back = now->fail;
|
back = now->fail;
|
||||||
while(back != NULL) {
|
while(back != NULL) {
|
||||||
if(back->next && (backiter = back->next->find(iter->first)) != back->next->end())
|
if(back->next && (backiter = back->next->find(iter->first)) != back->next->end())
|
||||||
@ -233,7 +231,7 @@ namespace CppJieba
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
private:
|
private:
|
||||||
void _createTrie(const KeysContainerType& keys, const ValueContainerType& valuePointers)
|
void _createTrie(const vector<Unicode>& keys, const vector<const DictUnit*> & valuePointers)
|
||||||
{
|
{
|
||||||
if(valuePointers.empty() || keys.empty())
|
if(valuePointers.empty() || keys.empty())
|
||||||
{
|
{
|
||||||
@ -247,22 +245,22 @@ namespace CppJieba
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
private:
|
private:
|
||||||
void _insertNode(const KeyContainerType& key, const ValueType* ptValue)
|
void _insertNode(const Unicode& key, const DictUnit* ptValue)
|
||||||
{
|
{
|
||||||
TrieNodeType* ptNode = _root;
|
TrieNode* ptNode = _root;
|
||||||
|
|
||||||
typename TrieNodeType::NextMap::const_iterator kmIter;
|
typename TrieNode::NextMap::const_iterator kmIter;
|
||||||
|
|
||||||
for(typename KeyContainerType::const_iterator citer = key.begin(); citer != key.end(); citer++)
|
for(typename Unicode::const_iterator citer = key.begin(); citer != key.end(); citer++)
|
||||||
{
|
{
|
||||||
if(NULL == ptNode->next)
|
if(NULL == ptNode->next)
|
||||||
{
|
{
|
||||||
ptNode->next = new typename TrieNodeType::NextMap;
|
ptNode->next = new typename TrieNode::NextMap;
|
||||||
}
|
}
|
||||||
kmIter = ptNode->next->find(*citer);
|
kmIter = ptNode->next->find(*citer);
|
||||||
if(ptNode->next->end() == kmIter)
|
if(ptNode->next->end() == kmIter)
|
||||||
{
|
{
|
||||||
TrieNodeType * nextNode = new TrieNodeType;
|
TrieNode * nextNode = new TrieNode;
|
||||||
nextNode->next = NULL;
|
nextNode->next = NULL;
|
||||||
nextNode->ptValue = NULL;
|
nextNode->ptValue = NULL;
|
||||||
|
|
||||||
@ -276,7 +274,7 @@ namespace CppJieba
|
|||||||
}
|
}
|
||||||
ptNode->ptValue = ptValue;
|
ptNode->ptValue = ptValue;
|
||||||
}
|
}
|
||||||
void _deleteNode(TrieNodeType* node)
|
void _deleteNode(TrieNode* node)
|
||||||
{
|
{
|
||||||
if(!node)
|
if(!node)
|
||||||
{
|
{
|
||||||
@ -284,7 +282,7 @@ namespace CppJieba
|
|||||||
}
|
}
|
||||||
if(node->next)
|
if(node->next)
|
||||||
{
|
{
|
||||||
typename TrieNodeType::NextMap::iterator it;
|
typename TrieNode::NextMap::iterator it;
|
||||||
for(it = node->next->begin(); it != node->next->end(); it++)
|
for(it = node->next->begin(); it != node->next->end(); it++)
|
||||||
{
|
{
|
||||||
_deleteNode(it->second);
|
_deleteNode(it->second);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user