mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
add localvector
This commit is contained in:
parent
014bea02ba
commit
76dd93051e
@ -44,7 +44,7 @@ namespace CppJieba
|
||||
class DictTrie: public InitOnOff
|
||||
{
|
||||
public:
|
||||
typedef Trie<Unicode::value_type, DictUnit> TrieType;
|
||||
typedef Trie<Unicode::value_type, DictUnit, Unicode, vector<Unicode>, vector<const DictUnit*> > TrieType;
|
||||
private:
|
||||
vector<DictUnit> _nodeInfos;
|
||||
TrieType * _trie;
|
||||
|
173
src/Limonp/LocalVector.hpp
Normal file
173
src/Limonp/LocalVector.hpp
Normal file
@ -0,0 +1,173 @@
|
||||
#ifndef LIMONP_LOCAL_VECTOR_HPP
|
||||
#define LIMONP_LOCAL_VECTOR_HPP
|
||||
|
||||
#include <iostream>
|
||||
#include <stdlib.h>
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
|
||||
namespace Limonp
|
||||
{
|
||||
using namespace std;
|
||||
const size_t LOCAL_VECTOR_BUFFER_SIZE = 32;
|
||||
template <class T>
|
||||
class LocalVector
|
||||
{
|
||||
public:
|
||||
typedef const T* const_iterator ;
|
||||
typedef T value_type;
|
||||
typedef size_t size_type;
|
||||
private:
|
||||
T _buffer[LOCAL_VECTOR_BUFFER_SIZE];
|
||||
T * _ptr;
|
||||
size_t _size;
|
||||
size_t _capacity;
|
||||
public:
|
||||
LocalVector()
|
||||
{
|
||||
_init();
|
||||
};
|
||||
LocalVector(const LocalVector<T>& vec)
|
||||
{
|
||||
_init();
|
||||
*this = vec;
|
||||
}
|
||||
LocalVector(const_iterator begin, const_iterator end) // TODO: make it faster
|
||||
{
|
||||
_init();
|
||||
while(begin != end)
|
||||
{
|
||||
push_back(*begin++);
|
||||
}
|
||||
}
|
||||
LocalVector(size_t size, const T& t) // TODO: make it faster
|
||||
{
|
||||
_init();
|
||||
while(size--)
|
||||
{
|
||||
push_back(t);
|
||||
}
|
||||
}
|
||||
~LocalVector()
|
||||
{
|
||||
if(_ptr != _buffer)
|
||||
{
|
||||
free(_ptr);
|
||||
}
|
||||
};
|
||||
public:
|
||||
LocalVector<T>& operator = (const LocalVector<T>& vec)
|
||||
{
|
||||
clear();
|
||||
_size = vec.size();
|
||||
_capacity = vec.capacity();
|
||||
if(vec._buffer == vec._ptr)
|
||||
{
|
||||
memcpy(_buffer, vec._buffer, sizeof(T) * _size);
|
||||
_ptr = _buffer;
|
||||
}
|
||||
else
|
||||
{
|
||||
_ptr = (T*) malloc(vec.capacity() * sizeof(T));
|
||||
assert(_ptr);
|
||||
memcpy(_ptr, vec._ptr, vec.size() * sizeof(T));
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
private:
|
||||
void _init()
|
||||
{
|
||||
_ptr = _buffer;
|
||||
_size = 0;
|
||||
_capacity = LOCAL_VECTOR_BUFFER_SIZE;
|
||||
}
|
||||
public:
|
||||
T& operator [] (size_t i)
|
||||
{
|
||||
return _ptr[i];
|
||||
}
|
||||
const T& operator [] (size_t i) const
|
||||
{
|
||||
return _ptr[i];
|
||||
}
|
||||
void push_back(const T& t)
|
||||
{
|
||||
if(!full())
|
||||
{
|
||||
_ptr[_size++] = t;
|
||||
return ;
|
||||
}
|
||||
assert(_capacity);
|
||||
reserve(_capacity * 2);
|
||||
_ptr[_size ++ ] = t;
|
||||
}
|
||||
void reserve(size_t size)
|
||||
{
|
||||
if(size <= _capacity)
|
||||
{
|
||||
return;
|
||||
}
|
||||
T * next = (T*)malloc(sizeof(T) * size);
|
||||
assert(next);
|
||||
T * old = _ptr;
|
||||
_ptr = next;
|
||||
memcpy(_ptr, old, sizeof(T) * _capacity);
|
||||
_capacity = size;
|
||||
if(old != _buffer)
|
||||
{
|
||||
free(old);
|
||||
}
|
||||
}
|
||||
bool full() const
|
||||
{
|
||||
return size() == capacity();
|
||||
}
|
||||
bool empty() const
|
||||
{
|
||||
return 0 == size();
|
||||
}
|
||||
size_t size() const
|
||||
{
|
||||
return _size;
|
||||
}
|
||||
size_t capacity() const
|
||||
{
|
||||
return _capacity;
|
||||
}
|
||||
const_iterator begin() const
|
||||
{
|
||||
return _ptr;
|
||||
}
|
||||
const_iterator end() const
|
||||
{
|
||||
return _ptr + _size;
|
||||
}
|
||||
void clear()
|
||||
{
|
||||
if(_ptr != _buffer)
|
||||
{
|
||||
free(_ptr);
|
||||
}
|
||||
_init();
|
||||
}
|
||||
};
|
||||
|
||||
template <class T>
|
||||
ostream & operator << (ostream& os, const LocalVector<T>& vec)
|
||||
{
|
||||
if(vec.empty())
|
||||
{
|
||||
return os << "[]";
|
||||
}
|
||||
os<<"[\""<<vec[0];
|
||||
for(size_t i = 1; i < vec.size(); i++)
|
||||
{
|
||||
os<<"\", \""<<vec[i];
|
||||
}
|
||||
os<<"\"]";
|
||||
return os;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
@ -208,7 +208,8 @@ namespace Limonp
|
||||
return (((uint16_t(high) & 0x00ff ) << 8) | (uint16_t(low) & 0x00ff));
|
||||
}
|
||||
|
||||
inline bool utf8ToUnicode(const char * const str, size_t len, vector<uint16_t>& vec)
|
||||
template <class Uint16Container>
|
||||
bool utf8ToUnicode(const char * const str, size_t len, Uint16Container& vec)
|
||||
{
|
||||
if(!str)
|
||||
{
|
||||
@ -247,12 +248,14 @@ namespace Limonp
|
||||
}
|
||||
return true;
|
||||
}
|
||||
inline bool utf8ToUnicode(const string& str, vector<uint16_t>& vec)
|
||||
template <class Uint16Container>
|
||||
bool utf8ToUnicode(const string& str, Uint16Container& vec)
|
||||
{
|
||||
return utf8ToUnicode(str.c_str(), str.size(), vec);
|
||||
}
|
||||
|
||||
inline bool unicodeToUtf8(vector<uint16_t>::const_iterator begin, vector<uint16_t>::const_iterator end, string& res)
|
||||
template <class Uint16ContainerConIter>
|
||||
bool unicodeToUtf8(Uint16ContainerConIter begin, Uint16ContainerConIter end, string& res)
|
||||
{
|
||||
if(begin >= end)
|
||||
{
|
||||
@ -284,7 +287,8 @@ namespace Limonp
|
||||
}
|
||||
|
||||
|
||||
inline bool gbkTrans(const char* const str, size_t len, vector<uint16_t>& vec)
|
||||
template <class Uint16Container>
|
||||
bool gbkTrans(const char* const str, size_t len, Uint16Container& vec)
|
||||
{
|
||||
vec.clear();
|
||||
if(!str)
|
||||
@ -316,12 +320,14 @@ namespace Limonp
|
||||
return true;
|
||||
}
|
||||
|
||||
inline bool gbkTrans(const string& str, vector<uint16_t>& vec)
|
||||
template <class Uint16Container>
|
||||
bool gbkTrans(const string& str, Uint16Container& vec)
|
||||
{
|
||||
return gbkTrans(str.c_str(), str.size(), vec);
|
||||
}
|
||||
|
||||
inline bool gbkTrans(vector<uint16_t>::const_iterator begin, vector<uint16_t>::const_iterator end, string& res)
|
||||
template <class Uint16ContainerConIter>
|
||||
bool gbkTrans(Uint16ContainerConIter begin, Uint16ContainerConIter end, string& res)
|
||||
{
|
||||
if(begin >= end)
|
||||
{
|
||||
|
@ -7,13 +7,14 @@
|
||||
|
||||
|
||||
#include "Limonp/StringUtil.hpp"
|
||||
#include "Limonp/LocalVector.hpp"
|
||||
|
||||
namespace CppJieba
|
||||
{
|
||||
|
||||
using namespace Limonp;
|
||||
typedef uint16_t UnicodeValueType;
|
||||
typedef std::vector<UnicodeValueType> Unicode;
|
||||
typedef Limonp::LocalVector<UnicodeValueType> Unicode;
|
||||
namespace TransCode
|
||||
{
|
||||
inline bool decode(const string& str, Unicode& res)
|
||||
|
18
src/Trie.hpp
18
src/Trie.hpp
@ -17,7 +17,7 @@ namespace CppJieba
|
||||
const ValueType * ptValue;
|
||||
};
|
||||
|
||||
template <class KeyType, class ValueType>
|
||||
template <class KeyType, class ValueType, class KeyContainerType = vector<KeyType>, class KeysContainerType = vector<KeyContainerType>, class ValueContainerType = vector<const ValueType* > >
|
||||
class Trie
|
||||
{
|
||||
public:
|
||||
@ -25,7 +25,7 @@ namespace CppJieba
|
||||
private:
|
||||
TrieNodeType* _root;
|
||||
public:
|
||||
Trie(const vector<vector<KeyType> >& keys, const vector<const ValueType* >& valuePointers)
|
||||
Trie(const KeysContainerType& keys, const ValueContainerType& valuePointers)
|
||||
{
|
||||
_root = new TrieNodeType;
|
||||
_root->ptKeyMap = NULL;
|
||||
@ -41,11 +41,11 @@ namespace CppJieba
|
||||
}
|
||||
}
|
||||
public:
|
||||
const ValueType* find(typename vector<KeyType>::const_iterator begin, typename vector<KeyType>::const_iterator end) const
|
||||
const ValueType* find(typename KeyContainerType::const_iterator begin, typename KeyContainerType::const_iterator end) const
|
||||
{
|
||||
typename TrieNodeType::KeyMapType::const_iterator citer;
|
||||
const TrieNodeType* ptNode = _root;
|
||||
for(typename vector<KeyType>::const_iterator it = begin; it != end; it++)
|
||||
for(typename KeyContainerType::const_iterator it = begin; it != end; it++)
|
||||
{
|
||||
assert(ptNode);
|
||||
if(NULL == ptNode->ptKeyMap || ptNode->ptKeyMap->end() == (citer = ptNode->ptKeyMap->find(*it)))
|
||||
@ -56,12 +56,12 @@ namespace CppJieba
|
||||
}
|
||||
return ptNode->ptValue;
|
||||
}
|
||||
bool find(typename vector<KeyType>::const_iterator begin, typename vector<KeyType> ::const_iterator end, map<typename vector<KeyType>::size_type, const ValueType* >& ordererMap, size_t offset = 0) const
|
||||
bool find(typename KeyContainerType::const_iterator begin, typename KeyContainerType::const_iterator end, map<typename KeyContainerType::size_type, const ValueType* >& ordererMap, size_t offset = 0) const
|
||||
{
|
||||
const TrieNodeType * ptNode = _root;
|
||||
typename TrieNodeType::KeyMapType::const_iterator citer;
|
||||
ordererMap.clear();
|
||||
for(typename vector<KeyType>::const_iterator itr = begin; itr != end ; itr++)
|
||||
for(typename KeyContainerType::const_iterator itr = begin; itr != end ; itr++)
|
||||
{
|
||||
assert(ptNode);
|
||||
if(NULL == ptNode->ptKeyMap || ptNode->ptKeyMap->end() == (citer = ptNode->ptKeyMap->find(*itr)))
|
||||
@ -77,7 +77,7 @@ namespace CppJieba
|
||||
return ordererMap.size();
|
||||
}
|
||||
private:
|
||||
void _createTrie(const vector<vector<KeyType> >& keys, const vector<const ValueType*>& valuePointers)
|
||||
void _createTrie(const KeysContainerType& keys, const ValueContainerType& valuePointers)
|
||||
{
|
||||
if(valuePointers.empty() || keys.empty())
|
||||
{
|
||||
@ -91,13 +91,13 @@ namespace CppJieba
|
||||
}
|
||||
}
|
||||
private:
|
||||
void _insertNode(const vector<KeyType>& key, const ValueType* ptValue)
|
||||
void _insertNode(const KeyContainerType& key, const ValueType* ptValue)
|
||||
{
|
||||
TrieNodeType* ptNode = _root;
|
||||
|
||||
typename TrieNodeType::KeyMapType::const_iterator kmIter;
|
||||
|
||||
for(typename vector<KeyType>::const_iterator citer = key.begin(); citer != key.end(); citer++)
|
||||
for(typename KeyContainerType::const_iterator citer = key.begin(); citer != key.end(); citer++)
|
||||
{
|
||||
if(NULL == ptNode->ptKeyMap)
|
||||
{
|
||||
|
Loading…
x
Reference in New Issue
Block a user