底层常用结构修整

This commit is contained in:
yanyiwu 2015-07-20 15:41:13 +08:00
parent 83222918cc
commit 620d276887
6 changed files with 35 additions and 45 deletions

View File

@ -66,12 +66,12 @@ class DictTrie {
const DictUnit* find(Unicode::const_iterator begin, Unicode::const_iterator end) const {
return trie_->find(begin, end);
}
bool find(Unicode::const_iterator begin, Unicode::const_iterator end, DagType& dag, size_t offset = 0) const {
return trie_->find(begin, end, dag, offset);
bool find(Unicode::const_iterator begin, Unicode::const_iterator end, LocalVector<pair<size_t, const DictUnit*> >& nexts, size_t offset = 0) const {
return trie_->find(begin, end, nexts, offset);
}
void find(Unicode::const_iterator begin,
Unicode::const_iterator end,
vector<SegmentChar>& res) const {
vector<Dag>& res) const {
trie_->find(begin, end, res);
}
bool isUserDictSingleChineseWord(const Unicode::value_type& word) const {
@ -79,7 +79,7 @@ class DictTrie {
}
double getMinWeight() const {
return minWeight_;
};
}
private:
void createTrie_(const vector<DictUnit>& dictUnits) {

View File

@ -31,7 +31,7 @@ class FullSegment: public SegmentBase {
bool cut(Unicode::const_iterator begin, Unicode::const_iterator end,
vector<Unicode>& res) const {
//resut of searching in trie tree
DagType tRes;
LocalVector<pair<size_t, const DictUnit*> > tRes;
//max index of res's words
int maxIdx = 0;
@ -45,9 +45,7 @@ class FullSegment: public SegmentBase {
for (Unicode::const_iterator uItr = begin; uItr != end; uItr++) {
//find word start from uItr
if (dictTrie_->find(uItr, end, tRes, 0)) {
for(DagType::const_iterator itr = tRes.begin(); itr != tRes.end(); itr++)
//for (vector<pair<size_t, const DictUnit*> >::const_iterator itr = tRes.begin(); itr != tRes.end(); itr++)
{
for(LocalVector<pair<size_t, const DictUnit*> >::const_iterator itr = tRes.begin(); itr != tRes.end(); itr++) {
wordLen = itr->second->word.size();
if (wordLen >= 2 || (tRes.size() == 1 && maxIdx <= uIdx)) {
res.push_back(itr->second->word);

View File

@ -48,7 +48,7 @@ class MPSegment: public SegmentBase {
}
bool cut(Unicode::const_iterator begin , Unicode::const_iterator end, vector<Unicode>& res) const {
vector<SegmentChar> segmentChars;
vector<Dag> segmentChars;
dictTrie_->find(begin, end, segmentChars);
@ -63,16 +63,16 @@ class MPSegment: public SegmentBase {
}
private:
void calcDP_(vector<SegmentChar>& segmentChars) const {
void calcDP_(vector<Dag>& segmentChars) const {
size_t nextPos;
const DictUnit* p;
double val;
for(vector<SegmentChar>::reverse_iterator rit = segmentChars.rbegin(); rit != segmentChars.rend(); rit++) {
for(vector<Dag>::reverse_iterator rit = segmentChars.rbegin(); rit != segmentChars.rend(); rit++) {
rit->pInfo = NULL;
rit->weight = MIN_DOUBLE;
assert(!rit->dag.empty());
for(DagType::const_iterator it = rit->dag.begin(); it != rit->dag.end(); it++) {
assert(!rit->nexts.empty());
for(LocalVector<pair<size_t, const DictUnit*> >::const_iterator it = rit->nexts.begin(); it != rit->nexts.end(); it++) {
nextPos = it->first;
p = it->second;
val = 0.0;
@ -92,7 +92,7 @@ class MPSegment: public SegmentBase {
}
}
}
void cut_(const vector<SegmentChar>& segmentChars,
void cut_(const vector<Dag>& segmentChars,
vector<Unicode>& res) const {
size_t i = 0;
while(i < segmentChars.size()) {

View File

@ -56,7 +56,6 @@ class QuerySegment: public SegmentBase {
return true;
}
bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<string>& res) const {
vector<Unicode> uRes;
if (!cut(begin, end, uRes)) {

View File

@ -21,16 +21,14 @@ inline ostream & operator << (ostream& os, const DictUnit& unit) {
return os << string_format("%s %s %.3lf", s.c_str(), unit.tag.c_str(), unit.weight);
}
typedef LocalVector<std::pair<size_t, const DictUnit*> > DagType;
struct SegmentChar {
struct Dag {
uint16_t uniCh;
DagType dag;
LocalVector<pair<size_t, const DictUnit*> > nexts;
const DictUnit * pInfo;
double weight;
size_t nextPos;
SegmentChar() : uniCh(), pInfo(NULL), weight(0.0), nextPos(0) {}
~SegmentChar() {}
Dag():uniCh(0), pInfo(NULL), weight(0.0), nextPos(0) {
}
};
typedef Unicode::value_type TrieKey;
@ -47,10 +45,23 @@ class TrieNode {
class Trie {
public:
static const size_t BASE_SIZE = (1 << (8 * (sizeof(TrieKey))));
public:
Trie(const vector<Unicode>& keys, const vector<const DictUnit*>& valuePointers) {
_createTrie(keys, valuePointers);
}
~Trie() {
for (size_t i = 0; i < BASE_SIZE; i++) {
if (_base[i].next == NULL) {
continue;
}
for (TrieNode::NextMap::iterator it = _base[i].next->begin(); it != _base[i].next->end(); it++) {
_deleteNode(it->second);
it->second = NULL;
}
delete _base[i].next;
_base[i].next = NULL;
}
}
const DictUnit* find(Unicode::const_iterator begin, Unicode::const_iterator end) const {
if (begin == end) {
return NULL;
@ -71,11 +82,9 @@ class Trie {
return ptNode->ptValue;
}
void find(
Unicode::const_iterator begin,
void find(Unicode::const_iterator begin,
Unicode::const_iterator end,
vector<struct SegmentChar>& res
) const {
vector<struct Dag>& res) const {
res.resize(end - begin);
const TrieNode *ptNode = NULL;
@ -106,7 +115,7 @@ class Trie {
bool find(
Unicode::const_iterator begin,
Unicode::const_iterator end,
DagType & res,
LocalVector<pair<size_t, const DictUnit*> > & res,
size_t offset = 0) const {
if (begin == end) {
return !res.empty();
@ -135,20 +144,6 @@ class Trie {
}
return !res.empty();
}
~Trie() {
for (size_t i = 0; i < BASE_SIZE; i++) {
if (_base[i].next == NULL) {
continue;
}
for (TrieNode::NextMap::iterator it = _base[i].next->begin(); it != _base[i].next->end(); it++) {
_deleteNode(it->second);
it->second = NULL;
}
delete _base[i].next;
_base[i].next = NULL;
}
}
void insertNode(const Unicode& key, const DictUnit* ptValue) {
if (key.begin() == key.end()) {
return;

View File

@ -32,9 +32,7 @@ TEST(DictTrieTest, Test1) {
EXPECT_EQ("[\"26469\", \"21040\"] v -8.870", s2);
word = "清华大学";
LocalVector<pair<size_t, const DictUnit*> > res;
//vector<pair<size_t, const DictUnit* > resMap;
LocalVector<pair<size_t, const DictUnit*> > res2;
LocalVector<pair<size_t, const DictUnit*> > res, res2;
const char * words[] = {"", "清华", "清华大学"};
for(size_t i = 0; i < sizeof(words)/sizeof(words[0]); i++) {
ASSERT_TRUE(TransCode::decode(words[i], uni));
@ -70,6 +68,6 @@ TEST(DictTrieTest, automation) {
string word = "abcderf";
Unicode unicode;
ASSERT_TRUE(TransCode::decode(word, unicode));
vector<struct SegmentChar> res;
vector<struct Dag> res;
trie.find(unicode.begin(), unicode.end(), res);
}