mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
底层常用结构修整
This commit is contained in:
parent
83222918cc
commit
620d276887
@ -66,12 +66,12 @@ class DictTrie {
|
||||
const DictUnit* find(Unicode::const_iterator begin, Unicode::const_iterator end) const {
|
||||
return trie_->find(begin, end);
|
||||
}
|
||||
bool find(Unicode::const_iterator begin, Unicode::const_iterator end, DagType& dag, size_t offset = 0) const {
|
||||
return trie_->find(begin, end, dag, offset);
|
||||
bool find(Unicode::const_iterator begin, Unicode::const_iterator end, LocalVector<pair<size_t, const DictUnit*> >& nexts, size_t offset = 0) const {
|
||||
return trie_->find(begin, end, nexts, offset);
|
||||
}
|
||||
void find(Unicode::const_iterator begin,
|
||||
Unicode::const_iterator end,
|
||||
vector<SegmentChar>& res) const {
|
||||
vector<Dag>& res) const {
|
||||
trie_->find(begin, end, res);
|
||||
}
|
||||
bool isUserDictSingleChineseWord(const Unicode::value_type& word) const {
|
||||
@ -79,7 +79,7 @@ class DictTrie {
|
||||
}
|
||||
double getMinWeight() const {
|
||||
return minWeight_;
|
||||
};
|
||||
}
|
||||
|
||||
private:
|
||||
void createTrie_(const vector<DictUnit>& dictUnits) {
|
||||
|
@ -31,7 +31,7 @@ class FullSegment: public SegmentBase {
|
||||
bool cut(Unicode::const_iterator begin, Unicode::const_iterator end,
|
||||
vector<Unicode>& res) const {
|
||||
//resut of searching in trie tree
|
||||
DagType tRes;
|
||||
LocalVector<pair<size_t, const DictUnit*> > tRes;
|
||||
|
||||
//max index of res's words
|
||||
int maxIdx = 0;
|
||||
@ -45,9 +45,7 @@ class FullSegment: public SegmentBase {
|
||||
for (Unicode::const_iterator uItr = begin; uItr != end; uItr++) {
|
||||
//find word start from uItr
|
||||
if (dictTrie_->find(uItr, end, tRes, 0)) {
|
||||
for(DagType::const_iterator itr = tRes.begin(); itr != tRes.end(); itr++)
|
||||
//for (vector<pair<size_t, const DictUnit*> >::const_iterator itr = tRes.begin(); itr != tRes.end(); itr++)
|
||||
{
|
||||
for(LocalVector<pair<size_t, const DictUnit*> >::const_iterator itr = tRes.begin(); itr != tRes.end(); itr++) {
|
||||
wordLen = itr->second->word.size();
|
||||
if (wordLen >= 2 || (tRes.size() == 1 && maxIdx <= uIdx)) {
|
||||
res.push_back(itr->second->word);
|
||||
|
@ -48,7 +48,7 @@ class MPSegment: public SegmentBase {
|
||||
}
|
||||
|
||||
bool cut(Unicode::const_iterator begin , Unicode::const_iterator end, vector<Unicode>& res) const {
|
||||
vector<SegmentChar> segmentChars;
|
||||
vector<Dag> segmentChars;
|
||||
|
||||
dictTrie_->find(begin, end, segmentChars);
|
||||
|
||||
@ -63,16 +63,16 @@ class MPSegment: public SegmentBase {
|
||||
}
|
||||
|
||||
private:
|
||||
void calcDP_(vector<SegmentChar>& segmentChars) const {
|
||||
void calcDP_(vector<Dag>& segmentChars) const {
|
||||
size_t nextPos;
|
||||
const DictUnit* p;
|
||||
double val;
|
||||
|
||||
for(vector<SegmentChar>::reverse_iterator rit = segmentChars.rbegin(); rit != segmentChars.rend(); rit++) {
|
||||
for(vector<Dag>::reverse_iterator rit = segmentChars.rbegin(); rit != segmentChars.rend(); rit++) {
|
||||
rit->pInfo = NULL;
|
||||
rit->weight = MIN_DOUBLE;
|
||||
assert(!rit->dag.empty());
|
||||
for(DagType::const_iterator it = rit->dag.begin(); it != rit->dag.end(); it++) {
|
||||
assert(!rit->nexts.empty());
|
||||
for(LocalVector<pair<size_t, const DictUnit*> >::const_iterator it = rit->nexts.begin(); it != rit->nexts.end(); it++) {
|
||||
nextPos = it->first;
|
||||
p = it->second;
|
||||
val = 0.0;
|
||||
@ -92,7 +92,7 @@ class MPSegment: public SegmentBase {
|
||||
}
|
||||
}
|
||||
}
|
||||
void cut_(const vector<SegmentChar>& segmentChars,
|
||||
void cut_(const vector<Dag>& segmentChars,
|
||||
vector<Unicode>& res) const {
|
||||
size_t i = 0;
|
||||
while(i < segmentChars.size()) {
|
||||
|
@ -56,7 +56,6 @@ class QuerySegment: public SegmentBase {
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<string>& res) const {
|
||||
vector<Unicode> uRes;
|
||||
if (!cut(begin, end, uRes)) {
|
||||
|
47
src/Trie.hpp
47
src/Trie.hpp
@ -21,16 +21,14 @@ inline ostream & operator << (ostream& os, const DictUnit& unit) {
|
||||
return os << string_format("%s %s %.3lf", s.c_str(), unit.tag.c_str(), unit.weight);
|
||||
}
|
||||
|
||||
typedef LocalVector<std::pair<size_t, const DictUnit*> > DagType;
|
||||
|
||||
struct SegmentChar {
|
||||
struct Dag {
|
||||
uint16_t uniCh;
|
||||
DagType dag;
|
||||
LocalVector<pair<size_t, const DictUnit*> > nexts;
|
||||
const DictUnit * pInfo;
|
||||
double weight;
|
||||
size_t nextPos;
|
||||
SegmentChar() : uniCh(), pInfo(NULL), weight(0.0), nextPos(0) {}
|
||||
~SegmentChar() {}
|
||||
Dag():uniCh(0), pInfo(NULL), weight(0.0), nextPos(0) {
|
||||
}
|
||||
};
|
||||
|
||||
typedef Unicode::value_type TrieKey;
|
||||
@ -47,10 +45,23 @@ class TrieNode {
|
||||
class Trie {
|
||||
public:
|
||||
static const size_t BASE_SIZE = (1 << (8 * (sizeof(TrieKey))));
|
||||
public:
|
||||
Trie(const vector<Unicode>& keys, const vector<const DictUnit*>& valuePointers) {
|
||||
_createTrie(keys, valuePointers);
|
||||
}
|
||||
~Trie() {
|
||||
for (size_t i = 0; i < BASE_SIZE; i++) {
|
||||
if (_base[i].next == NULL) {
|
||||
continue;
|
||||
}
|
||||
for (TrieNode::NextMap::iterator it = _base[i].next->begin(); it != _base[i].next->end(); it++) {
|
||||
_deleteNode(it->second);
|
||||
it->second = NULL;
|
||||
}
|
||||
delete _base[i].next;
|
||||
_base[i].next = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
const DictUnit* find(Unicode::const_iterator begin, Unicode::const_iterator end) const {
|
||||
if (begin == end) {
|
||||
return NULL;
|
||||
@ -71,11 +82,9 @@ class Trie {
|
||||
return ptNode->ptValue;
|
||||
}
|
||||
|
||||
void find(
|
||||
Unicode::const_iterator begin,
|
||||
void find(Unicode::const_iterator begin,
|
||||
Unicode::const_iterator end,
|
||||
vector<struct SegmentChar>& res
|
||||
) const {
|
||||
vector<struct Dag>& res) const {
|
||||
res.resize(end - begin);
|
||||
|
||||
const TrieNode *ptNode = NULL;
|
||||
@ -106,7 +115,7 @@ class Trie {
|
||||
bool find(
|
||||
Unicode::const_iterator begin,
|
||||
Unicode::const_iterator end,
|
||||
DagType & res,
|
||||
LocalVector<pair<size_t, const DictUnit*> > & res,
|
||||
size_t offset = 0) const {
|
||||
if (begin == end) {
|
||||
return !res.empty();
|
||||
@ -135,20 +144,6 @@ class Trie {
|
||||
}
|
||||
return !res.empty();
|
||||
}
|
||||
~Trie() {
|
||||
for (size_t i = 0; i < BASE_SIZE; i++) {
|
||||
if (_base[i].next == NULL) {
|
||||
continue;
|
||||
}
|
||||
for (TrieNode::NextMap::iterator it = _base[i].next->begin(); it != _base[i].next->end(); it++) {
|
||||
_deleteNode(it->second);
|
||||
it->second = NULL;
|
||||
}
|
||||
delete _base[i].next;
|
||||
_base[i].next = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
void insertNode(const Unicode& key, const DictUnit* ptValue) {
|
||||
if (key.begin() == key.end()) {
|
||||
return;
|
||||
|
@ -32,9 +32,7 @@ TEST(DictTrieTest, Test1) {
|
||||
|
||||
EXPECT_EQ("[\"26469\", \"21040\"] v -8.870", s2);
|
||||
word = "清华大学";
|
||||
LocalVector<pair<size_t, const DictUnit*> > res;
|
||||
//vector<pair<size_t, const DictUnit* > resMap;
|
||||
LocalVector<pair<size_t, const DictUnit*> > res2;
|
||||
LocalVector<pair<size_t, const DictUnit*> > res, res2;
|
||||
const char * words[] = {"清", "清华", "清华大学"};
|
||||
for(size_t i = 0; i < sizeof(words)/sizeof(words[0]); i++) {
|
||||
ASSERT_TRUE(TransCode::decode(words[i], uni));
|
||||
@ -70,6 +68,6 @@ TEST(DictTrieTest, automation) {
|
||||
string word = "abcderf";
|
||||
Unicode unicode;
|
||||
ASSERT_TRUE(TransCode::decode(word, unicode));
|
||||
vector<struct SegmentChar> res;
|
||||
vector<struct Dag> res;
|
||||
trie.find(unicode.begin(), unicode.end(), res);
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user