去掉一些没必要的返回值判断,精简代码

This commit is contained in:
yanyiwu 2015-07-24 14:39:03 +08:00
parent 0f79fa6c24
commit 8a3ced2b27
9 changed files with 47 additions and 66 deletions

View File

@ -73,10 +73,10 @@ class DictTrie {
} }
void findByLimit(Unicode::const_iterator begin, void findByLimit(Unicode::const_iterator begin,
Unicode::const_iterator end, Unicode::const_iterator end,
vector<struct Dag>&res,
size_t min_word_len, size_t min_word_len,
size_t max_word_len) const { size_t max_word_len,
trie_->findByLimit(begin, end, res, min_word_len, max_word_len); vector<struct Dag>&res) const {
trie_->findByLimit(begin, end, min_word_len, max_word_len, res);
} }
bool isUserDictSingleChineseWord(const Rune& word) const { bool isUserDictSingleChineseWord(const Rune& word) const {
return isIn(userDictSingleChineseWord_, word); return isIn(userDictSingleChineseWord_, word);

View File

@ -28,7 +28,8 @@ class FullSegment: public SegmentBase {
} }
} }
using SegmentBase::cut; using SegmentBase::cut;
bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, virtual void cut(Unicode::const_iterator begin,
Unicode::const_iterator end,
vector<Unicode>& res) const { vector<Unicode>& res) const {
//resut of searching in trie tree //resut of searching in trie tree
LocalVector<pair<size_t, const DictUnit*> > tRes; LocalVector<pair<size_t, const DictUnit*> > tRes;
@ -58,7 +59,6 @@ class FullSegment: public SegmentBase {
} }
uIdx++; uIdx++;
} }
return true;
} }
private: private:
const DictTrie* dictTrie_; const DictTrie* dictTrie_;

View File

@ -24,13 +24,13 @@ class HMMSegment: public SegmentBase {
} }
using SegmentBase::cut; using SegmentBase::cut;
bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res)const { void cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res)const {
Unicode::const_iterator left = begin; Unicode::const_iterator left = begin;
Unicode::const_iterator right = begin; Unicode::const_iterator right = begin;
while(right != end) { while(right != end) {
if(*right < 0x80) { if(*right < 0x80) {
if(left != right && !cut_(left, right, res)) { if(left != right) {
return false; cut_(left, right, res);
} }
left = right; left = right;
do { do {
@ -50,10 +50,9 @@ class HMMSegment: public SegmentBase {
right++; right++;
} }
} }
if(left != right && !cut_(left, right, res)) { if(left != right) {
return false; cut_(left, right, res);
} }
return true;
} }
private: private:
// sequential letters rule // sequential letters rule
@ -92,12 +91,9 @@ class HMMSegment: public SegmentBase {
} }
return begin; return begin;
} }
bool cut_(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const { void cut_(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const {
vector<size_t> status; vector<size_t> status;
if(!viterbi_(begin, end, status)) { viterbi_(begin, end, status);
LogError("viterbi_ failed.");
return false;
}
Unicode::const_iterator left = begin; Unicode::const_iterator left = begin;
Unicode::const_iterator right; Unicode::const_iterator right;
@ -108,15 +104,11 @@ class HMMSegment: public SegmentBase {
left = right; left = right;
} }
} }
return true;
} }
bool viterbi_(Unicode::const_iterator begin, Unicode::const_iterator end, void viterbi_(Unicode::const_iterator begin,
Unicode::const_iterator end,
vector<size_t>& status) const { vector<size_t>& status) const {
if(begin == end) {
return false;
}
size_t Y = HMMModel::STATUS_SUM; size_t Y = HMMModel::STATUS_SUM;
size_t X = end - begin; size_t X = end - begin;
@ -166,8 +158,6 @@ class HMMSegment: public SegmentBase {
status[x] = stat; status[x] = stat;
stat = path[x + stat*X]; stat = path[x + stat*X];
} }
return true;
} }
private: private:

View File

@ -33,7 +33,7 @@ class MPSegment: public SegmentBase {
} }
using SegmentBase::cut; using SegmentBase::cut;
bool cut(Unicode::const_iterator begin , Unicode::const_iterator end, vector<Unicode>& res) const { void cut(Unicode::const_iterator begin , Unicode::const_iterator end, vector<Unicode>& res) const {
vector<Dag> dags; vector<Dag> dags;
dictTrie_->find(begin, end, dags); dictTrie_->find(begin, end, dags);
@ -41,8 +41,19 @@ class MPSegment: public SegmentBase {
calcDP_(dags); calcDP_(dags);
cut_(dags, res); cut_(dags, res);
}
return true; void cut(Unicode::const_iterator begin,
Unicode::const_iterator end,
size_t min_word_len,
size_t max_word_len,
vector<Unicode>&res) const {
vector<Dag> dags;
dictTrie_->findByLimit(begin, end,
min_word_len,
max_word_len,
dags);
calcDP_(dags);
cut_(dags, res);
} }
const DictTrie* getDictTrie() const { const DictTrie* getDictTrie() const {
return dictTrie_; return dictTrie_;

View File

@ -21,13 +21,10 @@ class MixSegment: public SegmentBase {
virtual ~MixSegment() { virtual ~MixSegment() {
} }
using SegmentBase::cut; using SegmentBase::cut;
virtual bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const { virtual void cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const {
vector<Unicode> words; vector<Unicode> words;
words.reserve(end - begin); words.reserve(end - begin);
if(!mpSeg_.cut(begin, end, words)) { mpSeg_.cut(begin, end, words);
LogError("mpSeg cutDAG failed.");
return false;
}
vector<Unicode> hmmRes; vector<Unicode> hmmRes;
hmmRes.reserve(end - begin); hmmRes.reserve(end - begin);
@ -48,10 +45,7 @@ class MixSegment: public SegmentBase {
} }
// cut the sequence with hmm // cut the sequence with hmm
if (!hmmSeg_.cut(piece.begin(), piece.end(), hmmRes)) { hmmSeg_.cut(piece.begin(), piece.end(), hmmRes);
LogError("hmmSeg_ cut failed.");
return false;
}
//put hmm result to result //put hmm result to result
for (size_t k = 0; k < hmmRes.size(); k++) { for (size_t k = 0; k < hmmRes.size(); k++) {
@ -65,7 +59,6 @@ class MixSegment: public SegmentBase {
//let i jump over this piece //let i jump over this piece
i = j - 1; i = j - 1;
} }
return true;
} }
const DictTrie* getDictTrie() const { const DictTrie* getDictTrie() const {

View File

@ -28,32 +28,26 @@ class QuerySegment: public SegmentBase {
virtual ~QuerySegment() { virtual ~QuerySegment() {
} }
using SegmentBase::cut; using SegmentBase::cut;
bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const { void cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const {
//use mix cut first //use mix cut first
vector<Unicode> mixRes; vector<Unicode> mixRes;
if (!mixSeg_.cut(begin, end, mixRes)) { mixSeg_.cut(begin, end, mixRes);
LogError("mixSeg_ cut failed.");
return false;
}
vector<Unicode> fullRes; vector<Unicode> fullRes;
for (vector<Unicode>::const_iterator mixResItr = mixRes.begin(); mixResItr != mixRes.end(); mixResItr++) { for (vector<Unicode>::const_iterator mixResItr = mixRes.begin(); mixResItr != mixRes.end(); mixResItr++) {
// if it's too long, cut with fullSeg_, put fullRes in res // if it's too long, cut with fullSeg_, put fullRes in res
if (mixResItr->size() > maxWordLen_) { if (mixResItr->size() > maxWordLen_) {
if (fullSeg_.cut(mixResItr->begin(), mixResItr->end(), fullRes)) { fullSeg_.cut(mixResItr->begin(), mixResItr->end(), fullRes);
for (vector<Unicode>::const_iterator fullResItr = fullRes.begin(); fullResItr != fullRes.end(); fullResItr++) { for (vector<Unicode>::const_iterator fullResItr = fullRes.begin(); fullResItr != fullRes.end(); fullResItr++) {
res.push_back(*fullResItr); res.push_back(*fullResItr);
}
//clear tmp res
fullRes.clear();
} }
//clear tmp res
fullRes.clear();
} else { // just use the mix result } else { // just use the mix result
res.push_back(*mixResItr); res.push_back(*mixResItr);
} }
} }
return true;
} }
private: private:
MixSegment mixSeg_; MixSegment mixSeg_;

View File

@ -27,7 +27,7 @@ class SegmentBase: public ISegment, public NonCopyable {
virtual ~SegmentBase() { virtual ~SegmentBase() {
}; };
public: public:
virtual bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const = 0; virtual void cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const = 0;
virtual bool cut(const string& str, vector<string>& res) const { virtual bool cut(const string& str, vector<string>& res) const {
res.clear(); res.clear();
@ -55,25 +55,18 @@ class SegmentBase: public ISegment, public NonCopyable {
return true; return true;
} }
virtual bool cut(Unicode::const_iterator begin, void cut(Unicode::const_iterator begin,
Unicode::const_iterator end, Unicode::const_iterator end,
vector<string>& res) const { vector<string>& res) const {
if(begin == end) {
return false;
}
vector<Unicode> uRes; vector<Unicode> uRes;
uRes.reserve(end - begin); uRes.reserve(end - begin);
if (!cut(begin, end, uRes)) { cut(begin, end, uRes);
return false;
}
size_t offset = res.size(); size_t offset = res.size();
res.resize(res.size() + uRes.size()); res.resize(res.size() + uRes.size());
for(size_t i = 0; i < uRes.size(); i ++, offset++) { for(size_t i = 0; i < uRes.size(); i ++, offset++) {
TransCode::encode(uRes[i], res[offset]); TransCode::encode(uRes[i], res[offset]);
} }
return true;
} }
private: private:
void loadSpecialSymbols_() { void loadSpecialSymbols_() {

View File

@ -88,9 +88,9 @@ class Trie {
void findByLimit(Unicode::const_iterator begin, void findByLimit(Unicode::const_iterator begin,
Unicode::const_iterator end, Unicode::const_iterator end,
vector<struct Dag>&res,
size_t min_word_len, size_t min_word_len,
size_t max_word_len) const { size_t max_word_len,
vector<struct Dag>&res) const {
res.resize(end - begin); res.resize(end - begin);
// min_word_len start from 1; // min_word_len start from 1;
@ -130,7 +130,7 @@ class Trie {
void find(Unicode::const_iterator begin, void find(Unicode::const_iterator begin,
Unicode::const_iterator end, Unicode::const_iterator end,
vector<struct Dag>& res) const { vector<struct Dag>& res) const {
findByLimit(begin, end, res, MIN_WORD_LENGTH, MAX_WORD_LENGTH); findByLimit(begin, end, MIN_WORD_LENGTH, MAX_WORD_LENGTH, res);
} }
void insertNode(const Unicode& key, const DictUnit* ptValue) { void insertNode(const Unicode& key, const DictUnit* ptValue) {
if (key.begin() == key.end()) { if (key.begin() == key.end()) {

View File

@ -128,7 +128,7 @@ TEST(DictTrieTest, Dag) {
Unicode unicode; Unicode unicode;
ASSERT_TRUE(TransCode::decode(word, unicode)); ASSERT_TRUE(TransCode::decode(word, unicode));
vector<struct Dag> res; vector<struct Dag> res;
trie.findByLimit(unicode.begin(), unicode.end(), res, 2, 3); trie.findByLimit(unicode.begin(), unicode.end(), 2, 3, res);
size_t nexts_sizes[] = {1, 0, 1, 0}; size_t nexts_sizes[] = {1, 0, 1, 0};
ASSERT_EQ(res.size(), sizeof(nexts_sizes)/sizeof(nexts_sizes[0])); ASSERT_EQ(res.size(), sizeof(nexts_sizes)/sizeof(nexts_sizes[0]));
@ -143,7 +143,7 @@ TEST(DictTrieTest, Dag) {
Unicode unicode; Unicode unicode;
ASSERT_TRUE(TransCode::decode(word, unicode)); ASSERT_TRUE(TransCode::decode(word, unicode));
vector<struct Dag> res; vector<struct Dag> res;
trie.findByLimit(unicode.begin(), unicode.end(), res, 0, 4); trie.findByLimit(unicode.begin(), unicode.end(), 0, 4, res);
size_t nexts_sizes[] = {3, 1, 2, 1}; size_t nexts_sizes[] = {3, 1, 2, 1};
ASSERT_EQ(res.size(), sizeof(nexts_sizes)/sizeof(nexts_sizes[0])); ASSERT_EQ(res.size(), sizeof(nexts_sizes)/sizeof(nexts_sizes[0]));