去掉一些没必要的返回值判断,精简代码

This commit is contained in:
yanyiwu 2015-07-24 14:39:03 +08:00
parent 0f79fa6c24
commit 8a3ced2b27
9 changed files with 47 additions and 66 deletions

View File

@ -73,10 +73,10 @@ class DictTrie {
}
void findByLimit(Unicode::const_iterator begin,
Unicode::const_iterator end,
vector<struct Dag>&res,
size_t min_word_len,
size_t max_word_len) const {
trie_->findByLimit(begin, end, res, min_word_len, max_word_len);
size_t max_word_len,
vector<struct Dag>&res) const {
trie_->findByLimit(begin, end, min_word_len, max_word_len, res);
}
bool isUserDictSingleChineseWord(const Rune& word) const {
return isIn(userDictSingleChineseWord_, word);

View File

@ -28,7 +28,8 @@ class FullSegment: public SegmentBase {
}
}
using SegmentBase::cut;
bool cut(Unicode::const_iterator begin, Unicode::const_iterator end,
virtual void cut(Unicode::const_iterator begin,
Unicode::const_iterator end,
vector<Unicode>& res) const {
//resut of searching in trie tree
LocalVector<pair<size_t, const DictUnit*> > tRes;
@ -58,7 +59,6 @@ class FullSegment: public SegmentBase {
}
uIdx++;
}
return true;
}
private:
const DictTrie* dictTrie_;

View File

@ -24,13 +24,13 @@ class HMMSegment: public SegmentBase {
}
using SegmentBase::cut;
bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res)const {
void cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res)const {
Unicode::const_iterator left = begin;
Unicode::const_iterator right = begin;
while(right != end) {
if(*right < 0x80) {
if(left != right && !cut_(left, right, res)) {
return false;
if(left != right) {
cut_(left, right, res);
}
left = right;
do {
@ -50,10 +50,9 @@ class HMMSegment: public SegmentBase {
right++;
}
}
if(left != right && !cut_(left, right, res)) {
return false;
if(left != right) {
cut_(left, right, res);
}
return true;
}
private:
// sequential letters rule
@ -92,12 +91,9 @@ class HMMSegment: public SegmentBase {
}
return begin;
}
bool cut_(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const {
void cut_(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const {
vector<size_t> status;
if(!viterbi_(begin, end, status)) {
LogError("viterbi_ failed.");
return false;
}
viterbi_(begin, end, status);
Unicode::const_iterator left = begin;
Unicode::const_iterator right;
@ -108,15 +104,11 @@ class HMMSegment: public SegmentBase {
left = right;
}
}
return true;
}
bool viterbi_(Unicode::const_iterator begin, Unicode::const_iterator end,
void viterbi_(Unicode::const_iterator begin,
Unicode::const_iterator end,
vector<size_t>& status) const {
if(begin == end) {
return false;
}
size_t Y = HMMModel::STATUS_SUM;
size_t X = end - begin;
@ -166,8 +158,6 @@ class HMMSegment: public SegmentBase {
status[x] = stat;
stat = path[x + stat*X];
}
return true;
}
private:

View File

@ -33,7 +33,7 @@ class MPSegment: public SegmentBase {
}
using SegmentBase::cut;
bool cut(Unicode::const_iterator begin , Unicode::const_iterator end, vector<Unicode>& res) const {
void cut(Unicode::const_iterator begin , Unicode::const_iterator end, vector<Unicode>& res) const {
vector<Dag> dags;
dictTrie_->find(begin, end, dags);
@ -41,8 +41,19 @@ class MPSegment: public SegmentBase {
calcDP_(dags);
cut_(dags, res);
return true;
}
void cut(Unicode::const_iterator begin,
Unicode::const_iterator end,
size_t min_word_len,
size_t max_word_len,
vector<Unicode>&res) const {
vector<Dag> dags;
dictTrie_->findByLimit(begin, end,
min_word_len,
max_word_len,
dags);
calcDP_(dags);
cut_(dags, res);
}
const DictTrie* getDictTrie() const {
return dictTrie_;

View File

@ -21,13 +21,10 @@ class MixSegment: public SegmentBase {
virtual ~MixSegment() {
}
using SegmentBase::cut;
virtual bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const {
virtual void cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const {
vector<Unicode> words;
words.reserve(end - begin);
if(!mpSeg_.cut(begin, end, words)) {
LogError("mpSeg cutDAG failed.");
return false;
}
mpSeg_.cut(begin, end, words);
vector<Unicode> hmmRes;
hmmRes.reserve(end - begin);
@ -48,10 +45,7 @@ class MixSegment: public SegmentBase {
}
// cut the sequence with hmm
if (!hmmSeg_.cut(piece.begin(), piece.end(), hmmRes)) {
LogError("hmmSeg_ cut failed.");
return false;
}
hmmSeg_.cut(piece.begin(), piece.end(), hmmRes);
//put hmm result to result
for (size_t k = 0; k < hmmRes.size(); k++) {
@ -65,7 +59,6 @@ class MixSegment: public SegmentBase {
//let i jump over this piece
i = j - 1;
}
return true;
}
const DictTrie* getDictTrie() const {

View File

@ -28,32 +28,26 @@ class QuerySegment: public SegmentBase {
virtual ~QuerySegment() {
}
using SegmentBase::cut;
bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const {
void cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const {
//use mix cut first
vector<Unicode> mixRes;
if (!mixSeg_.cut(begin, end, mixRes)) {
LogError("mixSeg_ cut failed.");
return false;
}
mixSeg_.cut(begin, end, mixRes);
vector<Unicode> fullRes;
for (vector<Unicode>::const_iterator mixResItr = mixRes.begin(); mixResItr != mixRes.end(); mixResItr++) {
// if it's too long, cut with fullSeg_, put fullRes in res
if (mixResItr->size() > maxWordLen_) {
if (fullSeg_.cut(mixResItr->begin(), mixResItr->end(), fullRes)) {
for (vector<Unicode>::const_iterator fullResItr = fullRes.begin(); fullResItr != fullRes.end(); fullResItr++) {
res.push_back(*fullResItr);
}
//clear tmp res
fullRes.clear();
fullSeg_.cut(mixResItr->begin(), mixResItr->end(), fullRes);
for (vector<Unicode>::const_iterator fullResItr = fullRes.begin(); fullResItr != fullRes.end(); fullResItr++) {
res.push_back(*fullResItr);
}
//clear tmp res
fullRes.clear();
} else { // just use the mix result
res.push_back(*mixResItr);
}
}
return true;
}
private:
MixSegment mixSeg_;

View File

@ -27,7 +27,7 @@ class SegmentBase: public ISegment, public NonCopyable {
virtual ~SegmentBase() {
};
public:
virtual bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const = 0;
virtual void cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const = 0;
virtual bool cut(const string& str, vector<string>& res) const {
res.clear();
@ -55,25 +55,18 @@ class SegmentBase: public ISegment, public NonCopyable {
return true;
}
virtual bool cut(Unicode::const_iterator begin,
void cut(Unicode::const_iterator begin,
Unicode::const_iterator end,
vector<string>& res) const {
if(begin == end) {
return false;
}
vector<Unicode> uRes;
uRes.reserve(end - begin);
if (!cut(begin, end, uRes)) {
return false;
}
cut(begin, end, uRes);
size_t offset = res.size();
res.resize(res.size() + uRes.size());
for(size_t i = 0; i < uRes.size(); i ++, offset++) {
TransCode::encode(uRes[i], res[offset]);
}
return true;
}
private:
void loadSpecialSymbols_() {

View File

@ -88,9 +88,9 @@ class Trie {
void findByLimit(Unicode::const_iterator begin,
Unicode::const_iterator end,
vector<struct Dag>&res,
size_t min_word_len,
size_t max_word_len) const {
size_t max_word_len,
vector<struct Dag>&res) const {
res.resize(end - begin);
// min_word_len start from 1;
@ -130,7 +130,7 @@ class Trie {
void find(Unicode::const_iterator begin,
Unicode::const_iterator end,
vector<struct Dag>& res) const {
findByLimit(begin, end, res, MIN_WORD_LENGTH, MAX_WORD_LENGTH);
findByLimit(begin, end, MIN_WORD_LENGTH, MAX_WORD_LENGTH, res);
}
void insertNode(const Unicode& key, const DictUnit* ptValue) {
if (key.begin() == key.end()) {

View File

@ -128,7 +128,7 @@ TEST(DictTrieTest, Dag) {
Unicode unicode;
ASSERT_TRUE(TransCode::decode(word, unicode));
vector<struct Dag> res;
trie.findByLimit(unicode.begin(), unicode.end(), res, 2, 3);
trie.findByLimit(unicode.begin(), unicode.end(), 2, 3, res);
size_t nexts_sizes[] = {1, 0, 1, 0};
ASSERT_EQ(res.size(), sizeof(nexts_sizes)/sizeof(nexts_sizes[0]));
@ -143,7 +143,7 @@ TEST(DictTrieTest, Dag) {
Unicode unicode;
ASSERT_TRUE(TransCode::decode(word, unicode));
vector<struct Dag> res;
trie.findByLimit(unicode.begin(), unicode.end(), res, 0, 4);
trie.findByLimit(unicode.begin(), unicode.end(), 0, 4, res);
size_t nexts_sizes[] = {3, 1, 2, 1};
ASSERT_EQ(res.size(), sizeof(nexts_sizes)/sizeof(nexts_sizes[0]));