mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
去掉一些没必要的返回值判断,精简代码
This commit is contained in:
parent
0f79fa6c24
commit
8a3ced2b27
@ -73,10 +73,10 @@ class DictTrie {
|
||||
}
|
||||
void findByLimit(Unicode::const_iterator begin,
|
||||
Unicode::const_iterator end,
|
||||
vector<struct Dag>&res,
|
||||
size_t min_word_len,
|
||||
size_t max_word_len) const {
|
||||
trie_->findByLimit(begin, end, res, min_word_len, max_word_len);
|
||||
size_t max_word_len,
|
||||
vector<struct Dag>&res) const {
|
||||
trie_->findByLimit(begin, end, min_word_len, max_word_len, res);
|
||||
}
|
||||
bool isUserDictSingleChineseWord(const Rune& word) const {
|
||||
return isIn(userDictSingleChineseWord_, word);
|
||||
|
@ -28,7 +28,8 @@ class FullSegment: public SegmentBase {
|
||||
}
|
||||
}
|
||||
using SegmentBase::cut;
|
||||
bool cut(Unicode::const_iterator begin, Unicode::const_iterator end,
|
||||
virtual void cut(Unicode::const_iterator begin,
|
||||
Unicode::const_iterator end,
|
||||
vector<Unicode>& res) const {
|
||||
//resut of searching in trie tree
|
||||
LocalVector<pair<size_t, const DictUnit*> > tRes;
|
||||
@ -58,7 +59,6 @@ class FullSegment: public SegmentBase {
|
||||
}
|
||||
uIdx++;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
private:
|
||||
const DictTrie* dictTrie_;
|
||||
|
@ -24,13 +24,13 @@ class HMMSegment: public SegmentBase {
|
||||
}
|
||||
|
||||
using SegmentBase::cut;
|
||||
bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res)const {
|
||||
void cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res)const {
|
||||
Unicode::const_iterator left = begin;
|
||||
Unicode::const_iterator right = begin;
|
||||
while(right != end) {
|
||||
if(*right < 0x80) {
|
||||
if(left != right && !cut_(left, right, res)) {
|
||||
return false;
|
||||
if(left != right) {
|
||||
cut_(left, right, res);
|
||||
}
|
||||
left = right;
|
||||
do {
|
||||
@ -50,10 +50,9 @@ class HMMSegment: public SegmentBase {
|
||||
right++;
|
||||
}
|
||||
}
|
||||
if(left != right && !cut_(left, right, res)) {
|
||||
return false;
|
||||
if(left != right) {
|
||||
cut_(left, right, res);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
private:
|
||||
// sequential letters rule
|
||||
@ -92,12 +91,9 @@ class HMMSegment: public SegmentBase {
|
||||
}
|
||||
return begin;
|
||||
}
|
||||
bool cut_(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const {
|
||||
void cut_(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const {
|
||||
vector<size_t> status;
|
||||
if(!viterbi_(begin, end, status)) {
|
||||
LogError("viterbi_ failed.");
|
||||
return false;
|
||||
}
|
||||
viterbi_(begin, end, status);
|
||||
|
||||
Unicode::const_iterator left = begin;
|
||||
Unicode::const_iterator right;
|
||||
@ -108,15 +104,11 @@ class HMMSegment: public SegmentBase {
|
||||
left = right;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool viterbi_(Unicode::const_iterator begin, Unicode::const_iterator end,
|
||||
void viterbi_(Unicode::const_iterator begin,
|
||||
Unicode::const_iterator end,
|
||||
vector<size_t>& status) const {
|
||||
if(begin == end) {
|
||||
return false;
|
||||
}
|
||||
|
||||
size_t Y = HMMModel::STATUS_SUM;
|
||||
size_t X = end - begin;
|
||||
|
||||
@ -166,8 +158,6 @@ class HMMSegment: public SegmentBase {
|
||||
status[x] = stat;
|
||||
stat = path[x + stat*X];
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private:
|
||||
|
@ -33,7 +33,7 @@ class MPSegment: public SegmentBase {
|
||||
}
|
||||
|
||||
using SegmentBase::cut;
|
||||
bool cut(Unicode::const_iterator begin , Unicode::const_iterator end, vector<Unicode>& res) const {
|
||||
void cut(Unicode::const_iterator begin , Unicode::const_iterator end, vector<Unicode>& res) const {
|
||||
vector<Dag> dags;
|
||||
|
||||
dictTrie_->find(begin, end, dags);
|
||||
@ -41,8 +41,19 @@ class MPSegment: public SegmentBase {
|
||||
calcDP_(dags);
|
||||
|
||||
cut_(dags, res);
|
||||
|
||||
return true;
|
||||
}
|
||||
void cut(Unicode::const_iterator begin,
|
||||
Unicode::const_iterator end,
|
||||
size_t min_word_len,
|
||||
size_t max_word_len,
|
||||
vector<Unicode>&res) const {
|
||||
vector<Dag> dags;
|
||||
dictTrie_->findByLimit(begin, end,
|
||||
min_word_len,
|
||||
max_word_len,
|
||||
dags);
|
||||
calcDP_(dags);
|
||||
cut_(dags, res);
|
||||
}
|
||||
const DictTrie* getDictTrie() const {
|
||||
return dictTrie_;
|
||||
|
@ -21,13 +21,10 @@ class MixSegment: public SegmentBase {
|
||||
virtual ~MixSegment() {
|
||||
}
|
||||
using SegmentBase::cut;
|
||||
virtual bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const {
|
||||
virtual void cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const {
|
||||
vector<Unicode> words;
|
||||
words.reserve(end - begin);
|
||||
if(!mpSeg_.cut(begin, end, words)) {
|
||||
LogError("mpSeg cutDAG failed.");
|
||||
return false;
|
||||
}
|
||||
mpSeg_.cut(begin, end, words);
|
||||
|
||||
vector<Unicode> hmmRes;
|
||||
hmmRes.reserve(end - begin);
|
||||
@ -48,10 +45,7 @@ class MixSegment: public SegmentBase {
|
||||
}
|
||||
|
||||
// cut the sequence with hmm
|
||||
if (!hmmSeg_.cut(piece.begin(), piece.end(), hmmRes)) {
|
||||
LogError("hmmSeg_ cut failed.");
|
||||
return false;
|
||||
}
|
||||
hmmSeg_.cut(piece.begin(), piece.end(), hmmRes);
|
||||
|
||||
//put hmm result to result
|
||||
for (size_t k = 0; k < hmmRes.size(); k++) {
|
||||
@ -65,7 +59,6 @@ class MixSegment: public SegmentBase {
|
||||
//let i jump over this piece
|
||||
i = j - 1;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
const DictTrie* getDictTrie() const {
|
||||
|
@ -28,32 +28,26 @@ class QuerySegment: public SegmentBase {
|
||||
virtual ~QuerySegment() {
|
||||
}
|
||||
using SegmentBase::cut;
|
||||
bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const {
|
||||
void cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const {
|
||||
//use mix cut first
|
||||
vector<Unicode> mixRes;
|
||||
if (!mixSeg_.cut(begin, end, mixRes)) {
|
||||
LogError("mixSeg_ cut failed.");
|
||||
return false;
|
||||
}
|
||||
mixSeg_.cut(begin, end, mixRes);
|
||||
|
||||
vector<Unicode> fullRes;
|
||||
for (vector<Unicode>::const_iterator mixResItr = mixRes.begin(); mixResItr != mixRes.end(); mixResItr++) {
|
||||
// if it's too long, cut with fullSeg_, put fullRes in res
|
||||
if (mixResItr->size() > maxWordLen_) {
|
||||
if (fullSeg_.cut(mixResItr->begin(), mixResItr->end(), fullRes)) {
|
||||
for (vector<Unicode>::const_iterator fullResItr = fullRes.begin(); fullResItr != fullRes.end(); fullResItr++) {
|
||||
res.push_back(*fullResItr);
|
||||
}
|
||||
|
||||
//clear tmp res
|
||||
fullRes.clear();
|
||||
fullSeg_.cut(mixResItr->begin(), mixResItr->end(), fullRes);
|
||||
for (vector<Unicode>::const_iterator fullResItr = fullRes.begin(); fullResItr != fullRes.end(); fullResItr++) {
|
||||
res.push_back(*fullResItr);
|
||||
}
|
||||
|
||||
//clear tmp res
|
||||
fullRes.clear();
|
||||
} else { // just use the mix result
|
||||
res.push_back(*mixResItr);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
private:
|
||||
MixSegment mixSeg_;
|
||||
|
@ -27,7 +27,7 @@ class SegmentBase: public ISegment, public NonCopyable {
|
||||
virtual ~SegmentBase() {
|
||||
};
|
||||
public:
|
||||
virtual bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const = 0;
|
||||
virtual void cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const = 0;
|
||||
virtual bool cut(const string& str, vector<string>& res) const {
|
||||
res.clear();
|
||||
|
||||
@ -55,25 +55,18 @@ class SegmentBase: public ISegment, public NonCopyable {
|
||||
|
||||
return true;
|
||||
}
|
||||
virtual bool cut(Unicode::const_iterator begin,
|
||||
void cut(Unicode::const_iterator begin,
|
||||
Unicode::const_iterator end,
|
||||
vector<string>& res) const {
|
||||
if(begin == end) {
|
||||
return false;
|
||||
}
|
||||
|
||||
vector<Unicode> uRes;
|
||||
uRes.reserve(end - begin);
|
||||
if (!cut(begin, end, uRes)) {
|
||||
return false;
|
||||
}
|
||||
cut(begin, end, uRes);
|
||||
|
||||
size_t offset = res.size();
|
||||
res.resize(res.size() + uRes.size());
|
||||
for(size_t i = 0; i < uRes.size(); i ++, offset++) {
|
||||
TransCode::encode(uRes[i], res[offset]);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
private:
|
||||
void loadSpecialSymbols_() {
|
||||
|
@ -88,9 +88,9 @@ class Trie {
|
||||
|
||||
void findByLimit(Unicode::const_iterator begin,
|
||||
Unicode::const_iterator end,
|
||||
vector<struct Dag>&res,
|
||||
size_t min_word_len,
|
||||
size_t max_word_len) const {
|
||||
size_t max_word_len,
|
||||
vector<struct Dag>&res) const {
|
||||
res.resize(end - begin);
|
||||
|
||||
// min_word_len start from 1;
|
||||
@ -130,7 +130,7 @@ class Trie {
|
||||
void find(Unicode::const_iterator begin,
|
||||
Unicode::const_iterator end,
|
||||
vector<struct Dag>& res) const {
|
||||
findByLimit(begin, end, res, MIN_WORD_LENGTH, MAX_WORD_LENGTH);
|
||||
findByLimit(begin, end, MIN_WORD_LENGTH, MAX_WORD_LENGTH, res);
|
||||
}
|
||||
void insertNode(const Unicode& key, const DictUnit* ptValue) {
|
||||
if (key.begin() == key.end()) {
|
||||
|
@ -128,7 +128,7 @@ TEST(DictTrieTest, Dag) {
|
||||
Unicode unicode;
|
||||
ASSERT_TRUE(TransCode::decode(word, unicode));
|
||||
vector<struct Dag> res;
|
||||
trie.findByLimit(unicode.begin(), unicode.end(), res, 2, 3);
|
||||
trie.findByLimit(unicode.begin(), unicode.end(), 2, 3, res);
|
||||
|
||||
size_t nexts_sizes[] = {1, 0, 1, 0};
|
||||
ASSERT_EQ(res.size(), sizeof(nexts_sizes)/sizeof(nexts_sizes[0]));
|
||||
@ -143,7 +143,7 @@ TEST(DictTrieTest, Dag) {
|
||||
Unicode unicode;
|
||||
ASSERT_TRUE(TransCode::decode(word, unicode));
|
||||
vector<struct Dag> res;
|
||||
trie.findByLimit(unicode.begin(), unicode.end(), res, 0, 4);
|
||||
trie.findByLimit(unicode.begin(), unicode.end(), 0, 4, res);
|
||||
|
||||
size_t nexts_sizes[] = {3, 1, 2, 1};
|
||||
ASSERT_EQ(res.size(), sizeof(nexts_sizes)/sizeof(nexts_sizes[0]));
|
||||
|
Loading…
x
Reference in New Issue
Block a user