From 81c2d3caf1ca7c069dfdef8af2eb6b92cdd85611 Mon Sep 17 00:00:00 2001 From: wyy Date: Sat, 7 Dec 2013 07:45:06 -0800 Subject: [PATCH] modify calcDAG try to speed up --- src/MPSegment.hpp | 49 +++++++++++++++++++++++++++-------------------- src/Trie.hpp | 16 ++++++++-------- 2 files changed, 36 insertions(+), 29 deletions(-) diff --git a/src/MPSegment.hpp b/src/MPSegment.hpp index 42d8e5c..e195b9a 100644 --- a/src/MPSegment.hpp +++ b/src/MPSegment.hpp @@ -126,13 +126,9 @@ namespace CppJieba return false; } SegmentContext segContext; - for(Unicode::const_iterator it = begin; it != end; it++) - { - segContext.push_back(SegmentChar(*it)); - } //calc DAG - if(!_calcDAG(segContext)) + if(!_calcDAG(begin, end, segContext)) { LogError("_calcDAG failed."); return false; @@ -154,36 +150,47 @@ namespace CppJieba } private: - bool _calcDAG(SegmentContext& segContext)const + bool _calcDAG(Unicode::const_iterator begin, Unicode::const_iterator end, SegmentContext& segContext) const { - if(segContext.empty()) + if(begin >= end) { - LogError("segContext empty."); + LogError("begin >= end."); return false; } - - Unicode unicode; - vector > vp; - for(uint i = 0; i < segContext.size(); i++) + for(Unicode::const_iterator it = begin; it != end; it++) { - unicode.clear(); - for(uint j = i ; j < segContext.size(); j++) - { - unicode.push_back(segContext[j].uniCh); - } + segContext.push_back(SegmentChar(*it)); + } + vector > vp; + //for(Unicode::const_iterator it = begin; it != end; it++) + //{ + // segContext.push_back(SegmentChar(*it)); + //} + for(Unicode::const_iterator it = begin; it != end; it++) + //for(uint i = 0; i < segContext.size(); i++) + { + //unicode.clear(); + //for(uint j = i; j < segContext.size(); j++) + //{ + // unicode.push_back(segContext[j].uniCh); + //} + + segContext.push_back(SegmentChar(*it)); + SegmentChar& back = segContext.back(); + int i = it - begin; vp.clear(); - if(_trie.find(unicode, vp)) + if(_trie.find(it, end, vp)) { for(uint j = 0; j < vp.size(); j++) { uint nextp = vp[j].first + i; - segContext[i].dag[nextp] = vp[j].second; + back.dag[nextp] = vp[j].second; } } - if(segContext[i].dag.end() == segContext[i].dag.find(i)) + if(back.dag.end() == back.dag.find(i)) { - segContext[i].dag[i] = NULL; + back.dag[i] = NULL; } } return true; diff --git a/src/Trie.hpp b/src/Trie.hpp index 8282587..c39277a 100644 --- a/src/Trie.hpp +++ b/src/Trie.hpp @@ -244,14 +244,14 @@ namespace CppJieba return !res.empty(); } - bool find(const Unicode& unico, vector >& res)const - { - if (!unico.empty()) - { - return find(unico.begin(), unico.end(), res); - } - return false; - } + //bool find(const Unicode& unico, vector >& res)const + //{ + // if (!unico.empty()) + // { + // return find(unico.begin(), unico.end(), res); + // } + // return false; + //} public: double getMinLogFreq()const{return _minLogFreq;};