add optional argument: hmm

This commit is contained in:
yanyiwu 2015-09-13 17:28:49 +08:00
parent 14974d51b4
commit f98e94869c
6 changed files with 21 additions and 23 deletions

View File

@ -6,6 +6,7 @@
2. 新增层次分词器: LevelSegment 。
3. 增加MPSegment的细粒度分词功能。
4. 增加 class Jieba ,提供可读性更好的接口。
5. 放弃了统一接口ISegment因为统一的接口限制了分词方式的灵活性限制了一些功能的增加。
## v3.1.0

View File

@ -34,13 +34,13 @@ class Application {
CutMethod method = METHOD_MIX) const {
switch(method) {
case METHOD_MP:
jieba_.Cut(sentence, false, words);
jieba_.Cut(sentence, words);
break;
case METHOD_HMM:
jieba_.CutHMM(sentence, words);
break;
case METHOD_MIX:
jieba_.Cut(sentence, true, words);
jieba_.Cut(sentence, words);
break;
case METHOD_FULL:
jieba_.CutAll(sentence, words);

View File

@ -21,18 +21,14 @@ class Jieba {
~Jieba() {
}
void Cut(const string& sentence, bool hmm, vector<string>& words) const {
if (hmm) {
mix_seg_.cut(sentence, words);
} else {
mp_seg_.cut(sentence, words);
}
void Cut(const string& sentence, vector<string>& words, bool hmm = true) const {
mix_seg_.cut(sentence, words, hmm);
}
void CutAll(const string& sentence, vector<string>& words) const {
full_seg_.cut(sentence, words);
}
void CutForSearch(const string& sentence, vector<string>& words) const {
query_seg_.cut(sentence, words);
void CutForSearch(const string& sentence, vector<string>& words, bool hmm = true) const {
query_seg_.cut(sentence, words, hmm);
}
void CutHMM(const string& sentence, vector<string>& words) const {
hmm_seg_.cut(sentence, words);

View File

@ -31,9 +31,9 @@ class KeywordExtractor {
~KeywordExtractor() {
}
bool extract(const string& str, vector<string>& keywords, size_t topN) const {
bool extract(const string& sentence, vector<string>& keywords, size_t topN) const {
vector<pair<string, double> > topWords;
if(!extract(str, topWords, topN)) {
if(!extract(sentence, topWords, topN)) {
return false;
}
for(size_t i = 0; i < topWords.size(); i++) {
@ -42,9 +42,9 @@ class KeywordExtractor {
return true;
}
bool extract(const string& str, vector<pair<string, double> >& keywords, size_t topN) const {
bool extract(const string& sentence, vector<pair<string, double> >& keywords, size_t topN) const {
vector<string> words;
segment_.cut(str, words);
segment_.cut(sentence, words);
map<string, double> wordmap;
for(vector<string>::iterator iter = words.begin(); iter != words.end(); iter++) {

View File

@ -21,23 +21,25 @@ class MixSegment: public SegmentBase {
~MixSegment() {
}
void cut(const string& sentence,
vector<string>& words) const {
void cut(const string& sentence, vector<string>& words, bool hmm = true) const {
PreFilter pre_filter(symbols_, sentence);
PreFilter::Range range;
vector<Unicode> uwords;
uwords.reserve(sentence.size());
while (pre_filter.HasNext()) {
range = pre_filter.Next();
cut(range.begin, range.end, uwords);
cut(range.begin, range.end, uwords, hmm);
}
TransCode::encode(uwords, words);
}
void cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const {
void cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res, bool hmm) const {
vector<Unicode> words;
words.reserve(end - begin);
mpSeg_.cut(begin, end, words);
if (!hmm) {
return;
}
vector<Unicode> hmmRes;
hmmRes.reserve(end - begin);

View File

@ -26,22 +26,21 @@ class QuerySegment: public SegmentBase {
}
~QuerySegment() {
}
void cut(const string& sentence,
vector<string>& words) const {
void cut(const string& sentence, vector<string>& words, bool hmm = true) const {
PreFilter pre_filter(symbols_, sentence);
PreFilter::Range range;
vector<Unicode> uwords;
uwords.reserve(sentence.size());
while (pre_filter.HasNext()) {
range = pre_filter.Next();
cut(range.begin, range.end, uwords);
cut(range.begin, range.end, uwords, hmm);
}
TransCode::encode(uwords, words);
}
void cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const {
void cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res, bool hmm) const {
//use mix cut first
vector<Unicode> mixRes;
mixSeg_.cut(begin, end, mixRes);
mixSeg_.cut(begin, end, mixRes, hmm);
vector<Unicode> fullRes;
for (vector<Unicode>::const_iterator mixResItr = mixRes.begin(); mixResItr != mixRes.end(); mixResItr++) {