mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
add optional argument: hmm
This commit is contained in:
parent
14974d51b4
commit
f98e94869c
@ -6,6 +6,7 @@
|
||||
2. 新增层次分词器: LevelSegment 。
|
||||
3. 增加MPSegment的细粒度分词功能。
|
||||
4. 增加 class Jieba ,提供可读性更好的接口。
|
||||
5. 放弃了统一接口ISegment,因为统一的接口限制了分词方式的灵活性,限制了一些功能的增加。
|
||||
|
||||
## v3.1.0
|
||||
|
||||
|
@ -34,13 +34,13 @@ class Application {
|
||||
CutMethod method = METHOD_MIX) const {
|
||||
switch(method) {
|
||||
case METHOD_MP:
|
||||
jieba_.Cut(sentence, false, words);
|
||||
jieba_.Cut(sentence, words);
|
||||
break;
|
||||
case METHOD_HMM:
|
||||
jieba_.CutHMM(sentence, words);
|
||||
break;
|
||||
case METHOD_MIX:
|
||||
jieba_.Cut(sentence, true, words);
|
||||
jieba_.Cut(sentence, words);
|
||||
break;
|
||||
case METHOD_FULL:
|
||||
jieba_.CutAll(sentence, words);
|
||||
|
@ -21,18 +21,14 @@ class Jieba {
|
||||
~Jieba() {
|
||||
}
|
||||
|
||||
void Cut(const string& sentence, bool hmm, vector<string>& words) const {
|
||||
if (hmm) {
|
||||
mix_seg_.cut(sentence, words);
|
||||
} else {
|
||||
mp_seg_.cut(sentence, words);
|
||||
}
|
||||
void Cut(const string& sentence, vector<string>& words, bool hmm = true) const {
|
||||
mix_seg_.cut(sentence, words, hmm);
|
||||
}
|
||||
void CutAll(const string& sentence, vector<string>& words) const {
|
||||
full_seg_.cut(sentence, words);
|
||||
}
|
||||
void CutForSearch(const string& sentence, vector<string>& words) const {
|
||||
query_seg_.cut(sentence, words);
|
||||
void CutForSearch(const string& sentence, vector<string>& words, bool hmm = true) const {
|
||||
query_seg_.cut(sentence, words, hmm);
|
||||
}
|
||||
void CutHMM(const string& sentence, vector<string>& words) const {
|
||||
hmm_seg_.cut(sentence, words);
|
||||
|
@ -31,9 +31,9 @@ class KeywordExtractor {
|
||||
~KeywordExtractor() {
|
||||
}
|
||||
|
||||
bool extract(const string& str, vector<string>& keywords, size_t topN) const {
|
||||
bool extract(const string& sentence, vector<string>& keywords, size_t topN) const {
|
||||
vector<pair<string, double> > topWords;
|
||||
if(!extract(str, topWords, topN)) {
|
||||
if(!extract(sentence, topWords, topN)) {
|
||||
return false;
|
||||
}
|
||||
for(size_t i = 0; i < topWords.size(); i++) {
|
||||
@ -42,9 +42,9 @@ class KeywordExtractor {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool extract(const string& str, vector<pair<string, double> >& keywords, size_t topN) const {
|
||||
bool extract(const string& sentence, vector<pair<string, double> >& keywords, size_t topN) const {
|
||||
vector<string> words;
|
||||
segment_.cut(str, words);
|
||||
segment_.cut(sentence, words);
|
||||
|
||||
map<string, double> wordmap;
|
||||
for(vector<string>::iterator iter = words.begin(); iter != words.end(); iter++) {
|
||||
|
@ -21,23 +21,25 @@ class MixSegment: public SegmentBase {
|
||||
~MixSegment() {
|
||||
}
|
||||
|
||||
void cut(const string& sentence,
|
||||
vector<string>& words) const {
|
||||
void cut(const string& sentence, vector<string>& words, bool hmm = true) const {
|
||||
PreFilter pre_filter(symbols_, sentence);
|
||||
PreFilter::Range range;
|
||||
vector<Unicode> uwords;
|
||||
uwords.reserve(sentence.size());
|
||||
while (pre_filter.HasNext()) {
|
||||
range = pre_filter.Next();
|
||||
cut(range.begin, range.end, uwords);
|
||||
cut(range.begin, range.end, uwords, hmm);
|
||||
}
|
||||
TransCode::encode(uwords, words);
|
||||
}
|
||||
|
||||
void cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const {
|
||||
void cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res, bool hmm) const {
|
||||
vector<Unicode> words;
|
||||
words.reserve(end - begin);
|
||||
mpSeg_.cut(begin, end, words);
|
||||
if (!hmm) {
|
||||
return;
|
||||
}
|
||||
|
||||
vector<Unicode> hmmRes;
|
||||
hmmRes.reserve(end - begin);
|
||||
|
@ -26,22 +26,21 @@ class QuerySegment: public SegmentBase {
|
||||
}
|
||||
~QuerySegment() {
|
||||
}
|
||||
void cut(const string& sentence,
|
||||
vector<string>& words) const {
|
||||
void cut(const string& sentence, vector<string>& words, bool hmm = true) const {
|
||||
PreFilter pre_filter(symbols_, sentence);
|
||||
PreFilter::Range range;
|
||||
vector<Unicode> uwords;
|
||||
uwords.reserve(sentence.size());
|
||||
while (pre_filter.HasNext()) {
|
||||
range = pre_filter.Next();
|
||||
cut(range.begin, range.end, uwords);
|
||||
cut(range.begin, range.end, uwords, hmm);
|
||||
}
|
||||
TransCode::encode(uwords, words);
|
||||
}
|
||||
void cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const {
|
||||
void cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res, bool hmm) const {
|
||||
//use mix cut first
|
||||
vector<Unicode> mixRes;
|
||||
mixSeg_.cut(begin, end, mixRes);
|
||||
mixSeg_.cut(begin, end, mixRes, hmm);
|
||||
|
||||
vector<Unicode> fullRes;
|
||||
for (vector<Unicode>::const_iterator mixResItr = mixRes.begin(); mixResItr != mixRes.end(); mixResItr++) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user