mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
Add LookupTag function for single token tag lookup.
This commit is contained in:
parent
667acdeb7b
commit
5775a40bee
@ -62,6 +62,9 @@ class Jieba {
|
|||||||
void Tag(const string& sentence, vector<pair<string, string> >& words) const {
|
void Tag(const string& sentence, vector<pair<string, string> >& words) const {
|
||||||
mix_seg_.Tag(sentence, words);
|
mix_seg_.Tag(sentence, words);
|
||||||
}
|
}
|
||||||
|
string LookupTag(const string &str) const {
|
||||||
|
return mix_seg_.LookupTag(str);
|
||||||
|
}
|
||||||
bool InsertUserWord(const string& word, const string& tag = UNKNOWN_TAG) {
|
bool InsertUserWord(const string& word, const string& tag = UNKNOWN_TAG) {
|
||||||
return dict_trie_.InsertUserWord(word, tag);
|
return dict_trie_.InsertUserWord(word, tag);
|
||||||
}
|
}
|
||||||
|
@ -93,6 +93,10 @@ class MixSegment: public SegmentTagged {
|
|||||||
return tagger_.Tag(src, res, *this);
|
return tagger_.Tag(src, res, *this);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
string LookupTag(const string &str) const {
|
||||||
|
return tagger_.LookupTag(str, *this);
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
MPSegment mpSeg_;
|
MPSegment mpSeg_;
|
||||||
HMMSegment hmmSeg_;
|
HMMSegment hmmSeg_;
|
||||||
|
@ -23,24 +23,29 @@ class PosTagger {
|
|||||||
vector<string> CutRes;
|
vector<string> CutRes;
|
||||||
segment.Cut(src, CutRes);
|
segment.Cut(src, CutRes);
|
||||||
|
|
||||||
|
for (vector<string>::iterator itr = CutRes.begin(); itr != CutRes.end(); ++itr) {
|
||||||
|
res.push_back(make_pair(*itr, LookupTag(*itr, segment)));
|
||||||
|
}
|
||||||
|
return !res.empty();
|
||||||
|
}
|
||||||
|
|
||||||
|
string LookupTag(const string &str, const SegmentTagged& segment) const {
|
||||||
const DictUnit *tmp = NULL;
|
const DictUnit *tmp = NULL;
|
||||||
RuneStrArray runes;
|
RuneStrArray runes;
|
||||||
const DictTrie * dict = segment.GetDictTrie();
|
const DictTrie * dict = segment.GetDictTrie();
|
||||||
assert(dict != NULL);
|
assert(dict != NULL);
|
||||||
for (vector<string>::iterator itr = CutRes.begin(); itr != CutRes.end(); ++itr) {
|
if (!DecodeRunesInString(str, runes)) {
|
||||||
if (!DecodeRunesInString(*itr, runes)) {
|
|
||||||
XLOG(ERROR) << "Decode failed.";
|
XLOG(ERROR) << "Decode failed.";
|
||||||
return false;
|
return POS_X;
|
||||||
}
|
}
|
||||||
tmp = dict->Find(runes.begin(), runes.end());
|
tmp = dict->Find(runes.begin(), runes.end());
|
||||||
if (tmp == NULL || tmp->tag.empty()) {
|
if (tmp == NULL || tmp->tag.empty()) {
|
||||||
res.push_back(make_pair(*itr, SpecialRule(runes)));
|
return SpecialRule(runes);
|
||||||
} else {
|
} else {
|
||||||
res.push_back(make_pair(*itr, tmp->tag));
|
return tmp->tag;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
return !res.empty();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
const char* SpecialRule(const RuneStrArray& unicode) const {
|
const char* SpecialRule(const RuneStrArray& unicode) const {
|
||||||
size_t m = 0;
|
size_t m = 0;
|
||||||
|
@ -51,6 +51,13 @@ int main(int argc, char** argv) {
|
|||||||
jieba.CutForSearch(s, jiebawords, true);
|
jieba.CutForSearch(s, jiebawords, true);
|
||||||
cout << jiebawords << endl;
|
cout << jiebawords << endl;
|
||||||
|
|
||||||
|
cout << "[demo] Lookup Tag for Single Token" << endl;
|
||||||
|
vector<pair<string, string> > LookupTagres = {{"拖拉机", ""}, {"CEO", ""}, {".",""}};
|
||||||
|
LookupTagres[0].second = jieba.LookupTag(LookupTagres[0].first);
|
||||||
|
LookupTagres[1].second = jieba.LookupTag(LookupTagres[1].first);
|
||||||
|
LookupTagres[2].second = jieba.LookupTag(LookupTagres[2].first);
|
||||||
|
cout << LookupTagres << endl;;
|
||||||
|
|
||||||
cout << "[demo] Tagging" << endl;
|
cout << "[demo] Tagging" << endl;
|
||||||
vector<pair<string, string> > tagres;
|
vector<pair<string, string> > tagres;
|
||||||
s = "我是拖拉机学院手扶拖拉机专业的。不用多久,我就会升职加薪,当上CEO,走上人生巅峰。";
|
s = "我是拖拉机学院手扶拖拉机专业的。不用多久,我就会升职加薪,当上CEO,走上人生巅峰。";
|
||||||
|
Loading…
x
Reference in New Issue
Block a user