mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
InsertUserWord with freq arg,expose InserUserDictNode with vector<string> arg
This commit is contained in:
parent
36be7fb900
commit
d56e5c0659
@ -50,6 +50,17 @@ class DictTrie {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool InsertUserWord(const string& word,int freq, const string& tag = UNKNOWN_TAG) {
|
||||||
|
DictUnit node_info;
|
||||||
|
double weight = freq ? log(1.0 * freq / freq_sum_) : user_word_default_weight_ ;
|
||||||
|
if (!MakeNodeInfo(node_info, word, weight , tag)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
active_node_infos_.push_back(node_info);
|
||||||
|
trie_->InsertNode(node_info.word, &active_node_infos_.back());
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
const DictUnit* Find(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end) const {
|
const DictUnit* Find(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end) const {
|
||||||
return trie_->Find(begin, end);
|
return trie_->Find(begin, end);
|
||||||
}
|
}
|
||||||
@ -69,6 +80,30 @@ class DictTrie {
|
|||||||
return min_weight_;
|
return min_weight_;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void InserUserDictNode(vector<string>& buf){
|
||||||
|
DictUnit node_info;
|
||||||
|
if(buf.size() == 1){
|
||||||
|
MakeNodeInfo(node_info,
|
||||||
|
buf[0],
|
||||||
|
user_word_default_weight_,
|
||||||
|
UNKNOWN_TAG);
|
||||||
|
} else if (buf.size() == 2) {
|
||||||
|
MakeNodeInfo(node_info,
|
||||||
|
buf[0],
|
||||||
|
user_word_default_weight_,
|
||||||
|
buf[1]);
|
||||||
|
} else if (buf.size() == 3) {
|
||||||
|
int freq = atoi(buf[1].c_str());
|
||||||
|
assert(freq_sum_ > 0.0);
|
||||||
|
double weight = log(1.0 * freq / freq_sum_);
|
||||||
|
MakeNodeInfo(node_info, buf[0], weight, buf[2]);
|
||||||
|
}
|
||||||
|
static_node_infos_.push_back(node_info);
|
||||||
|
if (node_info.word.size() == 1) {
|
||||||
|
user_dict_single_chinese_word_.insert(node_info.word[0]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void Init(const string& dict_path, const string& user_dict_paths, UserWordWeightOption user_word_weight_opt) {
|
void Init(const string& dict_path, const string& user_dict_paths, UserWordWeightOption user_word_weight_opt) {
|
||||||
LoadDict(dict_path);
|
LoadDict(dict_path);
|
||||||
@ -95,6 +130,8 @@ class DictTrie {
|
|||||||
trie_ = new Trie(words, valuePointers);
|
trie_ = new Trie(words, valuePointers);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
void LoadUserDict(const string& filePaths) {
|
void LoadUserDict(const string& filePaths) {
|
||||||
vector<string> files = limonp::Split(filePaths, "|;");
|
vector<string> files = limonp::Split(filePaths, "|;");
|
||||||
size_t lineno = 0;
|
size_t lineno = 0;
|
||||||
@ -102,7 +139,6 @@ class DictTrie {
|
|||||||
ifstream ifs(files[i].c_str());
|
ifstream ifs(files[i].c_str());
|
||||||
XCHECK(ifs.is_open()) << "open " << files[i] << " failed";
|
XCHECK(ifs.is_open()) << "open " << files[i] << " failed";
|
||||||
string line;
|
string line;
|
||||||
DictUnit node_info;
|
|
||||||
vector<string> buf;
|
vector<string> buf;
|
||||||
for (; getline(ifs, line); lineno++) {
|
for (; getline(ifs, line); lineno++) {
|
||||||
if (line.size() == 0) {
|
if (line.size() == 0) {
|
||||||
@ -110,27 +146,7 @@ class DictTrie {
|
|||||||
}
|
}
|
||||||
buf.clear();
|
buf.clear();
|
||||||
Split(line, buf, " ");
|
Split(line, buf, " ");
|
||||||
DictUnit node_info;
|
InserUserDictNode(buf);
|
||||||
if(buf.size() == 1){
|
|
||||||
MakeNodeInfo(node_info,
|
|
||||||
buf[0],
|
|
||||||
user_word_default_weight_,
|
|
||||||
UNKNOWN_TAG);
|
|
||||||
} else if (buf.size() == 2) {
|
|
||||||
MakeNodeInfo(node_info,
|
|
||||||
buf[0],
|
|
||||||
user_word_default_weight_,
|
|
||||||
buf[1]);
|
|
||||||
} else if (buf.size() == 3) {
|
|
||||||
int freq = atoi(buf[1].c_str());
|
|
||||||
assert(freq_sum_ > 0.0);
|
|
||||||
double weight = log(1.0 * freq / freq_sum_);
|
|
||||||
MakeNodeInfo(node_info, buf[0], weight, buf[2]);
|
|
||||||
}
|
|
||||||
static_node_infos_.push_back(node_info);
|
|
||||||
if (node_info.word.size() == 1) {
|
|
||||||
user_dict_single_chinese_word_.insert(node_info.word[0]);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user