InsertUserWord with freq arg,expose InserUserDictNode with vector<string> arg

This commit is contained in:
zhoupeng 2018-06-08 00:44:33 +08:00
parent 36be7fb900
commit d56e5c0659

View File

@ -50,6 +50,17 @@ class DictTrie {
return true; return true;
} }
bool InsertUserWord(const string& word,int freq, const string& tag = UNKNOWN_TAG) {
DictUnit node_info;
double weight = freq ? log(1.0 * freq / freq_sum_) : user_word_default_weight_ ;
if (!MakeNodeInfo(node_info, word, weight , tag)) {
return false;
}
active_node_infos_.push_back(node_info);
trie_->InsertNode(node_info.word, &active_node_infos_.back());
return true;
}
const DictUnit* Find(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end) const { const DictUnit* Find(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end) const {
return trie_->Find(begin, end); return trie_->Find(begin, end);
} }
@ -69,6 +80,30 @@ class DictTrie {
return min_weight_; return min_weight_;
} }
void InserUserDictNode(vector<string>& buf){
DictUnit node_info;
if(buf.size() == 1){
MakeNodeInfo(node_info,
buf[0],
user_word_default_weight_,
UNKNOWN_TAG);
} else if (buf.size() == 2) {
MakeNodeInfo(node_info,
buf[0],
user_word_default_weight_,
buf[1]);
} else if (buf.size() == 3) {
int freq = atoi(buf[1].c_str());
assert(freq_sum_ > 0.0);
double weight = log(1.0 * freq / freq_sum_);
MakeNodeInfo(node_info, buf[0], weight, buf[2]);
}
static_node_infos_.push_back(node_info);
if (node_info.word.size() == 1) {
user_dict_single_chinese_word_.insert(node_info.word[0]);
}
}
private: private:
void Init(const string& dict_path, const string& user_dict_paths, UserWordWeightOption user_word_weight_opt) { void Init(const string& dict_path, const string& user_dict_paths, UserWordWeightOption user_word_weight_opt) {
LoadDict(dict_path); LoadDict(dict_path);
@ -95,6 +130,8 @@ class DictTrie {
trie_ = new Trie(words, valuePointers); trie_ = new Trie(words, valuePointers);
} }
void LoadUserDict(const string& filePaths) { void LoadUserDict(const string& filePaths) {
vector<string> files = limonp::Split(filePaths, "|;"); vector<string> files = limonp::Split(filePaths, "|;");
size_t lineno = 0; size_t lineno = 0;
@ -102,7 +139,6 @@ class DictTrie {
ifstream ifs(files[i].c_str()); ifstream ifs(files[i].c_str());
XCHECK(ifs.is_open()) << "open " << files[i] << " failed"; XCHECK(ifs.is_open()) << "open " << files[i] << " failed";
string line; string line;
DictUnit node_info;
vector<string> buf; vector<string> buf;
for (; getline(ifs, line); lineno++) { for (; getline(ifs, line); lineno++) {
if (line.size() == 0) { if (line.size() == 0) {
@ -110,27 +146,7 @@ class DictTrie {
} }
buf.clear(); buf.clear();
Split(line, buf, " "); Split(line, buf, " ");
DictUnit node_info; InserUserDictNode(buf);
if(buf.size() == 1){
MakeNodeInfo(node_info,
buf[0],
user_word_default_weight_,
UNKNOWN_TAG);
} else if (buf.size() == 2) {
MakeNodeInfo(node_info,
buf[0],
user_word_default_weight_,
buf[1]);
} else if (buf.size() == 3) {
int freq = atoi(buf[1].c_str());
assert(freq_sum_ > 0.0);
double weight = log(1.0 * freq / freq_sum_);
MakeNodeInfo(node_info, buf[0], weight, buf[2]);
}
static_node_infos_.push_back(node_info);
if (node_info.word.size() == 1) {
user_dict_single_chinese_word_.insert(node_info.word[0]);
}
} }
} }
} }