diff --git a/include/cppjieba/DictTrie.hpp b/include/cppjieba/DictTrie.hpp index 1e22ecc..aaefb04 100644 --- a/include/cppjieba/DictTrie.hpp +++ b/include/cppjieba/DictTrie.hpp @@ -50,6 +50,17 @@ class DictTrie { return true; } + bool InsertUserWord(const string& word,int freq, const string& tag = UNKNOWN_TAG) { + DictUnit node_info; + double weight = freq ? log(1.0 * freq / freq_sum_) : user_word_default_weight_ ; + if (!MakeNodeInfo(node_info, word, weight , tag)) { + return false; + } + active_node_infos_.push_back(node_info); + trie_->InsertNode(node_info.word, &active_node_infos_.back()); + return true; + } + const DictUnit* Find(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end) const { return trie_->Find(begin, end); } @@ -69,6 +80,30 @@ class DictTrie { return min_weight_; } + void InserUserDictNode(vector& buf){ + DictUnit node_info; + if(buf.size() == 1){ + MakeNodeInfo(node_info, + buf[0], + user_word_default_weight_, + UNKNOWN_TAG); + } else if (buf.size() == 2) { + MakeNodeInfo(node_info, + buf[0], + user_word_default_weight_, + buf[1]); + } else if (buf.size() == 3) { + int freq = atoi(buf[1].c_str()); + assert(freq_sum_ > 0.0); + double weight = log(1.0 * freq / freq_sum_); + MakeNodeInfo(node_info, buf[0], weight, buf[2]); + } + static_node_infos_.push_back(node_info); + if (node_info.word.size() == 1) { + user_dict_single_chinese_word_.insert(node_info.word[0]); + } + } + private: void Init(const string& dict_path, const string& user_dict_paths, UserWordWeightOption user_word_weight_opt) { LoadDict(dict_path); @@ -95,6 +130,8 @@ class DictTrie { trie_ = new Trie(words, valuePointers); } + + void LoadUserDict(const string& filePaths) { vector files = limonp::Split(filePaths, "|;"); size_t lineno = 0; @@ -102,7 +139,6 @@ class DictTrie { ifstream ifs(files[i].c_str()); XCHECK(ifs.is_open()) << "open " << files[i] << " failed"; string line; - DictUnit node_info; vector buf; for (; getline(ifs, line); lineno++) { if (line.size() == 0) { @@ -110,27 +146,7 @@ class DictTrie { } buf.clear(); Split(line, buf, " "); - DictUnit node_info; - if(buf.size() == 1){ - MakeNodeInfo(node_info, - buf[0], - user_word_default_weight_, - UNKNOWN_TAG); - } else if (buf.size() == 2) { - MakeNodeInfo(node_info, - buf[0], - user_word_default_weight_, - buf[1]); - } else if (buf.size() == 3) { - int freq = atoi(buf[1].c_str()); - assert(freq_sum_ > 0.0); - double weight = log(1.0 * freq / freq_sum_); - MakeNodeInfo(node_info, buf[0], weight, buf[2]); - } - static_node_infos_.push_back(node_info); - if (node_info.word.size() == 1) { - user_dict_single_chinese_word_.insert(node_info.word[0]); - } + InserUserDictNode(buf); } } }