support optional user word freq weight

This commit is contained in:
yanyiwu 2015-10-09 11:20:06 +08:00
parent ecacf118e6
commit 6f51373280
2 changed files with 20 additions and 8 deletions

View File

@ -25,12 +25,12 @@ const char* const UNKNOWN_TAG = "";
class DictTrie { class DictTrie {
public: public:
enum UserWordWeightOption { enum UserWordWeightOption {
Min, WordWeightMin,
Median, WordWeightMedian,
Max, WordWeightMax,
}; // enum UserWordWeightOption }; // enum UserWordWeightOption
DictTrie(const string& dict_path, const string& user_dict_paths = "", UserWordWeightOption user_word_weight_opt = Median) { DictTrie(const string& dict_path, const string& user_dict_paths = "", UserWordWeightOption user_word_weight_opt = WordWeightMedian) {
Init(dict_path, user_dict_paths, user_word_weight_opt); Init(dict_path, user_dict_paths, user_word_weight_opt);
} }
@ -40,7 +40,7 @@ class DictTrie {
bool InsertUserWord(const string& word, const string& tag = UNKNOWN_TAG) { bool InsertUserWord(const string& word, const string& tag = UNKNOWN_TAG) {
DictUnit node_info; DictUnit node_info;
if (!MakeNodeInfo(node_info, word, max_weight_, tag)) { if (!MakeNodeInfo(node_info, word, user_word_default_weight_, tag)) {
return false; return false;
} }
active_node_infos_.push_back(node_info); active_node_infos_.push_back(node_info);
@ -112,7 +112,7 @@ class DictTrie {
DictUnit node_info; DictUnit node_info;
MakeNodeInfo(node_info, MakeNodeInfo(node_info,
buf[0], buf[0],
max_weight_, user_word_default_weight_,
(buf.size() == 2 ? buf[1] : UNKNOWN_TAG)); (buf.size() == 2 ? buf[1] : UNKNOWN_TAG));
static_node_infos_.push_back(node_info); static_node_infos_.push_back(node_info);
if (node_info.word.size() == 1) { if (node_info.word.size() == 1) {
@ -172,10 +172,10 @@ class DictTrie {
max_weight_ = x[x.size() - 1].weight; max_weight_ = x[x.size() - 1].weight;
median_weight_ = x[x.size() / 2].weight; median_weight_ = x[x.size() / 2].weight;
switch (option) { switch (option) {
case Min: case WordWeightMin:
user_word_default_weight_ = min_weight_; user_word_default_weight_ = min_weight_;
break; break;
case Median: case WordWeightMedian:
user_word_default_weight_ = median_weight_; user_word_default_weight_ = median_weight_;
break; break;
default: default:

View File

@ -70,6 +70,18 @@ TEST(DictTrieTest, UserDict) {
ASSERT_TRUE(unit); ASSERT_TRUE(unit);
string res ; string res ;
res << *unit; res << *unit;
ASSERT_EQ("[\"20113\", \"35745\", \"31639\"] -14.100", res);
}
TEST(DictTrieTest, UserDictWithMaxWeight) {
DictTrie trie(DICT_FILE, "../test/testdata/userdict.utf8", DictTrie::WordWeightMax);
string word = "云计算";
Unicode unicode;
ASSERT_TRUE(TransCode::decode(word, unicode));
const DictUnit * unit = trie.Find(unicode.begin(), unicode.end());
ASSERT_TRUE(unit);
string res ;
res << *unit;
ASSERT_EQ("[\"20113\", \"35745\", \"31639\"] -2.975", res); ASSERT_EQ("[\"20113\", \"35745\", \"31639\"] -2.975", res);
} }