mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
support optional user word freq weight
This commit is contained in:
parent
ecacf118e6
commit
6f51373280
@ -25,12 +25,12 @@ const char* const UNKNOWN_TAG = "";
|
|||||||
class DictTrie {
|
class DictTrie {
|
||||||
public:
|
public:
|
||||||
enum UserWordWeightOption {
|
enum UserWordWeightOption {
|
||||||
Min,
|
WordWeightMin,
|
||||||
Median,
|
WordWeightMedian,
|
||||||
Max,
|
WordWeightMax,
|
||||||
}; // enum UserWordWeightOption
|
}; // enum UserWordWeightOption
|
||||||
|
|
||||||
DictTrie(const string& dict_path, const string& user_dict_paths = "", UserWordWeightOption user_word_weight_opt = Median) {
|
DictTrie(const string& dict_path, const string& user_dict_paths = "", UserWordWeightOption user_word_weight_opt = WordWeightMedian) {
|
||||||
Init(dict_path, user_dict_paths, user_word_weight_opt);
|
Init(dict_path, user_dict_paths, user_word_weight_opt);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -40,7 +40,7 @@ class DictTrie {
|
|||||||
|
|
||||||
bool InsertUserWord(const string& word, const string& tag = UNKNOWN_TAG) {
|
bool InsertUserWord(const string& word, const string& tag = UNKNOWN_TAG) {
|
||||||
DictUnit node_info;
|
DictUnit node_info;
|
||||||
if (!MakeNodeInfo(node_info, word, max_weight_, tag)) {
|
if (!MakeNodeInfo(node_info, word, user_word_default_weight_, tag)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
active_node_infos_.push_back(node_info);
|
active_node_infos_.push_back(node_info);
|
||||||
@ -112,7 +112,7 @@ class DictTrie {
|
|||||||
DictUnit node_info;
|
DictUnit node_info;
|
||||||
MakeNodeInfo(node_info,
|
MakeNodeInfo(node_info,
|
||||||
buf[0],
|
buf[0],
|
||||||
max_weight_,
|
user_word_default_weight_,
|
||||||
(buf.size() == 2 ? buf[1] : UNKNOWN_TAG));
|
(buf.size() == 2 ? buf[1] : UNKNOWN_TAG));
|
||||||
static_node_infos_.push_back(node_info);
|
static_node_infos_.push_back(node_info);
|
||||||
if (node_info.word.size() == 1) {
|
if (node_info.word.size() == 1) {
|
||||||
@ -172,10 +172,10 @@ class DictTrie {
|
|||||||
max_weight_ = x[x.size() - 1].weight;
|
max_weight_ = x[x.size() - 1].weight;
|
||||||
median_weight_ = x[x.size() / 2].weight;
|
median_weight_ = x[x.size() / 2].weight;
|
||||||
switch (option) {
|
switch (option) {
|
||||||
case Min:
|
case WordWeightMin:
|
||||||
user_word_default_weight_ = min_weight_;
|
user_word_default_weight_ = min_weight_;
|
||||||
break;
|
break;
|
||||||
case Median:
|
case WordWeightMedian:
|
||||||
user_word_default_weight_ = median_weight_;
|
user_word_default_weight_ = median_weight_;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
|
@ -70,6 +70,18 @@ TEST(DictTrieTest, UserDict) {
|
|||||||
ASSERT_TRUE(unit);
|
ASSERT_TRUE(unit);
|
||||||
string res ;
|
string res ;
|
||||||
res << *unit;
|
res << *unit;
|
||||||
|
ASSERT_EQ("[\"20113\", \"35745\", \"31639\"] -14.100", res);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(DictTrieTest, UserDictWithMaxWeight) {
|
||||||
|
DictTrie trie(DICT_FILE, "../test/testdata/userdict.utf8", DictTrie::WordWeightMax);
|
||||||
|
string word = "云计算";
|
||||||
|
Unicode unicode;
|
||||||
|
ASSERT_TRUE(TransCode::decode(word, unicode));
|
||||||
|
const DictUnit * unit = trie.Find(unicode.begin(), unicode.end());
|
||||||
|
ASSERT_TRUE(unit);
|
||||||
|
string res ;
|
||||||
|
res << *unit;
|
||||||
ASSERT_EQ("[\"20113\", \"35745\", \"31639\"] -2.975", res);
|
ASSERT_EQ("[\"20113\", \"35745\", \"31639\"] -2.975", res);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user