Merge pull request #195 from ahmadov/ahmadov/fix-ns

fix missing includes and make namespaces explicit
This commit is contained in:
Yanyi Wu 2025-04-10 23:01:18 +08:00 committed by GitHub
commit 7730deee52
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1,15 +1,15 @@
#ifndef CPPJIEBA_DICT_TRIE_HPP #ifndef CPPJIEBA_DICT_TRIE_HPP
#define CPPJIEBA_DICT_TRIE_HPP #define CPPJIEBA_DICT_TRIE_HPP
#include <iostream> #include <algorithm>
#include <fstream> #include <fstream>
#include <map>
#include <string>
#include <cstring> #include <cstring>
#include <cstdlib> #include <cstdlib>
#include <stdint.h>
#include <cmath> #include <cmath>
#include <limits> #include <deque>
#include <set>
#include <string>
#include <unordered_set>
#include "limonp/StringUtil.hpp" #include "limonp/StringUtil.hpp"
#include "limonp/Logging.hpp" #include "limonp/Logging.hpp"
#include "Unicode.hpp" #include "Unicode.hpp"
@ -17,8 +17,6 @@
namespace cppjieba { namespace cppjieba {
using namespace limonp;
const double MIN_DOUBLE = -3.14e+100; const double MIN_DOUBLE = -3.14e+100;
const double MAX_DOUBLE = 3.14e+100; const double MAX_DOUBLE = 3.14e+100;
const size_t DICT_COLUMN_NUM = 3; const size_t DICT_COLUMN_NUM = 3;
@ -32,7 +30,7 @@ class DictTrie {
WordWeightMax, WordWeightMax,
}; // enum UserWordWeightOption }; // enum UserWordWeightOption
DictTrie(const string& dict_path, const string& user_dict_paths = "", UserWordWeightOption user_word_weight_opt = WordWeightMedian) { DictTrie(const std::string& dict_path, const std::string& user_dict_paths = "", UserWordWeightOption user_word_weight_opt = WordWeightMedian) {
Init(dict_path, user_dict_paths, user_word_weight_opt); Init(dict_path, user_dict_paths, user_word_weight_opt);
} }
@ -40,7 +38,7 @@ class DictTrie {
delete trie_; delete trie_;
} }
bool InsertUserWord(const string& word, const string& tag = UNKNOWN_TAG) { bool InsertUserWord(const std::string& word, const std::string& tag = UNKNOWN_TAG) {
DictUnit node_info; DictUnit node_info;
if (!MakeNodeInfo(node_info, word, user_word_default_weight_, tag)) { if (!MakeNodeInfo(node_info, word, user_word_default_weight_, tag)) {
return false; return false;
@ -50,7 +48,7 @@ class DictTrie {
return true; return true;
} }
bool InsertUserWord(const string& word,int freq, const string& tag = UNKNOWN_TAG) { bool InsertUserWord(const std::string& word,int freq, const std::string& tag = UNKNOWN_TAG) {
DictUnit node_info; DictUnit node_info;
double weight = freq ? log(1.0 * freq / freq_sum_) : user_word_default_weight_ ; double weight = freq ? log(1.0 * freq / freq_sum_) : user_word_default_weight_ ;
if (!MakeNodeInfo(node_info, word, weight , tag)) { if (!MakeNodeInfo(node_info, word, weight , tag)) {
@ -61,7 +59,7 @@ class DictTrie {
return true; return true;
} }
bool DeleteUserWord(const string& word, const string& tag = UNKNOWN_TAG) { bool DeleteUserWord(const std::string& word, const std::string& tag = UNKNOWN_TAG) {
DictUnit node_info; DictUnit node_info;
if (!MakeNodeInfo(node_info, word, user_word_default_weight_, tag)) { if (!MakeNodeInfo(node_info, word, user_word_default_weight_, tag)) {
return false; return false;
@ -69,19 +67,19 @@ class DictTrie {
trie_->DeleteNode(node_info.word, &node_info); trie_->DeleteNode(node_info.word, &node_info);
return true; return true;
} }
const DictUnit* Find(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end) const { const DictUnit* Find(RuneStrArray::const_iterator begin, RuneStrArray::const_iterator end) const {
return trie_->Find(begin, end); return trie_->Find(begin, end);
} }
void Find(RuneStrArray::const_iterator begin, void Find(RuneStrArray::const_iterator begin,
RuneStrArray::const_iterator end, RuneStrArray::const_iterator end,
vector<struct Dag>&res, std::vector<struct Dag>&res,
size_t max_word_len = MAX_WORD_LENGTH) const { size_t max_word_len = MAX_WORD_LENGTH) const {
trie_->Find(begin, end, res, max_word_len); trie_->Find(begin, end, res, max_word_len);
} }
bool Find(const string& word) bool Find(const std::string& word)
{ {
const DictUnit *tmp = NULL; const DictUnit *tmp = NULL;
RuneStrArray runes; RuneStrArray runes;
@ -108,18 +106,18 @@ class DictTrie {
return min_weight_; return min_weight_;
} }
void InserUserDictNode(const string& line) { void InserUserDictNode(const std::string& line) {
vector<string> buf; std::vector<std::string> buf;
DictUnit node_info; DictUnit node_info;
Split(line, buf, " "); limonp::Split(line, buf, " ");
if(buf.size() == 1){ if(buf.size() == 1){
MakeNodeInfo(node_info, MakeNodeInfo(node_info,
buf[0], buf[0],
user_word_default_weight_, user_word_default_weight_,
UNKNOWN_TAG); UNKNOWN_TAG);
} else if (buf.size() == 2) { } else if (buf.size() == 2) {
MakeNodeInfo(node_info, MakeNodeInfo(node_info,
buf[0], buf[0],
user_word_default_weight_, user_word_default_weight_,
buf[1]); buf[1]);
} else if (buf.size() == 3) { } else if (buf.size() == 3) {
@ -133,27 +131,27 @@ class DictTrie {
user_dict_single_chinese_word_.insert(node_info.word[0]); user_dict_single_chinese_word_.insert(node_info.word[0]);
} }
} }
void LoadUserDict(const vector<string>& buf) { void LoadUserDict(const std::vector<std::string>& buf) {
for (size_t i = 0; i < buf.size(); i++) { for (size_t i = 0; i < buf.size(); i++) {
InserUserDictNode(buf[i]); InserUserDictNode(buf[i]);
} }
} }
void LoadUserDict(const set<string>& buf) { void LoadUserDict(const std::set<std::string>& buf) {
std::set<string>::const_iterator iter; std::set<std::string>::const_iterator iter;
for (iter = buf.begin(); iter != buf.end(); iter++){ for (iter = buf.begin(); iter != buf.end(); iter++){
InserUserDictNode(*iter); InserUserDictNode(*iter);
} }
} }
void LoadUserDict(const string& filePaths) { void LoadUserDict(const std::string& filePaths) {
vector<string> files = limonp::Split(filePaths, "|;"); std::vector<std::string> files = limonp::Split(filePaths, "|;");
for (size_t i = 0; i < files.size(); i++) { for (size_t i = 0; i < files.size(); i++) {
ifstream ifs(files[i].c_str()); std::ifstream ifs(files[i].c_str());
XCHECK(ifs.is_open()) << "open " << files[i] << " failed"; XCHECK(ifs.is_open()) << "open " << files[i] << " failed";
string line; std::string line;
while(getline(ifs, line)) { while(getline(ifs, line)) {
if (line.size() == 0) { if (line.size() == 0) {
continue; continue;
@ -165,7 +163,7 @@ class DictTrie {
private: private:
void Init(const string& dict_path, const string& user_dict_paths, UserWordWeightOption user_word_weight_opt) { void Init(const std::string& dict_path, const std::string& user_dict_paths, UserWordWeightOption user_word_weight_opt) {
LoadDict(dict_path); LoadDict(dict_path);
freq_sum_ = CalcFreqSum(static_node_infos_); freq_sum_ = CalcFreqSum(static_node_infos_);
CalculateWeight(static_node_infos_, freq_sum_); CalculateWeight(static_node_infos_, freq_sum_);
@ -177,11 +175,11 @@ class DictTrie {
Shrink(static_node_infos_); Shrink(static_node_infos_);
CreateTrie(static_node_infos_); CreateTrie(static_node_infos_);
} }
void CreateTrie(const vector<DictUnit>& dictUnits) { void CreateTrie(const std::vector<DictUnit>& dictUnits) {
assert(dictUnits.size()); assert(dictUnits.size());
vector<Unicode> words; std::vector<Unicode> words;
vector<const DictUnit*> valuePointers; std::vector<const DictUnit*> valuePointers;
for (size_t i = 0 ; i < dictUnits.size(); i ++) { for (size_t i = 0 ; i < dictUnits.size(); i ++) {
words.push_back(dictUnits[i].word); words.push_back(dictUnits[i].word);
valuePointers.push_back(&dictUnits[i]); valuePointers.push_back(&dictUnits[i]);
@ -190,13 +188,10 @@ class DictTrie {
trie_ = new Trie(words, valuePointers); trie_ = new Trie(words, valuePointers);
} }
bool MakeNodeInfo(DictUnit& node_info, bool MakeNodeInfo(DictUnit& node_info,
const string& word, const std::string& word,
double weight, double weight,
const string& tag) { const std::string& tag) {
if (!DecodeUTF8RunesInString(word, node_info.word)) { if (!DecodeUTF8RunesInString(word, node_info.word)) {
XLOG(ERROR) << "UTF-8 decode failed for dict word: " << word; XLOG(ERROR) << "UTF-8 decode failed for dict word: " << word;
return false; return false;
@ -206,19 +201,19 @@ class DictTrie {
return true; return true;
} }
void LoadDict(const string& filePath) { void LoadDict(const std::string& filePath) {
ifstream ifs(filePath.c_str()); std::ifstream ifs(filePath.c_str());
XCHECK(ifs.is_open()) << "open " << filePath << " failed."; XCHECK(ifs.is_open()) << "open " << filePath << " failed.";
string line; std::string line;
vector<string> buf; std::vector<std::string> buf;
DictUnit node_info; DictUnit node_info;
while (getline(ifs, line)) { while (getline(ifs, line)) {
Split(line, buf, " "); limonp::Split(line, buf, " ");
XCHECK(buf.size() == DICT_COLUMN_NUM) << "split result illegal, line:" << line; XCHECK(buf.size() == DICT_COLUMN_NUM) << "split result illegal, line:" << line;
MakeNodeInfo(node_info, MakeNodeInfo(node_info,
buf[0], buf[0],
atof(buf[1].c_str()), atof(buf[1].c_str()),
buf[2]); buf[2]);
static_node_infos_.push_back(node_info); static_node_infos_.push_back(node_info);
} }
@ -230,8 +225,8 @@ class DictTrie {
void SetStaticWordWeights(UserWordWeightOption option) { void SetStaticWordWeights(UserWordWeightOption option) {
XCHECK(!static_node_infos_.empty()); XCHECK(!static_node_infos_.empty());
vector<DictUnit> x = static_node_infos_; std::vector<DictUnit> x = static_node_infos_;
sort(x.begin(), x.end(), WeightCompare); std::sort(x.begin(), x.end(), WeightCompare);
min_weight_ = x[0].weight; min_weight_ = x[0].weight;
max_weight_ = x[x.size() - 1].weight; max_weight_ = x[x.size() - 1].weight;
median_weight_ = x[x.size() / 2].weight; median_weight_ = x[x.size() / 2].weight;
@ -248,7 +243,7 @@ class DictTrie {
} }
} }
double CalcFreqSum(const vector<DictUnit>& node_infos) const { double CalcFreqSum(const std::vector<DictUnit>& node_infos) const {
double sum = 0.0; double sum = 0.0;
for (size_t i = 0; i < node_infos.size(); i++) { for (size_t i = 0; i < node_infos.size(); i++) {
sum += node_infos[i].weight; sum += node_infos[i].weight;
@ -256,7 +251,7 @@ class DictTrie {
return sum; return sum;
} }
void CalculateWeight(vector<DictUnit>& node_infos, double sum) const { void CalculateWeight(std::vector<DictUnit>& node_infos, double sum) const {
assert(sum > 0.0); assert(sum > 0.0);
for (size_t i = 0; i < node_infos.size(); i++) { for (size_t i = 0; i < node_infos.size(); i++) {
DictUnit& node_info = node_infos[i]; DictUnit& node_info = node_infos[i];
@ -265,12 +260,12 @@ class DictTrie {
} }
} }
void Shrink(vector<DictUnit>& units) const { void Shrink(std::vector<DictUnit>& units) const {
vector<DictUnit>(units.begin(), units.end()).swap(units); std::vector<DictUnit>(units.begin(), units.end()).swap(units);
} }
vector<DictUnit> static_node_infos_; std::vector<DictUnit> static_node_infos_;
deque<DictUnit> active_node_infos_; // must not be vector std::deque<DictUnit> active_node_infos_; // must not be std::vector
Trie * trie_; Trie * trie_;
double freq_sum_; double freq_sum_;
@ -278,7 +273,7 @@ class DictTrie {
double max_weight_; double max_weight_;
double median_weight_; double median_weight_;
double user_word_default_weight_; double user_word_default_weight_;
unordered_set<Rune> user_dict_single_chinese_word_; std::unordered_set<Rune> user_dict_single_chinese_word_;
}; };
} }