Merge pull request #196 from ahmadov/ahmadov/fix-ns-2

avoid implicit namespaces
This commit is contained in:
Yanyi Wu 2025-04-11 08:59:41 +08:00 committed by GitHub
commit 338603b676
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1,37 +1,35 @@
#ifndef CPPJIEBA_KEYWORD_EXTRACTOR_H #ifndef CPPJIEBA_KEYWORD_EXTRACTOR_H
#define CPPJIEBA_KEYWORD_EXTRACTOR_H #define CPPJIEBA_KEYWORD_EXTRACTOR_H
#include <cmath> #include <algorithm>
#include <set> #include <unordered_map>
#include <unordered_set>
#include "MixSegment.hpp" #include "MixSegment.hpp"
namespace cppjieba { namespace cppjieba {
using namespace limonp;
using namespace std;
/*utf8*/ /*utf8*/
class KeywordExtractor { class KeywordExtractor {
public: public:
struct Word { struct Word {
string word; std::string word;
vector<size_t> offsets; std::vector<size_t> offsets;
double weight; double weight;
}; // struct Word }; // struct Word
KeywordExtractor(const string& dictPath, KeywordExtractor(const std::string& dictPath,
const string& hmmFilePath, const std::string& hmmFilePath,
const string& idfPath, const std::string& idfPath,
const string& stopWordPath, const std::string& stopWordPath,
const string& userDict = "") const std::string& userDict = "")
: segment_(dictPath, hmmFilePath, userDict) { : segment_(dictPath, hmmFilePath, userDict) {
LoadIdfDict(idfPath); LoadIdfDict(idfPath);
LoadStopWordDict(stopWordPath); LoadStopWordDict(stopWordPath);
} }
KeywordExtractor(const DictTrie* dictTrie, KeywordExtractor(const DictTrie* dictTrie,
const HMMModel* model, const HMMModel* model,
const string& idfPath, const std::string& idfPath,
const string& stopWordPath) const std::string& stopWordPath)
: segment_(dictTrie, model) { : segment_(dictTrie, model) {
LoadIdfDict(idfPath); LoadIdfDict(idfPath);
LoadStopWordDict(stopWordPath); LoadStopWordDict(stopWordPath);
@ -39,27 +37,27 @@ class KeywordExtractor {
~KeywordExtractor() { ~KeywordExtractor() {
} }
void Extract(const string& sentence, vector<string>& keywords, size_t topN) const { void Extract(const std::string& sentence, std::vector<std::string>& keywords, size_t topN) const {
vector<Word> topWords; std::vector<Word> topWords;
Extract(sentence, topWords, topN); Extract(sentence, topWords, topN);
for (size_t i = 0; i < topWords.size(); i++) { for (size_t i = 0; i < topWords.size(); i++) {
keywords.push_back(topWords[i].word); keywords.push_back(topWords[i].word);
} }
} }
void Extract(const string& sentence, vector<pair<string, double> >& keywords, size_t topN) const { void Extract(const std::string& sentence, std::vector<pair<std::string, double> >& keywords, size_t topN) const {
vector<Word> topWords; std::vector<Word> topWords;
Extract(sentence, topWords, topN); Extract(sentence, topWords, topN);
for (size_t i = 0; i < topWords.size(); i++) { for (size_t i = 0; i < topWords.size(); i++) {
keywords.push_back(pair<string, double>(topWords[i].word, topWords[i].weight)); keywords.push_back(pair<std::string, double>(topWords[i].word, topWords[i].weight));
} }
} }
void Extract(const string& sentence, vector<Word>& keywords, size_t topN) const { void Extract(const std::string& sentence, std::vector<Word>& keywords, size_t topN) const {
vector<string> words; std::vector<std::string> words;
segment_.Cut(sentence, words); segment_.Cut(sentence, words);
map<string, Word> wordmap; std::map<std::string, Word> wordmap;
size_t offset = 0; size_t offset = 0;
for (size_t i = 0; i < words.size(); ++i) { for (size_t i = 0; i < words.size(); ++i) {
size_t t = offset; size_t t = offset;
@ -77,8 +75,8 @@ class KeywordExtractor {
keywords.clear(); keywords.clear();
keywords.reserve(wordmap.size()); keywords.reserve(wordmap.size());
for (map<string, Word>::iterator itr = wordmap.begin(); itr != wordmap.end(); ++itr) { for (std::map<std::string, Word>::iterator itr = wordmap.begin(); itr != wordmap.end(); ++itr) {
unordered_map<string, double>::const_iterator cit = idfMap_.find(itr->first); std::unordered_map<std::string, double>::const_iterator cit = idfMap_.find(itr->first);
if (cit != idfMap_.end()) { if (cit != idfMap_.end()) {
itr->second.weight *= cit->second; itr->second.weight *= cit->second;
} else { } else {
@ -88,15 +86,15 @@ class KeywordExtractor {
keywords.push_back(itr->second); keywords.push_back(itr->second);
} }
topN = min(topN, keywords.size()); topN = min(topN, keywords.size());
partial_sort(keywords.begin(), keywords.begin() + topN, keywords.end(), Compare); std::partial_sort(keywords.begin(), keywords.begin() + topN, keywords.end(), Compare);
keywords.resize(topN); keywords.resize(topN);
} }
private: private:
void LoadIdfDict(const string& idfPath) { void LoadIdfDict(const std::string& idfPath) {
ifstream ifs(idfPath.c_str()); std::ifstream ifs(idfPath.c_str());
XCHECK(ifs.is_open()) << "open " << idfPath << " failed"; XCHECK(ifs.is_open()) << "open " << idfPath << " failed";
string line ; std::string line ;
vector<string> buf; std::vector<std::string> buf;
double idf = 0.0; double idf = 0.0;
double idfSum = 0.0; double idfSum = 0.0;
size_t lineno = 0; size_t lineno = 0;
@ -106,7 +104,7 @@ class KeywordExtractor {
XLOG(ERROR) << "lineno: " << lineno << " empty. skipped."; XLOG(ERROR) << "lineno: " << lineno << " empty. skipped.";
continue; continue;
} }
Split(line, buf, " "); limonp::Split(line, buf, " ");
if (buf.size() != 2) { if (buf.size() != 2) {
XLOG(ERROR) << "line: " << line << ", lineno: " << lineno << " empty. skipped."; XLOG(ERROR) << "line: " << line << ", lineno: " << lineno << " empty. skipped.";
continue; continue;
@ -121,10 +119,10 @@ class KeywordExtractor {
idfAverage_ = idfSum / lineno; idfAverage_ = idfSum / lineno;
assert(idfAverage_ > 0.0); assert(idfAverage_ > 0.0);
} }
void LoadStopWordDict(const string& filePath) { void LoadStopWordDict(const std::string& filePath) {
ifstream ifs(filePath.c_str()); std::ifstream ifs(filePath.c_str());
XCHECK(ifs.is_open()) << "open " << filePath << " failed"; XCHECK(ifs.is_open()) << "open " << filePath << " failed";
string line ; std::string line ;
while (getline(ifs, line)) { while (getline(ifs, line)) {
stopWords_.insert(line); stopWords_.insert(line);
} }
@ -136,18 +134,16 @@ class KeywordExtractor {
} }
MixSegment segment_; MixSegment segment_;
unordered_map<string, double> idfMap_; std::unordered_map<std::string, double> idfMap_;
double idfAverage_; double idfAverage_;
unordered_set<string> stopWords_; std::unordered_set<std::string> stopWords_;
}; // class KeywordExtractor }; // class KeywordExtractor
inline ostream& operator << (ostream& os, const KeywordExtractor::Word& word) { inline std::ostream& operator << (std::ostream& os, const KeywordExtractor::Word& word) {
return os << "{\"word\": \"" << word.word << "\", \"offset\": " << word.offsets << ", \"weight\": " << word.weight << "}"; return os << "{\"word\": \"" << word.word << "\", \"offset\": " << word.offsets << ", \"weight\": " << word.weight << "}";
} }
} // namespace cppjieba } // namespace cppjieba
#endif #endif