mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
统一私有函数的命名风格
This commit is contained in:
parent
0e0318f6ad
commit
fae951a95d
@ -41,21 +41,21 @@ class DictTrie {
|
||||
if(trie_ != NULL) {
|
||||
LogFatal("trie already initted");
|
||||
}
|
||||
loadDict_(dictPath);
|
||||
calculateWeight_(staticNodeInfos_);
|
||||
minWeight_ = findMinWeight_(staticNodeInfos_);
|
||||
maxWeight_ = findMaxWeight_(staticNodeInfos_);
|
||||
LoadDict(dictPath);
|
||||
CalculateWeight(staticNodeInfos_);
|
||||
minWeight_ = FindMinWeight(staticNodeInfos_);
|
||||
maxWeight_ = FindMaxWeight(staticNodeInfos_);
|
||||
|
||||
if(userDictPath.size()) {
|
||||
loadUserDict_(userDictPath);
|
||||
LoadUserDict(userDictPath);
|
||||
}
|
||||
shrink_(staticNodeInfos_);
|
||||
createTrie_(staticNodeInfos_);
|
||||
Shrink(staticNodeInfos_);
|
||||
CreateTrie(staticNodeInfos_);
|
||||
}
|
||||
|
||||
bool insertUserWord(const string& word, const string& tag = UNKNOWN_TAG) {
|
||||
DictUnit nodeInfo;
|
||||
if(!makeUserNodeInfo_(nodeInfo, word, tag)) {
|
||||
if(!MakeUserNodeInfo(nodeInfo, word, tag)) {
|
||||
return false;
|
||||
}
|
||||
activeNodeInfos_.push_back(nodeInfo);
|
||||
@ -83,7 +83,7 @@ class DictTrie {
|
||||
}
|
||||
|
||||
private:
|
||||
void createTrie_(const vector<DictUnit>& dictUnits) {
|
||||
void CreateTrie(const vector<DictUnit>& dictUnits) {
|
||||
assert(dictUnits.size());
|
||||
vector<Unicode> words;
|
||||
vector<const DictUnit*> valuePointers;
|
||||
@ -94,7 +94,7 @@ class DictTrie {
|
||||
|
||||
trie_ = new Trie(words, valuePointers);
|
||||
}
|
||||
void loadUserDict_(const string& filePath) {
|
||||
void LoadUserDict(const string& filePath) {
|
||||
ifstream ifs(filePath.c_str());
|
||||
if(!ifs.is_open()) {
|
||||
LogFatal("file %s open failed.", filePath.c_str());
|
||||
@ -110,13 +110,13 @@ class DictTrie {
|
||||
LogFatal("split [%s] result illegal", line.c_str());
|
||||
}
|
||||
DictUnit nodeInfo;
|
||||
makeUserNodeInfo_(nodeInfo, buf[0],
|
||||
MakeUserNodeInfo(nodeInfo, buf[0],
|
||||
(buf.size() == 2 ? buf[1] : UNKNOWN_TAG));
|
||||
staticNodeInfos_.push_back(nodeInfo);
|
||||
}
|
||||
LogInfo("load userdict[%s] ok. lines[%u]", filePath.c_str(), lineno);
|
||||
}
|
||||
bool makeNodeInfo(DictUnit& nodeInfo,
|
||||
bool MakeNodeInfo(DictUnit& nodeInfo,
|
||||
const string& word,
|
||||
double weight,
|
||||
const string& tag) {
|
||||
@ -128,7 +128,7 @@ class DictTrie {
|
||||
nodeInfo.tag = tag;
|
||||
return true;
|
||||
}
|
||||
bool makeUserNodeInfo_(DictUnit& nodeInfo,
|
||||
bool MakeUserNodeInfo(DictUnit& nodeInfo,
|
||||
const string& word,
|
||||
const string& tag = UNKNOWN_TAG) {
|
||||
if(!TransCode::decode(word, nodeInfo.word)) {
|
||||
@ -142,7 +142,7 @@ class DictTrie {
|
||||
nodeInfo.tag = tag;
|
||||
return true;
|
||||
}
|
||||
void loadDict_(const string& filePath) {
|
||||
void LoadDict(const string& filePath) {
|
||||
ifstream ifs(filePath.c_str());
|
||||
if(!ifs.is_open()) {
|
||||
LogFatal("file %s open failed.", filePath.c_str());
|
||||
@ -156,21 +156,21 @@ class DictTrie {
|
||||
if(buf.size() != DICT_COLUMN_NUM) {
|
||||
LogFatal("split result illegal, line: %s, result size: %u", line.c_str(), buf.size());
|
||||
}
|
||||
makeNodeInfo(nodeInfo,
|
||||
MakeNodeInfo(nodeInfo,
|
||||
buf[0],
|
||||
atof(buf[1].c_str()),
|
||||
buf[2]);
|
||||
staticNodeInfos_.push_back(nodeInfo);
|
||||
}
|
||||
}
|
||||
double findMinWeight_(const vector<DictUnit>& nodeInfos) const {
|
||||
double FindMinWeight(const vector<DictUnit>& nodeInfos) const {
|
||||
double ret = MAX_DOUBLE;
|
||||
for(size_t i = 0; i < nodeInfos.size(); i++) {
|
||||
ret = min(nodeInfos[i].weight, ret);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
double findMaxWeight_(const vector<DictUnit>& nodeInfos) const {
|
||||
double FindMaxWeight(const vector<DictUnit>& nodeInfos) const {
|
||||
double ret = MIN_DOUBLE;
|
||||
for(size_t i = 0; i < nodeInfos.size(); i++) {
|
||||
ret = max(nodeInfos[i].weight, ret);
|
||||
@ -178,7 +178,7 @@ class DictTrie {
|
||||
return ret;
|
||||
}
|
||||
|
||||
void calculateWeight_(vector<DictUnit>& nodeInfos) const {
|
||||
void CalculateWeight(vector<DictUnit>& nodeInfos) const {
|
||||
double sum = 0.0;
|
||||
for(size_t i = 0; i < nodeInfos.size(); i++) {
|
||||
sum += nodeInfos[i].weight;
|
||||
@ -191,11 +191,10 @@ class DictTrie {
|
||||
}
|
||||
}
|
||||
|
||||
void shrink_(vector<DictUnit>& units) const {
|
||||
void Shrink(vector<DictUnit>& units) const {
|
||||
vector<DictUnit>(units.begin(), units.end()).swap(units);
|
||||
}
|
||||
|
||||
private:
|
||||
vector<DictUnit> staticNodeInfos_;
|
||||
deque<DictUnit> activeNodeInfos_; // must not be vector
|
||||
Trie * trie_;
|
||||
|
@ -30,15 +30,15 @@ class HMMSegment: public SegmentBase {
|
||||
while(right != end) {
|
||||
if(*right < 0x80) {
|
||||
if(left != right) {
|
||||
cut_(left, right, res);
|
||||
Cut(left, right, res);
|
||||
}
|
||||
left = right;
|
||||
do {
|
||||
right = sequentialLetterRule_(left, end);
|
||||
right = SequentialLetterRule(left, end);
|
||||
if(right != left) {
|
||||
break;
|
||||
}
|
||||
right = numbersRule_(left, end);
|
||||
right = NumbersRule(left, end);
|
||||
if(right != left) {
|
||||
break;
|
||||
}
|
||||
@ -51,12 +51,12 @@ class HMMSegment: public SegmentBase {
|
||||
}
|
||||
}
|
||||
if(left != right) {
|
||||
cut_(left, right, res);
|
||||
Cut(left, right, res);
|
||||
}
|
||||
}
|
||||
private:
|
||||
// sequential letters rule
|
||||
Unicode::const_iterator sequentialLetterRule_(Unicode::const_iterator begin, Unicode::const_iterator end) const {
|
||||
Unicode::const_iterator SequentialLetterRule(Unicode::const_iterator begin, Unicode::const_iterator end) const {
|
||||
Rune x = *begin;
|
||||
if (('a' <= x && x <= 'z') || ('A' <= x && x <= 'Z')) {
|
||||
begin ++;
|
||||
@ -74,7 +74,7 @@ class HMMSegment: public SegmentBase {
|
||||
return begin;
|
||||
}
|
||||
//
|
||||
Unicode::const_iterator numbersRule_(Unicode::const_iterator begin, Unicode::const_iterator end) const {
|
||||
Unicode::const_iterator NumbersRule(Unicode::const_iterator begin, Unicode::const_iterator end) const {
|
||||
Rune x = *begin;
|
||||
if('0' <= x && x <= '9') {
|
||||
begin ++;
|
||||
@ -91,9 +91,9 @@ class HMMSegment: public SegmentBase {
|
||||
}
|
||||
return begin;
|
||||
}
|
||||
void cut_(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const {
|
||||
void Cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const {
|
||||
vector<size_t> status;
|
||||
viterbi_(begin, end, status);
|
||||
Viterbi(begin, end, status);
|
||||
|
||||
Unicode::const_iterator left = begin;
|
||||
Unicode::const_iterator right;
|
||||
@ -106,7 +106,7 @@ class HMMSegment: public SegmentBase {
|
||||
}
|
||||
}
|
||||
|
||||
void viterbi_(Unicode::const_iterator begin,
|
||||
void Viterbi(Unicode::const_iterator begin,
|
||||
Unicode::const_iterator end,
|
||||
vector<size_t>& status) const {
|
||||
size_t Y = HMMModel::STATUS_SUM;
|
||||
@ -160,7 +160,6 @@ class HMMSegment: public SegmentBase {
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
const HMMModel* model_;
|
||||
bool isNeedDestroy_;
|
||||
}; // class HMMSegment
|
||||
|
@ -17,16 +17,16 @@ class KeywordExtractor {
|
||||
const string& stopWordPath,
|
||||
const string& userDict = "")
|
||||
: segment_(dictPath, hmmFilePath, userDict) {
|
||||
loadIdfDict_(idfPath);
|
||||
loadStopWordDict_(stopWordPath);
|
||||
LoadIdfDict(idfPath);
|
||||
LoadStopWordDict(stopWordPath);
|
||||
}
|
||||
KeywordExtractor(const DictTrie* dictTrie,
|
||||
const HMMModel* model,
|
||||
const string& idfPath,
|
||||
const string& stopWordPath)
|
||||
: segment_(dictTrie, model){
|
||||
loadIdfDict_(idfPath);
|
||||
loadStopWordDict_(stopWordPath);
|
||||
LoadIdfDict(idfPath);
|
||||
LoadStopWordDict(stopWordPath);
|
||||
}
|
||||
~KeywordExtractor() {
|
||||
}
|
||||
@ -51,7 +51,7 @@ class KeywordExtractor {
|
||||
|
||||
map<string, double> wordmap;
|
||||
for(vector<string>::iterator iter = words.begin(); iter != words.end(); iter++) {
|
||||
if(isSingleWord_(*iter)) {
|
||||
if(IsSingleWord(*iter)) {
|
||||
continue;
|
||||
}
|
||||
wordmap[*iter] += 1.0;
|
||||
@ -75,12 +75,12 @@ class KeywordExtractor {
|
||||
keywords.clear();
|
||||
std::copy(wordmap.begin(), wordmap.end(), std::inserter(keywords, keywords.begin()));
|
||||
topN = min(topN, keywords.size());
|
||||
partial_sort(keywords.begin(), keywords.begin() + topN, keywords.end(), cmp_);
|
||||
partial_sort(keywords.begin(), keywords.begin() + topN, keywords.end(), Compare);
|
||||
keywords.resize(topN);
|
||||
return true;
|
||||
}
|
||||
private:
|
||||
void loadIdfDict_(const string& idfPath) {
|
||||
void LoadIdfDict(const string& idfPath) {
|
||||
ifstream ifs(idfPath.c_str());
|
||||
if(!ifs.is_open()) {
|
||||
LogFatal("open %s failed.", idfPath.c_str());
|
||||
@ -111,7 +111,7 @@ class KeywordExtractor {
|
||||
idfAverage_ = idfSum / lineno;
|
||||
assert(idfAverage_ > 0.0);
|
||||
}
|
||||
void loadStopWordDict_(const string& filePath) {
|
||||
void LoadStopWordDict(const string& filePath) {
|
||||
ifstream ifs(filePath.c_str());
|
||||
if(!ifs.is_open()) {
|
||||
LogFatal("open %s failed.", filePath.c_str());
|
||||
@ -123,7 +123,7 @@ class KeywordExtractor {
|
||||
assert(stopWords_.size());
|
||||
}
|
||||
|
||||
bool isSingleWord_(const string& str) const {
|
||||
bool IsSingleWord(const string& str) const {
|
||||
Unicode unicode;
|
||||
TransCode::decode(str, unicode);
|
||||
if(unicode.size() == 1)
|
||||
@ -131,11 +131,10 @@ class KeywordExtractor {
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool cmp_(const pair<string, double>& lhs, const pair<string, double>& rhs) {
|
||||
static bool Compare(const pair<string, double>& lhs, const pair<string, double>& rhs) {
|
||||
return lhs.second > rhs.second;
|
||||
}
|
||||
|
||||
private:
|
||||
MixSegment segment_;
|
||||
unordered_map<string, double> idfMap_;
|
||||
double idfAverage_;
|
||||
|
@ -38,9 +38,9 @@ class MPSegment: public SegmentBase {
|
||||
|
||||
dictTrie_->find(begin, end, dags);
|
||||
|
||||
calcDP_(dags);
|
||||
CalcDP(dags);
|
||||
|
||||
cut_(dags, res);
|
||||
Cut(dags, res);
|
||||
}
|
||||
void cut(Unicode::const_iterator begin,
|
||||
Unicode::const_iterator end,
|
||||
@ -51,15 +51,15 @@ class MPSegment: public SegmentBase {
|
||||
end,
|
||||
dags,
|
||||
max_word_len);
|
||||
calcDP_(dags);
|
||||
cut_(dags, res);
|
||||
CalcDP(dags);
|
||||
Cut(dags, res);
|
||||
}
|
||||
const DictTrie* getDictTrie() const {
|
||||
return dictTrie_;
|
||||
}
|
||||
|
||||
private:
|
||||
void calcDP_(vector<Dag>& dags) const {
|
||||
void CalcDP(vector<Dag>& dags) const {
|
||||
size_t nextPos;
|
||||
const DictUnit* p;
|
||||
double val;
|
||||
@ -88,7 +88,7 @@ class MPSegment: public SegmentBase {
|
||||
}
|
||||
}
|
||||
}
|
||||
void cut_(const vector<Dag>& dags,
|
||||
void Cut(const vector<Dag>& dags,
|
||||
vector<Unicode>& res) const {
|
||||
size_t i = 0;
|
||||
while(i < dags.size()) {
|
||||
@ -103,7 +103,6 @@ class MPSegment: public SegmentBase {
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
const DictTrie* dictTrie_;
|
||||
bool isNeedDestroy_;
|
||||
}; // class MPSegment
|
||||
|
@ -43,7 +43,7 @@ class PosTagger {
|
||||
}
|
||||
tmp = dict->find(unico.begin(), unico.end());
|
||||
if(tmp == NULL || tmp->tag.empty()) {
|
||||
res.push_back(make_pair(*itr, specialRule_(unico)));
|
||||
res.push_back(make_pair(*itr, SpecialRule(unico)));
|
||||
} else {
|
||||
res.push_back(make_pair(*itr, tmp->tag));
|
||||
}
|
||||
@ -51,7 +51,7 @@ class PosTagger {
|
||||
return !res.empty();
|
||||
}
|
||||
private:
|
||||
const char* specialRule_(const Unicode& unicode) const {
|
||||
const char* SpecialRule(const Unicode& unicode) const {
|
||||
size_t m = 0;
|
||||
size_t eng = 0;
|
||||
for(size_t i = 0; i < unicode.size() && eng < unicode.size() / 2; i++) {
|
||||
@ -73,7 +73,7 @@ class PosTagger {
|
||||
// the ascii chars contain english letter
|
||||
return POS_ENG;
|
||||
}
|
||||
private:
|
||||
|
||||
MixSegment segment_;
|
||||
}; // class PosTagger
|
||||
|
||||
|
@ -22,7 +22,7 @@ const Rune SPECIAL_SYMBOL[] = {32u, 9u, 10u};
|
||||
class SegmentBase: public ISegment, public NonCopyable {
|
||||
public:
|
||||
SegmentBase() {
|
||||
loadSpecialSymbols_();
|
||||
LoadSpecialSymbols();
|
||||
};
|
||||
virtual ~SegmentBase() {
|
||||
};
|
||||
@ -69,17 +69,18 @@ class SegmentBase: public ISegment, public NonCopyable {
|
||||
}
|
||||
}
|
||||
private:
|
||||
void loadSpecialSymbols_() {
|
||||
void LoadSpecialSymbols() {
|
||||
size_t size = sizeof(SPECIAL_SYMBOL)/sizeof(*SPECIAL_SYMBOL);
|
||||
for(size_t i = 0; i < size; i ++) {
|
||||
specialSymbols_.insert(SPECIAL_SYMBOL[i]);
|
||||
}
|
||||
assert(specialSymbols_.size());
|
||||
}
|
||||
private:
|
||||
|
||||
unordered_set<Rune> specialSymbols_;
|
||||
|
||||
};
|
||||
}
|
||||
}; // class SegmentBase
|
||||
|
||||
} // CppJieba
|
||||
|
||||
#endif
|
||||
|
10
src/Trie.hpp
10
src/Trie.hpp
@ -49,7 +49,7 @@ class Trie {
|
||||
public:
|
||||
static const size_t BASE_SIZE = (1 << (8 * (sizeof(TrieKey))));
|
||||
Trie(const vector<Unicode>& keys, const vector<const DictUnit*>& valuePointers) {
|
||||
_createTrie(keys, valuePointers);
|
||||
CreateTrie(keys, valuePointers);
|
||||
}
|
||||
~Trie() {
|
||||
for (size_t i = 0; i < BASE_SIZE; i++) {
|
||||
@ -57,7 +57,7 @@ class Trie {
|
||||
continue;
|
||||
}
|
||||
for (TrieNode::NextMap::iterator it = _base[i].next->begin(); it != _base[i].next->end(); it++) {
|
||||
_deleteNode(it->second);
|
||||
DeleteNode(it->second);
|
||||
it->second = NULL;
|
||||
}
|
||||
delete _base[i].next;
|
||||
@ -143,7 +143,7 @@ class Trie {
|
||||
}
|
||||
|
||||
private:
|
||||
void _createTrie(const vector<Unicode>& keys, const vector<const DictUnit*>& valuePointers) {
|
||||
void CreateTrie(const vector<Unicode>& keys, const vector<const DictUnit*>& valuePointers) {
|
||||
if (valuePointers.empty() || keys.empty()) {
|
||||
return;
|
||||
}
|
||||
@ -154,14 +154,14 @@ class Trie {
|
||||
}
|
||||
}
|
||||
|
||||
void _deleteNode(TrieNode* node) {
|
||||
void DeleteNode(TrieNode* node) {
|
||||
if (NULL == node) {
|
||||
return;
|
||||
}
|
||||
if (NULL != node->next) {
|
||||
TrieNode::NextMap::iterator it;
|
||||
for (it = node->next->begin(); it != node->next->end(); it++) {
|
||||
_deleteNode(it->second);
|
||||
DeleteNode(it->second);
|
||||
}
|
||||
delete node->next;
|
||||
node->next = NULL;
|
||||
|
Loading…
x
Reference in New Issue
Block a user