[code style] uppercase function name

This commit is contained in:
yanyiwu 2015-10-29 12:30:47 +08:00
parent 1a9a37aa64
commit f17c2d10e2
13 changed files with 78 additions and 77 deletions

View File

@ -5,6 +5,7 @@
1. 支持多个userdict载入多词典路径用英文冒号(:)作为分隔符就当是向环境变量PATH致敬哈哈。
2. userdict是不带权重的之前对于新的userword默认设置词频权重为最大值现已支持可配置默认使用中位值。
3. 【兼容性预警】修改一些代码风格比如命名空间小写化从CppJieba变成cppjieba。
4. 【兼容性预警】弃用Application.hpp, 取而代之使用Jieba.hpp 接口也进行了大幅修改函数风格更统一和python版本的Jieba分词更一致。
## v3.2.1

View File

@ -44,7 +44,7 @@ class DictTrie {
return false;
}
active_node_infos_.push_back(node_info);
trie_->insertNode(node_info.word, &active_node_infos_.back());
trie_->InsertNode(node_info.word, &active_node_infos_.back());
return true;
}
@ -120,7 +120,7 @@ class DictTrie {
}
}
}
LogInfo("load userdicts[%s] ok. lines[%u]", filePaths.c_str(), lineno);
LogInfo("Load userdicts[%s] ok. lines[%u]", filePaths.c_str(), lineno);
}
bool MakeNodeInfo(DictUnit& node_info,

View File

@ -26,7 +26,7 @@ class FullSegment: public SegmentBase {
delete dictTrie_;
}
}
void cut(const string& sentence,
void Cut(const string& sentence,
vector<string>& words) const {
PreFilter pre_filter(symbols_, sentence);
PreFilter::Range range;
@ -34,11 +34,11 @@ class FullSegment: public SegmentBase {
uwords.reserve(sentence.size());
while (pre_filter.HasNext()) {
range = pre_filter.Next();
cut(range.begin, range.end, uwords);
Cut(range.begin, range.end, uwords);
}
TransCode::encode(uwords, words);
}
void cut(Unicode::const_iterator begin,
void Cut(Unicode::const_iterator begin,
Unicode::const_iterator end,
vector<Unicode>& res) const {
//resut of searching in trie tree

View File

@ -26,11 +26,11 @@ struct HMMModel {
emitProbVec.push_back(&emitProbE);
emitProbVec.push_back(&emitProbM);
emitProbVec.push_back(&emitProbS);
loadModel(modelPath);
LoadModel(modelPath);
}
~HMMModel() {
}
void loadModel(const string& filePath) {
void LoadModel(const string& filePath) {
ifstream ifile(filePath.c_str());
if (!ifile.is_open()) {
LogFatal("open %s failed.", filePath.c_str());
@ -38,9 +38,9 @@ struct HMMModel {
string line;
vector<string> tmp;
vector<string> tmp2;
//load startProb
if (!getLine(ifile, line)) {
LogFatal("load startProb");
//Load startProb
if (!GetLine(ifile, line)) {
LogFatal("Load startProb");
}
split(line, tmp, " ");
if (tmp.size() != STATUS_SUM) {
@ -50,10 +50,10 @@ struct HMMModel {
startProb[j] = atof(tmp[j].c_str());
}
//load transProb
//Load transProb
for (size_t i = 0; i < STATUS_SUM; i++) {
if (!getLine(ifile, line)) {
LogFatal("load transProb failed.");
if (!GetLine(ifile, line)) {
LogFatal("Load transProb failed.");
}
split(line, tmp, " ");
if (tmp.size() != STATUS_SUM) {
@ -64,27 +64,27 @@ struct HMMModel {
}
}
//load emitProbB
if (!getLine(ifile, line) || !loadEmitProb(line, emitProbB)) {
LogFatal("load emitProbB failed.");
//Load emitProbB
if (!GetLine(ifile, line) || !LoadEmitProb(line, emitProbB)) {
LogFatal("Load emitProbB failed.");
}
//load emitProbE
if (!getLine(ifile, line) || !loadEmitProb(line, emitProbE)) {
LogFatal("load emitProbE failed.");
//Load emitProbE
if (!GetLine(ifile, line) || !LoadEmitProb(line, emitProbE)) {
LogFatal("Load emitProbE failed.");
}
//load emitProbM
if (!getLine(ifile, line) || !loadEmitProb(line, emitProbM)) {
LogFatal("load emitProbM failed.");
//Load emitProbM
if (!GetLine(ifile, line) || !LoadEmitProb(line, emitProbM)) {
LogFatal("Load emitProbM failed.");
}
//load emitProbS
if (!getLine(ifile, line) || !loadEmitProb(line, emitProbS)) {
LogFatal("load emitProbS failed.");
//Load emitProbS
if (!GetLine(ifile, line) || !LoadEmitProb(line, emitProbS)) {
LogFatal("Load emitProbS failed.");
}
}
double getEmitProb(const EmitProbMap* ptMp, uint16_t key,
double GetEmitProb(const EmitProbMap* ptMp, uint16_t key,
double defVal)const {
EmitProbMap::const_iterator cit = ptMp->find(key);
if (cit == ptMp->end()) {
@ -92,7 +92,7 @@ struct HMMModel {
}
return cit->second;
}
bool getLine(ifstream& ifile, string& line) {
bool GetLine(ifstream& ifile, string& line) {
while (getline(ifile, line)) {
trim(line);
if (line.empty()) {
@ -105,7 +105,7 @@ struct HMMModel {
}
return false;
}
bool loadEmitProb(const string& line, EmitProbMap& mp) {
bool LoadEmitProb(const string& line, EmitProbMap& mp) {
if (line.empty()) {
return false;
}

View File

@ -23,7 +23,7 @@ class HMMSegment: public SegmentBase {
}
}
void cut(const string& sentence,
void Cut(const string& sentence,
vector<string>& words) const {
PreFilter pre_filter(symbols_, sentence);
PreFilter::Range range;
@ -31,11 +31,11 @@ class HMMSegment: public SegmentBase {
uwords.reserve(sentence.size());
while (pre_filter.HasNext()) {
range = pre_filter.Next();
cut(range.begin, range.end, uwords);
Cut(range.begin, range.end, uwords);
}
TransCode::encode(uwords, words);
}
void cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const {
void Cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const {
Unicode::const_iterator left = begin;
Unicode::const_iterator right = begin;
while (right != end) {
@ -132,7 +132,7 @@ class HMMSegment: public SegmentBase {
//start
for (size_t y = 0; y < Y; y++) {
weight[0 + y * X] = model_->startProb[y] + model_->getEmitProb(model_->emitProbVec[y], *begin, MIN_DOUBLE);
weight[0 + y * X] = model_->startProb[y] + model_->GetEmitProb(model_->emitProbVec[y], *begin, MIN_DOUBLE);
path[0 + y * X] = -1;
}
@ -143,7 +143,7 @@ class HMMSegment: public SegmentBase {
now = x + y*X;
weight[now] = MIN_DOUBLE;
path[now] = HMMModel::E; // warning
emitProb = model_->getEmitProb(model_->emitProbVec[y], *(begin+x), MIN_DOUBLE);
emitProb = model_->GetEmitProb(model_->emitProbVec[y], *(begin+x), MIN_DOUBLE);
for (size_t preY = 0; preY < Y; preY++) {
old = x - 1 + preY * X;
tmp = weight[old] + model_->transProb[preY][y] + emitProb;

View File

@ -23,25 +23,25 @@ class Jieba {
}
void Cut(const string& sentence, vector<string>& words, bool hmm = true) const {
mix_seg_.cut(sentence, words, hmm);
mix_seg_.Cut(sentence, words, hmm);
}
void CutAll(const string& sentence, vector<string>& words) const {
full_seg_.cut(sentence, words);
full_seg_.Cut(sentence, words);
}
void CutForSearch(const string& sentence, vector<string>& words, bool hmm = true) const {
query_seg_.cut(sentence, words, hmm);
query_seg_.Cut(sentence, words, hmm);
}
void CutHMM(const string& sentence, vector<string>& words) const {
hmm_seg_.cut(sentence, words);
hmm_seg_.Cut(sentence, words);
}
void CutLevel(const string& sentence, vector<string>& words) const {
level_seg_.cut(sentence, words);
level_seg_.Cut(sentence, words);
}
void CutLevel(const string& sentence, vector<pair<string, size_t> >& words) const {
level_seg_.cut(sentence, words);
level_seg_.Cut(sentence, words);
}
void CutSmall(const string& sentence, vector<string>& words, size_t max_word_len) const {
mp_seg_.cut(sentence, words, max_word_len);
mp_seg_.Cut(sentence, words, max_word_len);
}
bool InsertUserWord(const string& word, const string& tag = UNKNOWN_TAG) {
return dict_trie_.InsertUserWord(word, tag);

View File

@ -44,7 +44,7 @@ class KeywordExtractor {
bool extract(const string& sentence, vector<pair<string, double> >& keywords, size_t topN) const {
vector<string> words;
segment_.cut(sentence, words);
segment_.Cut(sentence, words);
map<string, double> wordmap;
for (vector<string>::iterator iter = words.begin(); iter != words.end(); iter++) {

View File

@ -18,14 +18,14 @@ class LevelSegment: public SegmentBase{
~LevelSegment() {
}
void cut(Unicode::const_iterator begin,
void Cut(Unicode::const_iterator begin,
Unicode::const_iterator end,
vector<pair<Unicode, size_t> >& res) const {
res.clear();
vector<Unicode> words;
vector<Unicode> smallerWords;
words.reserve(end - begin);
mpSeg_.cut(begin, end, words);
mpSeg_.Cut(begin, end, words);
smallerWords.reserve(words.size());
res.reserve(words.size());
@ -35,7 +35,7 @@ class LevelSegment: public SegmentBase{
for (size_t i = 0; i < words.size(); i++) {
if (words[i].size() >= 3) {
size_t len = words[i].size() - 1;
mpSeg_.cut(words[i].begin(), words[i].end(), smallerWords, len); // buffer.push_back without clear
mpSeg_.Cut(words[i].begin(), words[i].end(), smallerWords, len); // buffer.push_back without clear
}
if (words[i].size() > 1) {
res.push_back(pair<Unicode, size_t>(words[i], level));
@ -47,13 +47,13 @@ class LevelSegment: public SegmentBase{
}
}
void cut(const string& sentence,
void Cut(const string& sentence,
vector<pair<string, size_t> >& words) const {
words.clear();
Unicode unicode;
TransCode::decode(sentence, unicode);
vector<pair<Unicode, size_t> > unicodeWords;
cut(unicode.begin(), unicode.end(), unicodeWords);
Cut(unicode.begin(), unicode.end(), unicodeWords);
words.resize(unicodeWords.size());
for (size_t i = 0; i < words.size(); i++) {
TransCode::encode(unicodeWords[i].first, words[i].first);
@ -61,10 +61,10 @@ class LevelSegment: public SegmentBase{
}
}
bool cut(const string& sentence,
bool Cut(const string& sentence,
vector<string>& res) const {
vector<pair<string, size_t> > words;
cut(sentence, words);
Cut(sentence, words);
res.clear();
res.reserve(words.size());
for (size_t i = 0; i < words.size(); i++) {

View File

@ -27,7 +27,7 @@ class MPSegment: public SegmentBase {
}
}
void cut(const string& sentence,
void Cut(const string& sentence,
vector<string>& words,
size_t max_word_len = MAX_WORD_LENGTH) const {
PreFilter pre_filter(symbols_, sentence);
@ -36,11 +36,11 @@ class MPSegment: public SegmentBase {
uwords.reserve(sentence.size());
while (pre_filter.HasNext()) {
range = pre_filter.Next();
cut(range.begin, range.end, uwords, max_word_len);
Cut(range.begin, range.end, uwords, max_word_len);
}
TransCode::encode(uwords, words);
}
void cut(Unicode::const_iterator begin,
void Cut(Unicode::const_iterator begin,
Unicode::const_iterator end,
vector<Unicode>& words,
size_t max_word_len = MAX_WORD_LENGTH) const {
@ -53,7 +53,7 @@ class MPSegment: public SegmentBase {
CutByDag(dags, words);
}
const DictTrie* getDictTrie() const {
const DictTrie* GetDictTrie() const {
return dictTrie_;
}

View File

@ -21,47 +21,47 @@ class MixSegment: public SegmentBase {
~MixSegment() {
}
void cut(const string& sentence, vector<string>& words, bool hmm = true) const {
void Cut(const string& sentence, vector<string>& words, bool hmm = true) const {
PreFilter pre_filter(symbols_, sentence);
PreFilter::Range range;
vector<Unicode> uwords;
uwords.reserve(sentence.size());
while (pre_filter.HasNext()) {
range = pre_filter.Next();
cut(range.begin, range.end, uwords, hmm);
Cut(range.begin, range.end, uwords, hmm);
}
TransCode::encode(uwords, words);
}
void cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res, bool hmm) const {
void Cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res, bool hmm) const {
if (!hmm) {
mpSeg_.cut(begin, end, res);
mpSeg_.Cut(begin, end, res);
return;
}
vector<Unicode> words;
words.reserve(end - begin);
mpSeg_.cut(begin, end, words);
mpSeg_.Cut(begin, end, words);
vector<Unicode> hmmRes;
hmmRes.reserve(end - begin);
Unicode piece;
piece.reserve(end - begin);
for (size_t i = 0, j = 0; i < words.size(); i++) {
//if mp get a word, it's ok, put it into result
//if mp Get a word, it's ok, put it into result
if (1 != words[i].size() || (words[i].size() == 1 && mpSeg_.IsUserDictSingleChineseWord(words[i][0]))) {
res.push_back(words[i]);
continue;
}
// if mp get a single one and it is not in userdict, collect it in sequence
// if mp Get a single one and it is not in userdict, collect it in sequence
j = i;
while (j < words.size() && 1 == words[j].size() && !mpSeg_.IsUserDictSingleChineseWord(words[j][0])) {
piece.push_back(words[j][0]);
j++;
}
// cut the sequence with hmm
hmmSeg_.cut(piece.begin(), piece.end(), hmmRes);
// Cut the sequence with hmm
hmmSeg_.Cut(piece.begin(), piece.end(), hmmRes);
//put hmm result to result
for (size_t k = 0; k < hmmRes.size(); k++) {
@ -77,8 +77,8 @@ class MixSegment: public SegmentBase {
}
}
const DictTrie* getDictTrie() const {
return mpSeg_.getDictTrie();
const DictTrie* GetDictTrie() const {
return mpSeg_.GetDictTrie();
}
private:
MPSegment mpSeg_;

View File

@ -26,14 +26,14 @@ class PosTagger {
}
bool tag(const string& src, vector<pair<string, string> >& res) const {
vector<string> cutRes;
segment_.cut(src, cutRes);
vector<string> CutRes;
segment_.Cut(src, CutRes);
const DictUnit *tmp = NULL;
Unicode unico;
const DictTrie * dict = segment_.getDictTrie();
const DictTrie * dict = segment_.GetDictTrie();
assert(dict != NULL);
for (vector<string>::iterator itr = cutRes.begin(); itr != cutRes.end(); ++itr) {
for (vector<string>::iterator itr = CutRes.begin(); itr != CutRes.end(); ++itr) {
if (!TransCode::decode(*itr, unico)) {
LogError("decode failed.");
return false;

View File

@ -17,7 +17,7 @@ class QuerySegment: public SegmentBase {
public:
QuerySegment(const string& dict, const string& model, const string& userDict = "", size_t maxWordLen = 4)
: mixSeg_(dict, model, userDict),
fullSeg_(mixSeg_.getDictTrie()),
fullSeg_(mixSeg_.GetDictTrie()),
maxWordLen_(maxWordLen) {
assert(maxWordLen_);
}
@ -26,27 +26,27 @@ class QuerySegment: public SegmentBase {
}
~QuerySegment() {
}
void cut(const string& sentence, vector<string>& words, bool hmm = true) const {
void Cut(const string& sentence, vector<string>& words, bool hmm = true) const {
PreFilter pre_filter(symbols_, sentence);
PreFilter::Range range;
vector<Unicode> uwords;
uwords.reserve(sentence.size());
while (pre_filter.HasNext()) {
range = pre_filter.Next();
cut(range.begin, range.end, uwords, hmm);
Cut(range.begin, range.end, uwords, hmm);
}
TransCode::encode(uwords, words);
}
void cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res, bool hmm) const {
//use mix cut first
void Cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res, bool hmm) const {
//use mix Cut first
vector<Unicode> mixRes;
mixSeg_.cut(begin, end, mixRes, hmm);
mixSeg_.Cut(begin, end, mixRes, hmm);
vector<Unicode> fullRes;
for (vector<Unicode>::const_iterator mixResItr = mixRes.begin(); mixResItr != mixRes.end(); mixResItr++) {
// if it's too long, cut with fullSeg_, put fullRes in res
// if it's too long, Cut with fullSeg_, put fullRes in res
if (mixResItr->size() > maxWordLen_) {
fullSeg_.cut(mixResItr->begin(), mixResItr->end(), fullRes);
fullSeg_.Cut(mixResItr->begin(), mixResItr->end(), fullRes);
for (vector<Unicode>::const_iterator fullResItr = fullRes.begin(); fullResItr != fullRes.end(); fullResItr++) {
res.push_back(*fullResItr);
}

View File

@ -117,7 +117,7 @@ class Trie {
}
}
void insertNode(const Unicode& key, const DictUnit* ptValue) {
void InsertNode(const Unicode& key, const DictUnit* ptValue) {
if (key.begin() == key.end()) {
return;
}
@ -150,7 +150,7 @@ class Trie {
assert(keys.size() == valuePointers.size());
for (size_t i = 0; i < keys.size(); i++) {
insertNode(keys[i], valuePointers[i]);
InsertNode(keys[i], valuePointers[i]);
}
}