mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
code style
This commit is contained in:
parent
9b60537b40
commit
b28d6db574
@ -54,7 +54,7 @@ class ReqHandler: public IRequestHandler {
|
||||
} else { // default
|
||||
app_.cut(sentence, words, CppJieba::METHOD_MIX);
|
||||
}
|
||||
if(format == "simple") {
|
||||
if (format == "simple") {
|
||||
join(words.begin(), words.end(), strSnd, " ");
|
||||
} else {
|
||||
strSnd << words;
|
||||
@ -65,11 +65,11 @@ class ReqHandler: public IRequestHandler {
|
||||
};
|
||||
|
||||
bool run(int argc, char** argv) {
|
||||
if(argc < 2) {
|
||||
if (argc < 2) {
|
||||
return false;
|
||||
}
|
||||
Config conf(argv[1]);
|
||||
if(!conf) {
|
||||
if (!conf) {
|
||||
return false;
|
||||
}
|
||||
int port = conf.get("port", 1339);
|
||||
@ -95,7 +95,7 @@ bool run(int argc, char** argv) {
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
if(!run(argc, argv)) {
|
||||
if (!run(argc, argv)) {
|
||||
printf("usage: %s <config_file>\n", argv[0]);
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
@ -36,7 +36,7 @@ class DictTrie {
|
||||
}
|
||||
|
||||
void init(const string& dictPath, const string& userDictPaths = "") {
|
||||
if(trie_ != NULL) {
|
||||
if (trie_ != NULL) {
|
||||
LogFatal("trie already initted");
|
||||
}
|
||||
LoadDict(dictPath);
|
||||
@ -44,7 +44,7 @@ class DictTrie {
|
||||
minWeight_ = FindMinWeight(staticNodeInfos_);
|
||||
maxWeight_ = FindMaxWeight(staticNodeInfos_);
|
||||
|
||||
if(userDictPaths.size()) {
|
||||
if (userDictPaths.size()) {
|
||||
LoadUserDict(userDictPaths);
|
||||
}
|
||||
Shrink(staticNodeInfos_);
|
||||
@ -53,7 +53,7 @@ class DictTrie {
|
||||
|
||||
bool insertUserWord(const string& word, const string& tag = UNKNOWN_TAG) {
|
||||
DictUnit nodeInfo;
|
||||
if(!MakeUserNodeInfo(nodeInfo, word, tag)) {
|
||||
if (!MakeUserNodeInfo(nodeInfo, word, tag)) {
|
||||
return false;
|
||||
}
|
||||
activeNodeInfos_.push_back(nodeInfo);
|
||||
@ -85,7 +85,7 @@ class DictTrie {
|
||||
assert(dictUnits.size());
|
||||
vector<Unicode> words;
|
||||
vector<const DictUnit*> valuePointers;
|
||||
for(size_t i = 0 ; i < dictUnits.size(); i ++) {
|
||||
for (size_t i = 0 ; i < dictUnits.size(); i ++) {
|
||||
words.push_back(dictUnits[i].word);
|
||||
valuePointers.push_back(&dictUnits[i]);
|
||||
}
|
||||
@ -97,16 +97,16 @@ class DictTrie {
|
||||
size_t lineno = 0;
|
||||
for (size_t i = 0; i < files.size(); i++) {
|
||||
ifstream ifs(files[i].c_str());
|
||||
if(!ifs.is_open()) {
|
||||
if (!ifs.is_open()) {
|
||||
LogFatal("file %s open failed.", files[i].c_str());
|
||||
}
|
||||
string line;
|
||||
DictUnit nodeInfo;
|
||||
vector<string> buf;
|
||||
for(; getline(ifs, line); lineno++) {
|
||||
for (; getline(ifs, line); lineno++) {
|
||||
buf.clear();
|
||||
split(line, buf, " ");
|
||||
if(buf.size() < 1) {
|
||||
if (buf.size() < 1) {
|
||||
LogFatal("split [%s] result illegal", line.c_str());
|
||||
}
|
||||
DictUnit nodeInfo;
|
||||
@ -121,7 +121,7 @@ class DictTrie {
|
||||
const string& word,
|
||||
double weight,
|
||||
const string& tag) {
|
||||
if(!TransCode::decode(word, nodeInfo.word)) {
|
||||
if (!TransCode::decode(word, nodeInfo.word)) {
|
||||
LogError("decode %s failed.", word.c_str());
|
||||
return false;
|
||||
}
|
||||
@ -132,11 +132,11 @@ class DictTrie {
|
||||
bool MakeUserNodeInfo(DictUnit& nodeInfo,
|
||||
const string& word,
|
||||
const string& tag = UNKNOWN_TAG) {
|
||||
if(!TransCode::decode(word, nodeInfo.word)) {
|
||||
if (!TransCode::decode(word, nodeInfo.word)) {
|
||||
LogError("decode %s failed.", word.c_str());
|
||||
return false;
|
||||
}
|
||||
if(nodeInfo.word.size() == 1) {
|
||||
if (nodeInfo.word.size() == 1) {
|
||||
userDictSingleChineseWord_.insert(nodeInfo.word[0]);
|
||||
}
|
||||
nodeInfo.weight = maxWeight_;
|
||||
@ -145,16 +145,16 @@ class DictTrie {
|
||||
}
|
||||
void LoadDict(const string& filePath) {
|
||||
ifstream ifs(filePath.c_str());
|
||||
if(!ifs.is_open()) {
|
||||
if (!ifs.is_open()) {
|
||||
LogFatal("file %s open failed.", filePath.c_str());
|
||||
}
|
||||
string line;
|
||||
vector<string> buf;
|
||||
|
||||
DictUnit nodeInfo;
|
||||
for(size_t lineno = 0; getline(ifs, line); lineno++) {
|
||||
for (size_t lineno = 0; getline(ifs, line); lineno++) {
|
||||
split(line, buf, " ");
|
||||
if(buf.size() != DICT_COLUMN_NUM) {
|
||||
if (buf.size() != DICT_COLUMN_NUM) {
|
||||
LogFatal("split result illegal, line: %s, result size: %u", line.c_str(), buf.size());
|
||||
}
|
||||
MakeNodeInfo(nodeInfo,
|
||||
@ -166,14 +166,14 @@ class DictTrie {
|
||||
}
|
||||
double FindMinWeight(const vector<DictUnit>& nodeInfos) const {
|
||||
double ret = MAX_DOUBLE;
|
||||
for(size_t i = 0; i < nodeInfos.size(); i++) {
|
||||
for (size_t i = 0; i < nodeInfos.size(); i++) {
|
||||
ret = min(nodeInfos[i].weight, ret);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
double FindMaxWeight(const vector<DictUnit>& nodeInfos) const {
|
||||
double ret = MIN_DOUBLE;
|
||||
for(size_t i = 0; i < nodeInfos.size(); i++) {
|
||||
for (size_t i = 0; i < nodeInfos.size(); i++) {
|
||||
ret = max(nodeInfos[i].weight, ret);
|
||||
}
|
||||
return ret;
|
||||
@ -181,11 +181,11 @@ class DictTrie {
|
||||
|
||||
void CalculateWeight(vector<DictUnit>& nodeInfos) const {
|
||||
double sum = 0.0;
|
||||
for(size_t i = 0; i < nodeInfos.size(); i++) {
|
||||
for (size_t i = 0; i < nodeInfos.size(); i++) {
|
||||
sum += nodeInfos[i].weight;
|
||||
}
|
||||
assert(sum);
|
||||
for(size_t i = 0; i < nodeInfos.size(); i++) {
|
||||
for (size_t i = 0; i < nodeInfos.size(); i++) {
|
||||
DictUnit& nodeInfo = nodeInfos[i];
|
||||
assert(nodeInfo.weight);
|
||||
nodeInfo.weight = log(double(nodeInfo.weight)/double(sum));
|
||||
|
@ -22,7 +22,7 @@ class FullSegment: public SegmentBase {
|
||||
assert(dictTrie_);
|
||||
}
|
||||
~FullSegment() {
|
||||
if(isNeedDestroy_) {
|
||||
if (isNeedDestroy_) {
|
||||
delete dictTrie_;
|
||||
}
|
||||
}
|
||||
|
@ -32,73 +32,73 @@ struct HMMModel {
|
||||
}
|
||||
void loadModel(const string& filePath) {
|
||||
ifstream ifile(filePath.c_str());
|
||||
if(!ifile.is_open()) {
|
||||
if (!ifile.is_open()) {
|
||||
LogFatal("open %s failed.", filePath.c_str());
|
||||
}
|
||||
string line;
|
||||
vector<string> tmp;
|
||||
vector<string> tmp2;
|
||||
//load startProb
|
||||
if(!getLine(ifile, line)) {
|
||||
if (!getLine(ifile, line)) {
|
||||
LogFatal("load startProb");
|
||||
}
|
||||
split(line, tmp, " ");
|
||||
if(tmp.size() != STATUS_SUM) {
|
||||
if (tmp.size() != STATUS_SUM) {
|
||||
LogFatal("start_p illegal");
|
||||
}
|
||||
for(size_t j = 0; j< tmp.size(); j++) {
|
||||
for (size_t j = 0; j< tmp.size(); j++) {
|
||||
startProb[j] = atof(tmp[j].c_str());
|
||||
}
|
||||
|
||||
//load transProb
|
||||
for(size_t i = 0; i < STATUS_SUM; i++) {
|
||||
if(!getLine(ifile, line)) {
|
||||
for (size_t i = 0; i < STATUS_SUM; i++) {
|
||||
if (!getLine(ifile, line)) {
|
||||
LogFatal("load transProb failed.");
|
||||
}
|
||||
split(line, tmp, " ");
|
||||
if(tmp.size() != STATUS_SUM) {
|
||||
if (tmp.size() != STATUS_SUM) {
|
||||
LogFatal("trans_p illegal");
|
||||
}
|
||||
for(size_t j =0; j < STATUS_SUM; j++) {
|
||||
for (size_t j =0; j < STATUS_SUM; j++) {
|
||||
transProb[i][j] = atof(tmp[j].c_str());
|
||||
}
|
||||
}
|
||||
|
||||
//load emitProbB
|
||||
if(!getLine(ifile, line) || !loadEmitProb(line, emitProbB)) {
|
||||
if (!getLine(ifile, line) || !loadEmitProb(line, emitProbB)) {
|
||||
LogFatal("load emitProbB failed.");
|
||||
}
|
||||
|
||||
//load emitProbE
|
||||
if(!getLine(ifile, line) || !loadEmitProb(line, emitProbE)) {
|
||||
if (!getLine(ifile, line) || !loadEmitProb(line, emitProbE)) {
|
||||
LogFatal("load emitProbE failed.");
|
||||
}
|
||||
|
||||
//load emitProbM
|
||||
if(!getLine(ifile, line) || !loadEmitProb(line, emitProbM)) {
|
||||
if (!getLine(ifile, line) || !loadEmitProb(line, emitProbM)) {
|
||||
LogFatal("load emitProbM failed.");
|
||||
}
|
||||
|
||||
//load emitProbS
|
||||
if(!getLine(ifile, line) || !loadEmitProb(line, emitProbS)) {
|
||||
if (!getLine(ifile, line) || !loadEmitProb(line, emitProbS)) {
|
||||
LogFatal("load emitProbS failed.");
|
||||
}
|
||||
}
|
||||
double getEmitProb(const EmitProbMap* ptMp, uint16_t key,
|
||||
double defVal)const {
|
||||
EmitProbMap::const_iterator cit = ptMp->find(key);
|
||||
if(cit == ptMp->end()) {
|
||||
if (cit == ptMp->end()) {
|
||||
return defVal;
|
||||
}
|
||||
return cit->second;
|
||||
}
|
||||
bool getLine(ifstream& ifile, string& line) {
|
||||
while(getline(ifile, line)) {
|
||||
while (getline(ifile, line)) {
|
||||
trim(line);
|
||||
if(line.empty()) {
|
||||
if (line.empty()) {
|
||||
continue;
|
||||
}
|
||||
if(startsWith(line, "#")) {
|
||||
if (startsWith(line, "#")) {
|
||||
continue;
|
||||
}
|
||||
return true;
|
||||
@ -106,19 +106,19 @@ struct HMMModel {
|
||||
return false;
|
||||
}
|
||||
bool loadEmitProb(const string& line, EmitProbMap& mp) {
|
||||
if(line.empty()) {
|
||||
if (line.empty()) {
|
||||
return false;
|
||||
}
|
||||
vector<string> tmp, tmp2;
|
||||
Unicode unicode;
|
||||
split(line, tmp, ",");
|
||||
for(size_t i = 0; i < tmp.size(); i++) {
|
||||
for (size_t i = 0; i < tmp.size(); i++) {
|
||||
split(tmp[i], tmp2, ":");
|
||||
if(2 != tmp2.size()) {
|
||||
if (2 != tmp2.size()) {
|
||||
LogError("emitProb illegal.");
|
||||
return false;
|
||||
}
|
||||
if(!TransCode::decode(tmp2[0], unicode) || unicode.size() != 1) {
|
||||
if (!TransCode::decode(tmp2[0], unicode) || unicode.size() != 1) {
|
||||
LogError("TransCode failed.");
|
||||
return false;
|
||||
}
|
||||
|
@ -18,7 +18,7 @@ class HMMSegment: public SegmentBase {
|
||||
: model_(model), isNeedDestroy_(false) {
|
||||
}
|
||||
~HMMSegment() {
|
||||
if(isNeedDestroy_) {
|
||||
if (isNeedDestroy_) {
|
||||
delete model_;
|
||||
}
|
||||
}
|
||||
@ -38,30 +38,30 @@ class HMMSegment: public SegmentBase {
|
||||
void cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const {
|
||||
Unicode::const_iterator left = begin;
|
||||
Unicode::const_iterator right = begin;
|
||||
while(right != end) {
|
||||
if(*right < 0x80) {
|
||||
if(left != right) {
|
||||
while (right != end) {
|
||||
if (*right < 0x80) {
|
||||
if (left != right) {
|
||||
Cut(left, right, res);
|
||||
}
|
||||
left = right;
|
||||
do {
|
||||
right = SequentialLetterRule(left, end);
|
||||
if(right != left) {
|
||||
if (right != left) {
|
||||
break;
|
||||
}
|
||||
right = NumbersRule(left, end);
|
||||
if(right != left) {
|
||||
if (right != left) {
|
||||
break;
|
||||
}
|
||||
right ++;
|
||||
} while(false);
|
||||
} while (false);
|
||||
res.push_back(Unicode(left, right));
|
||||
left = right;
|
||||
} else {
|
||||
right++;
|
||||
}
|
||||
}
|
||||
if(left != right) {
|
||||
if (left != right) {
|
||||
Cut(left, right, res);
|
||||
}
|
||||
}
|
||||
@ -74,9 +74,9 @@ class HMMSegment: public SegmentBase {
|
||||
} else {
|
||||
return begin;
|
||||
}
|
||||
while(begin != end) {
|
||||
while (begin != end) {
|
||||
x = *begin;
|
||||
if(('a' <= x && x <= 'z') || ('A' <= x && x <= 'Z') || ('0' <= x && x <= '9')) {
|
||||
if (('a' <= x && x <= 'z') || ('A' <= x && x <= 'Z') || ('0' <= x && x <= '9')) {
|
||||
begin ++;
|
||||
} else {
|
||||
break;
|
||||
@ -87,14 +87,14 @@ class HMMSegment: public SegmentBase {
|
||||
//
|
||||
Unicode::const_iterator NumbersRule(Unicode::const_iterator begin, Unicode::const_iterator end) const {
|
||||
Rune x = *begin;
|
||||
if('0' <= x && x <= '9') {
|
||||
if ('0' <= x && x <= '9') {
|
||||
begin ++;
|
||||
} else {
|
||||
return begin;
|
||||
}
|
||||
while(begin != end) {
|
||||
while (begin != end) {
|
||||
x = *begin;
|
||||
if( ('0' <= x && x <= '9') || x == '.') {
|
||||
if ( ('0' <= x && x <= '9') || x == '.') {
|
||||
begin++;
|
||||
} else {
|
||||
break;
|
||||
@ -108,8 +108,8 @@ class HMMSegment: public SegmentBase {
|
||||
|
||||
Unicode::const_iterator left = begin;
|
||||
Unicode::const_iterator right;
|
||||
for(size_t i = 0; i < status.size(); i++) {
|
||||
if(status[i] % 2) { //if(HMMModel::E == status[i] || HMMModel::S == status[i])
|
||||
for (size_t i = 0; i < status.size(); i++) {
|
||||
if (status[i] % 2) { //if (HMMModel::E == status[i] || HMMModel::S == status[i])
|
||||
right = begin + i + 1;
|
||||
res.push_back(Unicode(left, right));
|
||||
left = right;
|
||||
@ -131,23 +131,23 @@ class HMMSegment: public SegmentBase {
|
||||
vector<double> weight(XYSize);
|
||||
|
||||
//start
|
||||
for(size_t y = 0; y < Y; y++) {
|
||||
for (size_t y = 0; y < Y; y++) {
|
||||
weight[0 + y * X] = model_->startProb[y] + model_->getEmitProb(model_->emitProbVec[y], *begin, MIN_DOUBLE);
|
||||
path[0 + y * X] = -1;
|
||||
}
|
||||
|
||||
double emitProb;
|
||||
|
||||
for(size_t x = 1; x < X; x++) {
|
||||
for(size_t y = 0; y < Y; y++) {
|
||||
for (size_t x = 1; x < X; x++) {
|
||||
for (size_t y = 0; y < Y; y++) {
|
||||
now = x + y*X;
|
||||
weight[now] = MIN_DOUBLE;
|
||||
path[now] = HMMModel::E; // warning
|
||||
emitProb = model_->getEmitProb(model_->emitProbVec[y], *(begin+x), MIN_DOUBLE);
|
||||
for(size_t preY = 0; preY < Y; preY++) {
|
||||
for (size_t preY = 0; preY < Y; preY++) {
|
||||
old = x - 1 + preY * X;
|
||||
tmp = weight[old] + model_->transProb[preY][y] + emitProb;
|
||||
if(tmp > weight[now]) {
|
||||
if (tmp > weight[now]) {
|
||||
weight[now] = tmp;
|
||||
path[now] = preY;
|
||||
}
|
||||
@ -158,14 +158,14 @@ class HMMSegment: public SegmentBase {
|
||||
endE = weight[X-1+HMMModel::E*X];
|
||||
endS = weight[X-1+HMMModel::S*X];
|
||||
stat = 0;
|
||||
if(endE >= endS) {
|
||||
if (endE >= endS) {
|
||||
stat = HMMModel::E;
|
||||
} else {
|
||||
stat = HMMModel::S;
|
||||
}
|
||||
|
||||
status.resize(X);
|
||||
for(int x = X -1 ; x >= 0; x--) {
|
||||
for (int x = X -1 ; x >= 0; x--) {
|
||||
status[x] = stat;
|
||||
stat = path[x + stat*X];
|
||||
}
|
||||
|
@ -33,10 +33,10 @@ class KeywordExtractor {
|
||||
|
||||
bool extract(const string& sentence, vector<string>& keywords, size_t topN) const {
|
||||
vector<pair<string, double> > topWords;
|
||||
if(!extract(sentence, topWords, topN)) {
|
||||
if (!extract(sentence, topWords, topN)) {
|
||||
return false;
|
||||
}
|
||||
for(size_t i = 0; i < topWords.size(); i++) {
|
||||
for (size_t i = 0; i < topWords.size(); i++) {
|
||||
keywords.push_back(topWords[i].first);
|
||||
}
|
||||
return true;
|
||||
@ -47,21 +47,21 @@ class KeywordExtractor {
|
||||
segment_.cut(sentence, words);
|
||||
|
||||
map<string, double> wordmap;
|
||||
for(vector<string>::iterator iter = words.begin(); iter != words.end(); iter++) {
|
||||
if(IsSingleWord(*iter)) {
|
||||
for (vector<string>::iterator iter = words.begin(); iter != words.end(); iter++) {
|
||||
if (IsSingleWord(*iter)) {
|
||||
continue;
|
||||
}
|
||||
wordmap[*iter] += 1.0;
|
||||
}
|
||||
|
||||
for(map<string, double>::iterator itr = wordmap.begin(); itr != wordmap.end(); ) {
|
||||
if(stopWords_.end() != stopWords_.find(itr->first)) {
|
||||
for (map<string, double>::iterator itr = wordmap.begin(); itr != wordmap.end(); ) {
|
||||
if (stopWords_.end() != stopWords_.find(itr->first)) {
|
||||
wordmap.erase(itr++);
|
||||
continue;
|
||||
}
|
||||
|
||||
unordered_map<string, double>::const_iterator cit = idfMap_.find(itr->first);
|
||||
if(cit != idfMap_.end()) {
|
||||
if (cit != idfMap_.end()) {
|
||||
itr->second *= cit->second;
|
||||
} else {
|
||||
itr->second *= idfAverage_;
|
||||
@ -79,7 +79,7 @@ class KeywordExtractor {
|
||||
private:
|
||||
void LoadIdfDict(const string& idfPath) {
|
||||
ifstream ifs(idfPath.c_str());
|
||||
if(!ifs.is_open()) {
|
||||
if (!ifs.is_open()) {
|
||||
LogFatal("open %s failed.", idfPath.c_str());
|
||||
}
|
||||
string line ;
|
||||
@ -87,9 +87,9 @@ class KeywordExtractor {
|
||||
double idf = 0.0;
|
||||
double idfSum = 0.0;
|
||||
size_t lineno = 0;
|
||||
for(; getline(ifs, line); lineno++) {
|
||||
for (; getline(ifs, line); lineno++) {
|
||||
buf.clear();
|
||||
if(line.empty()) {
|
||||
if (line.empty()) {
|
||||
LogError("line[%d] empty. skipped.", lineno);
|
||||
continue;
|
||||
}
|
||||
@ -110,11 +110,11 @@ class KeywordExtractor {
|
||||
}
|
||||
void LoadStopWordDict(const string& filePath) {
|
||||
ifstream ifs(filePath.c_str());
|
||||
if(!ifs.is_open()) {
|
||||
if (!ifs.is_open()) {
|
||||
LogFatal("open %s failed.", filePath.c_str());
|
||||
}
|
||||
string line ;
|
||||
while(getline(ifs, line)) {
|
||||
while (getline(ifs, line)) {
|
||||
stopWords_.insert(line);
|
||||
}
|
||||
assert(stopWords_.size());
|
||||
@ -123,7 +123,7 @@ class KeywordExtractor {
|
||||
bool IsSingleWord(const string& str) const {
|
||||
Unicode unicode;
|
||||
TransCode::decode(str, unicode);
|
||||
if(unicode.size() == 1)
|
||||
if (unicode.size() == 1)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
@ -22,7 +22,7 @@ class MPSegment: public SegmentBase {
|
||||
assert(dictTrie_);
|
||||
}
|
||||
~MPSegment() {
|
||||
if(isNeedDestroy_) {
|
||||
if (isNeedDestroy_) {
|
||||
delete dictTrie_;
|
||||
}
|
||||
}
|
||||
@ -66,24 +66,24 @@ class MPSegment: public SegmentBase {
|
||||
const DictUnit* p;
|
||||
double val;
|
||||
|
||||
for(vector<Dag>::reverse_iterator rit = dags.rbegin(); rit != dags.rend(); rit++) {
|
||||
for (vector<Dag>::reverse_iterator rit = dags.rbegin(); rit != dags.rend(); rit++) {
|
||||
rit->pInfo = NULL;
|
||||
rit->weight = MIN_DOUBLE;
|
||||
assert(!rit->nexts.empty());
|
||||
for(LocalVector<pair<size_t, const DictUnit*> >::const_iterator it = rit->nexts.begin(); it != rit->nexts.end(); it++) {
|
||||
for (LocalVector<pair<size_t, const DictUnit*> >::const_iterator it = rit->nexts.begin(); it != rit->nexts.end(); it++) {
|
||||
nextPos = it->first;
|
||||
p = it->second;
|
||||
val = 0.0;
|
||||
if(nextPos + 1 < dags.size()) {
|
||||
if (nextPos + 1 < dags.size()) {
|
||||
val += dags[nextPos + 1].weight;
|
||||
}
|
||||
|
||||
if(p) {
|
||||
if (p) {
|
||||
val += p->weight;
|
||||
} else {
|
||||
val += dictTrie_->getMinWeight();
|
||||
}
|
||||
if(val > rit->weight) {
|
||||
if (val > rit->weight) {
|
||||
rit->pInfo = p;
|
||||
rit->weight = val;
|
||||
}
|
||||
@ -93,9 +93,9 @@ class MPSegment: public SegmentBase {
|
||||
void CutByDag(const vector<Dag>& dags,
|
||||
vector<Unicode>& words) const {
|
||||
size_t i = 0;
|
||||
while(i < dags.size()) {
|
||||
while (i < dags.size()) {
|
||||
const DictUnit* p = dags[i].pInfo;
|
||||
if(p) {
|
||||
if (p) {
|
||||
words.push_back(p->word);
|
||||
i += p->word.size();
|
||||
} else { //single chinese word
|
||||
|
@ -39,7 +39,7 @@ class PosTagger {
|
||||
return false;
|
||||
}
|
||||
tmp = dict->find(unico.begin(), unico.end());
|
||||
if(tmp == NULL || tmp->tag.empty()) {
|
||||
if (tmp == NULL || tmp->tag.empty()) {
|
||||
res.push_back(make_pair(*itr, SpecialRule(unico)));
|
||||
} else {
|
||||
res.push_back(make_pair(*itr, tmp->tag));
|
||||
@ -51,20 +51,20 @@ class PosTagger {
|
||||
const char* SpecialRule(const Unicode& unicode) const {
|
||||
size_t m = 0;
|
||||
size_t eng = 0;
|
||||
for(size_t i = 0; i < unicode.size() && eng < unicode.size() / 2; i++) {
|
||||
if(unicode[i] < 0x80) {
|
||||
for (size_t i = 0; i < unicode.size() && eng < unicode.size() / 2; i++) {
|
||||
if (unicode[i] < 0x80) {
|
||||
eng ++;
|
||||
if('0' <= unicode[i] && unicode[i] <= '9') {
|
||||
if ('0' <= unicode[i] && unicode[i] <= '9') {
|
||||
m++;
|
||||
}
|
||||
}
|
||||
}
|
||||
// ascii char is not found
|
||||
if(eng == 0) {
|
||||
if (eng == 0) {
|
||||
return POS_X;
|
||||
}
|
||||
// all the ascii is number char
|
||||
if(m == eng) {
|
||||
if (m == eng) {
|
||||
return POS_M;
|
||||
}
|
||||
// the ascii chars contain english letter
|
||||
|
@ -40,7 +40,7 @@ class SegmentBase {
|
||||
protected:
|
||||
void LoadSpecialSymbols() {
|
||||
size_t size = sizeof(SPECIAL_SYMBOL)/sizeof(*SPECIAL_SYMBOL);
|
||||
for(size_t i = 0; i < size; i ++) {
|
||||
for (size_t i = 0; i < size; i ++) {
|
||||
symbols_.insert(SPECIAL_SYMBOL[i]);
|
||||
}
|
||||
assert(symbols_.size());
|
||||
|
@ -17,7 +17,7 @@ void cut(size_t times = 50) {
|
||||
assert(ifs);
|
||||
doc << ifs;
|
||||
long beginTime = clock();
|
||||
for(size_t i = 0; i < times; i ++) {
|
||||
for (size_t i = 0; i < times; i ++) {
|
||||
printf("process [%3.0lf %%]\r", 100.0*(i+1)/times);
|
||||
fflush(stdout);
|
||||
res.clear();
|
||||
@ -36,7 +36,7 @@ void extract(size_t times = 400) {
|
||||
assert(ifs);
|
||||
doc << ifs;
|
||||
long beginTime = clock();
|
||||
for(size_t i = 0; i < times; i ++) {
|
||||
for (size_t i = 0; i < times; i ++) {
|
||||
printf("process [%3.0lf %%]\r", 100.0*(i+1)/times);
|
||||
fflush(stdout);
|
||||
words.clear();
|
||||
|
@ -82,7 +82,7 @@ TEST(ApplicationTest, InsertUserWord) {
|
||||
result << words;
|
||||
ASSERT_EQ("[\"男默女泪\"]", result);
|
||||
|
||||
for(size_t i = 0; i < 100; i++) {
|
||||
for (size_t i = 0; i < 100; i++) {
|
||||
string newWord;
|
||||
newWord << rand();
|
||||
ASSERT_TRUE(app.insertUserWord(newWord));
|
||||
|
@ -1106,7 +1106,7 @@ class Notification {
|
||||
// Blocks until the controller thread notifies. Must be called from a test
|
||||
// thread.
|
||||
void WaitForNotification() {
|
||||
while(!notified_) {
|
||||
while (!notified_) {
|
||||
SleepMilliseconds(10);
|
||||
}
|
||||
}
|
||||
|
@ -271,7 +271,7 @@ GTEST_API_ bool ShouldRunTestOnShard(
|
||||
// the given predicate.
|
||||
template <class Container, typename Predicate>
|
||||
inline int CountIf(const Container& c, Predicate predicate) {
|
||||
// Implemented as an explicit loop since std::count_if() in libCstd on
|
||||
// Implemented as an explicit loop since std::count_if () in libCstd on
|
||||
// Solaris has a non-standard signature.
|
||||
int count = 0;
|
||||
for (typename Container::const_iterator it = c.begin(); it != c.end(); ++it) {
|
||||
|
@ -1817,7 +1817,7 @@ void TestResult::RecordProperty(const TestProperty& test_property) {
|
||||
}
|
||||
internal::MutexLock lock(&test_properites_mutex_);
|
||||
const std::vector<TestProperty>::iterator property_with_matching_key =
|
||||
std::find_if(test_properties_.begin(), test_properties_.end(),
|
||||
std::find_if (test_properties_.begin(), test_properties_.end(),
|
||||
internal::TestPropertyKeyIs(test_property.key()));
|
||||
if (property_with_matching_key == test_properties_.end()) {
|
||||
test_properties_.push_back(test_property);
|
||||
@ -4099,7 +4099,7 @@ TestCase* UnitTestImpl::GetTestCase(const char* test_case_name,
|
||||
Test::TearDownTestCaseFunc tear_down_tc) {
|
||||
// Can we find a TestCase with the given name?
|
||||
const std::vector<TestCase*>::const_iterator test_case =
|
||||
std::find_if(test_cases_.begin(), test_cases_.end(),
|
||||
std::find_if (test_cases_.begin(), test_cases_.end(),
|
||||
TestCaseNameIs(test_case_name));
|
||||
|
||||
if (test_case != test_cases_.end())
|
||||
|
@ -160,7 +160,7 @@ TEST(MPSegmentTest, Test1) {
|
||||
// }
|
||||
// string res;
|
||||
//
|
||||
// while(getline(ifs, line)) {
|
||||
// while (getline(ifs, line)) {
|
||||
// res += line;
|
||||
// res += '\n';
|
||||
//
|
||||
|
@ -48,7 +48,7 @@ TEST(DictTrieTest, Test1) {
|
||||
word = "清华大学";
|
||||
LocalVector<pair<size_t, const DictUnit*> > res;
|
||||
const char * words[] = {"清", "清华", "清华大学"};
|
||||
for(size_t i = 0; i < sizeof(words)/sizeof(words[0]); i++) {
|
||||
for (size_t i = 0; i < sizeof(words)/sizeof(words[0]); i++) {
|
||||
ASSERT_TRUE(TransCode::decode(words[i], uni));
|
||||
res.push_back(make_pair(uni.size() - 1, trie.find(uni.begin(), uni.end())));
|
||||
//resMap[uni.size() - 1] = trie.find(uni.begin(), uni.end());
|
||||
|
Loading…
x
Reference in New Issue
Block a user