mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
add KeyWordInfo into KeyWordExt.cpp/h
This commit is contained in:
parent
346bc54c35
commit
73e83e6ed9
@ -21,7 +21,7 @@ void testKeyWordExt(const char * dictPath, const char * filePath)
|
|||||||
return ;
|
return ;
|
||||||
}
|
}
|
||||||
ifstream ifile(filePath);
|
ifstream ifile(filePath);
|
||||||
vector<string> res;
|
vector<KeyWordInfo> res;
|
||||||
string line;
|
string line;
|
||||||
while(getline(ifile, line))
|
while(getline(ifile, line))
|
||||||
{
|
{
|
||||||
@ -29,7 +29,7 @@ void testKeyWordExt(const char * dictPath, const char * filePath)
|
|||||||
if(!line.empty())
|
if(!line.empty())
|
||||||
{
|
{
|
||||||
ext.extract(line, res, 20);
|
ext.extract(line, res, 20);
|
||||||
cout<<line<<"\n"<<joinStr(res,",")<<endl;
|
cout<<line<<'\n'<<joinWordInfos(res)<<endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -57,15 +57,15 @@ void testKeyWordExt2(const char * dictPath, const char * filePath)
|
|||||||
|
|
||||||
ifstream ifile(filePath);
|
ifstream ifile(filePath);
|
||||||
vector<string> words;
|
vector<string> words;
|
||||||
vector<string> keywords;
|
vector<KeyWordInfo> res;
|
||||||
string line;
|
string line;
|
||||||
while(getline(ifile, line))
|
while(getline(ifile, line))
|
||||||
{
|
{
|
||||||
if(!line.empty())
|
if(!line.empty())
|
||||||
{
|
{
|
||||||
seg.cutDAG(line, words);
|
seg.cutDAG(line, words);
|
||||||
ext.extract(words, keywords, 20);
|
ext.extract(words, res, 20);
|
||||||
cout<<line<<"\n"<<joinStr(keywords," ")<<endl;
|
cout<<line<<"\n"<<joinWordInfos(res)<<endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -73,13 +73,26 @@ void testKeyWordExt2(const char * dictPath, const char * filePath)
|
|||||||
ext.dispose();
|
ext.dispose();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const char * const DEFAULT_DICTPATH = "../dicts/jieba.dict.gbk";
|
||||||
|
|
||||||
int main(int argc, char ** argv)
|
int main(int argc, char ** argv)
|
||||||
{
|
{
|
||||||
if(argc != 3)
|
ArgvContext arg(argc, argv);
|
||||||
|
string dictPath = arg["--dictpath"];
|
||||||
|
if("" == dictPath)
|
||||||
{
|
{
|
||||||
cerr<<"usage: "<<argv[0]<<" ../dicts/jieba.dict.gbk filename"<<endl;
|
dictPath = DEFAULT_DICTPATH;
|
||||||
|
}
|
||||||
|
if("" == arg[1])
|
||||||
|
{
|
||||||
|
cout<<"usage: \n\t"<<argv[0]<<" [options] <filename>\n"
|
||||||
|
<<"options:\n"
|
||||||
|
<<"\t--dictpath\tIf is not specified, the default is "<<DEFAULT_DICTPATH<<"\n"
|
||||||
|
<<"\t--encoding\tSupported encoding methods are [gbk, utf-8] for now. \n\t\t\tIf is not specified, the default is gbk."
|
||||||
|
<<endl;
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
testKeyWordExt(argv[1], argv[2]);
|
|
||||||
|
testKeyWordExt(dictPath.c_str(), arg[1].c_str());
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -92,184 +92,144 @@ namespace CppJieba
|
|||||||
for(uint i = 0; i < wordInfos.size(); i++)
|
for(uint i = 0; i < wordInfos.size(); i++)
|
||||||
{
|
{
|
||||||
KeyWordInfo& wInfo = wordInfos[i];
|
KeyWordInfo& wInfo = wordInfos[i];
|
||||||
double logWordFreq = 1.0;//_segment.getWordWeight(wInfo.word);
|
wInfo.idf = - wInfo.logFreq;
|
||||||
wInfo.idf = -logWordFreq;
|
if(0 == wInfo.wLen)
|
||||||
size_t wLen = TransCode::getWordLength(wInfo.word);
|
|
||||||
if(0 == wLen)
|
|
||||||
{
|
{
|
||||||
LogFatal("getUtf8WordLen(%s) return 0");
|
LogFatal("wLen is 0!");
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
wInfo.weight = log(double(wLen + 1)) * wInfo.idf;
|
wInfo.weight = log(double(wInfo.wLen + 1)) * wInfo.idf;
|
||||||
}
|
}
|
||||||
sort(wordInfos.begin(), wordInfos.end(), _wordInfoCompare);
|
sort(wordInfos.begin(), wordInfos.end(), _wordInfoCompare);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool KeyWordExt::_extractTopN(const vector<string>& words, vector<string>& keywords, uint topN)
|
bool KeyWordExt::_extTopN(vector<KeyWordInfo>& wordInfos, uint topN)
|
||||||
{
|
{
|
||||||
keywords.clear();
|
int dis = wordInfos.size() - topN;
|
||||||
vector<KeyWordInfo> wordInfos;
|
if(dis <= 0)
|
||||||
for(uint i = 0; i < words.size(); i++)
|
|
||||||
{
|
{
|
||||||
KeyWordInfo wInfo;
|
return true;
|
||||||
wInfo.word = words[i];
|
|
||||||
wordInfos.push_back(wInfo);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
_sortWLIDF(wordInfos);
|
if(uint(dis) <= topN)
|
||||||
#ifdef DEBUG
|
|
||||||
LogDebug(string_format("calc weight & sorted:%s",joinWordInfos(wordInfos).c_str()));
|
|
||||||
#endif
|
|
||||||
|
|
||||||
_prioritizeSubWords(wordInfos);
|
|
||||||
#ifdef DEBUG
|
|
||||||
LogDebug(string_format("_prioritizeSubWords res:%s", joinWordInfos(wordInfos).c_str()));
|
|
||||||
#endif
|
|
||||||
//extract TopN
|
|
||||||
for(uint i = 0; i < topN && i < wordInfos.size(); i++)
|
|
||||||
{
|
{
|
||||||
keywords.push_back(wordInfos[i].word);
|
for(int i = 0; i< dis; i++)
|
||||||
|
{
|
||||||
|
wordInfos.pop_back();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else// in case that topN << size;
|
||||||
|
{
|
||||||
|
|
||||||
|
vector<KeyWordInfo> tmp(wordInfos.begin(), wordInfos.begin() + topN);
|
||||||
|
wordInfos.swap(tmp);
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
bool KeyWordExt::extract(const vector<string>& _words, vector<string>& keywords, uint topN)
|
bool KeyWordExt::extract(const vector<string>& words, vector<KeyWordInfo>& keyWordInfos, uint topN)
|
||||||
{
|
{
|
||||||
if(_words.empty())
|
if(words.empty())
|
||||||
{
|
{
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
vector<string> words(_words);
|
#ifdef DEBU
|
||||||
|
|
||||||
#ifdef DEBUG
|
|
||||||
LogDebug(string_format("words:[%s]", joinStr(words, ",").c_str()));
|
LogDebug(string_format("words:[%s]", joinStr(words, ",").c_str()));
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
bool retFlag = _filter(words);
|
keyWordInfos.clear();
|
||||||
if(!retFlag)
|
for(uint i = 0; i < words.size(); i++)
|
||||||
{
|
{
|
||||||
LogError("_filter failed.");
|
keyWordInfos.push_back(words[i]);
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef DEBUG
|
return _extract(keyWordInfos, topN);
|
||||||
LogDebug(string_format("_filter res:[%s]", joinStr(words, ",").c_str()));
|
|
||||||
#endif
|
|
||||||
|
|
||||||
retFlag = _extractTopN(words, keywords, topN);
|
|
||||||
if(!retFlag)
|
|
||||||
{
|
|
||||||
LogError("_extractTopN failed.");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
//LogDebug("_extractTopN finished.");
|
|
||||||
|
|
||||||
#ifdef DEBUG
|
|
||||||
LogDebug(string_format("ext res:[%s]", joinStr(keywords, ",").c_str()));
|
|
||||||
#endif
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool KeyWordExt::extract(const string& title, vector<string>& keywords, uint topN)
|
bool KeyWordExt::extract(const string& title, vector<KeyWordInfo>& keyWordInfos, uint topN)
|
||||||
{
|
{
|
||||||
if(title.empty())
|
if(title.empty())
|
||||||
{
|
{
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef DEBUG
|
vector<TrieNodeInfo> trieNodeInfos;
|
||||||
LogDebug(string_format("title:[%s]",title.c_str()));
|
_segment.cutDAG(title, trieNodeInfos);
|
||||||
#endif
|
|
||||||
|
|
||||||
bool retFlag;
|
keyWordInfos.clear();
|
||||||
vector<string> words;
|
for(uint i = 0; i < trieNodeInfos.size(); i++)
|
||||||
retFlag = _segment.cutDAG(title, words);
|
|
||||||
if(!retFlag)
|
|
||||||
{
|
{
|
||||||
LogError(string_format("cutDAG(%s) failed.", title.c_str()));
|
keyWordInfos.push_back(trieNodeInfos[i]);
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
#ifdef DEBUG
|
return _extract(keyWordInfos, topN);
|
||||||
LogDebug(string_format("cutDAG result:[%s]", joinStr(words, ",").c_str()));
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
retFlag = _filter(words);
|
bool KeyWordExt::_extract(vector<KeyWordInfo>& keyWordInfos, uint topN)
|
||||||
if(!retFlag)
|
{
|
||||||
|
if(!_filter(keyWordInfos))
|
||||||
{
|
{
|
||||||
LogError("_filter failed.");
|
LogError("_filter failed.");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef DEBUG
|
if(!_sortWLIDF(keyWordInfos))
|
||||||
LogDebug(string_format("_filter res:[%s]", joinStr(words, ",").c_str()));
|
|
||||||
#endif
|
|
||||||
|
|
||||||
retFlag = _extractTopN(words, keywords, topN);
|
|
||||||
if(!retFlag)
|
|
||||||
{
|
{
|
||||||
LogError("_extractTopN failed.");
|
LogError("_sortWLIDF failed.");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(!_extTopN(keyWordInfos, topN))
|
||||||
|
{
|
||||||
|
LogError("_extTopN failed.");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
//LogDebug("_extractTopN finished.");
|
|
||||||
|
|
||||||
#ifdef DEBUG
|
|
||||||
LogDebug(string_format("ext res:[%s]", joinStr(keywords, ",").c_str()));
|
|
||||||
#endif
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool KeyWordExt::_filter(vector<string>& strs)
|
bool KeyWordExt::_filter(vector<KeyWordInfo>& wordInfos)
|
||||||
{
|
{
|
||||||
bool retFlag;
|
if(!_filterDuplicate(wordInfos))
|
||||||
retFlag = _filterDuplicate(strs);
|
|
||||||
if(!retFlag)
|
|
||||||
{
|
{
|
||||||
LogError("_filterDuplicate failed.");
|
LogError("_filterDuplicate failed.");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
//LogDebug(string_format("_filterDuplicate res:[%s]", joinStr(strs, ",").c_str()));
|
|
||||||
|
|
||||||
retFlag = _filterSingleWord(strs);
|
if(!_filterSingleWord(wordInfos))
|
||||||
if(!retFlag)
|
|
||||||
{
|
{
|
||||||
LogError("_filterSingleWord failed.");
|
LogError("_filterSingleWord failed.");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
//LogDebug(string_format("_filterSingleWord res:[%s]", joinStr(strs, ",").c_str()));
|
|
||||||
|
|
||||||
retFlag = _filterStopWords(strs);
|
if(!_filterStopWords(wordInfos))
|
||||||
if(!retFlag)
|
|
||||||
{
|
{
|
||||||
LogError("_filterStopWords failed.");
|
LogError("_filterStopWords failed.");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
//LogDebug(string_format("_filterStopWords res:[%s]", joinStr(strs, ",").c_str()));
|
|
||||||
|
|
||||||
retFlag = _filterSubstr(strs);
|
if(!_filterSubstr(wordInfos))
|
||||||
if(!retFlag)
|
|
||||||
{
|
{
|
||||||
LogError("_filterSubstr failed.");
|
LogError("_filterSubstr failed.");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
//LogDebug(string_format("_filterSubstr res:[%s]", joinStr(strs, ",").c_str()));
|
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool KeyWordExt::_filterStopWords(vector<string>& strs)
|
bool KeyWordExt::_filterStopWords(vector<KeyWordInfo>& wordInfos)
|
||||||
{
|
{
|
||||||
if(_stopWords.empty())
|
if(_stopWords.empty())
|
||||||
{
|
{
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
for(VSI it = strs.begin(); it != strs.end();)
|
for(vector<KeyWordInfo>::iterator it = wordInfos.begin(); it != wordInfos.end();)
|
||||||
{
|
{
|
||||||
if(_stopWords.find(*it) != _stopWords.end())
|
if(_stopWords.find(it->word) != _stopWords.end())
|
||||||
{
|
{
|
||||||
it = strs.erase(it);
|
it = wordInfos.erase(it);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -280,33 +240,33 @@ namespace CppJieba
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
bool KeyWordExt::_filterDuplicate(vector<string>& strs)
|
bool KeyWordExt::_filterDuplicate(vector<KeyWordInfo>& wordInfos)
|
||||||
{
|
{
|
||||||
set<string> st;
|
set<string> st;
|
||||||
for(VSI it = strs.begin(); it != strs.end(); )
|
for(vector<KeyWordInfo>::iterator it = wordInfos.begin(); it != wordInfos.end(); )
|
||||||
{
|
{
|
||||||
if(st.find(*it) != st.end())
|
if(st.find(it->word) != st.end())
|
||||||
{
|
{
|
||||||
it = strs.erase(it);
|
it = wordInfos.erase(it);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
st.insert(*it);
|
st.insert(it->word);
|
||||||
it++;
|
it++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool KeyWordExt::_filterSingleWord(vector<string>& strs)
|
bool KeyWordExt::_filterSingleWord(vector<KeyWordInfo>& wordInfos)
|
||||||
{
|
{
|
||||||
for(vector<string>::iterator it = strs.begin(); it != strs.end();)
|
for(vector<KeyWordInfo>::iterator it = wordInfos.begin(); it != wordInfos.end();)
|
||||||
{
|
{
|
||||||
|
|
||||||
// filter single word
|
// filter single word
|
||||||
if(1 == TransCode::getWordLength(*it))
|
if(1 == it->wLen)
|
||||||
{
|
{
|
||||||
it = strs.erase(it);
|
it = wordInfos.erase(it);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -316,27 +276,31 @@ namespace CppJieba
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool KeyWordExt::_filterSubstr(vector<string>& strs)
|
bool KeyWordExt::_filterSubstr(vector<KeyWordInfo>& wordInfos)
|
||||||
{
|
{
|
||||||
vector<string> tmp = strs;
|
vector<string> tmp ;
|
||||||
|
for(uint i = 0; i < wordInfos.size(); i++)
|
||||||
|
{
|
||||||
|
tmp.push_back(wordInfos[i].word);
|
||||||
|
}
|
||||||
set<string> subs;
|
set<string> subs;
|
||||||
for(VSI it = strs.begin(); it != strs.end(); it ++)
|
for(vector<KeyWordInfo>::iterator it = wordInfos.begin(); it != wordInfos.end(); it ++)
|
||||||
{
|
{
|
||||||
for(uint j = 0; j < tmp.size(); j++)
|
for(uint j = 0; j < tmp.size(); j++)
|
||||||
{
|
{
|
||||||
if(*it != tmp[j] && string::npos != tmp[j].find(*it, 0))
|
if(it->word != tmp[j] && string::npos != tmp[j].find(it->word, 0))
|
||||||
{
|
{
|
||||||
subs.insert(*it);
|
subs.insert(it->word);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//erase subs from strs
|
//erase subs from strs
|
||||||
for(VSI it = strs.begin(); it != strs.end(); )
|
for(vector<KeyWordInfo>::iterator it = wordInfos.begin(); it != wordInfos.end(); )
|
||||||
{
|
{
|
||||||
if(subs.end() != subs.find(*it))
|
if(subs.end() != subs.find(it->word))
|
||||||
{
|
{
|
||||||
it = strs.erase(it);
|
it = wordInfos.erase(it);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -33,21 +33,22 @@ namespace CppJieba
|
|||||||
bool dispose();
|
bool dispose();
|
||||||
|
|
||||||
public:
|
public:
|
||||||
bool extract(const string& title, vector<string>& keywords, uint topN);
|
bool extract(const string& title, vector<KeyWordInfo>& keyWordInfos, uint topN);
|
||||||
bool extract(const vector<string>& words, vector<string>& keywords, uint topN);
|
bool extract(const vector<string>& words, vector<KeyWordInfo>& keyWordInfos, uint topN);
|
||||||
private:
|
private:
|
||||||
static bool _wordInfoCompare(const KeyWordInfo& a, const KeyWordInfo& b);
|
static bool _wordInfoCompare(const KeyWordInfo& a, const KeyWordInfo& b);
|
||||||
private:
|
private:
|
||||||
bool _extractTopN(const vector<string>& words, vector<string>& keywords, uint topN);
|
bool _extract(vector<KeyWordInfo>& keyWordInfos, uint topN);
|
||||||
|
bool _extTopN(vector<KeyWordInfo>& wordInfos, uint topN);
|
||||||
private:
|
private:
|
||||||
//sort by word len - idf
|
//sort by word len - idf
|
||||||
bool _sortWLIDF(vector<KeyWordInfo>& wordInfos);
|
bool _sortWLIDF(vector<KeyWordInfo>& wordInfos);
|
||||||
private:
|
private:
|
||||||
bool _filter(vector<string>& strs);
|
bool _filter(vector<KeyWordInfo>& );
|
||||||
bool _filterDuplicate(vector<string>& strs);
|
bool _filterDuplicate(vector<KeyWordInfo>& );
|
||||||
bool _filterSingleWord(vector<string>& strs);
|
bool _filterSingleWord(vector<KeyWordInfo>& );
|
||||||
bool _filterSubstr(vector<string>& strs);
|
bool _filterSubstr(vector<KeyWordInfo>& );
|
||||||
bool _filterStopWords(vector<string>& strs);
|
bool _filterStopWords(vector<KeyWordInfo>& );
|
||||||
private:
|
private:
|
||||||
bool _prioritizeSubWords(vector<KeyWordInfo>& wordInfos);
|
bool _prioritizeSubWords(vector<KeyWordInfo>& wordInfos);
|
||||||
bool _isContainSubWords(const string& word);
|
bool _isContainSubWords(const string& word);
|
||||||
|
@ -1,7 +1,9 @@
|
|||||||
#ifndef CPPJIEBA_STRUCTS_H
|
#ifndef CPPJIEBA_STRUCTS_H
|
||||||
#define CPPJIEBA_STRUCTS_H
|
#define CPPJIEBA_STRUCTS_H
|
||||||
|
|
||||||
|
#include <limits>
|
||||||
#include "globals.h"
|
#include "globals.h"
|
||||||
|
#include "Trie.h"
|
||||||
|
|
||||||
namespace CppJieba
|
namespace CppJieba
|
||||||
{
|
{
|
||||||
@ -19,6 +21,13 @@ namespace CppJieba
|
|||||||
freq = 0;
|
freq = 0;
|
||||||
logFreq = 0.0;
|
logFreq = 0.0;
|
||||||
}
|
}
|
||||||
|
TrieNodeInfo(const string& _word)
|
||||||
|
{
|
||||||
|
word = _word;
|
||||||
|
wLen = TransCode::getWordLength(_word);
|
||||||
|
freq = 0;
|
||||||
|
logFreq = -numeric_limits<double>::max();
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
@ -37,7 +46,6 @@ namespace CppJieba
|
|||||||
};
|
};
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
struct KeyWordInfo: public TrieNodeInfo
|
struct KeyWordInfo: public TrieNodeInfo
|
||||||
{
|
{
|
||||||
double idf;
|
double idf;
|
||||||
@ -47,10 +55,34 @@ namespace CppJieba
|
|||||||
idf = 0.0;
|
idf = 0.0;
|
||||||
weight = 0.0;
|
weight = 0.0;
|
||||||
}
|
}
|
||||||
|
KeyWordInfo(const string& _word):TrieNodeInfo(_word)
|
||||||
|
{
|
||||||
|
idf = 0.0;
|
||||||
|
weight = 0.0;
|
||||||
|
}
|
||||||
|
KeyWordInfo(const TrieNodeInfo& trieNodeInfo)
|
||||||
|
{
|
||||||
|
word = trieNodeInfo.word;
|
||||||
|
freq = trieNodeInfo.freq;
|
||||||
|
wLen = trieNodeInfo.wLen;
|
||||||
|
tag = trieNodeInfo.tag;
|
||||||
|
logFreq = trieNodeInfo.logFreq;
|
||||||
|
idf = 0.0;
|
||||||
|
weight = 0.0;
|
||||||
|
}
|
||||||
string toString() const
|
string toString() const
|
||||||
{
|
{
|
||||||
return string_format("{word:%s,wLen:%d weight:%lf, idf:%lf}", word.c_str(), wLen, weight, idf);
|
return string_format("{word:%s,wLen:%d weight:%lf, idf:%lf}", word.c_str(), wLen, weight, idf);
|
||||||
}
|
}
|
||||||
|
KeyWordInfo& operator = (const TrieNodeInfo& trieNodeInfo)
|
||||||
|
{
|
||||||
|
word = trieNodeInfo.word;
|
||||||
|
freq = trieNodeInfo.freq;
|
||||||
|
wLen = trieNodeInfo.wLen;
|
||||||
|
tag = trieNodeInfo.tag;
|
||||||
|
logFreq = trieNodeInfo.logFreq;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
inline string joinWordInfos(const vector<KeyWordInfo>& vec)
|
inline string joinWordInfos(const vector<KeyWordInfo>& vec)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user