mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
change the public load* function to private && mv them into init(...)
This commit is contained in:
parent
50117075a7
commit
b167777a31
@ -8,18 +8,11 @@ using namespace CppJieba;
|
||||
void testKeyWordExt(const char * dictPath, const char * filePath)
|
||||
{
|
||||
KeyWordExt ext;
|
||||
ext.init();
|
||||
if(!ext.init(dictPath, "../dicts/stopwords.gbk.v1.0"))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
if(!ext.loadSegDict(dictPath))
|
||||
{
|
||||
cerr<<__FILE__<<__LINE__<<endl;
|
||||
return ;
|
||||
}
|
||||
if(!ext.loadStopWords("../dicts/stopwords.gbk.v1.0"))
|
||||
{
|
||||
cerr<<__FILE__<<__LINE__<<endl;
|
||||
return ;
|
||||
}
|
||||
ifstream ifile(filePath);
|
||||
vector<KeyWordInfo> res;
|
||||
string line;
|
||||
@ -36,43 +29,6 @@ void testKeyWordExt(const char * dictPath, const char * filePath)
|
||||
ext.dispose();
|
||||
}
|
||||
|
||||
void testKeyWordExt2(const char * dictPath, const char * filePath)
|
||||
{
|
||||
Segment seg;
|
||||
seg.init();
|
||||
KeyWordExt ext;
|
||||
ext.init();
|
||||
|
||||
if(!seg.loadSegDict(dictPath))
|
||||
{
|
||||
cerr<<__FILE__<<__LINE__<<endl;
|
||||
return ;
|
||||
}
|
||||
|
||||
if(!ext.loadSegDict(dictPath));
|
||||
{
|
||||
cerr<<__FILE__<<__LINE__<<endl;
|
||||
return ;
|
||||
}
|
||||
|
||||
ifstream ifile(filePath);
|
||||
vector<string> words;
|
||||
vector<KeyWordInfo> res;
|
||||
string line;
|
||||
while(getline(ifile, line))
|
||||
{
|
||||
if(!line.empty())
|
||||
{
|
||||
seg.cutDAG(line, words);
|
||||
ext.extract(words, res, 20);
|
||||
cout<<line<<"\n"<<joinWordInfos(res)<<endl;
|
||||
}
|
||||
|
||||
}
|
||||
seg.dispose();
|
||||
ext.dispose();
|
||||
}
|
||||
|
||||
const char * const DEFAULT_DICTPATH = "../dicts/jieba.dict.utf8";
|
||||
|
||||
int main(int argc, char ** argv)
|
||||
|
@ -8,22 +8,18 @@ Segment seg;
|
||||
HMMSegment hmmseg;
|
||||
bool init(const char * const dictPath, const char * const modelPath)
|
||||
{
|
||||
if(!seg.init())
|
||||
if(!seg.init(dictPath))
|
||||
{
|
||||
cout<<"seg init failed."<<endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
if(!seg.loadSegDict(dictPath))
|
||||
if(!hmmseg.init(modelPath))
|
||||
{
|
||||
cout<<"seg loadDict failed."<<endl;
|
||||
return false;
|
||||
}
|
||||
if(!hmmseg.loadModel(modelPath))
|
||||
{
|
||||
cout<<"hmmseg loadModel failed."<<endl;
|
||||
cout<<"hmmseg init failed."<<endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -66,6 +62,11 @@ bool dispose()
|
||||
cout<<"seg dispose failed."<<endl;
|
||||
return false;
|
||||
}
|
||||
if(!hmmseg.dispose())
|
||||
{
|
||||
cout<<"seg dispose failed."<<endl;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -1,3 +1,4 @@
|
||||
我来到北京清华大学
|
||||
他来到了网易杭研大厦
|
||||
杭研
|
||||
小明硕士毕业于中国科学院计算所,后在日本京都大学深造
|
||||
|
@ -21,9 +21,9 @@ namespace CppJieba
|
||||
|
||||
}
|
||||
|
||||
bool HMMSegment::init()
|
||||
bool HMMSegment::init(const char* const modelPath)
|
||||
{
|
||||
return true;
|
||||
return _loadModel(modelPath);
|
||||
}
|
||||
|
||||
bool HMMSegment::dispose()
|
||||
@ -31,7 +31,7 @@ namespace CppJieba
|
||||
return true;
|
||||
}
|
||||
|
||||
bool HMMSegment::loadModel(const char* const filePath)
|
||||
bool HMMSegment::_loadModel(const char* const filePath)
|
||||
{
|
||||
LogInfo(string_format("loadModel [%s] start ...", filePath));
|
||||
ifstream ifile(filePath);
|
||||
|
@ -32,13 +32,13 @@ namespace CppJieba
|
||||
HMMSegment();
|
||||
~HMMSegment();
|
||||
public:
|
||||
bool init();
|
||||
bool init(const char* const modelPath);
|
||||
bool dispose();
|
||||
public:
|
||||
bool loadModel(const char* const filePath);
|
||||
bool cut(const string& str, vector<string>& res);
|
||||
bool viterbi(const vector<uint16_t>& unico, vector<uint>& status);
|
||||
private:
|
||||
bool _loadModel(const char* const filePath);
|
||||
bool _getLine(ifstream& ifile, string& line);
|
||||
bool _loadEmitProb(const string& line, EmitProbMap& mp);
|
||||
bool _decodeOne(const string& str, uint16_t& res);
|
||||
|
@ -16,19 +16,26 @@ namespace CppJieba
|
||||
{
|
||||
}
|
||||
|
||||
bool KeyWordExt::init()
|
||||
bool KeyWordExt::init(const char* const segDictFile, const char* const stopWordDictFile)
|
||||
{
|
||||
return _segment.init();
|
||||
LogInfo("KeyWordExt init start ...");
|
||||
if(!_segment.init(segDictFile))
|
||||
{
|
||||
LogError("_segment.init failed.");
|
||||
return false;
|
||||
}
|
||||
if(!_loadStopWords(stopWordDictFile))
|
||||
{
|
||||
LogError("_loadStopWords failed.");
|
||||
return false;
|
||||
}
|
||||
LogInfo("KeyWordExt init OK.");
|
||||
return true;
|
||||
}
|
||||
|
||||
bool KeyWordExt::loadSegDict(const char * const filePath)
|
||||
bool KeyWordExt::_loadPriorSubWords(const char * const filePath)
|
||||
{
|
||||
return _segment.loadSegDict(filePath);
|
||||
}
|
||||
|
||||
bool KeyWordExt::loadPriorSubWords(const char * const filePath)
|
||||
{
|
||||
LogInfo(string_format("loadPriorSubWords(%s) start", filePath));
|
||||
LogInfo(string_format("_loadPriorSubWords(%s) start", filePath));
|
||||
if(!checkFileExist(filePath))
|
||||
{
|
||||
LogError(string_format("cann't find file[%s].",filePath));
|
||||
@ -45,15 +52,15 @@ namespace CppJieba
|
||||
{
|
||||
_priorSubWords.push_back(subword);
|
||||
}
|
||||
LogInfo(string_format("loadPriorSubWords(%s) end", filePath));
|
||||
LogInfo(string_format("_loadPriorSubWords(%s) end", filePath));
|
||||
infile.close();
|
||||
return true;
|
||||
}
|
||||
|
||||
bool KeyWordExt::loadStopWords(const char * const filePath)
|
||||
bool KeyWordExt::_loadStopWords(const char * const filePath)
|
||||
{
|
||||
|
||||
LogInfo(string_format("loadStopWords(%s) start", filePath));
|
||||
LogInfo(string_format("_loadStopWords(%s) start", filePath));
|
||||
if(!_stopWords.empty())
|
||||
{
|
||||
LogError("_stopWords has been loaded before! ");
|
||||
@ -366,9 +373,9 @@ int main()
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
ext.loadStopWords("../dicts/stopwords.gbk.v1.0");
|
||||
ext._loadStopWords("../dicts/stopwords.gbk.v1.0");
|
||||
|
||||
if(!ext.loadPriorSubWords("../dicts/prior.gbk"))
|
||||
if(!ext._loadPriorSubWords("../dicts/prior.gbk"))
|
||||
{
|
||||
cerr<<"err"<<endl;
|
||||
return 1;
|
||||
|
@ -20,18 +20,14 @@ namespace CppJieba
|
||||
public:
|
||||
KeyWordExt();
|
||||
~KeyWordExt();
|
||||
bool init();
|
||||
|
||||
bool loadSegDict(const char * const filePath);
|
||||
|
||||
//load stopwords
|
||||
bool loadStopWords(const char * const filePath);
|
||||
|
||||
//load prior words' prefix
|
||||
bool loadPriorSubWords(const char * const filePath);
|
||||
|
||||
bool init(const char* const segDictFile, const char* const stopWordDictFile);
|
||||
bool dispose();
|
||||
|
||||
private:
|
||||
bool _loadStopWords(const char * const filePath);
|
||||
bool _loadPriorSubWords(const char * const filePath);
|
||||
|
||||
|
||||
public:
|
||||
bool extract(const string& title, vector<KeyWordInfo>& keyWordInfos, uint topN);
|
||||
bool extract(const vector<string>& words, vector<KeyWordInfo>& keyWordInfos, uint topN);
|
||||
|
@ -14,25 +14,23 @@ namespace CppJieba
|
||||
{
|
||||
}
|
||||
|
||||
bool Segment::init()
|
||||
bool Segment::init(const char* const filePath)
|
||||
{
|
||||
if(!_trie.init())
|
||||
{
|
||||
LogError("_trie.init failed.");
|
||||
return false;
|
||||
}
|
||||
LogInfo(string_format("_trie.loadDict(%s) start...", filePath));
|
||||
if(!_trie.loadDict(filePath))
|
||||
{
|
||||
LogError("_trie.loadDict faield.");
|
||||
return false;
|
||||
}
|
||||
LogInfo("_trie.loadDict end.");
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Segment::loadSegDict(const char * const filePath)
|
||||
{
|
||||
LogInfo(string_format("_trie.loadDict(%s) start...", filePath));
|
||||
bool retFlag = _trie.loadDict(filePath);
|
||||
LogInfo("_trie.loadDict end.");
|
||||
return retFlag;
|
||||
}
|
||||
|
||||
|
||||
bool Segment::dispose()
|
||||
{
|
||||
return _trie.dispose();
|
||||
@ -212,7 +210,7 @@ int main()
|
||||
{
|
||||
Segment segment;
|
||||
segment.init();
|
||||
if(!segment.loadSegDict("../dicts/segdict.gbk.v3.0"))
|
||||
if(!segment._loadSegDict("../dicts/segdict.gbk.v3.0"))
|
||||
{
|
||||
cerr<<"1"<<endl;
|
||||
return 1;
|
||||
|
@ -9,6 +9,7 @@
|
||||
#include <set>
|
||||
#include "Trie.h"
|
||||
#include "globals.h"
|
||||
#include "HMMSegment.h"
|
||||
|
||||
namespace CppJieba
|
||||
{
|
||||
@ -16,12 +17,12 @@ namespace CppJieba
|
||||
{
|
||||
private:
|
||||
Trie _trie;
|
||||
|
||||
public:
|
||||
Segment();
|
||||
~Segment();
|
||||
public:
|
||||
bool init();
|
||||
bool loadSegDict(const char * const filePath);
|
||||
bool init(const char* const filePath);
|
||||
bool dispose();
|
||||
public:
|
||||
bool cutDAG(const string& str, vector<TrieNodeInfo>& segWordInfos);
|
||||
|
Loading…
x
Reference in New Issue
Block a user