change the public load* function to private && mv them into init(...)

This commit is contained in:
gwdwyy 2013-09-06 00:53:36 +08:00
parent 50117075a7
commit b167777a31
9 changed files with 58 additions and 98 deletions

View File

@ -8,18 +8,11 @@ using namespace CppJieba;
void testKeyWordExt(const char * dictPath, const char * filePath) void testKeyWordExt(const char * dictPath, const char * filePath)
{ {
KeyWordExt ext; KeyWordExt ext;
ext.init(); if(!ext.init(dictPath, "../dicts/stopwords.gbk.v1.0"))
{
return;
}
if(!ext.loadSegDict(dictPath))
{
cerr<<__FILE__<<__LINE__<<endl;
return ;
}
if(!ext.loadStopWords("../dicts/stopwords.gbk.v1.0"))
{
cerr<<__FILE__<<__LINE__<<endl;
return ;
}
ifstream ifile(filePath); ifstream ifile(filePath);
vector<KeyWordInfo> res; vector<KeyWordInfo> res;
string line; string line;
@ -36,43 +29,6 @@ void testKeyWordExt(const char * dictPath, const char * filePath)
ext.dispose(); ext.dispose();
} }
void testKeyWordExt2(const char * dictPath, const char * filePath)
{
Segment seg;
seg.init();
KeyWordExt ext;
ext.init();
if(!seg.loadSegDict(dictPath))
{
cerr<<__FILE__<<__LINE__<<endl;
return ;
}
if(!ext.loadSegDict(dictPath));
{
cerr<<__FILE__<<__LINE__<<endl;
return ;
}
ifstream ifile(filePath);
vector<string> words;
vector<KeyWordInfo> res;
string line;
while(getline(ifile, line))
{
if(!line.empty())
{
seg.cutDAG(line, words);
ext.extract(words, res, 20);
cout<<line<<"\n"<<joinWordInfos(res)<<endl;
}
}
seg.dispose();
ext.dispose();
}
const char * const DEFAULT_DICTPATH = "../dicts/jieba.dict.utf8"; const char * const DEFAULT_DICTPATH = "../dicts/jieba.dict.utf8";
int main(int argc, char ** argv) int main(int argc, char ** argv)

View File

@ -8,22 +8,18 @@ Segment seg;
HMMSegment hmmseg; HMMSegment hmmseg;
bool init(const char * const dictPath, const char * const modelPath) bool init(const char * const dictPath, const char * const modelPath)
{ {
if(!seg.init()) if(!seg.init(dictPath))
{ {
cout<<"seg init failed."<<endl; cout<<"seg init failed."<<endl;
return false; return false;
} }
if(!seg.loadSegDict(dictPath)) if(!hmmseg.init(modelPath))
{ {
cout<<"seg loadDict failed."<<endl; cout<<"hmmseg init failed."<<endl;
return false;
}
if(!hmmseg.loadModel(modelPath))
{
cout<<"hmmseg loadModel failed."<<endl;
return false; return false;
} }
return true; return true;
} }
@ -66,6 +62,11 @@ bool dispose()
cout<<"seg dispose failed."<<endl; cout<<"seg dispose failed."<<endl;
return false; return false;
} }
if(!hmmseg.dispose())
{
cout<<"seg dispose failed."<<endl;
return false;
}
return true; return true;
} }

View File

@ -1,3 +1,4 @@
我来到北京清华大学 我来到北京清华大学
他来到了网易杭研大厦 他来到了网易杭研大厦
杭研
小明硕士毕业于中国科学院计算所,后在日本京都大学深造 小明硕士毕业于中国科学院计算所,后在日本京都大学深造

View File

@ -21,9 +21,9 @@ namespace CppJieba
} }
bool HMMSegment::init() bool HMMSegment::init(const char* const modelPath)
{ {
return true; return _loadModel(modelPath);
} }
bool HMMSegment::dispose() bool HMMSegment::dispose()
@ -31,7 +31,7 @@ namespace CppJieba
return true; return true;
} }
bool HMMSegment::loadModel(const char* const filePath) bool HMMSegment::_loadModel(const char* const filePath)
{ {
LogInfo(string_format("loadModel [%s] start ...", filePath)); LogInfo(string_format("loadModel [%s] start ...", filePath));
ifstream ifile(filePath); ifstream ifile(filePath);

View File

@ -32,13 +32,13 @@ namespace CppJieba
HMMSegment(); HMMSegment();
~HMMSegment(); ~HMMSegment();
public: public:
bool init(); bool init(const char* const modelPath);
bool dispose(); bool dispose();
public: public:
bool loadModel(const char* const filePath);
bool cut(const string& str, vector<string>& res); bool cut(const string& str, vector<string>& res);
bool viterbi(const vector<uint16_t>& unico, vector<uint>& status); bool viterbi(const vector<uint16_t>& unico, vector<uint>& status);
private: private:
bool _loadModel(const char* const filePath);
bool _getLine(ifstream& ifile, string& line); bool _getLine(ifstream& ifile, string& line);
bool _loadEmitProb(const string& line, EmitProbMap& mp); bool _loadEmitProb(const string& line, EmitProbMap& mp);
bool _decodeOne(const string& str, uint16_t& res); bool _decodeOne(const string& str, uint16_t& res);

View File

@ -16,19 +16,26 @@ namespace CppJieba
{ {
} }
bool KeyWordExt::init() bool KeyWordExt::init(const char* const segDictFile, const char* const stopWordDictFile)
{ {
return _segment.init(); LogInfo("KeyWordExt init start ...");
if(!_segment.init(segDictFile))
{
LogError("_segment.init failed.");
return false;
}
if(!_loadStopWords(stopWordDictFile))
{
LogError("_loadStopWords failed.");
return false;
}
LogInfo("KeyWordExt init OK.");
return true;
} }
bool KeyWordExt::loadSegDict(const char * const filePath) bool KeyWordExt::_loadPriorSubWords(const char * const filePath)
{ {
return _segment.loadSegDict(filePath); LogInfo(string_format("_loadPriorSubWords(%s) start", filePath));
}
bool KeyWordExt::loadPriorSubWords(const char * const filePath)
{
LogInfo(string_format("loadPriorSubWords(%s) start", filePath));
if(!checkFileExist(filePath)) if(!checkFileExist(filePath))
{ {
LogError(string_format("cann't find file[%s].",filePath)); LogError(string_format("cann't find file[%s].",filePath));
@ -45,15 +52,15 @@ namespace CppJieba
{ {
_priorSubWords.push_back(subword); _priorSubWords.push_back(subword);
} }
LogInfo(string_format("loadPriorSubWords(%s) end", filePath)); LogInfo(string_format("_loadPriorSubWords(%s) end", filePath));
infile.close(); infile.close();
return true; return true;
} }
bool KeyWordExt::loadStopWords(const char * const filePath) bool KeyWordExt::_loadStopWords(const char * const filePath)
{ {
LogInfo(string_format("loadStopWords(%s) start", filePath)); LogInfo(string_format("_loadStopWords(%s) start", filePath));
if(!_stopWords.empty()) if(!_stopWords.empty())
{ {
LogError("_stopWords has been loaded before! "); LogError("_stopWords has been loaded before! ");
@ -366,9 +373,9 @@ int main()
{ {
return 1; return 1;
} }
ext.loadStopWords("../dicts/stopwords.gbk.v1.0"); ext._loadStopWords("../dicts/stopwords.gbk.v1.0");
if(!ext.loadPriorSubWords("../dicts/prior.gbk")) if(!ext._loadPriorSubWords("../dicts/prior.gbk"))
{ {
cerr<<"err"<<endl; cerr<<"err"<<endl;
return 1; return 1;

View File

@ -20,18 +20,14 @@ namespace CppJieba
public: public:
KeyWordExt(); KeyWordExt();
~KeyWordExt(); ~KeyWordExt();
bool init(); bool init(const char* const segDictFile, const char* const stopWordDictFile);
bool loadSegDict(const char * const filePath);
//load stopwords
bool loadStopWords(const char * const filePath);
//load prior words' prefix
bool loadPriorSubWords(const char * const filePath);
bool dispose(); bool dispose();
private:
bool _loadStopWords(const char * const filePath);
bool _loadPriorSubWords(const char * const filePath);
public: public:
bool extract(const string& title, vector<KeyWordInfo>& keyWordInfos, uint topN); bool extract(const string& title, vector<KeyWordInfo>& keyWordInfos, uint topN);
bool extract(const vector<string>& words, vector<KeyWordInfo>& keyWordInfos, uint topN); bool extract(const vector<string>& words, vector<KeyWordInfo>& keyWordInfos, uint topN);

View File

@ -14,25 +14,23 @@ namespace CppJieba
{ {
} }
bool Segment::init() bool Segment::init(const char* const filePath)
{ {
if(!_trie.init()) if(!_trie.init())
{ {
LogError("_trie.init failed."); LogError("_trie.init failed.");
return false; return false;
} }
LogInfo(string_format("_trie.loadDict(%s) start...", filePath));
if(!_trie.loadDict(filePath))
{
LogError("_trie.loadDict faield.");
return false;
}
LogInfo("_trie.loadDict end.");
return true; return true;
} }
bool Segment::loadSegDict(const char * const filePath)
{
LogInfo(string_format("_trie.loadDict(%s) start...", filePath));
bool retFlag = _trie.loadDict(filePath);
LogInfo("_trie.loadDict end.");
return retFlag;
}
bool Segment::dispose() bool Segment::dispose()
{ {
return _trie.dispose(); return _trie.dispose();
@ -212,7 +210,7 @@ int main()
{ {
Segment segment; Segment segment;
segment.init(); segment.init();
if(!segment.loadSegDict("../dicts/segdict.gbk.v3.0")) if(!segment._loadSegDict("../dicts/segdict.gbk.v3.0"))
{ {
cerr<<"1"<<endl; cerr<<"1"<<endl;
return 1; return 1;

View File

@ -9,6 +9,7 @@
#include <set> #include <set>
#include "Trie.h" #include "Trie.h"
#include "globals.h" #include "globals.h"
#include "HMMSegment.h"
namespace CppJieba namespace CppJieba
{ {
@ -16,12 +17,12 @@ namespace CppJieba
{ {
private: private:
Trie _trie; Trie _trie;
public: public:
Segment(); Segment();
~Segment(); ~Segment();
public: public:
bool init(); bool init(const char* const filePath);
bool loadSegDict(const char * const filePath);
bool dispose(); bool dispose();
public: public:
bool cutDAG(const string& str, vector<TrieNodeInfo>& segWordInfos); bool cutDAG(const string& str, vector<TrieNodeInfo>& segWordInfos);