mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
modify construction and init for segments
This commit is contained in:
parent
f89cf00552
commit
fa75f0f319
@ -15,127 +15,118 @@ namespace CppJieba
|
||||
{
|
||||
class FullSegment: public SegmentBase
|
||||
{
|
||||
private:
|
||||
Trie* _trie;
|
||||
const string _dictPath;
|
||||
private:
|
||||
Trie* _trie;
|
||||
|
||||
public:
|
||||
FullSegment(const char* dictPath): _dictPath(dictPath){};
|
||||
virtual ~FullSegment(){dispose();};
|
||||
public:
|
||||
bool init()
|
||||
{
|
||||
if(_getInitFlag())
|
||||
public:
|
||||
FullSegment(){_setInitFlag(false);};
|
||||
explicit FullSegment(const string& dictPath){_setInitFlag(init(dictPath));}
|
||||
virtual ~FullSegment(){};
|
||||
public:
|
||||
bool init(const string& dictPath)
|
||||
{
|
||||
LogError("already inited before now.");
|
||||
return false;
|
||||
if(_getInitFlag())
|
||||
{
|
||||
LogError("already inited before now.");
|
||||
return false;
|
||||
}
|
||||
_trie = TrieManager::getInstance().getTrie(dictPath.c_str());
|
||||
if (NULL == _trie)
|
||||
{
|
||||
LogError("get NULL pointor from getTrie(\"%s\")", dictPath.c_str());
|
||||
return false;
|
||||
}
|
||||
return _setInitFlag(true);
|
||||
}
|
||||
_trie = TrieManager::getInstance().getTrie(_dictPath.c_str());
|
||||
if (NULL == _trie)
|
||||
{
|
||||
LogError("get NULL pointor from getTrie(\"%s\")", _dictPath.c_str());
|
||||
return false;
|
||||
}
|
||||
return _setInitFlag(true);
|
||||
}
|
||||
bool dispose()
|
||||
{
|
||||
if(!_getInitFlag())
|
||||
|
||||
public:
|
||||
using SegmentBase::cut;
|
||||
|
||||
public:
|
||||
bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const
|
||||
{
|
||||
assert(_getInitFlag());
|
||||
if (begin >= end)
|
||||
{
|
||||
LogError("begin >= end");
|
||||
return false;
|
||||
}
|
||||
|
||||
//resut of searching in trie tree
|
||||
vector<pair<uint, const TrieNodeInfo*> > tRes;
|
||||
|
||||
//max index of res's words
|
||||
int maxIdx = 0;
|
||||
|
||||
// always equals to (uItr - begin)
|
||||
int uIdx = 0;
|
||||
|
||||
//tmp variables
|
||||
int wordLen = 0;
|
||||
for (Unicode::const_iterator uItr = begin; uItr != end; uItr++)
|
||||
{
|
||||
//find word start from uItr
|
||||
if (_trie->find(uItr, end, tRes))
|
||||
{
|
||||
for (vector<pair<uint, const TrieNodeInfo*> >::const_iterator itr = tRes.begin(); itr != tRes.end(); itr++)
|
||||
{
|
||||
wordLen = itr->second->word.size();
|
||||
if (wordLen >= 2 || tRes.size() == 1 && maxIdx <= uIdx)
|
||||
{
|
||||
res.push_back(itr->second->word);
|
||||
}
|
||||
maxIdx = uIdx+wordLen > maxIdx ? uIdx+wordLen : maxIdx;
|
||||
}
|
||||
tRes.clear();
|
||||
}
|
||||
else // not found word start from uItr
|
||||
{
|
||||
if (maxIdx <= uIdx) // never exist in prev results
|
||||
{
|
||||
//put itr itself in res
|
||||
res.push_back(Unicode(1, *uItr));
|
||||
|
||||
//mark it exits
|
||||
++maxIdx;
|
||||
}
|
||||
}
|
||||
++uIdx;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
_setInitFlag(false);
|
||||
return true;
|
||||
}
|
||||
|
||||
public:
|
||||
using SegmentBase::cut;
|
||||
|
||||
public:
|
||||
bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<Unicode>& res) const
|
||||
{
|
||||
assert(_getInitFlag());
|
||||
if (begin >= end)
|
||||
bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<string>& res) const
|
||||
{
|
||||
LogError("begin >= end");
|
||||
return false;
|
||||
}
|
||||
|
||||
//resut of searching in trie tree
|
||||
vector<pair<uint, const TrieNodeInfo*> > tRes;
|
||||
|
||||
//max index of res's words
|
||||
int maxIdx = 0;
|
||||
|
||||
// always equals to (uItr - begin)
|
||||
int uIdx = 0;
|
||||
|
||||
//tmp variables
|
||||
int wordLen = 0;
|
||||
for (Unicode::const_iterator uItr = begin; uItr != end; uItr++)
|
||||
{
|
||||
//find word start from uItr
|
||||
if (_trie->find(uItr, end, tRes))
|
||||
assert(_getInitFlag());
|
||||
if (begin >= end)
|
||||
{
|
||||
for (vector<pair<uint, const TrieNodeInfo*> >::const_iterator itr = tRes.begin(); itr != tRes.end(); itr++)
|
||||
{
|
||||
wordLen = itr->second->word.size();
|
||||
if (wordLen >= 2 || tRes.size() == 1 && maxIdx <= uIdx)
|
||||
{
|
||||
res.push_back(itr->second->word);
|
||||
}
|
||||
maxIdx = uIdx+wordLen > maxIdx ? uIdx+wordLen : maxIdx;
|
||||
}
|
||||
tRes.clear();
|
||||
LogError("begin >= end");
|
||||
return false;
|
||||
}
|
||||
else // not found word start from uItr
|
||||
{
|
||||
if (maxIdx <= uIdx) // never exist in prev results
|
||||
{
|
||||
//put itr itself in res
|
||||
res.push_back(Unicode(1, *uItr));
|
||||
|
||||
//mark it exits
|
||||
++maxIdx;
|
||||
vector<Unicode> uRes;
|
||||
if (!cut(begin, end, uRes))
|
||||
{
|
||||
LogError("get unicode cut result error.");
|
||||
return false;
|
||||
}
|
||||
|
||||
string tmp;
|
||||
for (vector<Unicode>::const_iterator uItr = uRes.begin(); uItr != uRes.end(); uItr++)
|
||||
{
|
||||
if (TransCode::encode(*uItr, tmp))
|
||||
{
|
||||
res.push_back(tmp);
|
||||
}
|
||||
else
|
||||
{
|
||||
LogError("encode failed.");
|
||||
}
|
||||
}
|
||||
++uIdx;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<string>& res) const
|
||||
{
|
||||
assert(_getInitFlag());
|
||||
if (begin >= end)
|
||||
{
|
||||
LogError("begin >= end");
|
||||
return false;
|
||||
}
|
||||
|
||||
vector<Unicode> uRes;
|
||||
if (!cut(begin, end, uRes))
|
||||
{
|
||||
LogError("get unicode cut result error.");
|
||||
return false;
|
||||
}
|
||||
|
||||
string tmp;
|
||||
for (vector<Unicode>::const_iterator uItr = uRes.begin(); uItr != uRes.end(); uItr++)
|
||||
{
|
||||
if (TransCode::encode(*uItr, tmp))
|
||||
{
|
||||
res.push_back(tmp);
|
||||
}
|
||||
else
|
||||
{
|
||||
LogError("encode failed.");
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -33,12 +33,21 @@ namespace CppJieba
|
||||
EmitProbMap _emitProbM;
|
||||
EmitProbMap _emitProbS;
|
||||
vector<EmitProbMap* > _emitProbVec;
|
||||
private:
|
||||
const string _hmmModelPath;
|
||||
|
||||
public:
|
||||
HMMSegment(const char * const filePath): _hmmModelPath(filePath)
|
||||
HMMSegment(){_setInitFlag(false);}
|
||||
explicit HMMSegment(const string& filePath)
|
||||
{
|
||||
_setInitFlag(init(filePath));
|
||||
}
|
||||
virtual ~HMMSegment(){}
|
||||
public:
|
||||
bool init(const string& filePath)
|
||||
{
|
||||
if(_getInitFlag())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
memset(_startProb, 0, sizeof(_startProb));
|
||||
memset(_transProb, 0, sizeof(_transProb));
|
||||
_statMap[0] = 'B';
|
||||
@ -49,20 +58,7 @@ namespace CppJieba
|
||||
_emitProbVec.push_back(&_emitProbE);
|
||||
_emitProbVec.push_back(&_emitProbM);
|
||||
_emitProbVec.push_back(&_emitProbS);
|
||||
}
|
||||
virtual ~HMMSegment()
|
||||
{
|
||||
dispose();
|
||||
}
|
||||
public:
|
||||
virtual bool init()
|
||||
{
|
||||
return _setInitFlag(_loadModel(_hmmModelPath.c_str()));
|
||||
}
|
||||
virtual bool dispose()
|
||||
{
|
||||
_setInitFlag(false);
|
||||
return true;
|
||||
return _setInitFlag(_loadModel(filePath.c_str()));
|
||||
}
|
||||
public:
|
||||
using SegmentBase::cut;
|
||||
@ -96,11 +92,6 @@ namespace CppJieba
|
||||
public:
|
||||
virtual bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<string>& res)const
|
||||
{
|
||||
//if(!_getInitFlag())
|
||||
//{
|
||||
// LogError("not inited.");
|
||||
// return false;
|
||||
//}
|
||||
assert(_getInitFlag());
|
||||
if(begin == end)
|
||||
{
|
||||
@ -121,7 +112,6 @@ namespace CppJieba
|
||||
}
|
||||
return true;
|
||||
}
|
||||
//virtual bool cut(const string& str, vector<string>& res)const;
|
||||
|
||||
private:
|
||||
bool _viterbi(Unicode::const_iterator begin, Unicode::const_iterator end, vector<uint>& status)const
|
||||
|
@ -4,6 +4,7 @@
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <cassert>
|
||||
#include <sys/socket.h>
|
||||
#include <sys/types.h>
|
||||
#include <arpa/inet.h>
|
||||
@ -40,9 +41,6 @@ namespace Husky
|
||||
public:
|
||||
virtual ~IRequestHandler(){};
|
||||
public:
|
||||
virtual bool init() = 0;
|
||||
virtual bool dispose() = 0;
|
||||
|
||||
virtual bool do_GET(const HttpReqInfo& httpReq, string& res) = 0;
|
||||
|
||||
};
|
||||
@ -63,10 +61,11 @@ namespace Husky
|
||||
public:
|
||||
ServerFrame(unsigned nPort, unsigned nThreadCount, IRequestHandler* pHandler)
|
||||
{
|
||||
m_bShutdown = false;
|
||||
m_nLsnPort = nPort;
|
||||
m_nThreadCount = nThreadCount;
|
||||
m_pHandler = pHandler;
|
||||
m_bShutdown = false;
|
||||
assert(pHandler);
|
||||
pthread_mutex_init(&m_pmAccept,NULL);
|
||||
};
|
||||
virtual ~ServerFrame(){pthread_mutex_destroy(&m_pmAccept);};
|
||||
@ -80,11 +79,6 @@ namespace Husky
|
||||
}
|
||||
LogInfo("init ok {port:%d, threadNum:%d}", m_nLsnPort, m_nThreadCount);
|
||||
|
||||
if(!m_pHandler->init())
|
||||
{
|
||||
LogFatal("m_pHandler init failed.");
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
virtual bool dispose()
|
||||
@ -96,7 +90,6 @@ namespace Husky
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
int sockfd;
|
||||
struct sockaddr_in dest;
|
||||
|
||||
@ -120,10 +113,6 @@ namespace Husky
|
||||
LogError("error [%s]", strerror(errno));
|
||||
}
|
||||
close(sockfd);
|
||||
if(!m_pHandler->dispose())
|
||||
{
|
||||
LogFatal("m_pHandler dispose failed.");
|
||||
}
|
||||
return true;
|
||||
}
|
||||
virtual bool run()
|
||||
|
@ -8,9 +8,6 @@ namespace CppJieba
|
||||
{
|
||||
public:
|
||||
virtual ~ISegment(){};
|
||||
public:
|
||||
virtual bool init() = 0;
|
||||
virtual bool dispose() = 0;
|
||||
public:
|
||||
virtual bool cut(Unicode::const_iterator begin , Unicode::const_iterator end, vector<string>& res) const = 0;
|
||||
virtual bool cut(const string& str, vector<string>& res) const = 0;
|
||||
|
@ -34,37 +34,30 @@ namespace CppJieba
|
||||
{
|
||||
private:
|
||||
Trie* _trie;
|
||||
private:
|
||||
const string _dictPath;
|
||||
|
||||
public:
|
||||
MPSegment(const char * const dictPath): _dictPath(dictPath){};
|
||||
virtual ~MPSegment(){dispose();};
|
||||
MPSegment(){_setInitFlag(false);};
|
||||
explicit MPSegment(const string& dictPath)
|
||||
{
|
||||
_setInitFlag(init(dictPath));
|
||||
};
|
||||
virtual ~MPSegment(){};
|
||||
public:
|
||||
virtual bool init()
|
||||
bool init(const string& dictPath)
|
||||
{
|
||||
if(_getInitFlag())
|
||||
{
|
||||
LogError("already inited before now.");
|
||||
return false;
|
||||
}
|
||||
_trie = TrieManager::getInstance().getTrie(_dictPath.c_str());
|
||||
_trie = TrieManager::getInstance().getTrie(dictPath.c_str());
|
||||
if (_trie == NULL)
|
||||
{
|
||||
LogError("get a NULL pointor form getTrie(\"%s\").", _dictPath.c_str());
|
||||
LogError("get a NULL pointor form getTrie(\"%s\").", dictPath.c_str());
|
||||
return false;
|
||||
}
|
||||
return _setInitFlag(true);
|
||||
}
|
||||
virtual bool dispose()
|
||||
{
|
||||
if(!_getInitFlag())
|
||||
{
|
||||
return true;
|
||||
}
|
||||
_setInitFlag(false);
|
||||
return true;
|
||||
}
|
||||
public:
|
||||
using SegmentBase::cut;
|
||||
virtual bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<string>& res)const
|
||||
|
@ -14,44 +14,32 @@ namespace CppJieba
|
||||
MPSegment _mpSeg;
|
||||
HMMSegment _hmmSeg;
|
||||
public:
|
||||
MixSegment(const char * const mpSegDict, const char * const hmmSegDict): _mpSeg(mpSegDict), _hmmSeg(hmmSegDict)
|
||||
MixSegment(){_setInitFlag(false);};
|
||||
explicit MixSegment(const string& mpSegDict, const string& hmmSegDict): _mpSeg(mpSegDict), _hmmSeg(hmmSegDict)
|
||||
{
|
||||
_setInitFlag(_mpSeg && _hmmSeg);
|
||||
}
|
||||
virtual ~MixSegment()
|
||||
{
|
||||
dispose();
|
||||
}
|
||||
virtual ~MixSegment(){}
|
||||
public:
|
||||
virtual bool init()
|
||||
bool init(const string& mpSegDict, const string& hmmSegDict)
|
||||
{
|
||||
if(_getInitFlag())
|
||||
{
|
||||
LogError("inited.");
|
||||
return false;
|
||||
}
|
||||
if(!_mpSeg.init())
|
||||
if(!_mpSeg.init(mpSegDict))
|
||||
{
|
||||
LogError("_mpSeg init");
|
||||
return false;
|
||||
}
|
||||
if(!_hmmSeg.init())
|
||||
if(!_hmmSeg.init(hmmSegDict))
|
||||
{
|
||||
LogError("_hmmSeg init");
|
||||
return false;
|
||||
}
|
||||
return _setInitFlag(true);
|
||||
}
|
||||
virtual bool dispose()
|
||||
{
|
||||
if(!_getInitFlag())
|
||||
{
|
||||
return true;
|
||||
}
|
||||
_mpSeg.dispose();
|
||||
_hmmSeg.dispose();
|
||||
_setInitFlag(false);
|
||||
return true;
|
||||
}
|
||||
public:
|
||||
using SegmentBase::cut;
|
||||
public:
|
||||
|
@ -20,41 +20,35 @@ namespace CppJieba
|
||||
private:
|
||||
MixSegment _mixSeg;
|
||||
FullSegment _fullSeg;
|
||||
int _maxWordLen;
|
||||
size_t _maxWordLen;
|
||||
|
||||
public:
|
||||
QuerySegment(const char* dict, const char* model, int maxWordLen): _mixSeg(dict, model), _fullSeg(dict), _maxWordLen(maxWordLen){};
|
||||
virtual ~QuerySegment(){dispose();};
|
||||
QuerySegment(){_setInitFlag(false);};
|
||||
explicit QuerySegment(const string& dict, const string& model, size_t maxWordLen)
|
||||
{
|
||||
_setInitFlag(init(dict, model, maxWordLen));
|
||||
};
|
||||
virtual ~QuerySegment(){};
|
||||
public:
|
||||
bool init()
|
||||
bool init(const string& dict, const string& model, size_t maxWordLen)
|
||||
{
|
||||
if (_getInitFlag())
|
||||
{
|
||||
LogError("inited.");
|
||||
LogError("inited already.");
|
||||
return false;
|
||||
}
|
||||
if (!_mixSeg.init())
|
||||
if (!_mixSeg.init(dict, model))
|
||||
{
|
||||
LogError("_mixSeg init");
|
||||
return false;
|
||||
}
|
||||
if (!_fullSeg.init())
|
||||
if (!_fullSeg.init(dict))
|
||||
{
|
||||
LogError("_fullSeg init");
|
||||
return false;
|
||||
}
|
||||
return _setInitFlag(true);
|
||||
}
|
||||
bool dispose()
|
||||
{
|
||||
if(!_getInitFlag())
|
||||
{
|
||||
return true;
|
||||
}
|
||||
_fullSeg.dispose();
|
||||
_mixSeg.dispose();
|
||||
_setInitFlag(false);
|
||||
return true;
|
||||
}
|
||||
|
||||
public:
|
||||
using SegmentBase::cut;
|
||||
|
@ -18,11 +18,9 @@ namespace CppJieba
|
||||
bool _isInited;
|
||||
bool _getInitFlag()const{return _isInited;};
|
||||
bool _setInitFlag(bool flag){return _isInited = flag;};
|
||||
public:
|
||||
virtual bool init() = 0;
|
||||
virtual bool dispose() = 0;
|
||||
|
||||
public:
|
||||
operator bool(){return _getInitFlag();};
|
||||
virtual bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<string>& res)const = 0;
|
||||
virtual bool cut(const string& str, vector<string>& res)const
|
||||
{
|
||||
|
@ -63,57 +63,52 @@ int main(int argc, char ** argv)
|
||||
if("cutHMM" == algorithm)
|
||||
{
|
||||
HMMSegment seg(modelPath.c_str());
|
||||
if(!seg.init())
|
||||
if(!seg)
|
||||
{
|
||||
cout<<"seg init failed."<<endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
cut(&seg, arg[1].c_str());
|
||||
seg.dispose();
|
||||
}
|
||||
else if("cutDAG" == algorithm)
|
||||
{
|
||||
MPSegment seg(dictPath.c_str());
|
||||
if(!seg.init())
|
||||
if(!seg)
|
||||
{
|
||||
cout<<"seg init failed."<<endl;
|
||||
return false;
|
||||
}
|
||||
cut(&seg, arg[1].c_str());
|
||||
seg.dispose();
|
||||
}
|
||||
else if ("cutFull" == algorithm)
|
||||
{
|
||||
FullSegment seg(dictPath.c_str());
|
||||
if (!seg.init())
|
||||
if (!seg)
|
||||
{
|
||||
cout << "seg init failed" << endl;
|
||||
return false;
|
||||
}
|
||||
cut(&seg, arg[1].c_str());
|
||||
seg.dispose();
|
||||
}
|
||||
else if ("cutQuery" == algorithm)
|
||||
{
|
||||
QuerySegment seg(dictPath.c_str(), modelPath.c_str(), maxLen);
|
||||
if (!seg.init())
|
||||
if (!seg)
|
||||
{
|
||||
cout << "seg init failed" << endl;
|
||||
return false;
|
||||
}
|
||||
cut(&seg, arg[1].c_str());
|
||||
seg.dispose();
|
||||
}
|
||||
else
|
||||
{
|
||||
MixSegment seg(dictPath.c_str(), modelPath.c_str());
|
||||
if(!seg.init())
|
||||
if(!seg)
|
||||
{
|
||||
cout<<"seg init failed."<<endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
cut(&seg, arg[1].c_str());
|
||||
seg.dispose();
|
||||
}
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
@ -15,10 +15,8 @@ using namespace CppJieba;
|
||||
class ReqHandler: public IRequestHandler
|
||||
{
|
||||
public:
|
||||
ReqHandler(const string& dictPath, const string& modelPath): _segment(dictPath.c_str(), modelPath.c_str()){};
|
||||
ReqHandler(const string& dictPath, const string& modelPath): _segment(dictPath, modelPath){};
|
||||
virtual ~ReqHandler(){};
|
||||
virtual bool init(){return _segment.init();};
|
||||
virtual bool dispose(){return _segment.dispose();};
|
||||
public:
|
||||
virtual bool do_GET(const HttpReqInfo& httpReq, string& strSnd)
|
||||
{
|
||||
|
@ -31,33 +31,30 @@ int main(int argc, char ** argv)
|
||||
//demo
|
||||
{
|
||||
HMMSegment seg(HMM_DICT_FILE);
|
||||
if(!seg.init())
|
||||
if(!seg)
|
||||
{
|
||||
cout<<"seg init failed."<<endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
cut(&seg, TEST_FILE);
|
||||
seg.dispose();
|
||||
}
|
||||
{
|
||||
MixSegment seg(JIEBA_DICT_FILE, HMM_DICT_FILE);
|
||||
if(!seg.init())
|
||||
if(!seg)
|
||||
{
|
||||
cout<<"seg init failed."<<endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
cut(&seg, TEST_FILE);
|
||||
seg.dispose();
|
||||
}
|
||||
{
|
||||
MPSegment seg(JIEBA_DICT_FILE);
|
||||
if(!seg.init())
|
||||
if(!seg)
|
||||
{
|
||||
cout<<"seg init failed."<<endl;
|
||||
return false;
|
||||
}
|
||||
cut(&seg, TEST_FILE);
|
||||
seg.dispose();
|
||||
}
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
@ -25,36 +25,14 @@ void cut(const ISegment * seg, const char * const filePath)
|
||||
|
||||
int main(int argc, char ** argv)
|
||||
{
|
||||
//demo
|
||||
//{
|
||||
// HMMSegment seg;
|
||||
// if(!seg.init("../dicts/hmm_model.utf8"))
|
||||
// {
|
||||
// cout<<"seg init failed."<<endl;
|
||||
// return EXIT_FAILURE;
|
||||
// }
|
||||
// cut(&seg, "testlines.utf8");
|
||||
// seg.dispose();
|
||||
//}
|
||||
//{
|
||||
// MixSegment seg;
|
||||
// if(!seg.init("../dicts/jieba.dict.utf8", "../dicts/hmm_model.utf8"))
|
||||
// {
|
||||
// cout<<"seg init failed."<<endl;
|
||||
// return EXIT_FAILURE;
|
||||
// }
|
||||
// cut(&seg, "testlines.utf8");
|
||||
// seg.dispose();
|
||||
//}
|
||||
{
|
||||
MixSegment seg("../dicts/jieba.dict.utf8", "../dicts/hmm_model.utf8");
|
||||
if(!seg.init())
|
||||
if(!seg)
|
||||
{
|
||||
cout<<"seg init failed."<<endl;
|
||||
return false;
|
||||
}
|
||||
cut(&seg, argv[1]);
|
||||
seg.dispose();
|
||||
}
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
@ -11,8 +11,8 @@ TEST(HMMSegmentTest, Test1)
|
||||
//string s;
|
||||
//vector<string> buf(res, res + sizeof(res)/sizeof(res[0]));
|
||||
vector<string> words;
|
||||
ASSERT_EQ(segment.init(), true);
|
||||
ASSERT_EQ(segment.cut(str, words), true);
|
||||
ASSERT_TRUE(segment);
|
||||
ASSERT_TRUE(segment.cut(str, words));
|
||||
//print(words);
|
||||
EXPECT_EQ(words, vector<string>(res, res + sizeof(res)/sizeof(res[0])));
|
||||
}
|
||||
|
@ -9,8 +9,8 @@ TEST(MPSegmentTest, Test1)
|
||||
const char* str = "我来自北京邮电大学。。。 学号 123456";
|
||||
const char* res[] = {"我", "来自", "北京邮电大学", "。","。","。"," ","学","号", " 123456"};
|
||||
vector<string> words;
|
||||
ASSERT_EQ(segment.init(), true);
|
||||
ASSERT_EQ(segment.cut(str, words), true);
|
||||
ASSERT_TRUE(segment);
|
||||
ASSERT_TRUE(segment.cut(str, words));
|
||||
//print(words);
|
||||
EXPECT_EQ(words, vector<string>(res, res + sizeof(res)/sizeof(res[0])));
|
||||
}
|
||||
|
@ -8,41 +8,9 @@ TEST(MixSegmentTest, Test1)
|
||||
MixSegment segment("../dicts/jieba.dict.utf8", "../dicts/hmm_model.utf8");;
|
||||
const char* str = "我来自北京邮电大学。。。 学号 123456";
|
||||
const char* res[] = {"我", "来自", "北京邮电大学", "。","。","。"," ","学号", " 123456"};
|
||||
//string s;
|
||||
//vector<string> buf(res, res + sizeof(res)/sizeof(res[0]));
|
||||
vector<string> words;
|
||||
ASSERT_EQ(segment.init(), true);
|
||||
ASSERT_EQ(segment.cut(str, words), true);
|
||||
ASSERT_TRUE(segment);
|
||||
ASSERT_TRUE(segment.cut(str, words));
|
||||
EXPECT_EQ(words, vector<string>(res, res + sizeof(res)/sizeof(res[0])));
|
||||
//print(words);
|
||||
|
||||
//for(uint i = 0; i < sizeof(res)/sizeof(res[0]); i++)
|
||||
//{
|
||||
// buf.push_back()
|
||||
//}
|
||||
//buf.push_back("");
|
||||
//buf.push_back("你好");
|
||||
//buf.push_back("...hh");
|
||||
//vector<string> res;
|
||||
//uint size = strlen(str);
|
||||
//uint offset = 0;
|
||||
//while(offset < size)
|
||||
//{
|
||||
// uint len;
|
||||
// const char* t = str + offset;
|
||||
// int ret = filterAscii(t, size - offset, len);
|
||||
// s.assign(t, len);
|
||||
// res.push_back(s);
|
||||
// //cout<<s<<","<<ret<<","<<len<<endl;
|
||||
// //cout<<str<<endl;
|
||||
// offset += len;
|
||||
//}
|
||||
//EXPECT_EQ(res, buf);
|
||||
}
|
||||
|
||||
//int main(int argc, char** argv)
|
||||
//{
|
||||
// //ChineseFilter chFilter;
|
||||
// return 0;
|
||||
//}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user