merge MixSegment.h/cpp into hpp

This commit is contained in:
wyy 2013-11-30 12:41:31 +08:00
parent 55c64e9893
commit 58e69783cc
7 changed files with 396 additions and 429 deletions

View File

@ -1,23 +1,15 @@
SET(EXECUTABLE_OUTPUT_PATH ${PROJECT_BINARY_DIR}/bin) SET(EXECUTABLE_OUTPUT_PATH ${PROJECT_BINARY_DIR}/bin)
SET(LIBRARY_OUTPUT_PATH ${PROJECT_BINARY_DIR}/lib) SET(LIBRARY_OUTPUT_PATH ${PROJECT_BINARY_DIR}/lib)
SET(LIBCPPJIEBA_SRC MixSegment.cpp)
ADD_LIBRARY(cppjieba STATIC ${LIBCPPJIEBA_SRC})
ADD_EXECUTABLE(cjsegment segment.cpp) ADD_EXECUTABLE(cjsegment segment.cpp)
ADD_EXECUTABLE(cjserver server.cpp) ADD_EXECUTABLE(cjserver server.cpp)
TARGET_LINK_LIBRARIES(cjserver husky pthread)
LINK_DIRECTORIES(Husky) LINK_DIRECTORIES(Husky)
TARGET_LINK_LIBRARIES(cjsegment cppjieba)
TARGET_LINK_LIBRARIES(cjserver cppjieba husky pthread)
SET_TARGET_PROPERTIES(cppjieba PROPERTIES VERSION 1.2 SOVERSION 1)
INSTALL(TARGETS cjsegment RUNTIME DESTINATION bin) INSTALL(TARGETS cjsegment RUNTIME DESTINATION bin)
INSTALL(TARGETS cjserver RUNTIME DESTINATION bin) INSTALL(TARGETS cjserver RUNTIME DESTINATION bin)
INSTALL(TARGETS cppjieba ARCHIVE DESTINATION lib/CppJieba) INSTALL(FILES ChineseFilter.hpp HMMSegment.hpp MPSegment.hpp structs.h Trie.hpp globals.h ISegment.hpp MixSegment.hpp SegmentBase.hpp TransCode.hpp DESTINATION include/CppJieba)
INSTALL(FILES ChineseFilter.hpp HMMSegment.h MPSegment.h structs.h Trie.h globals.h ISegment.hpp MixSegment.h SegmentBase.hpp TransCode.hpp DESTINATION include/CppJieba)
ADD_SUBDIRECTORY(Husky) ADD_SUBDIRECTORY(Husky)
ADD_SUBDIRECTORY(Limonp) ADD_SUBDIRECTORY(Limonp)

View File

@ -1,125 +0,0 @@
#include "MixSegment.h"
namespace CppJieba
{
MixSegment::MixSegment()
{
}
MixSegment::~MixSegment()
{
dispose();
}
bool MixSegment::init(const char* const mpSegDict, const char* const hmmSegDict)
{
if(_getInitFlag())
{
LogError("inited.");
return false;
}
if(!_mpSeg.init(mpSegDict))
{
LogError("_mpSeg init");
return false;
}
if(!_hmmSeg.init(hmmSegDict))
{
LogError("_hmmSeg init");
return false;
}
return _setInitFlag(true);
}
bool MixSegment::dispose()
{
if(!_getInitFlag())
{
return true;
}
_mpSeg.dispose();
_hmmSeg.dispose();
_setInitFlag(false);
return true;
}
bool MixSegment::cut(const string& str, vector<string>& res)const
{
return SegmentBase::cut(str, res);
}
bool MixSegment::cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<string>& res)const
{
if(!_getInitFlag())
{
LogError("not inited.");
return false;
}
if(begin == end)
{
return false;
}
vector<TrieNodeInfo> infos;
if(!_mpSeg.cut(begin, end, infos))
{
LogError("mpSeg cutDAG failed.");
return false;
}
Unicode unico;
vector<Unicode> hmmRes;
string tmp;
for(uint i= 0; i < infos.size(); i++)
{
TransCode::encode(infos[i].word,tmp);
if(1 == infos[i].word.size())
{
unico.push_back(infos[i].word[0]);
}
else
{
if(!unico.empty())
{
hmmRes.clear();
if(!_hmmSeg.cut(unico.begin(), unico.end(), hmmRes))
{
LogError("_hmmSeg cut failed.");
return false;
}
for(uint j = 0; j < hmmRes.size(); j++)
{
TransCode::encode(hmmRes[j], tmp);
res.push_back(tmp);
}
}
unico.clear();
TransCode::encode(infos[i].word, tmp);
res.push_back(tmp);
}
}
if(!unico.empty())
{
hmmRes.clear();
if(!_hmmSeg.cut(unico.begin(), unico.end(), hmmRes))
{
LogError("_hmmSeg cut failed.");
return false;
}
for(uint j = 0; j < hmmRes.size(); j++)
{
TransCode::encode(hmmRes[j], tmp);
res.push_back(tmp);
}
}
return true;
}
}
#ifdef MIXSEGMENT_UT
using namespace CppJieba;
int main()
{
return 0;
}
#endif

View File

@ -1,28 +0,0 @@
#ifndef CPPJIEBA_MIXSEGMENT_H
#define CPPJIEBA_MIXSEGMENT_H
#include "MPSegment.hpp"
#include "HMMSegment.hpp"
#include "Limonp/str_functs.hpp"
namespace CppJieba
{
class MixSegment: public SegmentBase
{
private:
MPSegment _mpSeg;
HMMSegment _hmmSeg;
public:
MixSegment();
virtual ~MixSegment();
public:
bool init(const char* const _mpSegDict, const char* const _hmmSegDict);
bool dispose();
public:
//virtual bool cut(const string& str, vector<string>& res) const;
bool cut(const string& str, vector<string>& res)const;
bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<string>& res)const;
};
}
#endif

128
src/MixSegment.hpp Normal file
View File

@ -0,0 +1,128 @@
#ifndef CPPJIEBA_MIXSEGMENT_H
#define CPPJIEBA_MIXSEGMENT_H
#include "MPSegment.hpp"
#include "HMMSegment.hpp"
#include "Limonp/str_functs.hpp"
namespace CppJieba
{
class MixSegment: public SegmentBase
{
private:
MPSegment _mpSeg;
HMMSegment _hmmSeg;
public:
MixSegment()
{
}
virtual ~MixSegment()
{
dispose();
}
public:
bool init(const char* const mpSegDict, const char* const hmmSegDict)
{
if(_getInitFlag())
{
LogError("inited.");
return false;
}
if(!_mpSeg.init(mpSegDict))
{
LogError("_mpSeg init");
return false;
}
if(!_hmmSeg.init(hmmSegDict))
{
LogError("_hmmSeg init");
return false;
}
return _setInitFlag(true);
}
bool dispose()
{
if(!_getInitFlag())
{
return true;
}
_mpSeg.dispose();
_hmmSeg.dispose();
_setInitFlag(false);
return true;
}
public:
//virtual bool cut(const string& str, vector<string>& res) const;
bool cut(const string& str, vector<string>& res)const
{
return SegmentBase::cut(str, res);
}
bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<string>& res)const
{
if(!_getInitFlag())
{
LogError("not inited.");
return false;
}
if(begin == end)
{
return false;
}
vector<TrieNodeInfo> infos;
if(!_mpSeg.cut(begin, end, infos))
{
LogError("mpSeg cutDAG failed.");
return false;
}
Unicode unico;
vector<Unicode> hmmRes;
string tmp;
for(uint i= 0; i < infos.size(); i++)
{
TransCode::encode(infos[i].word,tmp);
if(1 == infos[i].word.size())
{
unico.push_back(infos[i].word[0]);
}
else
{
if(!unico.empty())
{
hmmRes.clear();
if(!_hmmSeg.cut(unico.begin(), unico.end(), hmmRes))
{
LogError("_hmmSeg cut failed.");
return false;
}
for(uint j = 0; j < hmmRes.size(); j++)
{
TransCode::encode(hmmRes[j], tmp);
res.push_back(tmp);
}
}
unico.clear();
TransCode::encode(infos[i].word, tmp);
res.push_back(tmp);
}
}
if(!unico.empty())
{
hmmRes.clear();
if(!_hmmSeg.cut(unico.begin(), unico.end(), hmmRes))
{
LogError("_hmmSeg cut failed.");
return false;
}
for(uint j = 0; j < hmmRes.size(); j++)
{
TransCode::encode(hmmRes[j], tmp);
res.push_back(tmp);
}
}
return true;
}
};
}
#endif

View File

@ -3,7 +3,7 @@
#include "Limonp/ArgvContext.hpp" #include "Limonp/ArgvContext.hpp"
#include "MPSegment.hpp" #include "MPSegment.hpp"
#include "HMMSegment.hpp" #include "HMMSegment.hpp"
#include "MixSegment.h" #include "MixSegment.hpp"
using namespace CppJieba; using namespace CppJieba;

View File

@ -9,7 +9,7 @@
#include "Husky/ServerFrame.h" #include "Husky/ServerFrame.h"
#include "MPSegment.hpp" #include "MPSegment.hpp"
#include "HMMSegment.hpp" #include "HMMSegment.hpp"
#include "MixSegment.h" #include "MixSegment.hpp"
using namespace Husky; using namespace Husky;
using namespace CppJieba; using namespace CppJieba;