merge upstream

This commit is contained in:
aholic 2013-11-27 16:32:54 +08:00
commit ef8954f1fe
15 changed files with 226 additions and 232 deletions

View File

@ -1,6 +1,9 @@
PROJECT(CPPJIEBA) PROJECT(CPPJIEBA)
SET(CMAKE_INSTALL_PREFIX /usr) SET(CMAKE_INSTALL_PREFIX /usr)
ADD_DEFINITIONS(-std=c++0x -O3) ADD_DEFINITIONS(-std=c++0x -O3)
IF (DEFINED ENC)
ADD_DEFINITIONS(-DCPPJIEBA_${ENC})
ENDIF()
ADD_SUBDIRECTORY(src) ADD_SUBDIRECTORY(src)
ADD_SUBDIRECTORY(dicts) ADD_SUBDIRECTORY(dicts)
ADD_SUBDIRECTORY(scripts) ADD_SUBDIRECTORY(scripts)

View File

@ -8,9 +8,6 @@
现在支持utf8,gbk编码的分词。 现在支持utf8,gbk编码的分词。
- `master`分支支持`utf8`编码
- `gbk`分支支持`gbk`编码
## 安装与使用 ## 安装与使用
### 下载和安装 ### 下载和安装
@ -22,6 +19,8 @@ cd cppjieba-master
mkdir build mkdir build
cd build cd build
cmake .. cmake ..
# 默认是utf8编码如果要使用gbk编码则使用下句cmake命令
# cmake .. -DENC=GBK
make make
sudo make install sudo make install
``` ```
@ -122,14 +121,14 @@ Output:
核心目录,包含主要源代码。 核心目录,包含主要源代码。
#### Trie树 #### Trie树
Trie.cpp/Trie.h 负责载入词典的trie树主要供Segment模块使用。 Trie.hpp 负责载入词典的trie树主要供Segment模块使用。
#### Segment模块 #### Segment模块
MPSegment.cpp/MPSegment.h MPSegment.hpp
(Maximum Probability)最大概率法:负责根据Trie树构建有向无环图和进行动态规划算法是分词算法的核心。 (Maximum Probability)最大概率法:负责根据Trie树构建有向无环图和进行动态规划算法是分词算法的核心。
HMMSegment.cpp/HMMSegment.h HMMSegment.hpp
是根据HMM模型来进行分词主要算法思路是根据(B,E,M,S)四个状态来代表每个字的隐藏状态。 是根据HMM模型来进行分词主要算法思路是根据(B,E,M,S)四个状态来代表每个字的隐藏状态。
HMM模型由dicts/下面的`hmm_model.utf8`提供。 HMM模型由dicts/下面的`hmm_model.utf8`提供。
分词算法即viterbi算法。 分词算法即viterbi算法。

View File

@ -6,7 +6,7 @@
namespace CppJieba namespace CppJieba
{ {
enum CHAR_TYPE { CHWORD = 0, DIGIT_OR_LETTER = 1, OTHERS = 2}; enum CHAR_TYPE { CHWORD = 0, DIGIT_OR_LETTER = 1};
typedef Unicode::const_iterator UniConIter; typedef Unicode::const_iterator UniConIter;
class ChineseFilter; class ChineseFilter;
class ChFilterIterator class ChFilterIterator
@ -55,11 +55,7 @@ namespace CppJieba
{ {
return DIGIT_OR_LETTER; return DIGIT_OR_LETTER;
} }
if(x >= 0x4e00 && x <= 0x9fff) return CHWORD;
{
return CHWORD;
}
return OTHERS;
} }
ChFilterIterator _get(UniConIter iter) ChFilterIterator _get(UniConIter iter)
{ {
@ -67,7 +63,7 @@ namespace CppJieba
const UniConIter& _end = ptUnico->end(); const UniConIter& _end = ptUnico->end();
if(iter == _end) if(iter == _end)
{ {
return ChFilterIterator(ptUnico, end, end, OTHERS); return ChFilterIterator(ptUnico, end, end, DIGIT_OR_LETTER);
} }
CHAR_TYPE charType = _charType(*iter); CHAR_TYPE charType = _charType(*iter);
iter ++; iter ++;
@ -99,7 +95,7 @@ namespace CppJieba
} }
iterator end() iterator end()
{ {
return iterator(&_unico, _unico.end(), _unico.end(), OTHERS); return iterator(&_unico, _unico.end(), _unico.end(), DIGIT_OR_LETTER);
} }
}; };
} }

View File

@ -15,12 +15,13 @@ namespace CppJieba
{ {
private: private:
Trie _trie; Trie _trie;
const string _dictPath;
public: public:
FullSegment(){}; FullSegment(const char* dictPath): _dictPath(dictPath){};
virtual ~FullSegment(){dispose();}; virtual ~FullSegment(){dispose();};
public: public:
bool init(const char* const filePath) bool init()
{ {
if(_getInitFlag()) if(_getInitFlag())
{ {
@ -32,8 +33,8 @@ namespace CppJieba
LogError("_trie.init failed."); LogError("_trie.init failed.");
return false; return false;
} }
LogInfo("_trie.loadDict(%s) start...", filePath); LogInfo("_trie.loadDict(%s) start...", _dictPath.c_str());
if(!_trie.loadDict(filePath)) if(!_trie.loadDict(_dictPath.c_str()))
{ {
LogError("_trie.loadDict faield."); LogError("_trie.loadDict faield.");
return false; return false;

View File

@ -31,9 +31,11 @@ namespace CppJieba
EmitProbMap _emitProbM; EmitProbMap _emitProbM;
EmitProbMap _emitProbS; EmitProbMap _emitProbS;
vector<EmitProbMap* > _emitProbVec; vector<EmitProbMap* > _emitProbVec;
private:
const string _hmmModelPath;
public: public:
HMMSegment() HMMSegment(const char * const filePath): _hmmModelPath(filePath)
{ {
memset(_startProb, 0, sizeof(_startProb)); memset(_startProb, 0, sizeof(_startProb));
memset(_transProb, 0, sizeof(_transProb)); memset(_transProb, 0, sizeof(_transProb));
@ -51,11 +53,11 @@ namespace CppJieba
dispose(); dispose();
} }
public: public:
bool init(const char* const modelPath) virtual bool init()
{ {
return _setInitFlag(_loadModel(modelPath)); return _setInitFlag(_loadModel(_hmmModelPath.c_str()));
} }
bool dispose() virtual bool dispose()
{ {
_setInitFlag(false); _setInitFlag(false);
return true; return true;
@ -88,11 +90,8 @@ namespace CppJieba
} }
return true; return true;
} }
bool cut(const string& str, vector<string>& res)const public:
{ virtual bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<string>& res)const
return SegmentBase::cut(str, res);
}
bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<string>& res)const
{ {
if(!_getInitFlag()) if(!_getInitFlag())
{ {

View File

@ -6,8 +6,11 @@ namespace CppJieba
{ {
class ISegment class ISegment
{ {
//public: public:
// virtual ~ISegment(){}; virtual ~ISegment(){};
public:
virtual bool init() = 0;
virtual bool dispose() = 0;
public: public:
virtual bool cut(Unicode::const_iterator begin , Unicode::const_iterator end, vector<string>& res) const = 0; virtual bool cut(Unicode::const_iterator begin , Unicode::const_iterator end, vector<string>& res) const = 0;
virtual bool cut(const string& str, vector<string>& res) const = 0; virtual bool cut(const string& str, vector<string>& res) const = 0;

View File

@ -120,7 +120,7 @@ namespace Limonp
return res; return res;
} }
inline bool splitStr(const string& src, vector<string>& res, const string& pattern) inline bool splitStr(const string& src, vector<string>& res, const string& pattern)
{ {
@ -218,41 +218,129 @@ namespace Limonp
return str.find(ch) != string::npos; return str.find(ch) != string::npos;
} }
//inline void extractWords(const string& sentence, vector<string>& words) inline bool utf8ToUnicode(const string& str, vector<uint16_t>& vec)
//{ {
// bool flag = false; char ch1, ch2;
// uint lhs = 0, len = 0; if(str.empty())
// for(uint i = 0; i < sentence.size(); i++) {
// { return false;
// char x = sentence[i]; }
// if((0x0030 <= x && x<= 0x0039) || (0x0041 <= x && x <= 0x005a ) || (0x0061 <= x && x <= 0x007a)) vec.clear();
// { size_t siz = str.size();
// if(flag) for(uint i = 0;i < siz;)
// { {
// len ++; if(!(str[i] & 0x80)) // 0xxxxxxx
// } {
// else vec.push_back(str[i]);
// { i++;
// lhs = i; }
// len = 1; else if ((unsigned char)str[i] <= 0xdf && i + 1 < siz) // 110xxxxxx
// } {
// flag = true; ch1 = (str[i] >> 2) & 0x07;
// } ch2 = (str[i+1] & 0x3f) | ((str[i] & 0x03) << 6 );
// else vec.push_back(twocharToUint16(ch1, ch2));
// { i += 2;
// if(flag) }
// { else if((unsigned char)str[i] <= 0xef && i + 2 < siz)
// words.push_back(string(sentence, lhs, len)); {
// } ch1 = (str[i] << 4) | ((str[i+1] >> 2) & 0x0f );
// flag = false; ch2 = ((str[i+1]<<6) & 0xc0) | (str[i+2] & 0x3f);
// } vec.push_back(twocharToUint16(ch1, ch2));
// } i += 3;
// if(flag) }
// { else
// words.push_back(string(sentence, lhs, len)); {
// } return false;
//} }
}
return true;
}
inline bool unicodeToUtf8(vector<uint16_t>::const_iterator begin, vector<uint16_t>::const_iterator end, string& res)
{
if(begin >= end)
{
return false;
}
res.clear();
uint16_t ui;
while(begin != end)
{
ui = *begin;
if(ui <= 0x7f)
{
res += char(ui);
}
else if(ui <= 0x7ff)
{
res += char(((ui>>6) & 0x1f) | 0xc0);
res += char((ui & 0x3f) | 0x80);
}
else
{
res += char(((ui >> 12) & 0x0f )| 0xe0);
res += char(((ui>>6) & 0x3f )| 0x80 );
res += char((ui & 0x3f) | 0x80);
}
begin ++;
}
return true;
}
inline bool gbkTrans(const string& str, vector<uint16_t>& vec)
{
vec.clear();
if(str.empty())
{
return false;
}
uint i = 0;
while(i < str.size())
{
if(0 == (str[i] & 0x80))
{
vec.push_back(uint16_t(str[i]));
i++;
}
else
{
if(i + 1 < str.size()) //&& (str[i+1] & 0x80))
{
vec.push_back(twocharToUint16(str[i], str[i + 1]));
i += 2;
}
else
{
return false;
}
}
}
return true;
}
inline bool gbkTrans(vector<uint16_t>::const_iterator begin, vector<uint16_t>::const_iterator end, string& res)
{
if(begin >= end)
{
return false;
}
res.clear();
pair<char, char> pa;
while(begin != end)
{
pa = uint16ToChar2(*begin);
if(pa.first & 0x80)
{
res += pa.first;
res += pa.second;
}
else
{
res += pa.second;
}
begin++;
}
return true;
}
} }
#endif #endif

View File

@ -32,12 +32,14 @@ namespace CppJieba
{ {
private: private:
Trie _trie; Trie _trie;
private:
const string _dictPath;
public: public:
MPSegment(){}; MPSegment(const char * const dictPath): _dictPath(dictPath){};
virtual ~MPSegment(){dispose();}; virtual ~MPSegment(){dispose();};
public: public:
bool init(const char* const filePath) virtual bool init()
{ {
if(_getInitFlag()) if(_getInitFlag())
{ {
@ -49,8 +51,8 @@ namespace CppJieba
LogError("_trie.init failed."); LogError("_trie.init failed.");
return false; return false;
} }
LogInfo("_trie.loadDict(%s) start...", filePath); LogInfo("_trie.loadDict(%s) start...", _dictPath.c_str());
if(!_trie.loadDict(filePath)) if(!_trie.loadDict(_dictPath.c_str()))
{ {
LogError("_trie.loadDict faield."); LogError("_trie.loadDict faield.");
return false; return false;
@ -58,7 +60,7 @@ namespace CppJieba
LogInfo("_trie.loadDict end."); LogInfo("_trie.loadDict end.");
return _setInitFlag(true); return _setInitFlag(true);
} }
bool dispose() virtual bool dispose()
{ {
if(!_getInitFlag()) if(!_getInitFlag())
{ {
@ -69,12 +71,7 @@ namespace CppJieba
return true; return true;
} }
public: public:
//bool cut(const string& str, vector<TrieNodeInfo>& segWordInfos)const; virtual bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<string>& res)const
bool cut(const string& str, vector<string>& res)const
{
return SegmentBase::cut(str, res);
}
bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<string>& res)const
{ {
if(!_getInitFlag()) if(!_getInitFlag())
{ {
@ -155,7 +152,6 @@ namespace CppJieba
return true; return true;
} }
//virtual bool cut(const string& str, vector<string>& res)const;
private: private:
bool _calcDAG(SegmentContext& segContext)const bool _calcDAG(SegmentContext& segContext)const

View File

@ -13,7 +13,7 @@ namespace CppJieba
MPSegment _mpSeg; MPSegment _mpSeg;
HMMSegment _hmmSeg; HMMSegment _hmmSeg;
public: public:
MixSegment() MixSegment(const char * const mpSegDict, const char * const hmmSegDict): _mpSeg(mpSegDict), _hmmSeg(hmmSegDict)
{ {
} }
virtual ~MixSegment() virtual ~MixSegment()
@ -21,26 +21,26 @@ namespace CppJieba
dispose(); dispose();
} }
public: public:
bool init(const char* const mpSegDict, const char* const hmmSegDict) virtual bool init()
{ {
if(_getInitFlag()) if(_getInitFlag())
{ {
LogError("inited."); LogError("inited.");
return false; return false;
} }
if(!_mpSeg.init(mpSegDict)) if(!_mpSeg.init())
{ {
LogError("_mpSeg init"); LogError("_mpSeg init");
return false; return false;
} }
if(!_hmmSeg.init(hmmSegDict)) if(!_hmmSeg.init())
{ {
LogError("_hmmSeg init"); LogError("_hmmSeg init");
return false; return false;
} }
return _setInitFlag(true); return _setInitFlag(true);
} }
bool dispose() virtual bool dispose()
{ {
if(!_getInitFlag()) if(!_getInitFlag())
{ {
@ -52,12 +52,9 @@ namespace CppJieba
return true; return true;
} }
public: public:
//virtual bool cut(const string& str, vector<string>& res) const; using SegmentBase::cut;
bool cut(const string& str, vector<string>& res)const public:
{ virtual bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<string>& res)const
return SegmentBase::cut(str, res);
}
bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<string>& res)const
{ {
if(!_getInitFlag()) if(!_getInitFlag())
{ {

View File

@ -14,12 +14,17 @@ namespace CppJieba
public: public:
SegmentBase(){_setInitFlag(false);}; SegmentBase(){_setInitFlag(false);};
virtual ~SegmentBase(){}; virtual ~SegmentBase(){};
private:
bool _isInited;
protected: protected:
bool _isInited;
bool _getInitFlag()const{return _isInited;}; bool _getInitFlag()const{return _isInited;};
bool _setInitFlag(bool flag){return _isInited = flag;}; bool _setInitFlag(bool flag){return _isInited = flag;};
bool cut(const string& str, vector<string>& res)const public:
virtual bool init() = 0;
virtual bool dispose() = 0;
public:
virtual bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<string>& res)const = 0;
virtual bool cut(const string& str, vector<string>& res)const
{ {
if(!_getInitFlag()) if(!_getInitFlag())
{ {
@ -45,7 +50,6 @@ namespace CppJieba
} }
return true; return true;
} }
bool cut(Unicode::const_iterator begin, Unicode::const_iterator end, vector<string>& res)const = 0;
}; };
} }

View File

@ -17,76 +17,25 @@ namespace CppJieba
{ {
inline bool decode(const string& str, vector<uint16_t>& vec) inline bool decode(const string& str, vector<uint16_t>& vec)
{ {
char ch1, ch2; #ifdef CPPJIEBA_GBK
if(str.empty()) return gbkTrans(str, vec);
{ #else
return false; return utf8ToUnicode(str, vec);
} #endif
vec.clear();
size_t siz = str.size();
for(uint i = 0;i < siz;)
{
if(!(str[i] & 0x80)) // 0xxxxxxx
{
vec.push_back(str[i]);
i++;
}
else if ((unsigned char)str[i] <= 0xdf && i + 1 < siz) // 110xxxxxx
{
ch1 = (str[i] >> 2) & 0x07;
ch2 = (str[i+1] & 0x3f) | ((str[i] & 0x03) << 6 );
vec.push_back(twocharToUint16(ch1, ch2));
i += 2;
}
else if((unsigned char)str[i] <= 0xef && i + 2 < siz)
{
ch1 = (str[i] << 4) | ((str[i+1] >> 2) & 0x0f );
ch2 = ((str[i+1]<<6) & 0xc0) | (str[i+2] & 0x3f);
vec.push_back(twocharToUint16(ch1, ch2));
i += 3;
}
else
{
return false;
}
}
return true;
} }
inline bool encode(vector<uint16_t>::const_iterator begin, vector<uint16_t>::const_iterator end, string& res) inline bool encode(vector<uint16_t>::const_iterator begin, vector<uint16_t>::const_iterator end, string& res)
{ {
if(begin >= end) #ifdef CPPJIEBA_GBK
{ return gbkTrans(begin, end, res);
return false; #else
} return unicodeToUtf8(begin, end, res);
res.clear(); #endif
uint16_t ui;
while(begin != end)
{
ui = *begin;
if(ui <= 0x7f)
{
res += char(ui);
}
else if(ui <= 0x7ff)
{
res += char(((ui>>6) & 0x1f) | 0xc0);
res += char((ui & 0x3f) | 0x80);
}
else
{
res += char(((ui >> 12) & 0x0f )| 0xe0);
res += char(((ui>>6) & 0x3f )| 0x80 );
res += char((ui & 0x3f) | 0x80);
}
begin ++;
}
return true;
} }
inline bool encode(const vector<uint16_t>& sentence, string& res)
inline bool encode(const vector<uint16_t>& uni, string& res)
{ {
return encode(sentence.begin(), sentence.end(), res); return encode(uni.begin(), uni.end(), res);
} }
} }
} }

View File

@ -164,14 +164,6 @@ namespace CppJieba
{ {
return NULL; return NULL;
} }
return find(uintVec);
}
const TrieNodeInfo* find(const Unicode& uintVec)const
{
if(uintVec.empty())
{
return NULL;
}
return find(uintVec.begin(), uintVec.end()); return find(uintVec.begin(), uintVec.end());
} }
const TrieNodeInfo* find(Unicode::const_iterator begin, Unicode::const_iterator end)const const TrieNodeInfo* find(Unicode::const_iterator begin, Unicode::const_iterator end)const
@ -261,56 +253,11 @@ namespace CppJieba
return false; return false;
} }
const TrieNodeInfo* findPrefix(const string& str)const
{
if(!_getInitFlag())
{
LogFatal("trie not initted!");
return NULL;
}
Unicode uintVec;
if(!TransCode::decode(str, uintVec))
{
LogError("TransCode::decode failed.");
return NULL;
}
//find
TrieNode* p = _root;
uint pos = 0;
uint16_t chUni = 0;
const TrieNodeInfo * res = NULL;
for(uint i = 0; i < uintVec.size(); i++)
{
chUni = uintVec[i];
if(p->isLeaf)
{
pos = p->nodeInfoVecPos;
if(pos >= _nodeInfoVec.size())
{
LogFatal("node's nodeInfoVecPos is out of _nodeInfoVec's range");
return NULL;
}
res = &(_nodeInfoVec[pos]);
}
if(p->hmap.find(chUni) == p->hmap.end())
{
break;
}
else
{
p = p->hmap[chUni];
}
}
return res;
}
public: public:
double getMinLogFreq()const{return _minLogFreq;}; double getMinLogFreq()const{return _minLogFreq;};
bool insert(const TrieNodeInfo& nodeInfo) private:
bool _insert(const TrieNodeInfo& nodeInfo)
{ {
if(!_getInitFlag()) if(!_getInitFlag())
{ {
@ -353,7 +300,7 @@ namespace CppJieba
} }
if(p->isLeaf) if(p->isLeaf)
{ {
LogError("this node already inserted"); LogError("this node already _inserted");
return false; return false;
} }
@ -391,10 +338,10 @@ namespace CppJieba
nodeInfo.tag = vecBuf[2]; nodeInfo.tag = vecBuf[2];
} }
//insert node //_insert node
if(!insert(nodeInfo)) if(!_insert(nodeInfo))
{ {
LogError("insert node failed!"); LogError("_insert node failed!");
} }
} }
return true; return true;

View File

@ -19,8 +19,14 @@ void cut(const ISegment * seg, const char * const filePath)
{ {
cout << line << endl; cout << line << endl;
res.clear(); res.clear();
seg->cut(line, res); if(!seg->cut(line, res))
cout<<join(res.begin(), res.end(),"/")<<endl; {
LogError("seg cut failed.");
}
else
{
print(join(res.begin(), res.end(), "/"));
}
} }
} }
} }
@ -35,9 +41,9 @@ int main(int argc, char ** argv)
<<"\t--dictpath\tsee example\n" <<"\t--dictpath\tsee example\n"
<<"\t--modelpath\tsee example\n" <<"\t--modelpath\tsee example\n"
<<"example:\n" <<"example:\n"
<<"\t"<<argv[0]<<" testlines.utf8 --dictpath dicts/jieba.dict.utf8\n" <<"\t"<<argv[0]<<" ../test/testlines.utf8 --dictpath ../dicts/jieba.dict.utf8 --algorithm cutDAG\n"
<<"\t"<<argv[0]<<" testlines.utf8 --modelpath dicts/hmm_model.utf8 --algorithm cutHMM\n" <<"\t"<<argv[0]<<" ../test/testlines.utf8 --modelpath ../dicts/hmm_model.utf8 --algorithm cutHMM\n"
<<"\t"<<argv[0]<<" testlines.utf8 --dictpath dicts/jieba.dict.utf8 --modelpath dicts/hmm_model.utf8 --algorithm cutMix\n" <<"\t"<<argv[0]<<" ../test/testlines.utf8 --dictpath ../dicts/jieba.dict.utf8 --modelpath ../dicts/hmm_model.utf8 --algorithm cutMix\n"
<<endl; <<endl;
return EXIT_FAILURE; return EXIT_FAILURE;
@ -49,8 +55,8 @@ int main(int argc, char ** argv)
if("cutHMM" == algorithm) if("cutHMM" == algorithm)
{ {
HMMSegment seg; HMMSegment seg(modelPath.c_str());
if(!seg.init(modelPath.c_str())) if(!seg.init())
{ {
cout<<"seg init failed."<<endl; cout<<"seg init failed."<<endl;
return EXIT_FAILURE; return EXIT_FAILURE;
@ -60,8 +66,8 @@ int main(int argc, char ** argv)
} }
else if("cutDAG" == algorithm) else if("cutDAG" == algorithm)
{ {
MPSegment seg; MPSegment seg(dictPath.c_str());
if(!seg.init(dictPath.c_str())) if(!seg.init())
{ {
cout<<"seg init failed."<<endl; cout<<"seg init failed."<<endl;
return false; return false;
@ -71,8 +77,8 @@ int main(int argc, char ** argv)
} }
else if ("cutFull" == algorithm) else if ("cutFull" == algorithm)
{ {
FullSegment seg; FullSegment seg(dictPath.c_str());
if (!seg.init(dictPath.c_str())) if (!seg.init())
{ {
cout << "seg init failed" << endl; cout << "seg init failed" << endl;
return false; return false;
@ -82,8 +88,8 @@ int main(int argc, char ** argv)
} }
else else
{ {
MixSegment seg; MixSegment seg(dictPath.c_str(), modelPath.c_str());
if(!seg.init(dictPath.c_str(), modelPath.c_str())) if(!seg.init())
{ {
cout<<"seg init failed."<<endl; cout<<"seg init failed."<<endl;
return EXIT_FAILURE; return EXIT_FAILURE;

View File

@ -14,13 +14,10 @@ using namespace CppJieba;
class ReqHandler: public IRequestHandler class ReqHandler: public IRequestHandler
{ {
private:
string _dictPath;
string _modelPath;
public: public:
ReqHandler(const string& dictPath, const string& modelPath): _dictPath(dictPath), _modelPath(modelPath){}; ReqHandler(const string& dictPath, const string& modelPath): _segment(dictPath.c_str(), modelPath.c_str()){};
virtual ~ReqHandler(){}; virtual ~ReqHandler(){};
virtual bool init(){return _segment.init(_dictPath.c_str(), _modelPath.c_str());}; virtual bool init(){return _segment.init();};
virtual bool dispose(){return _segment.dispose();}; virtual bool dispose(){return _segment.dispose();};
public: public:
virtual bool do_GET(const HttpReqInfo& httpReq, string& strSnd) virtual bool do_GET(const HttpReqInfo& httpReq, string& strSnd)

9
test/testlines.gbk Normal file
View File

@ -0,0 +1,9 @@
我来到北京清华大学
他来到了网易杭研大厦
杭研
小明硕士毕业于中国科学院计算所,后在日本京都大学深造
我来自北京邮电大学。。。学号091111xx。。。
来这里看看别人正在搜索什么吧
我来到南京市长江大桥
请在一米线外等候
人事处女干事