add Part of Speech without viterbi....

This commit is contained in:
aholic 2014-02-25 21:20:48 +08:00
parent 31e3d4fc12
commit 275a3779e5
2 changed files with 83 additions and 0 deletions

71
src/PosTagger.hpp Normal file
View File

@ -0,0 +1,71 @@
#ifndef CPPJIEBA_POS_TAGGING_H
#define CPPJIEBA_POS_TAGGING_H
#include "MixSegment.hpp"
#include "Limonp/str_functs.hpp"
#include "Trie.hpp"
#include "TrieManager.hpp"
namespace CppJieba
{
using namespace Limonp;
class PosTagger: public InitOnOff
{
private:
MixSegment _segment;
Trie* _trie;
public:
PosTagger(){_setInitFlag(false);};
explicit PosTagger(const string& dictPath, const string& hmmFilePath, const string& charStatus, const string& startProb, const string& emitProb, const string& endProb, const string transProb)
{
_setInitFlag(init(dictPath, hmmFilePath, charStatus, startProb, emitProb, endProb, transProb));
};
~PosTagger(){};
public:
bool init(const string& dictPath, const string& hmmFilePath, const string& charStatus, const string& startProb, const string& emitProb, const string& endProb, const string transProb)
{
if (_getInitFlag())
{
LogError("already inited before.");
return false;
}
_trie = TrieManager::getInstance().getTrie(dictPath.c_str());
if (NULL == _trie)
{
LogError("get a NULL pointor from getTrie(\"%s\").", dictPath.c_str());
return false;
}
return _setInitFlag(_segment.init(dictPath, hmmFilePath));
};
bool tag(const string& src, vector<pair<string, string> >& res)
{
assert(_getInitFlag());
vector<string> cutRes;
if (!_segment.cut(src, cutRes))
{
LogError("_mixSegment cut failed");
return false;
}
const TrieNodeInfo *tmp = NULL;
Unicode unico;
for (vector<string>::iterator itr = cutRes.begin(); itr != cutRes.end(); ++itr)
{
if (!TransCode::decode(*itr, unico))
{
LogError("decode failed.");
return false;
}
tmp = _trie->find(unico.begin(), unico.end());
res.push_back(make_pair(*itr, tmp == NULL ? "x" : tmp->tag));
}
tmp = NULL;
return !res.empty();
}
};
}
#endif

12
test/tagging_demo.cpp Normal file
View File

@ -0,0 +1,12 @@
#include "../src/PosTagger.hpp"
using namespace CppJieba;
int main(int argc, char ** argv)
{
PosTagger tagger("../dict/jieba.dict.utf8", "../dict/hmm_model.utf8", "", "", "", "", "");
string s("我是蓝翔技工拖拉机学院手扶拖拉机专业的。不用多久我就会升职加薪当上总经理出任CEO迎娶白富美走上人生巅峰。");
vector<pair<string, string> > res;
tagger.tag(s, res);
cout << res << endl;
return EXIT_SUCCESS;
}