mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
add Part of Speech without viterbi....
This commit is contained in:
parent
31e3d4fc12
commit
275a3779e5
71
src/PosTagger.hpp
Normal file
71
src/PosTagger.hpp
Normal file
@ -0,0 +1,71 @@
|
||||
#ifndef CPPJIEBA_POS_TAGGING_H
|
||||
#define CPPJIEBA_POS_TAGGING_H
|
||||
|
||||
#include "MixSegment.hpp"
|
||||
#include "Limonp/str_functs.hpp"
|
||||
#include "Trie.hpp"
|
||||
#include "TrieManager.hpp"
|
||||
|
||||
namespace CppJieba
|
||||
{
|
||||
using namespace Limonp;
|
||||
|
||||
class PosTagger: public InitOnOff
|
||||
{
|
||||
private:
|
||||
MixSegment _segment;
|
||||
Trie* _trie;
|
||||
|
||||
public:
|
||||
PosTagger(){_setInitFlag(false);};
|
||||
explicit PosTagger(const string& dictPath, const string& hmmFilePath, const string& charStatus, const string& startProb, const string& emitProb, const string& endProb, const string transProb)
|
||||
{
|
||||
_setInitFlag(init(dictPath, hmmFilePath, charStatus, startProb, emitProb, endProb, transProb));
|
||||
};
|
||||
~PosTagger(){};
|
||||
public:
|
||||
bool init(const string& dictPath, const string& hmmFilePath, const string& charStatus, const string& startProb, const string& emitProb, const string& endProb, const string transProb)
|
||||
{
|
||||
if (_getInitFlag())
|
||||
{
|
||||
LogError("already inited before.");
|
||||
return false;
|
||||
}
|
||||
_trie = TrieManager::getInstance().getTrie(dictPath.c_str());
|
||||
if (NULL == _trie)
|
||||
{
|
||||
LogError("get a NULL pointor from getTrie(\"%s\").", dictPath.c_str());
|
||||
return false;
|
||||
}
|
||||
return _setInitFlag(_segment.init(dictPath, hmmFilePath));
|
||||
};
|
||||
|
||||
bool tag(const string& src, vector<pair<string, string> >& res)
|
||||
{
|
||||
assert(_getInitFlag());
|
||||
vector<string> cutRes;
|
||||
if (!_segment.cut(src, cutRes))
|
||||
{
|
||||
LogError("_mixSegment cut failed");
|
||||
return false;
|
||||
}
|
||||
|
||||
const TrieNodeInfo *tmp = NULL;
|
||||
Unicode unico;
|
||||
for (vector<string>::iterator itr = cutRes.begin(); itr != cutRes.end(); ++itr)
|
||||
{
|
||||
if (!TransCode::decode(*itr, unico))
|
||||
{
|
||||
LogError("decode failed.");
|
||||
return false;
|
||||
}
|
||||
tmp = _trie->find(unico.begin(), unico.end());
|
||||
res.push_back(make_pair(*itr, tmp == NULL ? "x" : tmp->tag));
|
||||
}
|
||||
tmp = NULL;
|
||||
return !res.empty();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
#endif
|
12
test/tagging_demo.cpp
Normal file
12
test/tagging_demo.cpp
Normal file
@ -0,0 +1,12 @@
|
||||
#include "../src/PosTagger.hpp"
|
||||
using namespace CppJieba;
|
||||
|
||||
int main(int argc, char ** argv)
|
||||
{
|
||||
PosTagger tagger("../dict/jieba.dict.utf8", "../dict/hmm_model.utf8", "", "", "", "", "");
|
||||
string s("我是蓝翔技工拖拉机学院手扶拖拉机专业的。不用多久,我就会升职加薪,当上总经理,出任CEO,迎娶白富美,走上人生巅峰。");
|
||||
vector<pair<string, string> > res;
|
||||
tagger.tag(s, res);
|
||||
cout << res << endl;
|
||||
return EXIT_SUCCESS;
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user