mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
add Part of Speech without viterbi....
This commit is contained in:
parent
31e3d4fc12
commit
275a3779e5
71
src/PosTagger.hpp
Normal file
71
src/PosTagger.hpp
Normal file
@ -0,0 +1,71 @@
|
|||||||
|
#ifndef CPPJIEBA_POS_TAGGING_H
|
||||||
|
#define CPPJIEBA_POS_TAGGING_H
|
||||||
|
|
||||||
|
#include "MixSegment.hpp"
|
||||||
|
#include "Limonp/str_functs.hpp"
|
||||||
|
#include "Trie.hpp"
|
||||||
|
#include "TrieManager.hpp"
|
||||||
|
|
||||||
|
namespace CppJieba
|
||||||
|
{
|
||||||
|
using namespace Limonp;
|
||||||
|
|
||||||
|
class PosTagger: public InitOnOff
|
||||||
|
{
|
||||||
|
private:
|
||||||
|
MixSegment _segment;
|
||||||
|
Trie* _trie;
|
||||||
|
|
||||||
|
public:
|
||||||
|
PosTagger(){_setInitFlag(false);};
|
||||||
|
explicit PosTagger(const string& dictPath, const string& hmmFilePath, const string& charStatus, const string& startProb, const string& emitProb, const string& endProb, const string transProb)
|
||||||
|
{
|
||||||
|
_setInitFlag(init(dictPath, hmmFilePath, charStatus, startProb, emitProb, endProb, transProb));
|
||||||
|
};
|
||||||
|
~PosTagger(){};
|
||||||
|
public:
|
||||||
|
bool init(const string& dictPath, const string& hmmFilePath, const string& charStatus, const string& startProb, const string& emitProb, const string& endProb, const string transProb)
|
||||||
|
{
|
||||||
|
if (_getInitFlag())
|
||||||
|
{
|
||||||
|
LogError("already inited before.");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
_trie = TrieManager::getInstance().getTrie(dictPath.c_str());
|
||||||
|
if (NULL == _trie)
|
||||||
|
{
|
||||||
|
LogError("get a NULL pointor from getTrie(\"%s\").", dictPath.c_str());
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return _setInitFlag(_segment.init(dictPath, hmmFilePath));
|
||||||
|
};
|
||||||
|
|
||||||
|
bool tag(const string& src, vector<pair<string, string> >& res)
|
||||||
|
{
|
||||||
|
assert(_getInitFlag());
|
||||||
|
vector<string> cutRes;
|
||||||
|
if (!_segment.cut(src, cutRes))
|
||||||
|
{
|
||||||
|
LogError("_mixSegment cut failed");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
const TrieNodeInfo *tmp = NULL;
|
||||||
|
Unicode unico;
|
||||||
|
for (vector<string>::iterator itr = cutRes.begin(); itr != cutRes.end(); ++itr)
|
||||||
|
{
|
||||||
|
if (!TransCode::decode(*itr, unico))
|
||||||
|
{
|
||||||
|
LogError("decode failed.");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
tmp = _trie->find(unico.begin(), unico.end());
|
||||||
|
res.push_back(make_pair(*itr, tmp == NULL ? "x" : tmp->tag));
|
||||||
|
}
|
||||||
|
tmp = NULL;
|
||||||
|
return !res.empty();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
12
test/tagging_demo.cpp
Normal file
12
test/tagging_demo.cpp
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
#include "../src/PosTagger.hpp"
|
||||||
|
using namespace CppJieba;
|
||||||
|
|
||||||
|
int main(int argc, char ** argv)
|
||||||
|
{
|
||||||
|
PosTagger tagger("../dict/jieba.dict.utf8", "../dict/hmm_model.utf8", "", "", "", "", "");
|
||||||
|
string s("我是蓝翔技工拖拉机学院手扶拖拉机专业的。不用多久,我就会升职加薪,当上总经理,出任CEO,迎娶白富美,走上人生巅峰。");
|
||||||
|
vector<pair<string, string> > res;
|
||||||
|
tagger.tag(s, res);
|
||||||
|
cout << res << endl;
|
||||||
|
return EXIT_SUCCESS;
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user