diff --git a/src/PosTagger.hpp b/src/PosTagger.hpp new file mode 100644 index 0000000..99ad049 --- /dev/null +++ b/src/PosTagger.hpp @@ -0,0 +1,71 @@ +#ifndef CPPJIEBA_POS_TAGGING_H +#define CPPJIEBA_POS_TAGGING_H + +#include "MixSegment.hpp" +#include "Limonp/str_functs.hpp" +#include "Trie.hpp" +#include "TrieManager.hpp" + +namespace CppJieba +{ + using namespace Limonp; + + class PosTagger: public InitOnOff + { + private: + MixSegment _segment; + Trie* _trie; + + public: + PosTagger(){_setInitFlag(false);}; + explicit PosTagger(const string& dictPath, const string& hmmFilePath, const string& charStatus, const string& startProb, const string& emitProb, const string& endProb, const string transProb) + { + _setInitFlag(init(dictPath, hmmFilePath, charStatus, startProb, emitProb, endProb, transProb)); + }; + ~PosTagger(){}; + public: + bool init(const string& dictPath, const string& hmmFilePath, const string& charStatus, const string& startProb, const string& emitProb, const string& endProb, const string transProb) + { + if (_getInitFlag()) + { + LogError("already inited before."); + return false; + } + _trie = TrieManager::getInstance().getTrie(dictPath.c_str()); + if (NULL == _trie) + { + LogError("get a NULL pointor from getTrie(\"%s\").", dictPath.c_str()); + return false; + } + return _setInitFlag(_segment.init(dictPath, hmmFilePath)); + }; + + bool tag(const string& src, vector >& res) + { + assert(_getInitFlag()); + vector cutRes; + if (!_segment.cut(src, cutRes)) + { + LogError("_mixSegment cut failed"); + return false; + } + + const TrieNodeInfo *tmp = NULL; + Unicode unico; + for (vector::iterator itr = cutRes.begin(); itr != cutRes.end(); ++itr) + { + if (!TransCode::decode(*itr, unico)) + { + LogError("decode failed."); + return false; + } + tmp = _trie->find(unico.begin(), unico.end()); + res.push_back(make_pair(*itr, tmp == NULL ? "x" : tmp->tag)); + } + tmp = NULL; + return !res.empty(); + } + }; +} + +#endif diff --git a/test/tagging_demo.cpp b/test/tagging_demo.cpp new file mode 100644 index 0000000..5b341f9 --- /dev/null +++ b/test/tagging_demo.cpp @@ -0,0 +1,12 @@ +#include "../src/PosTagger.hpp" +using namespace CppJieba; + +int main(int argc, char ** argv) +{ + PosTagger tagger("../dict/jieba.dict.utf8", "../dict/hmm_model.utf8", "", "", "", "", ""); + string s("我是蓝翔技工拖拉机学院手扶拖拉机专业的。不用多久,我就会升职加薪,当上总经理,出任CEO,迎娶白富美,走上人生巅峰。"); + vector > res; + tagger.tag(s, res); + cout << res << endl; + return EXIT_SUCCESS; +}