#ifndef CPPJIEBA_STRUCTS_H #define CPPJIEBA_STRUCTS_H #include #include "globals.h" #include "Trie.h" #include "TransCode.h" namespace CppJieba { struct TrieNodeInfo { //string word; //size_t wLen;// the word's len , not string.length(), Unicode word; size_t freq; string tag; double logFreq; //logFreq = log(freq/sum(freq)); TrieNodeInfo():freq(0),logFreq(0.0) { } TrieNodeInfo(const TrieNodeInfo& nodeInfo):word(nodeInfo.word), freq(nodeInfo.freq), tag(nodeInfo.tag), logFreq(nodeInfo.logFreq) { } TrieNodeInfo(const Unicode& _word):word(_word),freq(0),logFreq(MIN_DOUBLE) { } }; typedef unordered_map DagType; struct SegmentChar { uint16_t uniCh; DagType dag; const TrieNodeInfo * pInfo; double weight; SegmentChar(uint16_t uni):uniCh(uni), pInfo(NULL), weight(0.0) { } /*const TrieNodeInfo* pInfo; double weight; SegmentChar(uint16_t unich, const TrieNodeInfo* p, double w):uniCh(unich), pInfo(p), weight(w) { }*/ }; /* struct SegmentContext { vector context; bool getDA };*/ typedef vector SegmentContext; //struct SegmentContext //{ // vector context; // //vector uintVec; // //vector< vector > > dag; // //vector< pair > dp; //}; /* struct SegmentWordInfo: public TrieNodeInfo { }; */ struct KeyWordInfo: public TrieNodeInfo { double idf; double weight;// log(wLen+1)*logFreq; KeyWordInfo():idf(0.0),weight(0.0) { } KeyWordInfo(const Unicode& _word):TrieNodeInfo(_word),idf(0.0),weight(0.0) { } KeyWordInfo(const TrieNodeInfo& trieNodeInfo):TrieNodeInfo(trieNodeInfo) { } inline string toString() const { string tmp; TransCode::encode(word, tmp); return string_format("{word:%s,weight:%lf, idf:%lf}", tmp.c_str(), weight, idf); } KeyWordInfo& operator = (const TrieNodeInfo& trieNodeInfo) { word = trieNodeInfo.word; freq = trieNodeInfo.freq; tag = trieNodeInfo.tag; logFreq = trieNodeInfo.logFreq; return *this; } }; inline string joinWordInfos(const vector& vec) { vector tmp; for(uint i = 0; i < vec.size(); i++) { tmp.push_back(vec[i].toString()); } return joinStr(tmp, ","); } } #endif