diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 9ee5277..d71e7a0 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -9,7 +9,7 @@ LINK_DIRECTORIES(Husky) INSTALL(TARGETS cjsegment RUNTIME DESTINATION bin) INSTALL(TARGETS cjserver RUNTIME DESTINATION bin) -INSTALL(FILES ChineseFilter.hpp HMMSegment.hpp MPSegment.hpp structs.h Trie.hpp globals.h ISegment.hpp MixSegment.hpp SegmentBase.hpp TransCode.hpp DESTINATION include/CppJieba) +INSTALL(FILES ChineseFilter.hpp HMMSegment.hpp MPSegment.hpp Trie.hpp globals.h ISegment.hpp MixSegment.hpp SegmentBase.hpp TransCode.hpp DESTINATION include/CppJieba) ADD_SUBDIRECTORY(Husky) ADD_SUBDIRECTORY(Limonp) diff --git a/src/MPSegment.hpp b/src/MPSegment.hpp index f8ce060..86f877e 100644 --- a/src/MPSegment.hpp +++ b/src/MPSegment.hpp @@ -16,6 +16,17 @@ namespace CppJieba { + struct SegmentChar + { + uint16_t uniCh; + DagType dag; + const TrieNodeInfo * pInfo; + double weight; + + SegmentChar(uint16_t uni):uniCh(uni), pInfo(NULL), weight(0.0) + { + } + }; typedef vector SegmentContext; class MPSegment: public SegmentBase diff --git a/src/Trie.hpp b/src/Trie.hpp index 7d91112..b23bff3 100644 --- a/src/Trie.hpp +++ b/src/Trie.hpp @@ -16,7 +16,6 @@ #include "Limonp/logger.hpp" #include "TransCode.hpp" #include "globals.h" -#include "structs.h" namespace CppJieba @@ -34,6 +33,30 @@ namespace CppJieba } }; + struct TrieNodeInfo + { + Unicode word; + size_t freq; + string tag; + double logFreq; //logFreq = log(freq/sum(freq)); + TrieNodeInfo():freq(0),logFreq(0.0) + { + } + TrieNodeInfo(const TrieNodeInfo& nodeInfo):word(nodeInfo.word), freq(nodeInfo.freq), tag(nodeInfo.tag), logFreq(nodeInfo.logFreq) + { + } + TrieNodeInfo(const Unicode& _word):word(_word),freq(0),logFreq(MIN_DOUBLE) + { + } + string toString()const + { + string tmp; + TransCode::encode(word, tmp); + return string_format("{word:%s,freq:%d, logFreq:%lf}", tmp.c_str(), freq, logFreq); + } + }; + typedef unordered_map DagType; + class Trie { diff --git a/src/structs.h b/src/structs.h deleted file mode 100644 index e02b294..0000000 --- a/src/structs.h +++ /dev/null @@ -1,111 +0,0 @@ -#ifndef CPPJIEBA_STRUCTS_H -#define CPPJIEBA_STRUCTS_H - -#include -#include "globals.h" -#include "Trie.hpp" -#include "TransCode.hpp" - -namespace CppJieba -{ - - struct TrieNodeInfo - { - //string word; - //size_t wLen;// the word's len , not string.length(), - Unicode word; - size_t freq; - string tag; - double logFreq; //logFreq = log(freq/sum(freq)); - TrieNodeInfo():freq(0),logFreq(0.0) - { - } - TrieNodeInfo(const TrieNodeInfo& nodeInfo):word(nodeInfo.word), freq(nodeInfo.freq), tag(nodeInfo.tag), logFreq(nodeInfo.logFreq) - { - } - TrieNodeInfo(const Unicode& _word):word(_word),freq(0),logFreq(MIN_DOUBLE) - { - } - string toString()const - { - string tmp; - TransCode::encode(word, tmp); - return string_format("{word:%s,freq:%d, logFreq:%lf}", tmp.c_str(), freq, logFreq); - } - }; - - typedef unordered_map DagType; - struct SegmentChar - { - uint16_t uniCh; - DagType dag; - const TrieNodeInfo * pInfo; - double weight; - - SegmentChar(uint16_t uni):uniCh(uni), pInfo(NULL), weight(0.0) - { - } - - /*const TrieNodeInfo* pInfo; - double weight; - SegmentChar(uint16_t unich, const TrieNodeInfo* p, double w):uniCh(unich), pInfo(p), weight(w) - { - }*/ - }; - /* - struct SegmentContext - { - vector context; - bool getDA - };*/ - typedef vector SegmentContext; - - - struct KeyWordInfo: public TrieNodeInfo - { - double idf; - double weight;// log(wLen+1)*logFreq; - KeyWordInfo():idf(0.0),weight(0.0) - { - } - KeyWordInfo(const Unicode& _word):TrieNodeInfo(_word),idf(0.0),weight(0.0) - { - } - KeyWordInfo(const TrieNodeInfo& trieNodeInfo):TrieNodeInfo(trieNodeInfo) - { - } - string toString() const - { - string tmp; - TransCode::encode(word, tmp); - return string_format("{word:%s,weight:%lf, idf:%lf}", tmp.c_str(), weight, idf); - } - KeyWordInfo& operator = (const TrieNodeInfo& trieNodeInfo) - { - word = trieNodeInfo.word; - freq = trieNodeInfo.freq; - tag = trieNodeInfo.tag; - logFreq = trieNodeInfo.logFreq; - return *this; - } - }; - - inline ostream& operator << (ostream& os, const KeyWordInfo& info) - { - string tmp; - TransCode::encode(info.word, tmp); - return os << "{words:" << tmp << ", weight:" << info.weight << ", idf:" << info.idf << "}"; - } - - //inline string joinWordInfos(const vector& vec) - //{ - // vector tmp; - // for(uint i = 0; i < vec.size(); i++) - // { - // tmp.push_back(vec[i].toString()); - // } - // return joinStr(tmp, ","); - //} -} - -#endif