mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
delete structs.h
This commit is contained in:
parent
58e69783cc
commit
ccaeeb5bb0
@ -9,7 +9,7 @@ LINK_DIRECTORIES(Husky)
|
||||
|
||||
INSTALL(TARGETS cjsegment RUNTIME DESTINATION bin)
|
||||
INSTALL(TARGETS cjserver RUNTIME DESTINATION bin)
|
||||
INSTALL(FILES ChineseFilter.hpp HMMSegment.hpp MPSegment.hpp structs.h Trie.hpp globals.h ISegment.hpp MixSegment.hpp SegmentBase.hpp TransCode.hpp DESTINATION include/CppJieba)
|
||||
INSTALL(FILES ChineseFilter.hpp HMMSegment.hpp MPSegment.hpp Trie.hpp globals.h ISegment.hpp MixSegment.hpp SegmentBase.hpp TransCode.hpp DESTINATION include/CppJieba)
|
||||
|
||||
ADD_SUBDIRECTORY(Husky)
|
||||
ADD_SUBDIRECTORY(Limonp)
|
||||
|
@ -16,6 +16,17 @@
|
||||
namespace CppJieba
|
||||
{
|
||||
|
||||
struct SegmentChar
|
||||
{
|
||||
uint16_t uniCh;
|
||||
DagType dag;
|
||||
const TrieNodeInfo * pInfo;
|
||||
double weight;
|
||||
|
||||
SegmentChar(uint16_t uni):uniCh(uni), pInfo(NULL), weight(0.0)
|
||||
{
|
||||
}
|
||||
};
|
||||
typedef vector<SegmentChar> SegmentContext;
|
||||
|
||||
class MPSegment: public SegmentBase
|
||||
|
25
src/Trie.hpp
25
src/Trie.hpp
@ -16,7 +16,6 @@
|
||||
#include "Limonp/logger.hpp"
|
||||
#include "TransCode.hpp"
|
||||
#include "globals.h"
|
||||
#include "structs.h"
|
||||
|
||||
|
||||
namespace CppJieba
|
||||
@ -34,6 +33,30 @@ namespace CppJieba
|
||||
}
|
||||
};
|
||||
|
||||
struct TrieNodeInfo
|
||||
{
|
||||
Unicode word;
|
||||
size_t freq;
|
||||
string tag;
|
||||
double logFreq; //logFreq = log(freq/sum(freq));
|
||||
TrieNodeInfo():freq(0),logFreq(0.0)
|
||||
{
|
||||
}
|
||||
TrieNodeInfo(const TrieNodeInfo& nodeInfo):word(nodeInfo.word), freq(nodeInfo.freq), tag(nodeInfo.tag), logFreq(nodeInfo.logFreq)
|
||||
{
|
||||
}
|
||||
TrieNodeInfo(const Unicode& _word):word(_word),freq(0),logFreq(MIN_DOUBLE)
|
||||
{
|
||||
}
|
||||
string toString()const
|
||||
{
|
||||
string tmp;
|
||||
TransCode::encode(word, tmp);
|
||||
return string_format("{word:%s,freq:%d, logFreq:%lf}", tmp.c_str(), freq, logFreq);
|
||||
}
|
||||
};
|
||||
typedef unordered_map<uint, const TrieNodeInfo*> DagType;
|
||||
|
||||
class Trie
|
||||
{
|
||||
|
||||
|
111
src/structs.h
111
src/structs.h
@ -1,111 +0,0 @@
|
||||
#ifndef CPPJIEBA_STRUCTS_H
|
||||
#define CPPJIEBA_STRUCTS_H
|
||||
|
||||
#include <limits>
|
||||
#include "globals.h"
|
||||
#include "Trie.hpp"
|
||||
#include "TransCode.hpp"
|
||||
|
||||
namespace CppJieba
|
||||
{
|
||||
|
||||
struct TrieNodeInfo
|
||||
{
|
||||
//string word;
|
||||
//size_t wLen;// the word's len , not string.length(),
|
||||
Unicode word;
|
||||
size_t freq;
|
||||
string tag;
|
||||
double logFreq; //logFreq = log(freq/sum(freq));
|
||||
TrieNodeInfo():freq(0),logFreq(0.0)
|
||||
{
|
||||
}
|
||||
TrieNodeInfo(const TrieNodeInfo& nodeInfo):word(nodeInfo.word), freq(nodeInfo.freq), tag(nodeInfo.tag), logFreq(nodeInfo.logFreq)
|
||||
{
|
||||
}
|
||||
TrieNodeInfo(const Unicode& _word):word(_word),freq(0),logFreq(MIN_DOUBLE)
|
||||
{
|
||||
}
|
||||
string toString()const
|
||||
{
|
||||
string tmp;
|
||||
TransCode::encode(word, tmp);
|
||||
return string_format("{word:%s,freq:%d, logFreq:%lf}", tmp.c_str(), freq, logFreq);
|
||||
}
|
||||
};
|
||||
|
||||
typedef unordered_map<uint, const TrieNodeInfo*> DagType;
|
||||
struct SegmentChar
|
||||
{
|
||||
uint16_t uniCh;
|
||||
DagType dag;
|
||||
const TrieNodeInfo * pInfo;
|
||||
double weight;
|
||||
|
||||
SegmentChar(uint16_t uni):uniCh(uni), pInfo(NULL), weight(0.0)
|
||||
{
|
||||
}
|
||||
|
||||
/*const TrieNodeInfo* pInfo;
|
||||
double weight;
|
||||
SegmentChar(uint16_t unich, const TrieNodeInfo* p, double w):uniCh(unich), pInfo(p), weight(w)
|
||||
{
|
||||
}*/
|
||||
};
|
||||
/*
|
||||
struct SegmentContext
|
||||
{
|
||||
vector<SegmentChar> context;
|
||||
bool getDA
|
||||
};*/
|
||||
typedef vector<SegmentChar> SegmentContext;
|
||||
|
||||
|
||||
struct KeyWordInfo: public TrieNodeInfo
|
||||
{
|
||||
double idf;
|
||||
double weight;// log(wLen+1)*logFreq;
|
||||
KeyWordInfo():idf(0.0),weight(0.0)
|
||||
{
|
||||
}
|
||||
KeyWordInfo(const Unicode& _word):TrieNodeInfo(_word),idf(0.0),weight(0.0)
|
||||
{
|
||||
}
|
||||
KeyWordInfo(const TrieNodeInfo& trieNodeInfo):TrieNodeInfo(trieNodeInfo)
|
||||
{
|
||||
}
|
||||
string toString() const
|
||||
{
|
||||
string tmp;
|
||||
TransCode::encode(word, tmp);
|
||||
return string_format("{word:%s,weight:%lf, idf:%lf}", tmp.c_str(), weight, idf);
|
||||
}
|
||||
KeyWordInfo& operator = (const TrieNodeInfo& trieNodeInfo)
|
||||
{
|
||||
word = trieNodeInfo.word;
|
||||
freq = trieNodeInfo.freq;
|
||||
tag = trieNodeInfo.tag;
|
||||
logFreq = trieNodeInfo.logFreq;
|
||||
return *this;
|
||||
}
|
||||
};
|
||||
|
||||
inline ostream& operator << (ostream& os, const KeyWordInfo& info)
|
||||
{
|
||||
string tmp;
|
||||
TransCode::encode(info.word, tmp);
|
||||
return os << "{words:" << tmp << ", weight:" << info.weight << ", idf:" << info.idf << "}";
|
||||
}
|
||||
|
||||
//inline string joinWordInfos(const vector<KeyWordInfo>& vec)
|
||||
//{
|
||||
// vector<string> tmp;
|
||||
// for(uint i = 0; i < vec.size(); i++)
|
||||
// {
|
||||
// tmp.push_back(vec[i].toString());
|
||||
// }
|
||||
// return joinStr(tmp, ",");
|
||||
//}
|
||||
}
|
||||
|
||||
#endif
|
Loading…
x
Reference in New Issue
Block a user