mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
delete structs.h
This commit is contained in:
parent
58e69783cc
commit
ccaeeb5bb0
@ -9,7 +9,7 @@ LINK_DIRECTORIES(Husky)
|
|||||||
|
|
||||||
INSTALL(TARGETS cjsegment RUNTIME DESTINATION bin)
|
INSTALL(TARGETS cjsegment RUNTIME DESTINATION bin)
|
||||||
INSTALL(TARGETS cjserver RUNTIME DESTINATION bin)
|
INSTALL(TARGETS cjserver RUNTIME DESTINATION bin)
|
||||||
INSTALL(FILES ChineseFilter.hpp HMMSegment.hpp MPSegment.hpp structs.h Trie.hpp globals.h ISegment.hpp MixSegment.hpp SegmentBase.hpp TransCode.hpp DESTINATION include/CppJieba)
|
INSTALL(FILES ChineseFilter.hpp HMMSegment.hpp MPSegment.hpp Trie.hpp globals.h ISegment.hpp MixSegment.hpp SegmentBase.hpp TransCode.hpp DESTINATION include/CppJieba)
|
||||||
|
|
||||||
ADD_SUBDIRECTORY(Husky)
|
ADD_SUBDIRECTORY(Husky)
|
||||||
ADD_SUBDIRECTORY(Limonp)
|
ADD_SUBDIRECTORY(Limonp)
|
||||||
|
@ -16,6 +16,17 @@
|
|||||||
namespace CppJieba
|
namespace CppJieba
|
||||||
{
|
{
|
||||||
|
|
||||||
|
struct SegmentChar
|
||||||
|
{
|
||||||
|
uint16_t uniCh;
|
||||||
|
DagType dag;
|
||||||
|
const TrieNodeInfo * pInfo;
|
||||||
|
double weight;
|
||||||
|
|
||||||
|
SegmentChar(uint16_t uni):uniCh(uni), pInfo(NULL), weight(0.0)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
};
|
||||||
typedef vector<SegmentChar> SegmentContext;
|
typedef vector<SegmentChar> SegmentContext;
|
||||||
|
|
||||||
class MPSegment: public SegmentBase
|
class MPSegment: public SegmentBase
|
||||||
|
25
src/Trie.hpp
25
src/Trie.hpp
@ -16,7 +16,6 @@
|
|||||||
#include "Limonp/logger.hpp"
|
#include "Limonp/logger.hpp"
|
||||||
#include "TransCode.hpp"
|
#include "TransCode.hpp"
|
||||||
#include "globals.h"
|
#include "globals.h"
|
||||||
#include "structs.h"
|
|
||||||
|
|
||||||
|
|
||||||
namespace CppJieba
|
namespace CppJieba
|
||||||
@ -34,6 +33,30 @@ namespace CppJieba
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct TrieNodeInfo
|
||||||
|
{
|
||||||
|
Unicode word;
|
||||||
|
size_t freq;
|
||||||
|
string tag;
|
||||||
|
double logFreq; //logFreq = log(freq/sum(freq));
|
||||||
|
TrieNodeInfo():freq(0),logFreq(0.0)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
TrieNodeInfo(const TrieNodeInfo& nodeInfo):word(nodeInfo.word), freq(nodeInfo.freq), tag(nodeInfo.tag), logFreq(nodeInfo.logFreq)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
TrieNodeInfo(const Unicode& _word):word(_word),freq(0),logFreq(MIN_DOUBLE)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
string toString()const
|
||||||
|
{
|
||||||
|
string tmp;
|
||||||
|
TransCode::encode(word, tmp);
|
||||||
|
return string_format("{word:%s,freq:%d, logFreq:%lf}", tmp.c_str(), freq, logFreq);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
typedef unordered_map<uint, const TrieNodeInfo*> DagType;
|
||||||
|
|
||||||
class Trie
|
class Trie
|
||||||
{
|
{
|
||||||
|
|
||||||
|
111
src/structs.h
111
src/structs.h
@ -1,111 +0,0 @@
|
|||||||
#ifndef CPPJIEBA_STRUCTS_H
|
|
||||||
#define CPPJIEBA_STRUCTS_H
|
|
||||||
|
|
||||||
#include <limits>
|
|
||||||
#include "globals.h"
|
|
||||||
#include "Trie.hpp"
|
|
||||||
#include "TransCode.hpp"
|
|
||||||
|
|
||||||
namespace CppJieba
|
|
||||||
{
|
|
||||||
|
|
||||||
struct TrieNodeInfo
|
|
||||||
{
|
|
||||||
//string word;
|
|
||||||
//size_t wLen;// the word's len , not string.length(),
|
|
||||||
Unicode word;
|
|
||||||
size_t freq;
|
|
||||||
string tag;
|
|
||||||
double logFreq; //logFreq = log(freq/sum(freq));
|
|
||||||
TrieNodeInfo():freq(0),logFreq(0.0)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
TrieNodeInfo(const TrieNodeInfo& nodeInfo):word(nodeInfo.word), freq(nodeInfo.freq), tag(nodeInfo.tag), logFreq(nodeInfo.logFreq)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
TrieNodeInfo(const Unicode& _word):word(_word),freq(0),logFreq(MIN_DOUBLE)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
string toString()const
|
|
||||||
{
|
|
||||||
string tmp;
|
|
||||||
TransCode::encode(word, tmp);
|
|
||||||
return string_format("{word:%s,freq:%d, logFreq:%lf}", tmp.c_str(), freq, logFreq);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
typedef unordered_map<uint, const TrieNodeInfo*> DagType;
|
|
||||||
struct SegmentChar
|
|
||||||
{
|
|
||||||
uint16_t uniCh;
|
|
||||||
DagType dag;
|
|
||||||
const TrieNodeInfo * pInfo;
|
|
||||||
double weight;
|
|
||||||
|
|
||||||
SegmentChar(uint16_t uni):uniCh(uni), pInfo(NULL), weight(0.0)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
/*const TrieNodeInfo* pInfo;
|
|
||||||
double weight;
|
|
||||||
SegmentChar(uint16_t unich, const TrieNodeInfo* p, double w):uniCh(unich), pInfo(p), weight(w)
|
|
||||||
{
|
|
||||||
}*/
|
|
||||||
};
|
|
||||||
/*
|
|
||||||
struct SegmentContext
|
|
||||||
{
|
|
||||||
vector<SegmentChar> context;
|
|
||||||
bool getDA
|
|
||||||
};*/
|
|
||||||
typedef vector<SegmentChar> SegmentContext;
|
|
||||||
|
|
||||||
|
|
||||||
struct KeyWordInfo: public TrieNodeInfo
|
|
||||||
{
|
|
||||||
double idf;
|
|
||||||
double weight;// log(wLen+1)*logFreq;
|
|
||||||
KeyWordInfo():idf(0.0),weight(0.0)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
KeyWordInfo(const Unicode& _word):TrieNodeInfo(_word),idf(0.0),weight(0.0)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
KeyWordInfo(const TrieNodeInfo& trieNodeInfo):TrieNodeInfo(trieNodeInfo)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
string toString() const
|
|
||||||
{
|
|
||||||
string tmp;
|
|
||||||
TransCode::encode(word, tmp);
|
|
||||||
return string_format("{word:%s,weight:%lf, idf:%lf}", tmp.c_str(), weight, idf);
|
|
||||||
}
|
|
||||||
KeyWordInfo& operator = (const TrieNodeInfo& trieNodeInfo)
|
|
||||||
{
|
|
||||||
word = trieNodeInfo.word;
|
|
||||||
freq = trieNodeInfo.freq;
|
|
||||||
tag = trieNodeInfo.tag;
|
|
||||||
logFreq = trieNodeInfo.logFreq;
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
inline ostream& operator << (ostream& os, const KeyWordInfo& info)
|
|
||||||
{
|
|
||||||
string tmp;
|
|
||||||
TransCode::encode(info.word, tmp);
|
|
||||||
return os << "{words:" << tmp << ", weight:" << info.weight << ", idf:" << info.idf << "}";
|
|
||||||
}
|
|
||||||
|
|
||||||
//inline string joinWordInfos(const vector<KeyWordInfo>& vec)
|
|
||||||
//{
|
|
||||||
// vector<string> tmp;
|
|
||||||
// for(uint i = 0; i < vec.size(); i++)
|
|
||||||
// {
|
|
||||||
// tmp.push_back(vec[i].toString());
|
|
||||||
// }
|
|
||||||
// return joinStr(tmp, ",");
|
|
||||||
//}
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
Loading…
x
Reference in New Issue
Block a user