mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
change Rune type from uint16_t to uint32_t to support more chinese word
This commit is contained in:
parent
8d66b1f1fa
commit
14e09290c2
@ -4,6 +4,7 @@
|
||||
|
||||
+ 在 Trie 中去除之前糟糕的针对 uint16 优化的用数组代替 map 的设计,
|
||||
该设计的主要问题是前提 unicode 每个字符必须是 uint16 ,则无法更全面得支持 unicode 多国字符。
|
||||
+ Rune 类型从 16bit 更改为 32bit ,支持更多 Unicode 字符,包括一些罕见汉字。
|
||||
|
||||
## v4.4.1
|
||||
|
||||
|
@ -2,11 +2,12 @@
|
||||
#define CPPJIEBA_HMMMODEL_H
|
||||
|
||||
#include "limonp/StringUtil.hpp"
|
||||
#include "Trie.hpp"
|
||||
|
||||
namespace cppjieba {
|
||||
|
||||
using namespace limonp;
|
||||
typedef unordered_map<uint16_t, double> EmitProbMap;
|
||||
typedef unordered_map<Rune, double> EmitProbMap;
|
||||
|
||||
struct HMMModel {
|
||||
/*
|
||||
@ -70,7 +71,7 @@ struct HMMModel {
|
||||
CHECK(GetLine(ifile, line));
|
||||
CHECK(LoadEmitProb(line, emitProbS));
|
||||
}
|
||||
double GetEmitProb(const EmitProbMap* ptMp, uint16_t key,
|
||||
double GetEmitProb(const EmitProbMap* ptMp, Rune key,
|
||||
double defVal)const {
|
||||
EmitProbMap::const_iterator cit = ptMp->find(key);
|
||||
if (cit == ptMp->end()) {
|
||||
|
@ -13,7 +13,7 @@ namespace cppjieba {
|
||||
|
||||
using namespace limonp;
|
||||
|
||||
typedef uint16_t Rune;
|
||||
typedef uint32_t Rune;
|
||||
typedef limonp::LocalVector<Rune> Unicode;
|
||||
|
||||
namespace TransCode {
|
||||
@ -21,7 +21,7 @@ inline bool Decode(const string& str, Unicode& res) {
|
||||
#ifdef CPPJIEBA_GBK
|
||||
return gbkTrans(str, res);
|
||||
#else
|
||||
return Utf8ToUnicode(str, res);
|
||||
return Utf8ToUnicode32(str, res);
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -29,7 +29,7 @@ inline void Encode(Unicode::const_iterator begin, Unicode::const_iterator end, s
|
||||
#ifdef CPPJIEBA_GBK
|
||||
gbkTrans(begin, end, res);
|
||||
#else
|
||||
UnicodeToUtf8(begin, end, res);
|
||||
Unicode32ToUtf8(begin, end, res);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -4,6 +4,7 @@
|
||||
#include <vector>
|
||||
#include <queue>
|
||||
#include "limonp/StdExtension.hpp"
|
||||
#include "Trie.hpp"
|
||||
|
||||
namespace cppjieba {
|
||||
|
||||
@ -25,7 +26,7 @@ inline ostream & operator << (ostream& os, const DictUnit& unit) {
|
||||
}
|
||||
|
||||
struct Dag {
|
||||
uint16_t rune;
|
||||
Rune rune;
|
||||
LocalVector<pair<size_t, const DictUnit*> > nexts;
|
||||
const DictUnit * pInfo;
|
||||
double weight;
|
||||
|
Loading…
x
Reference in New Issue
Block a user