mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
new feature: loading multi user dict, path is split by :
This commit is contained in:
parent
e55d0bf95c
commit
9de513f1d5
@ -1,5 +1,9 @@
|
|||||||
# CppJieba ChangeLog
|
# CppJieba ChangeLog
|
||||||
|
|
||||||
|
## next version
|
||||||
|
|
||||||
|
1. 用户词典的载入支持多词典载入,多词典路径用英文冒号(:)作为分隔符,就当坐是向环境变量PATH致敬,哈哈。
|
||||||
|
|
||||||
## v3.2.1
|
## v3.2.1
|
||||||
|
|
||||||
1. 修复 Jieba.hpp 头文件保护写错导致的 bug。
|
1. 修复 Jieba.hpp 头文件保护写错导致的 bug。
|
||||||
|
@ -27,15 +27,15 @@ class DictTrie {
|
|||||||
trie_ = NULL;
|
trie_ = NULL;
|
||||||
minWeight_ = MAX_DOUBLE;
|
minWeight_ = MAX_DOUBLE;
|
||||||
}
|
}
|
||||||
DictTrie(const string& dictPath, const string& userDictPath = "") {
|
DictTrie(const string& dictPath, const string& userDictPaths = "") {
|
||||||
new (this) DictTrie();
|
new (this) DictTrie();
|
||||||
init(dictPath, userDictPath);
|
init(dictPath, userDictPaths);
|
||||||
}
|
}
|
||||||
~DictTrie() {
|
~DictTrie() {
|
||||||
delete trie_;
|
delete trie_;
|
||||||
}
|
}
|
||||||
|
|
||||||
void init(const string& dictPath, const string& userDictPath = "") {
|
void init(const string& dictPath, const string& userDictPaths = "") {
|
||||||
if(trie_ != NULL) {
|
if(trie_ != NULL) {
|
||||||
LogFatal("trie already initted");
|
LogFatal("trie already initted");
|
||||||
}
|
}
|
||||||
@ -44,8 +44,8 @@ class DictTrie {
|
|||||||
minWeight_ = FindMinWeight(staticNodeInfos_);
|
minWeight_ = FindMinWeight(staticNodeInfos_);
|
||||||
maxWeight_ = FindMaxWeight(staticNodeInfos_);
|
maxWeight_ = FindMaxWeight(staticNodeInfos_);
|
||||||
|
|
||||||
if(userDictPath.size()) {
|
if(userDictPaths.size()) {
|
||||||
LoadUserDict(userDictPath);
|
LoadUserDict(userDictPaths);
|
||||||
}
|
}
|
||||||
Shrink(staticNodeInfos_);
|
Shrink(staticNodeInfos_);
|
||||||
CreateTrie(staticNodeInfos_);
|
CreateTrie(staticNodeInfos_);
|
||||||
@ -92,27 +92,30 @@ class DictTrie {
|
|||||||
|
|
||||||
trie_ = new Trie(words, valuePointers);
|
trie_ = new Trie(words, valuePointers);
|
||||||
}
|
}
|
||||||
void LoadUserDict(const string& filePath) {
|
void LoadUserDict(const string& filePaths) {
|
||||||
ifstream ifs(filePath.c_str());
|
vector<string> files = limonp::split(filePaths, ":");
|
||||||
if(!ifs.is_open()) {
|
size_t lineno = 0;
|
||||||
LogFatal("file %s open failed.", filePath.c_str());
|
for (size_t i = 0; i < files.size(); i++) {
|
||||||
}
|
ifstream ifs(files[i].c_str());
|
||||||
string line;
|
if(!ifs.is_open()) {
|
||||||
DictUnit nodeInfo;
|
LogFatal("file %s open failed.", files[i].c_str());
|
||||||
vector<string> buf;
|
|
||||||
size_t lineno;
|
|
||||||
for(lineno = 0; getline(ifs, line); lineno++) {
|
|
||||||
buf.clear();
|
|
||||||
split(line, buf, " ");
|
|
||||||
if(buf.size() < 1) {
|
|
||||||
LogFatal("split [%s] result illegal", line.c_str());
|
|
||||||
}
|
}
|
||||||
|
string line;
|
||||||
DictUnit nodeInfo;
|
DictUnit nodeInfo;
|
||||||
MakeUserNodeInfo(nodeInfo, buf[0],
|
vector<string> buf;
|
||||||
(buf.size() == 2 ? buf[1] : UNKNOWN_TAG));
|
for(; getline(ifs, line); lineno++) {
|
||||||
staticNodeInfos_.push_back(nodeInfo);
|
buf.clear();
|
||||||
|
split(line, buf, " ");
|
||||||
|
if(buf.size() < 1) {
|
||||||
|
LogFatal("split [%s] result illegal", line.c_str());
|
||||||
|
}
|
||||||
|
DictUnit nodeInfo;
|
||||||
|
MakeUserNodeInfo(nodeInfo, buf[0],
|
||||||
|
(buf.size() == 2 ? buf[1] : UNKNOWN_TAG));
|
||||||
|
staticNodeInfos_.push_back(nodeInfo);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
LogInfo("load userdict[%s] ok. lines[%u]", filePath.c_str(), lineno);
|
LogInfo("load userdicts[%s] ok. lines[%u]", filePaths.c_str(), lineno);
|
||||||
}
|
}
|
||||||
bool MakeNodeInfo(DictUnit& nodeInfo,
|
bool MakeNodeInfo(DictUnit& nodeInfo,
|
||||||
const string& word,
|
const string& word,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user