new feature: loading multi user dict, path is split by :

This commit is contained in:
yanyiwu 2015-09-25 16:20:06 +08:00
parent e55d0bf95c
commit 9de513f1d5
2 changed files with 30 additions and 23 deletions

View File

@ -1,5 +1,9 @@
# CppJieba ChangeLog
## next version
1. 用户词典的载入支持多词典载入,多词典路径用英文冒号(:)作为分隔符就当坐是向环境变量PATH致敬哈哈。
## v3.2.1
1. 修复 Jieba.hpp 头文件保护写错导致的 bug。

View File

@ -27,15 +27,15 @@ class DictTrie {
trie_ = NULL;
minWeight_ = MAX_DOUBLE;
}
DictTrie(const string& dictPath, const string& userDictPath = "") {
DictTrie(const string& dictPath, const string& userDictPaths = "") {
new (this) DictTrie();
init(dictPath, userDictPath);
init(dictPath, userDictPaths);
}
~DictTrie() {
delete trie_;
}
void init(const string& dictPath, const string& userDictPath = "") {
void init(const string& dictPath, const string& userDictPaths = "") {
if(trie_ != NULL) {
LogFatal("trie already initted");
}
@ -44,8 +44,8 @@ class DictTrie {
minWeight_ = FindMinWeight(staticNodeInfos_);
maxWeight_ = FindMaxWeight(staticNodeInfos_);
if(userDictPath.size()) {
LoadUserDict(userDictPath);
if(userDictPaths.size()) {
LoadUserDict(userDictPaths);
}
Shrink(staticNodeInfos_);
CreateTrie(staticNodeInfos_);
@ -92,27 +92,30 @@ class DictTrie {
trie_ = new Trie(words, valuePointers);
}
void LoadUserDict(const string& filePath) {
ifstream ifs(filePath.c_str());
if(!ifs.is_open()) {
LogFatal("file %s open failed.", filePath.c_str());
}
string line;
DictUnit nodeInfo;
vector<string> buf;
size_t lineno;
for(lineno = 0; getline(ifs, line); lineno++) {
buf.clear();
split(line, buf, " ");
if(buf.size() < 1) {
LogFatal("split [%s] result illegal", line.c_str());
void LoadUserDict(const string& filePaths) {
vector<string> files = limonp::split(filePaths, ":");
size_t lineno = 0;
for (size_t i = 0; i < files.size(); i++) {
ifstream ifs(files[i].c_str());
if(!ifs.is_open()) {
LogFatal("file %s open failed.", files[i].c_str());
}
string line;
DictUnit nodeInfo;
MakeUserNodeInfo(nodeInfo, buf[0],
(buf.size() == 2 ? buf[1] : UNKNOWN_TAG));
staticNodeInfos_.push_back(nodeInfo);
vector<string> buf;
for(; getline(ifs, line); lineno++) {
buf.clear();
split(line, buf, " ");
if(buf.size() < 1) {
LogFatal("split [%s] result illegal", line.c_str());
}
DictUnit nodeInfo;
MakeUserNodeInfo(nodeInfo, buf[0],
(buf.size() == 2 ? buf[1] : UNKNOWN_TAG));
staticNodeInfos_.push_back(nodeInfo);
}
}
LogInfo("load userdict[%s] ok. lines[%u]", filePath.c_str(), lineno);
LogInfo("load userdicts[%s] ok. lines[%u]", filePaths.c_str(), lineno);
}
bool MakeNodeInfo(DictUnit& nodeInfo,
const string& word,