new feature: loading multi user dict, path is split by :

This commit is contained in:
yanyiwu 2015-09-25 16:20:06 +08:00
parent e55d0bf95c
commit 9de513f1d5
2 changed files with 30 additions and 23 deletions

View File

@ -1,5 +1,9 @@
# CppJieba ChangeLog # CppJieba ChangeLog
## next version
1. 用户词典的载入支持多词典载入,多词典路径用英文冒号(:)作为分隔符就当坐是向环境变量PATH致敬哈哈。
## v3.2.1 ## v3.2.1
1. 修复 Jieba.hpp 头文件保护写错导致的 bug。 1. 修复 Jieba.hpp 头文件保护写错导致的 bug。

View File

@ -27,15 +27,15 @@ class DictTrie {
trie_ = NULL; trie_ = NULL;
minWeight_ = MAX_DOUBLE; minWeight_ = MAX_DOUBLE;
} }
DictTrie(const string& dictPath, const string& userDictPath = "") { DictTrie(const string& dictPath, const string& userDictPaths = "") {
new (this) DictTrie(); new (this) DictTrie();
init(dictPath, userDictPath); init(dictPath, userDictPaths);
} }
~DictTrie() { ~DictTrie() {
delete trie_; delete trie_;
} }
void init(const string& dictPath, const string& userDictPath = "") { void init(const string& dictPath, const string& userDictPaths = "") {
if(trie_ != NULL) { if(trie_ != NULL) {
LogFatal("trie already initted"); LogFatal("trie already initted");
} }
@ -44,8 +44,8 @@ class DictTrie {
minWeight_ = FindMinWeight(staticNodeInfos_); minWeight_ = FindMinWeight(staticNodeInfos_);
maxWeight_ = FindMaxWeight(staticNodeInfos_); maxWeight_ = FindMaxWeight(staticNodeInfos_);
if(userDictPath.size()) { if(userDictPaths.size()) {
LoadUserDict(userDictPath); LoadUserDict(userDictPaths);
} }
Shrink(staticNodeInfos_); Shrink(staticNodeInfos_);
CreateTrie(staticNodeInfos_); CreateTrie(staticNodeInfos_);
@ -92,27 +92,30 @@ class DictTrie {
trie_ = new Trie(words, valuePointers); trie_ = new Trie(words, valuePointers);
} }
void LoadUserDict(const string& filePath) { void LoadUserDict(const string& filePaths) {
ifstream ifs(filePath.c_str()); vector<string> files = limonp::split(filePaths, ":");
if(!ifs.is_open()) { size_t lineno = 0;
LogFatal("file %s open failed.", filePath.c_str()); for (size_t i = 0; i < files.size(); i++) {
} ifstream ifs(files[i].c_str());
string line; if(!ifs.is_open()) {
DictUnit nodeInfo; LogFatal("file %s open failed.", files[i].c_str());
vector<string> buf;
size_t lineno;
for(lineno = 0; getline(ifs, line); lineno++) {
buf.clear();
split(line, buf, " ");
if(buf.size() < 1) {
LogFatal("split [%s] result illegal", line.c_str());
} }
string line;
DictUnit nodeInfo; DictUnit nodeInfo;
MakeUserNodeInfo(nodeInfo, buf[0], vector<string> buf;
(buf.size() == 2 ? buf[1] : UNKNOWN_TAG)); for(; getline(ifs, line); lineno++) {
staticNodeInfos_.push_back(nodeInfo); buf.clear();
split(line, buf, " ");
if(buf.size() < 1) {
LogFatal("split [%s] result illegal", line.c_str());
}
DictUnit nodeInfo;
MakeUserNodeInfo(nodeInfo, buf[0],
(buf.size() == 2 ? buf[1] : UNKNOWN_TAG));
staticNodeInfos_.push_back(nodeInfo);
}
} }
LogInfo("load userdict[%s] ok. lines[%u]", filePath.c_str(), lineno); LogInfo("load userdicts[%s] ok. lines[%u]", filePaths.c_str(), lineno);
} }
bool MakeNodeInfo(DictUnit& nodeInfo, bool MakeNodeInfo(DictUnit& nodeInfo,
const string& word, const string& word,