new feature: loading multi user dict, path is split by :

This commit is contained in:
yanyiwu 2015-09-25 16:20:06 +08:00
parent e55d0bf95c
commit 9de513f1d5
2 changed files with 30 additions and 23 deletions

View File

@ -1,5 +1,9 @@
# CppJieba ChangeLog # CppJieba ChangeLog
## next version
1. 用户词典的载入支持多词典载入,多词典路径用英文冒号(:)作为分隔符就当坐是向环境变量PATH致敬哈哈。
## v3.2.1 ## v3.2.1
1. 修复 Jieba.hpp 头文件保护写错导致的 bug。 1. 修复 Jieba.hpp 头文件保护写错导致的 bug。

View File

@ -27,15 +27,15 @@ class DictTrie {
trie_ = NULL; trie_ = NULL;
minWeight_ = MAX_DOUBLE; minWeight_ = MAX_DOUBLE;
} }
DictTrie(const string& dictPath, const string& userDictPath = "") { DictTrie(const string& dictPath, const string& userDictPaths = "") {
new (this) DictTrie(); new (this) DictTrie();
init(dictPath, userDictPath); init(dictPath, userDictPaths);
} }
~DictTrie() { ~DictTrie() {
delete trie_; delete trie_;
} }
void init(const string& dictPath, const string& userDictPath = "") { void init(const string& dictPath, const string& userDictPaths = "") {
if(trie_ != NULL) { if(trie_ != NULL) {
LogFatal("trie already initted"); LogFatal("trie already initted");
} }
@ -44,8 +44,8 @@ class DictTrie {
minWeight_ = FindMinWeight(staticNodeInfos_); minWeight_ = FindMinWeight(staticNodeInfos_);
maxWeight_ = FindMaxWeight(staticNodeInfos_); maxWeight_ = FindMaxWeight(staticNodeInfos_);
if(userDictPath.size()) { if(userDictPaths.size()) {
LoadUserDict(userDictPath); LoadUserDict(userDictPaths);
} }
Shrink(staticNodeInfos_); Shrink(staticNodeInfos_);
CreateTrie(staticNodeInfos_); CreateTrie(staticNodeInfos_);
@ -92,16 +92,18 @@ class DictTrie {
trie_ = new Trie(words, valuePointers); trie_ = new Trie(words, valuePointers);
} }
void LoadUserDict(const string& filePath) { void LoadUserDict(const string& filePaths) {
ifstream ifs(filePath.c_str()); vector<string> files = limonp::split(filePaths, ":");
size_t lineno = 0;
for (size_t i = 0; i < files.size(); i++) {
ifstream ifs(files[i].c_str());
if(!ifs.is_open()) { if(!ifs.is_open()) {
LogFatal("file %s open failed.", filePath.c_str()); LogFatal("file %s open failed.", files[i].c_str());
} }
string line; string line;
DictUnit nodeInfo; DictUnit nodeInfo;
vector<string> buf; vector<string> buf;
size_t lineno; for(; getline(ifs, line); lineno++) {
for(lineno = 0; getline(ifs, line); lineno++) {
buf.clear(); buf.clear();
split(line, buf, " "); split(line, buf, " ");
if(buf.size() < 1) { if(buf.size() < 1) {
@ -112,7 +114,8 @@ class DictTrie {
(buf.size() == 2 ? buf[1] : UNKNOWN_TAG)); (buf.size() == 2 ? buf[1] : UNKNOWN_TAG));
staticNodeInfos_.push_back(nodeInfo); staticNodeInfos_.push_back(nodeInfo);
} }
LogInfo("load userdict[%s] ok. lines[%u]", filePath.c_str(), lineno); }
LogInfo("load userdicts[%s] ok. lines[%u]", filePaths.c_str(), lineno);
} }
bool MakeNodeInfo(DictUnit& nodeInfo, bool MakeNodeInfo(DictUnit& nodeInfo,
const string& word, const string& word,