From 1e1e585194d0816b369464890db7779436d5da21 Mon Sep 17 00:00:00 2001 From: zhoupeng Date: Fri, 8 Jun 2018 14:23:01 +0800 Subject: [PATCH] LoadUserDict by set,vector --- include/cppjieba/DictTrie.hpp | 45 +++++++++++++++++++++-------------- include/cppjieba/Jieba.hpp | 11 ++++++++- 2 files changed, 37 insertions(+), 19 deletions(-) diff --git a/include/cppjieba/DictTrie.hpp b/include/cppjieba/DictTrie.hpp index d4d2043..25aa5cf 100644 --- a/include/cppjieba/DictTrie.hpp +++ b/include/cppjieba/DictTrie.hpp @@ -80,7 +80,7 @@ class DictTrie { return min_weight_; } - void InserUserDictNode(const string& line){ + void InserUserDictNode(const string& line) { vector buf; DictUnit node_info; Split(line, buf, " "); @@ -106,12 +106,37 @@ class DictTrie { } } - void LoadUserDict(vector& buf){ + void LoadUserDict(const vector& buf) { for (size_t i = 0; i < buf.size(); i++) { InserUserDictNode(buf[i]); } } + void LoadUserDict(const set& buf) { + std::set::const_iterator iter; + for (iter = buf.begin(); iter != buf.end(); iter++){ + InserUserDictNode(*iter); + } + } + + void LoadUserDict(const string& filePaths) { + vector files = limonp::Split(filePaths, "|;"); + size_t lineno = 0; + for (size_t i = 0; i < files.size(); i++) { + ifstream ifs(files[i].c_str()); + XCHECK(ifs.is_open()) << "open " << files[i] << " failed"; + string line; + + for (; getline(ifs, line); lineno++) { + if (line.size() == 0) { + continue; + } + InserUserDictNode(line); + } + } + } + + private: void Init(const string& dict_path, const string& user_dict_paths, UserWordWeightOption user_word_weight_opt) { LoadDict(dict_path); @@ -140,22 +165,6 @@ class DictTrie { - void LoadUserDict(const string& filePaths) { - vector files = limonp::Split(filePaths, "|;"); - size_t lineno = 0; - for (size_t i = 0; i < files.size(); i++) { - ifstream ifs(files[i].c_str()); - XCHECK(ifs.is_open()) << "open " << files[i] << " failed"; - string line; - - for (; getline(ifs, line); lineno++) { - if (line.size() == 0) { - continue; - } - InserUserDictNode(line); - } - } - } bool MakeNodeInfo(DictUnit& node_info, const string& word, diff --git a/include/cppjieba/Jieba.hpp b/include/cppjieba/Jieba.hpp index 2062a75..a8f6751 100644 --- a/include/cppjieba/Jieba.hpp +++ b/include/cppjieba/Jieba.hpp @@ -84,14 +84,23 @@ class Jieba { const DictTrie* GetDictTrie() const { return &dict_trie_; } + const HMMModel* GetHMMModel() const { return &model_; } - void LoadUserDict(vector& buf) { + void LoadUserDict(const vector& buf) { dict_trie_.LoadUserDict(buf); } + void LoadUserDict(const set& buf) { + dict_trie_.LoadUserDict(buf); + } + + void LoadUserDict(const string& path) { + dict_trie_.LoadUserDict(path); + } + private: DictTrie dict_trie_; HMMModel model_;