diff --git a/include/cppjieba/Jieba.hpp b/include/cppjieba/Jieba.hpp index 16e63dc..6cd1d95 100644 --- a/include/cppjieba/Jieba.hpp +++ b/include/cppjieba/Jieba.hpp @@ -67,6 +67,15 @@ class Jieba { return dict_trie_.InsertUserWord(word, tag); } + void ResetSeparators(const string& s) { + //TODO + mp_seg_.ResetSeparators(s); + hmm_seg_.ResetSeparators(s); + mix_seg_.ResetSeparators(s); + full_seg_.ResetSeparators(s); + query_seg_.ResetSeparators(s); + } + const DictTrie* GetDictTrie() const { return &dict_trie_; } diff --git a/include/cppjieba/SegmentBase.hpp b/include/cppjieba/SegmentBase.hpp index 9ce21c4..3f81404 100644 --- a/include/cppjieba/SegmentBase.hpp +++ b/include/cppjieba/SegmentBase.hpp @@ -8,20 +8,20 @@ namespace cppjieba { -const char* const SPECIAL_RUNES = " \t\n,。"; +const char* const SPECIAL_SEPARATORS = " \t\n,。"; using namespace limonp; class SegmentBase { public: SegmentBase() { - XCHECK(Insert(SPECIAL_RUNES)); + XCHECK(ResetSeparators(SPECIAL_SEPARATORS)); } ~SegmentBase() { } - protected: - bool Insert(const string& s) { + bool ResetSeparators(const string& s) { + symbols_.clear(); RuneStrArray runes; if (!DecodeRunesInString(s, runes)) { XLOG(ERROR) << "decode " << s << " failed"; @@ -35,23 +35,7 @@ class SegmentBase { } return true; } - //bool Remove(const string& s) { - // RuneStrArray runes; - // if (!DecodeRunesInString(s, runes)) { - // XLOG(ERROR) << "decode " << s << " failed"; - // return false; - // } - // for (size_t i = 0; i < runes.size(); i++) { - // unordered_set::iterator iter = symbols_.find(runes[i].rune); - // if (iter == symbols_.end()) { - // XLOG(ERROR) << s.substr(runes[i].offset, runes[i].len) << " not found"; - // return false; - // } - // symbols_.erase(iter); - // } - // return true; - //} - + protected: unordered_set symbols_; }; // class SegmentBase diff --git a/test/unittest/jieba_test.cpp b/test/unittest/jieba_test.cpp index 0081774..bffedc9 100644 --- a/test/unittest/jieba_test.cpp +++ b/test/unittest/jieba_test.cpp @@ -103,4 +103,15 @@ TEST(JiebaTest, InsertUserWord) { result << words; ASSERT_EQ(result, StringFormat("[\"%s\"]", newWord.c_str())); } + + ASSERT_TRUE(jieba.InsertUserWord("同一个世界,同一个梦想")); + jieba.Cut("同一个世界,同一个梦想", words); + result = Join(words.begin(), words.end(), "/"); + ASSERT_EQ(result, "同一个/世界/,/同一个/梦想"); + + jieba.ResetSeparators(""); + + jieba.Cut("同一个世界,同一个梦想", words); + result = Join(words.begin(), words.end(), "/"); + ASSERT_EQ(result, "同一个世界,同一个梦想"); }