mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
add Jieba::ResetSeparators api and unittest
This commit is contained in:
parent
6e3ecec599
commit
c425bcc49f
@ -67,6 +67,15 @@ class Jieba {
|
||||
return dict_trie_.InsertUserWord(word, tag);
|
||||
}
|
||||
|
||||
void ResetSeparators(const string& s) {
|
||||
//TODO
|
||||
mp_seg_.ResetSeparators(s);
|
||||
hmm_seg_.ResetSeparators(s);
|
||||
mix_seg_.ResetSeparators(s);
|
||||
full_seg_.ResetSeparators(s);
|
||||
query_seg_.ResetSeparators(s);
|
||||
}
|
||||
|
||||
const DictTrie* GetDictTrie() const {
|
||||
return &dict_trie_;
|
||||
}
|
||||
|
@ -8,20 +8,20 @@
|
||||
|
||||
namespace cppjieba {
|
||||
|
||||
const char* const SPECIAL_RUNES = " \t\n,。";
|
||||
const char* const SPECIAL_SEPARATORS = " \t\n,。";
|
||||
|
||||
using namespace limonp;
|
||||
|
||||
class SegmentBase {
|
||||
public:
|
||||
SegmentBase() {
|
||||
XCHECK(Insert(SPECIAL_RUNES));
|
||||
XCHECK(ResetSeparators(SPECIAL_SEPARATORS));
|
||||
}
|
||||
~SegmentBase() {
|
||||
}
|
||||
|
||||
protected:
|
||||
bool Insert(const string& s) {
|
||||
bool ResetSeparators(const string& s) {
|
||||
symbols_.clear();
|
||||
RuneStrArray runes;
|
||||
if (!DecodeRunesInString(s, runes)) {
|
||||
XLOG(ERROR) << "decode " << s << " failed";
|
||||
@ -35,23 +35,7 @@ class SegmentBase {
|
||||
}
|
||||
return true;
|
||||
}
|
||||
//bool Remove(const string& s) {
|
||||
// RuneStrArray runes;
|
||||
// if (!DecodeRunesInString(s, runes)) {
|
||||
// XLOG(ERROR) << "decode " << s << " failed";
|
||||
// return false;
|
||||
// }
|
||||
// for (size_t i = 0; i < runes.size(); i++) {
|
||||
// unordered_set<Rune>::iterator iter = symbols_.find(runes[i].rune);
|
||||
// if (iter == symbols_.end()) {
|
||||
// XLOG(ERROR) << s.substr(runes[i].offset, runes[i].len) << " not found";
|
||||
// return false;
|
||||
// }
|
||||
// symbols_.erase(iter);
|
||||
// }
|
||||
// return true;
|
||||
//}
|
||||
|
||||
protected:
|
||||
unordered_set<Rune> symbols_;
|
||||
}; // class SegmentBase
|
||||
|
||||
|
@ -103,4 +103,15 @@ TEST(JiebaTest, InsertUserWord) {
|
||||
result << words;
|
||||
ASSERT_EQ(result, StringFormat("[\"%s\"]", newWord.c_str()));
|
||||
}
|
||||
|
||||
ASSERT_TRUE(jieba.InsertUserWord("同一个世界,同一个梦想"));
|
||||
jieba.Cut("同一个世界,同一个梦想", words);
|
||||
result = Join(words.begin(), words.end(), "/");
|
||||
ASSERT_EQ(result, "同一个/世界/,/同一个/梦想");
|
||||
|
||||
jieba.ResetSeparators("");
|
||||
|
||||
jieba.Cut("同一个世界,同一个梦想", words);
|
||||
result = Join(words.begin(), words.end(), "/");
|
||||
ASSERT_EQ(result, "同一个世界,同一个梦想");
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user