add Jieba::ResetSeparators api and unittest

This commit is contained in:
yanyiwu 2016-05-09 22:49:51 +08:00
parent 6e3ecec599
commit c425bcc49f
3 changed files with 25 additions and 21 deletions

View File

@ -67,6 +67,15 @@ class Jieba {
return dict_trie_.InsertUserWord(word, tag);
}
void ResetSeparators(const string& s) {
//TODO
mp_seg_.ResetSeparators(s);
hmm_seg_.ResetSeparators(s);
mix_seg_.ResetSeparators(s);
full_seg_.ResetSeparators(s);
query_seg_.ResetSeparators(s);
}
const DictTrie* GetDictTrie() const {
return &dict_trie_;
}

View File

@ -8,20 +8,20 @@
namespace cppjieba {
const char* const SPECIAL_RUNES = " \t\n,。";
const char* const SPECIAL_SEPARATORS = " \t\n,。";
using namespace limonp;
class SegmentBase {
public:
SegmentBase() {
XCHECK(Insert(SPECIAL_RUNES));
XCHECK(ResetSeparators(SPECIAL_SEPARATORS));
}
~SegmentBase() {
}
protected:
bool Insert(const string& s) {
bool ResetSeparators(const string& s) {
symbols_.clear();
RuneStrArray runes;
if (!DecodeRunesInString(s, runes)) {
XLOG(ERROR) << "decode " << s << " failed";
@ -35,23 +35,7 @@ class SegmentBase {
}
return true;
}
//bool Remove(const string& s) {
// RuneStrArray runes;
// if (!DecodeRunesInString(s, runes)) {
// XLOG(ERROR) << "decode " << s << " failed";
// return false;
// }
// for (size_t i = 0; i < runes.size(); i++) {
// unordered_set<Rune>::iterator iter = symbols_.find(runes[i].rune);
// if (iter == symbols_.end()) {
// XLOG(ERROR) << s.substr(runes[i].offset, runes[i].len) << " not found";
// return false;
// }
// symbols_.erase(iter);
// }
// return true;
//}
protected:
unordered_set<Rune> symbols_;
}; // class SegmentBase

View File

@ -103,4 +103,15 @@ TEST(JiebaTest, InsertUserWord) {
result << words;
ASSERT_EQ(result, StringFormat("[\"%s\"]", newWord.c_str()));
}
ASSERT_TRUE(jieba.InsertUserWord("同一个世界,同一个梦想"));
jieba.Cut("同一个世界,同一个梦想", words);
result = Join(words.begin(), words.end(), "/");
ASSERT_EQ(result, "同一个/世界//同一个/梦想");
jieba.ResetSeparators("");
jieba.Cut("同一个世界,同一个梦想", words);
result = Join(words.begin(), words.end(), "/");
ASSERT_EQ(result, "同一个世界,同一个梦想");
}