Improve error logging for UTF-8 decoding failures across cppjieba components. Updated error messages in DictTrie, PosTagger, PreFilter, and SegmentBase to provide clearer context on the specific input causing the failure. This change enhances the debugging experience when handling UTF-8 encoded strings.

This commit is contained in:
yanyiwu 2024-12-08 17:26:28 +08:00
parent 39fc58f081
commit 016fc17575
4 changed files with 4 additions and 4 deletions

View File

@ -198,7 +198,7 @@ class DictTrie {
double weight,
const string& tag) {
if (!DecodeUTF8RunesInString(word, node_info.word)) {
XLOG(ERROR) << "Decode " << word << " failed.";
XLOG(ERROR) << "UTF-8 decode failed for dict word: " << word;
return false;
}
node_info.weight = weight;

View File

@ -35,7 +35,7 @@ class PosTagger {
const DictTrie * dict = segment.GetDictTrie();
assert(dict != NULL);
if (!DecodeUTF8RunesInString(str, runes)) {
XLOG(ERROR) << "Decode failed.";
XLOG(ERROR) << "UTF-8 decode failed for word: " << str;
return POS_X;
}
tmp = dict->Find(runes.begin(), runes.end());

View File

@ -18,7 +18,7 @@ class PreFilter {
const string& sentence)
: symbols_(symbols) {
if (!DecodeUTF8RunesInString(sentence, sentence_)) {
XLOG(ERROR) << "decode failed. ";
XLOG(ERROR) << "UTF-8 decode failed for input sentence";
}
cursor_ = sentence_.begin();
}

View File

@ -26,7 +26,7 @@ class SegmentBase {
symbols_.clear();
RuneStrArray runes;
if (!DecodeUTF8RunesInString(s, runes)) {
XLOG(ERROR) << "decode " << s << " failed";
XLOG(ERROR) << "UTF-8 decode failed for separators: " << s;
return false;
}
for (size_t i = 0; i < runes.size(); i++) {