mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
Improve error logging for UTF-8 decoding failures across cppjieba components. Updated error messages in DictTrie, PosTagger, PreFilter, and SegmentBase to provide clearer context on the specific input causing the failure. This change enhances the debugging experience when handling UTF-8 encoded strings.
This commit is contained in:
parent
39fc58f081
commit
016fc17575
@ -198,7 +198,7 @@ class DictTrie {
|
||||
double weight,
|
||||
const string& tag) {
|
||||
if (!DecodeUTF8RunesInString(word, node_info.word)) {
|
||||
XLOG(ERROR) << "Decode " << word << " failed.";
|
||||
XLOG(ERROR) << "UTF-8 decode failed for dict word: " << word;
|
||||
return false;
|
||||
}
|
||||
node_info.weight = weight;
|
||||
|
@ -35,7 +35,7 @@ class PosTagger {
|
||||
const DictTrie * dict = segment.GetDictTrie();
|
||||
assert(dict != NULL);
|
||||
if (!DecodeUTF8RunesInString(str, runes)) {
|
||||
XLOG(ERROR) << "Decode failed.";
|
||||
XLOG(ERROR) << "UTF-8 decode failed for word: " << str;
|
||||
return POS_X;
|
||||
}
|
||||
tmp = dict->Find(runes.begin(), runes.end());
|
||||
|
@ -18,7 +18,7 @@ class PreFilter {
|
||||
const string& sentence)
|
||||
: symbols_(symbols) {
|
||||
if (!DecodeUTF8RunesInString(sentence, sentence_)) {
|
||||
XLOG(ERROR) << "decode failed. ";
|
||||
XLOG(ERROR) << "UTF-8 decode failed for input sentence";
|
||||
}
|
||||
cursor_ = sentence_.begin();
|
||||
}
|
||||
|
@ -26,7 +26,7 @@ class SegmentBase {
|
||||
symbols_.clear();
|
||||
RuneStrArray runes;
|
||||
if (!DecodeUTF8RunesInString(s, runes)) {
|
||||
XLOG(ERROR) << "decode " << s << " failed";
|
||||
XLOG(ERROR) << "UTF-8 decode failed for separators: " << s;
|
||||
return false;
|
||||
}
|
||||
for (size_t i = 0; i < runes.size(); i++) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user