mirror of
https://github.com/yanyiwu/cppjieba.git
synced 2025-07-18 00:00:12 +08:00
Improve error logging for UTF-8 decoding failures across cppjieba components. Updated error messages in DictTrie, PosTagger, PreFilter, and SegmentBase to provide clearer context on the specific input causing the failure. This change enhances the debugging experience when handling UTF-8 encoded strings.
This commit is contained in:
parent
39fc58f081
commit
016fc17575
@ -198,7 +198,7 @@ class DictTrie {
|
|||||||
double weight,
|
double weight,
|
||||||
const string& tag) {
|
const string& tag) {
|
||||||
if (!DecodeUTF8RunesInString(word, node_info.word)) {
|
if (!DecodeUTF8RunesInString(word, node_info.word)) {
|
||||||
XLOG(ERROR) << "Decode " << word << " failed.";
|
XLOG(ERROR) << "UTF-8 decode failed for dict word: " << word;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
node_info.weight = weight;
|
node_info.weight = weight;
|
||||||
|
@ -35,7 +35,7 @@ class PosTagger {
|
|||||||
const DictTrie * dict = segment.GetDictTrie();
|
const DictTrie * dict = segment.GetDictTrie();
|
||||||
assert(dict != NULL);
|
assert(dict != NULL);
|
||||||
if (!DecodeUTF8RunesInString(str, runes)) {
|
if (!DecodeUTF8RunesInString(str, runes)) {
|
||||||
XLOG(ERROR) << "Decode failed.";
|
XLOG(ERROR) << "UTF-8 decode failed for word: " << str;
|
||||||
return POS_X;
|
return POS_X;
|
||||||
}
|
}
|
||||||
tmp = dict->Find(runes.begin(), runes.end());
|
tmp = dict->Find(runes.begin(), runes.end());
|
||||||
|
@ -18,7 +18,7 @@ class PreFilter {
|
|||||||
const string& sentence)
|
const string& sentence)
|
||||||
: symbols_(symbols) {
|
: symbols_(symbols) {
|
||||||
if (!DecodeUTF8RunesInString(sentence, sentence_)) {
|
if (!DecodeUTF8RunesInString(sentence, sentence_)) {
|
||||||
XLOG(ERROR) << "decode failed. ";
|
XLOG(ERROR) << "UTF-8 decode failed for input sentence";
|
||||||
}
|
}
|
||||||
cursor_ = sentence_.begin();
|
cursor_ = sentence_.begin();
|
||||||
}
|
}
|
||||||
|
@ -26,7 +26,7 @@ class SegmentBase {
|
|||||||
symbols_.clear();
|
symbols_.clear();
|
||||||
RuneStrArray runes;
|
RuneStrArray runes;
|
||||||
if (!DecodeUTF8RunesInString(s, runes)) {
|
if (!DecodeUTF8RunesInString(s, runes)) {
|
||||||
XLOG(ERROR) << "decode " << s << " failed";
|
XLOG(ERROR) << "UTF-8 decode failed for separators: " << s;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
for (size_t i = 0; i < runes.size(); i++) {
|
for (size_t i = 0; i < runes.size(); i++) {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user