mirror of
https://github.com/panda3d/panda3d.git
synced 2025-10-04 10:54:24 -04:00
display warnings for unexpected characters in utf-8 string
This commit is contained in:
parent
2bc10310d0
commit
50682a4409
@ -17,6 +17,7 @@
|
|||||||
////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
#include "stringDecoder.h"
|
#include "stringDecoder.h"
|
||||||
|
#include "config_text.h"
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////
|
||||||
// Function: StringDecoder::Destructor
|
// Function: StringDecoder::Destructor
|
||||||
@ -71,33 +72,51 @@ The value of each individual byte indicates its UTF-8 function, as follows:
|
|||||||
////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////
|
||||||
int StringUtf8Decoder::
|
int StringUtf8Decoder::
|
||||||
get_next_character() {
|
get_next_character() {
|
||||||
if (test_eof()) {
|
unsigned int result;
|
||||||
return -1;
|
while (!test_eof()) {
|
||||||
|
result = (unsigned char)_input[_p++];
|
||||||
|
if ((result & 0x80) == 0) {
|
||||||
|
// A 7-bit ascii value in one byte.
|
||||||
|
return result;
|
||||||
|
|
||||||
|
} if ((result & 0xe0) == 0xc0) {
|
||||||
|
// First byte of two.
|
||||||
|
unsigned int two = 0;
|
||||||
|
if (test_eof()) {
|
||||||
|
text_cat.warning()
|
||||||
|
<< "utf-8 encoded string ends abruptly.\n";
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
two = (unsigned char)_input[_p++];
|
||||||
|
result = ((result & 0x1f) << 6) | (two & 0x3f);
|
||||||
|
return result;
|
||||||
|
|
||||||
|
} else if ((result & 0xf0) == 0xe0) {
|
||||||
|
// First byte of three.
|
||||||
|
if (test_eof()) {
|
||||||
|
text_cat.warning()
|
||||||
|
<< "utf-8 encoded string ends abruptly.\n";
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
unsigned int two = (unsigned char)_input[_p++];
|
||||||
|
if (test_eof()) {
|
||||||
|
text_cat.warning()
|
||||||
|
<< "utf-8 encoded string ends abruptly.\n";
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
unsigned int three = (unsigned char)_input[_p++];
|
||||||
|
result = ((result & 0x0f) << 12) | ((two & 0x3f) << 6) | (three & 0x3f);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Otherwise--the high bit is set but it is not one of the
|
||||||
|
// introductory utf-8 bytes--we have an error.
|
||||||
|
text_cat.warning()
|
||||||
|
<< "Non utf-8 byte in string: 0x" << hex << result << dec << "\n";
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned int result = (unsigned char)_input[_p++];
|
// End of string reached.
|
||||||
if ((result & 0xe0) == 0xc0) {
|
return -1;
|
||||||
// First byte of two.
|
|
||||||
unsigned int two = 0;
|
|
||||||
if (!test_eof()) {
|
|
||||||
two = (unsigned char)_input[_p++];
|
|
||||||
}
|
|
||||||
result = ((result & 0x1f) << 6) | (two & 0x3f);
|
|
||||||
|
|
||||||
} else if ((result & 0xf0) == 0xe0) {
|
|
||||||
// First byte of three.
|
|
||||||
unsigned int two = 0;
|
|
||||||
unsigned int three = 0;
|
|
||||||
if (!test_eof()) {
|
|
||||||
two = (unsigned char)_input[_p++];
|
|
||||||
}
|
|
||||||
if (!test_eof()) {
|
|
||||||
three = (unsigned char)_input[_p++];
|
|
||||||
}
|
|
||||||
result = ((result & 0x0f) << 12) | ((two & 0x3f) << 6) | (three & 0x3f);
|
|
||||||
}
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////
|
||||||
@ -112,9 +131,11 @@ get_next_character() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
unsigned int high = (unsigned char)_input[_p++];
|
unsigned int high = (unsigned char)_input[_p++];
|
||||||
unsigned int low = 0;
|
if (test_eof()) {
|
||||||
if (!test_eof()) {
|
text_cat.warning()
|
||||||
low = (unsigned char)_input[_p++];
|
<< "Unicode-encoded string has odd number of bytes.\n";
|
||||||
|
return -1;
|
||||||
}
|
}
|
||||||
|
unsigned int low = (unsigned char)_input[_p++];
|
||||||
return ((high << 8) | low);
|
return ((high << 8) | low);
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user