mirror of
https://github.com/AngelAuraMC/angelauramc-openjdk-build.git
synced 2025-09-13 06:06:38 -04:00
445 lines
12 KiB
Diff
445 lines
12 KiB
Diff
diff --git a/src/java.base/share/native/libtinyiconv/iconv.cpp b/src/java.base/share/native/libtinyiconv/iconv.cpp
|
|
new file mode 100644
|
|
index 000000000..7018b6ce2
|
|
--- /dev/null
|
|
+++ b/src/java.base/share/native/libtinyiconv/iconv.cpp
|
|
@@ -0,0 +1,438 @@
|
|
+/*
|
|
+ * Copyright (C) 2017 The Android Open Source Project
|
|
+ * All rights reserved.
|
|
+ *
|
|
+ * Redistribution and use in source and binary forms, with or without
|
|
+ * modification, are permitted provided that the following conditions
|
|
+ * are met:
|
|
+ * * Redistributions of source code must retain the above copyright
|
|
+ * notice, this list of conditions and the following disclaimer.
|
|
+ * * Redistributions in binary form must reproduce the above copyright
|
|
+ * notice, this list of conditions and the following disclaimer in
|
|
+ * the documentation and/or other materials provided with the
|
|
+ * distribution.
|
|
+ *
|
|
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
|
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
|
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
|
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
|
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
|
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
+ * SUCH DAMAGE.
|
|
+ */
|
|
+
|
|
+#ifdef __ANDROID__
|
|
+
|
|
+#include <ctype.h>
|
|
+#include <endian.h>
|
|
+#include <errno.h>
|
|
+#include <iconv.h>
|
|
+#include <stdbool.h>
|
|
+#include <stdint.h>
|
|
+#include <stdlib.h>
|
|
+#include <string.h>
|
|
+#include <uchar.h>
|
|
+#include <wchar.h>
|
|
+
|
|
+__BEGIN_DECLS
|
|
+
|
|
+/*
|
|
+ * These return values are specified by POSIX for multibyte conversion
|
|
+ * functions.
|
|
+ */
|
|
+
|
|
+#ifdef __cplusplus
|
|
+#define __MB_ERR_ILLEGAL_SEQUENCE static_cast<size_t>(-1)
|
|
+#define __MB_ERR_INCOMPLETE_SEQUENCE static_cast<size_t>(-2)
|
|
+#else
|
|
+#define __MB_ERR_ILLEGAL_SEQUENCE (size_t)(-1)
|
|
+#define __MB_ERR_INCOMPLETE_SEQUENCE (size_t)(-2)
|
|
+#endif // __cplusplus
|
|
+#define __MB_IS_ERR(rv) (rv == __MB_ERR_ILLEGAL_SEQUENCE || \
|
|
+ rv == __MB_ERR_INCOMPLETE_SEQUENCE)
|
|
+static inline __wur size_t mbstate_bytes_so_far(const mbstate_t* ps) {
|
|
+ return
|
|
+ (ps->__seq[2] != 0) ? 3 :
|
|
+ (ps->__seq[1] != 0) ? 2 :
|
|
+ (ps->__seq[0] != 0) ? 1 : 0;
|
|
+}
|
|
+static inline void mbstate_set_byte(mbstate_t* ps, int i, char byte) {
|
|
+ ps->__seq[i] = (uint8_t)(byte);
|
|
+}
|
|
+static inline __wur uint8_t mbstate_get_byte(const mbstate_t* ps, int n) {
|
|
+ return ps->__seq[n];
|
|
+}
|
|
+static inline __wur size_t mbstate_reset_and_return_illegal(int _errno, mbstate_t* ps) {
|
|
+ errno = _errno;
|
|
+#ifdef __cplusplus
|
|
+ *(reinterpret_cast<uint32_t*>(ps->__seq)) = 0;
|
|
+#else
|
|
+ *(uint32_t*)(ps->__seq) = 0;
|
|
+#endif // __cplusplus
|
|
+ return __MB_ERR_ILLEGAL_SEQUENCE;
|
|
+}
|
|
+static inline __wur size_t mbstate_reset_and_return(int _return, mbstate_t* ps) {
|
|
+#ifdef __cplusplus
|
|
+ *(reinterpret_cast<uint32_t*>(ps->__seq)) = 0;
|
|
+#else
|
|
+ *(uint32_t*)(ps->__seq) = 0;
|
|
+#endif // __cplusplus
|
|
+ return _return;
|
|
+}
|
|
+
|
|
+#ifdef __cplusplus
|
|
+# define INVALID_ICONV_T reinterpret_cast<iconv_t>(-1)
|
|
+#else // !__cplusplus
|
|
+# define INVALID_ICONV_T (iconv_t)(-1)
|
|
+#endif // __cplusplus
|
|
+
|
|
+// Ideally we'd use icu4c but the API mismatch seems too great. So we just offer something
|
|
+// equivalent to (but slightly easier to use for runs of text than) <uchar.h>. If you're
|
|
+// here to add more encodings, consider working on finishing the icu4c NDK wrappers instead.
|
|
+
|
|
+#ifdef __cplusplus
|
|
+ enum Encoding
|
|
+#else
|
|
+ typedef enum
|
|
+#endif // __cplusplus
|
|
+{
|
|
+ US_ASCII,
|
|
+ UTF_8,
|
|
+ UTF_16_LE,
|
|
+ UTF_16_BE,
|
|
+ UTF_32_LE,
|
|
+ UTF_32_BE,
|
|
+ WCHAR_T,
|
|
+#ifdef __cplusplus
|
|
+ };
|
|
+#else
|
|
+ } Encoding;
|
|
+#endif // __cplusplus
|
|
+
|
|
+#ifdef __cplusplus
|
|
+ enum Mode
|
|
+#else
|
|
+ typedef enum
|
|
+#endif // __cplusplus
|
|
+{
|
|
+ ERROR,
|
|
+ IGNORE,
|
|
+ TRANSLIT,
|
|
+#ifdef __cplusplus
|
|
+ };
|
|
+#else
|
|
+ } Mode;
|
|
+#endif // __cplusplus
|
|
+
|
|
+// This matching is strange but true.
|
|
+// See http://www.unicode.org/reports/tr22/#Charset_Alias_Matching.
|
|
+static bool __match_encoding(const char* lhs, const char* rhs) {
|
|
+ while (*lhs && *rhs) {
|
|
+ // Skip non-alnum in lhs; "UTF-8", "UTF_8", "UTF8", "UTF 8" are all equivalent.
|
|
+ // Also implement the "delete each 0 that is not preceded by a digit" rule.
|
|
+ for (; *lhs; ++lhs) {
|
|
+ if (isalnum(*lhs) && (*lhs != '0' || !isdigit(*(lhs + 1)))) break;
|
|
+ }
|
|
+ // Case doesn't matter either.
|
|
+ if (tolower(*lhs) != tolower(*rhs)) break;
|
|
+ ++lhs;
|
|
+ ++rhs;
|
|
+ }
|
|
+ // As a special case we treat the GNU "//" extensions as end of string.
|
|
+ if ((*lhs == '\0' || strstr(lhs, "//") == lhs) && *rhs == '\0') return true;
|
|
+ return false;
|
|
+}
|
|
+
|
|
+static bool __parse_encoding(const char* s, Encoding* encoding, Mode* mode) {
|
|
+ const char* suffix = strstr(s, "//");
|
|
+ if (suffix) {
|
|
+ if (!mode) return false;
|
|
+ if (strcmp(suffix, "//IGNORE") == 0) {
|
|
+ *mode = IGNORE;
|
|
+ } else if (strcmp(suffix, "//TRANSLIT") == 0) {
|
|
+ *mode = TRANSLIT;
|
|
+ } else {
|
|
+ return false;
|
|
+ }
|
|
+ }
|
|
+ if (__match_encoding(s, "utf8")) {
|
|
+ *encoding = UTF_8;
|
|
+ } else if (__match_encoding(s, "ascii") || __match_encoding(s, "usascii")) {
|
|
+ *encoding = US_ASCII;
|
|
+ } else if (__match_encoding(s, "utf16le")) {
|
|
+ *encoding = UTF_16_LE;
|
|
+ } else if (__match_encoding(s, "utf16be")) {
|
|
+ *encoding = UTF_16_BE;
|
|
+ } else if (__match_encoding(s, "utf32le")) {
|
|
+ *encoding = UTF_32_LE;
|
|
+ } else if (__match_encoding(s, "utf32be")) {
|
|
+ *encoding = UTF_32_BE;
|
|
+ } else if (__match_encoding(s, "wchart")) {
|
|
+ *encoding = WCHAR_T;
|
|
+ } else {
|
|
+ return false;
|
|
+ }
|
|
+ return true;
|
|
+}
|
|
+
|
|
+struct __iconv_t {
|
|
+ Encoding src_encoding;
|
|
+ Encoding dst_encoding;
|
|
+ Mode mode;
|
|
+/*
|
|
+ __iconv_t() : mode(ERROR) {
|
|
+ }
|
|
+*/
|
|
+ int Convert(char** src_buf0, size_t* src_bytes_left0, char** dst_buf0, size_t* dst_bytes_left0) {
|
|
+ // Reset state.
|
|
+ wc = 0;
|
|
+ memset(&ps, 0, sizeof(ps));
|
|
+ replacement_count = 0;
|
|
+ ignored = false;
|
|
+ src_buf = src_buf0;
|
|
+ src_bytes_left = src_bytes_left0;
|
|
+ dst_buf = dst_buf0;
|
|
+ dst_bytes_left = dst_bytes_left0;
|
|
+ while (*src_bytes_left > 0) {
|
|
+ if (!GetNext() || !Convert()) return -1;
|
|
+ }
|
|
+ return Done();
|
|
+ }
|
|
+ private:
|
|
+ char32_t wc;
|
|
+ char buf[16];
|
|
+ size_t src_bytes_used;
|
|
+ size_t dst_bytes_used;
|
|
+ mbstate_t ps;
|
|
+ size_t replacement_count;
|
|
+ bool ignored;
|
|
+ char** src_buf;
|
|
+ size_t* src_bytes_left;
|
|
+ char** dst_buf;
|
|
+ size_t* dst_bytes_left;
|
|
+ bool GetNext() {
|
|
+ errno = 0;
|
|
+ switch (src_encoding) {
|
|
+ case US_ASCII:
|
|
+ wc = **src_buf;
|
|
+ src_bytes_used = 1;
|
|
+ if (wc > 0x7f) errno = EILSEQ;
|
|
+ break;
|
|
+ case UTF_8:
|
|
+ src_bytes_used = mbrtoc32(&wc, *src_buf, *src_bytes_left, &ps);
|
|
+ if (src_bytes_used == __MB_ERR_ILLEGAL_SEQUENCE) {
|
|
+ break; // EILSEQ already set.
|
|
+ } else if (src_bytes_used == __MB_ERR_INCOMPLETE_SEQUENCE) {
|
|
+ errno = EINVAL;
|
|
+ return false;
|
|
+ }
|
|
+ break;
|
|
+ case UTF_16_BE:
|
|
+ case UTF_16_LE: {
|
|
+ if (*src_bytes_left < 2) {
|
|
+ errno = EINVAL;
|
|
+ return false;
|
|
+ }
|
|
+ bool swap = (src_encoding == UTF_16_BE);
|
|
+ wc = In16(*src_buf, swap);
|
|
+ // 0xd800-0xdbff: high surrogates
|
|
+ // 0xdc00-0xdfff: low surrogates
|
|
+ if (wc >= 0xd800 && wc <= 0xdfff) {
|
|
+ if (wc >= 0xdc00) { // Low surrogate before high surrogate.
|
|
+ errno = EILSEQ;
|
|
+ return false;
|
|
+ }
|
|
+ if (*src_bytes_left < 4) {
|
|
+ errno = EINVAL;
|
|
+ return false;
|
|
+ }
|
|
+ uint16_t hi = wc;
|
|
+ uint16_t lo = In16(*src_buf + 2, swap);
|
|
+ wc = 0x10000 + ((hi - 0xd800) << 10) + (lo - 0xdc00);
|
|
+ src_bytes_used = 4;
|
|
+ }
|
|
+ break;
|
|
+ }
|
|
+ case UTF_32_BE:
|
|
+ case UTF_32_LE:
|
|
+ case WCHAR_T:
|
|
+ if (*src_bytes_left < 4) {
|
|
+ errno = EINVAL;
|
|
+ return false;
|
|
+ }
|
|
+ wc = In32(*src_buf, (src_encoding == UTF_32_BE));
|
|
+ break;
|
|
+ }
|
|
+ if (errno == EILSEQ) {
|
|
+ switch (mode) {
|
|
+ case ERROR:
|
|
+ return false;
|
|
+ case IGNORE:
|
|
+ *src_buf += src_bytes_used;
|
|
+ *src_bytes_left -= src_bytes_used;
|
|
+ ignored = true;
|
|
+ return GetNext();
|
|
+ case TRANSLIT:
|
|
+ wc = '?';
|
|
+ ++replacement_count;
|
|
+ return true;
|
|
+ }
|
|
+ }
|
|
+ return true;
|
|
+ }
|
|
+
|
|
+ bool Convert() {
|
|
+ errno = 0;
|
|
+ switch (dst_encoding) {
|
|
+ case US_ASCII:
|
|
+ buf[0] = wc;
|
|
+ dst_bytes_used = 1;
|
|
+ if (wc > 0x7f) errno = EILSEQ;
|
|
+ break;
|
|
+ case UTF_8:
|
|
+ dst_bytes_used = c32rtomb(buf, wc, &ps);
|
|
+ if (dst_bytes_used == __MB_ERR_ILLEGAL_SEQUENCE) {
|
|
+ break; // EILSEQ already set.
|
|
+ } else if (dst_bytes_used == __MB_ERR_INCOMPLETE_SEQUENCE) {
|
|
+ errno = EINVAL;
|
|
+ return false;
|
|
+ }
|
|
+ break;
|
|
+ case UTF_16_BE:
|
|
+ case UTF_16_LE: {
|
|
+ bool swap = (dst_encoding == UTF_16_BE);
|
|
+ if (wc < 0x10000) { // BMP.
|
|
+ Out16(buf, wc, swap);
|
|
+ } else { // Supplementary plane; output surrogate pair.
|
|
+ wc -= 0x10000;
|
|
+ char16_t hi = 0xd800 | (wc >> 10);
|
|
+ char16_t lo = 0xdc00 | (wc & 0x3ff);
|
|
+ Out16(buf + 0, hi, swap);
|
|
+ Out16(buf + 2, lo, swap);
|
|
+ dst_bytes_used = 4;
|
|
+ }
|
|
+ } break;
|
|
+ case UTF_32_BE:
|
|
+ case UTF_32_LE:
|
|
+ case WCHAR_T:
|
|
+ Out32(wc, (dst_encoding == UTF_32_BE));
|
|
+ break;
|
|
+ }
|
|
+ if (errno == EILSEQ) {
|
|
+ if (mode == IGNORE) {
|
|
+ *src_buf += src_bytes_used;
|
|
+ *src_bytes_left -= src_bytes_used;
|
|
+ ignored = true;
|
|
+ return true;
|
|
+ } else if (mode == TRANSLIT) {
|
|
+ wc = '?';
|
|
+ ++replacement_count;
|
|
+ return Convert();
|
|
+ }
|
|
+ return false;
|
|
+ }
|
|
+ return Emit();
|
|
+ }
|
|
+
|
|
+ uint16_t In16(const char* buf, bool swap) {
|
|
+#ifdef __cplusplus
|
|
+ const uint8_t* src = reinterpret_cast<const uint8_t*>(buf);
|
|
+#else // !__cplusplus
|
|
+ const uint8_t* src = (const uint8_t*)(buf);
|
|
+#endif // __cplusplus
|
|
+ uint16_t wc = (src[0]) | (src[1] << 8);
|
|
+ if (swap) wc = __swap16(wc);
|
|
+ src_bytes_used = 2;
|
|
+ return wc;
|
|
+ }
|
|
+
|
|
+ uint32_t In32(const char* buf, bool swap) {
|
|
+#ifdef __cplusplus
|
|
+ const uint8_t* src = reinterpret_cast<const uint8_t*>(buf);
|
|
+#else // !__cplusplus
|
|
+ const uint8_t* src = (const uint8_t*)(buf);
|
|
+#endif // __cplusplus
|
|
+ uint32_t wc = (src[0]) | (src[1] << 8) | (src[2] << 16) | (src[3] << 24);
|
|
+ if (swap) wc = __swap32(wc);
|
|
+ src_bytes_used = 4;
|
|
+ return wc;
|
|
+ }
|
|
+
|
|
+ void Out16(char* dst, char16_t ch, bool swap) {
|
|
+ if (swap) ch = __swap16(ch);
|
|
+ dst[0] = ch;
|
|
+ dst[1] = ch >> 8;
|
|
+ dst_bytes_used = 2;
|
|
+ }
|
|
+
|
|
+ void Out32(char32_t ch, bool swap) {
|
|
+ if (swap) ch = __swap32(ch);
|
|
+ buf[0] = ch;
|
|
+ buf[1] = ch >> 8;
|
|
+ buf[2] = ch >> 16;
|
|
+ buf[3] = ch >> 24;
|
|
+ dst_bytes_used = 4;
|
|
+ }
|
|
+
|
|
+ bool Emit() {
|
|
+ if (dst_bytes_used > *dst_bytes_left) {
|
|
+ errno = E2BIG;
|
|
+ return false;
|
|
+ }
|
|
+ memcpy(*dst_buf, buf, dst_bytes_used);
|
|
+ *src_buf += src_bytes_used;
|
|
+ *src_bytes_left -= src_bytes_used;
|
|
+ *dst_buf += dst_bytes_used;
|
|
+ *dst_bytes_left -= dst_bytes_used;
|
|
+ return true;
|
|
+ }
|
|
+
|
|
+ int Done() {
|
|
+ if (mode == TRANSLIT) return replacement_count;
|
|
+ if (ignored) {
|
|
+ errno = EILSEQ;
|
|
+ return -1;
|
|
+ }
|
|
+ return 0;
|
|
+ }
|
|
+};
|
|
+
|
|
+iconv_t iconv_open(const char* __dst_encoding, const char* __src_encoding) {
|
|
+ iconv_t result = iconv_t();
|
|
+ result->mode = ERROR;
|
|
+ if (!__parse_encoding(__src_encoding, &result->src_encoding, 0 /* nullptr */) ||
|
|
+ !__parse_encoding(__dst_encoding, &result->dst_encoding, &result->mode)) {
|
|
+ free(result);
|
|
+ errno = EINVAL;
|
|
+ return INVALID_ICONV_T;
|
|
+ }
|
|
+ return result;
|
|
+}
|
|
+
|
|
+size_t iconv(iconv_t __converter,
|
|
+ char** __src_buf, size_t* __src_bytes_left,
|
|
+ char** __dst_buf, size_t* __dst_bytes_left) {
|
|
+ if (__converter == INVALID_ICONV_T) {
|
|
+ errno = EBADF;
|
|
+ return -1;
|
|
+ }
|
|
+ return __converter->Convert(__src_buf, __src_bytes_left, __dst_buf, __dst_bytes_left);
|
|
+}
|
|
+
|
|
+int iconv_close(iconv_t __converter) {
|
|
+ if (__converter == INVALID_ICONV_T) {
|
|
+ errno = EBADF;
|
|
+ return -1;
|
|
+ }
|
|
+ free(__converter);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+__END_DECLS
|
|
+
|
|
+#endif // __ANDROID__
|