angelauramc-openjdk-build/patches/jre_17/android/13_add_libtinyconv.diff

diff --git a/src/java.base/share/native/libtinyiconv/iconv.cpp b/src/java.base/share/native/libtinyiconv/iconv.cpp
new file mode 100644
index 000000000..7018b6ce2
--- /dev/null
+++ b/src/java.base/share/native/libtinyiconv/iconv.cpp
@@ -0,0 +1,438 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifdef __ANDROID__
+
+#include <ctype.h>
+#include <endian.h>
+#include <errno.h>
+#include <iconv.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <uchar.h>
+#include <wchar.h>
+
+__BEGIN_DECLS
+
+/*
+ * These return values are specified by POSIX for multibyte conversion
+ * functions.
+ */
+
+#ifdef __cplusplus
+#define __MB_ERR_ILLEGAL_SEQUENCE static_cast<size_t>(-1)
+#define __MB_ERR_INCOMPLETE_SEQUENCE static_cast<size_t>(-2)
+#else
+#define __MB_ERR_ILLEGAL_SEQUENCE (size_t)(-1)
+#define __MB_ERR_INCOMPLETE_SEQUENCE (size_t)(-2)
+#endif // __cplusplus
+#define __MB_IS_ERR(rv) (rv == __MB_ERR_ILLEGAL_SEQUENCE || \
+                         rv == __MB_ERR_INCOMPLETE_SEQUENCE)
+static inline __wur size_t mbstate_bytes_so_far(const mbstate_t* ps) {
+  return
+      (ps->__seq[2] != 0) ? 3 :
+      (ps->__seq[1] != 0) ? 2 :
+      (ps->__seq[0] != 0) ? 1 : 0;
+}
+static inline void mbstate_set_byte(mbstate_t* ps, int i, char byte) {
+  ps->__seq[i] = (uint8_t)(byte);
+}
+static inline __wur uint8_t mbstate_get_byte(const mbstate_t* ps, int n) {
+  return ps->__seq[n];
+}
+static inline __wur size_t mbstate_reset_and_return_illegal(int _errno, mbstate_t* ps) {
+  errno = _errno;
+#ifdef __cplusplus
+  *(reinterpret_cast<uint32_t*>(ps->__seq)) = 0;
+#else
+  *(uint32_t*)(ps->__seq) = 0;
+#endif // __cplusplus
+  return __MB_ERR_ILLEGAL_SEQUENCE;
+}
+static inline __wur size_t mbstate_reset_and_return(int _return, mbstate_t* ps) {
+#ifdef __cplusplus
+  *(reinterpret_cast<uint32_t*>(ps->__seq)) = 0;
+#else
+  *(uint32_t*)(ps->__seq) = 0;
+#endif // __cplusplus
+  return _return;
+}
+
+#ifdef __cplusplus
+# define INVALID_ICONV_T reinterpret_cast<iconv_t>(-1)
+#else // !__cplusplus
+# define INVALID_ICONV_T (iconv_t)(-1)
+#endif // __cplusplus
+
+// Ideally we'd use icu4c but the API mismatch seems too great. So we just offer something
+// equivalent to (but slightly easier to use for runs of text than) <uchar.h>. If you're
+// here to add more encodings, consider working on finishing the icu4c NDK wrappers instead.
+
+#ifdef __cplusplus
+ enum Encoding
+#else
+ typedef enum
+#endif // __cplusplus
+{
+  US_ASCII,
+  UTF_8,
+  UTF_16_LE,
+  UTF_16_BE,
+  UTF_32_LE,
+  UTF_32_BE,
+  WCHAR_T,
+#ifdef __cplusplus
+ };
+#else
+ } Encoding;
+#endif // __cplusplus
+
+#ifdef __cplusplus
+ enum Mode
+#else
+ typedef enum
+#endif // __cplusplus
+{
+  ERROR,
+  IGNORE,
+  TRANSLIT,
+#ifdef __cplusplus
+ };
+#else
+ } Mode;
+#endif // __cplusplus
+
+// This matching is strange but true.
+// See http://www.unicode.org/reports/tr22/#Charset_Alias_Matching.
+static bool __match_encoding(const char* lhs, const char* rhs) {
+  while (*lhs && *rhs) {
+    // Skip non-alnum in lhs; "UTF-8", "UTF_8", "UTF8", "UTF 8" are all equivalent.
+    // Also implement the "delete each 0 that is not preceded by a digit" rule.
+    for (; *lhs; ++lhs) {
+      if (isalnum(*lhs) && (*lhs != '0' || !isdigit(*(lhs + 1)))) break;
+    }
+    // Case doesn't matter either.
+    if (tolower(*lhs) != tolower(*rhs)) break;
+    ++lhs;
+    ++rhs;
+  }
+  // As a special case we treat the GNU "//" extensions as end of string.
+  if ((*lhs == '\0' || strstr(lhs, "//") == lhs) && *rhs == '\0') return true;
+  return false;
+}
+
+static bool __parse_encoding(const char* s, Encoding* encoding, Mode* mode) {
+  const char* suffix = strstr(s, "//");
+  if (suffix) {
+    if (!mode) return false;
+    if (strcmp(suffix, "//IGNORE") == 0) {
+      *mode = IGNORE;
+    } else if (strcmp(suffix, "//TRANSLIT") == 0) {
+      *mode = TRANSLIT;
+    } else {
+      return false;
+    }
+  }
+  if (__match_encoding(s, "utf8")) {
+    *encoding = UTF_8;
+  } else if (__match_encoding(s, "ascii") || __match_encoding(s, "usascii")) {
+    *encoding = US_ASCII;
+  } else if (__match_encoding(s, "utf16le")) {
+    *encoding = UTF_16_LE;
+  } else if (__match_encoding(s, "utf16be")) {
+    *encoding = UTF_16_BE;
+  } else if (__match_encoding(s, "utf32le")) {
+    *encoding = UTF_32_LE;
+  } else if (__match_encoding(s, "utf32be")) {
+    *encoding = UTF_32_BE;
+  } else if (__match_encoding(s, "wchart")) {
+    *encoding = WCHAR_T;
+  } else {
+    return false;
+  }
+  return true;
+}
+
+struct __iconv_t {
+  Encoding src_encoding;
+  Encoding dst_encoding;
+  Mode mode;
+/*
+  __iconv_t() : mode(ERROR) {
+  }
+*/
+  int Convert(char** src_buf0, size_t* src_bytes_left0, char** dst_buf0, size_t* dst_bytes_left0) {
+    // Reset state.
+    wc = 0;
+    memset(&ps, 0, sizeof(ps));
+    replacement_count = 0;
+    ignored = false;
+    src_buf = src_buf0;
+    src_bytes_left = src_bytes_left0;
+    dst_buf = dst_buf0;
+    dst_bytes_left = dst_bytes_left0;
+    while (*src_bytes_left > 0) {
+      if (!GetNext() || !Convert()) return -1;
+    }
+    return Done();
+  }
+ private:
+  char32_t wc;
+  char buf[16];
+  size_t src_bytes_used;
+  size_t dst_bytes_used;
+  mbstate_t ps;
+  size_t replacement_count;
+  bool ignored;
+  char** src_buf;
+  size_t* src_bytes_left;
+  char** dst_buf;
+  size_t* dst_bytes_left;
+  bool GetNext() {
+    errno = 0;
+    switch (src_encoding) {
+      case US_ASCII:
+        wc = **src_buf;
+        src_bytes_used = 1;
+        if (wc > 0x7f) errno = EILSEQ;
+        break;
+      case UTF_8:
+        src_bytes_used = mbrtoc32(&wc, *src_buf, *src_bytes_left, &ps);
+        if (src_bytes_used == __MB_ERR_ILLEGAL_SEQUENCE) {
+          break;  // EILSEQ already set.
+        } else if (src_bytes_used == __MB_ERR_INCOMPLETE_SEQUENCE) {
+          errno = EINVAL;
+          return false;
+        }
+        break;
+      case UTF_16_BE:
+      case UTF_16_LE: {
+        if (*src_bytes_left < 2) {
+          errno = EINVAL;
+          return false;
+        }
+        bool swap = (src_encoding == UTF_16_BE);
+        wc = In16(*src_buf, swap);
+        // 0xd800-0xdbff: high surrogates
+        // 0xdc00-0xdfff: low surrogates
+        if (wc >= 0xd800 && wc <= 0xdfff) {
+          if (wc >= 0xdc00) {  // Low surrogate before high surrogate.
+            errno = EILSEQ;
+            return false;
+          }
+          if (*src_bytes_left < 4) {
+            errno = EINVAL;
+            return false;
+          }
+          uint16_t hi = wc;
+          uint16_t lo = In16(*src_buf + 2, swap);
+          wc = 0x10000 + ((hi - 0xd800) << 10) + (lo - 0xdc00);
+          src_bytes_used = 4;
+        }
+        break;
+      }
+      case UTF_32_BE:
+      case UTF_32_LE:
+      case WCHAR_T:
+        if (*src_bytes_left < 4) {
+          errno = EINVAL;
+          return false;
+        }
+        wc = In32(*src_buf, (src_encoding == UTF_32_BE));
+        break;
+    }
+    if (errno == EILSEQ) {
+      switch (mode) {
+        case ERROR:
+          return false;
+        case IGNORE:
+          *src_buf += src_bytes_used;
+          *src_bytes_left -= src_bytes_used;
+          ignored = true;
+          return GetNext();
+        case TRANSLIT:
+          wc = '?';
+          ++replacement_count;
+          return true;
+      }
+    }
+    return true;
+  }
+
+  bool Convert() {
+    errno = 0;
+    switch (dst_encoding) {
+      case US_ASCII:
+        buf[0] = wc;
+        dst_bytes_used = 1;
+        if (wc > 0x7f) errno = EILSEQ;
+        break;
+      case UTF_8:
+        dst_bytes_used = c32rtomb(buf, wc, &ps);
+        if (dst_bytes_used == __MB_ERR_ILLEGAL_SEQUENCE) {
+          break;  // EILSEQ already set.
+        } else if (dst_bytes_used == __MB_ERR_INCOMPLETE_SEQUENCE) {
+          errno = EINVAL;
+          return false;
+        }
+        break;
+      case UTF_16_BE:
+      case UTF_16_LE: {
+        bool swap = (dst_encoding == UTF_16_BE);
+        if (wc < 0x10000) {  // BMP.
+          Out16(buf, wc, swap);
+        } else {  // Supplementary plane; output surrogate pair.
+          wc -= 0x10000;
+          char16_t hi = 0xd800 | (wc >> 10);
+          char16_t lo = 0xdc00 | (wc & 0x3ff);
+          Out16(buf + 0, hi, swap);
+          Out16(buf + 2, lo, swap);
+          dst_bytes_used = 4;
+        }
+      } break;
+      case UTF_32_BE:
+      case UTF_32_LE:
+      case WCHAR_T:
+        Out32(wc, (dst_encoding == UTF_32_BE));
+        break;
+    }
+    if (errno == EILSEQ) {
+      if (mode == IGNORE) {
+        *src_buf += src_bytes_used;
+        *src_bytes_left -= src_bytes_used;
+        ignored = true;
+        return true;
+      } else if (mode == TRANSLIT) {
+        wc = '?';
+        ++replacement_count;
+        return Convert();
+      }
+      return false;
+    }
+    return Emit();
+  }
+
+  uint16_t In16(const char* buf, bool swap) {
+#ifdef __cplusplus
+    const uint8_t* src = reinterpret_cast<const uint8_t*>(buf);
+#else // !__cplusplus
+    const uint8_t* src = (const uint8_t*)(buf);
+#endif // __cplusplus
+    uint16_t wc = (src[0]) | (src[1] << 8);
+    if (swap) wc = __swap16(wc);
+    src_bytes_used = 2;
+    return wc;
+  }
+
+  uint32_t In32(const char* buf, bool swap) {
+#ifdef __cplusplus
+    const uint8_t* src = reinterpret_cast<const uint8_t*>(buf);
+#else // !__cplusplus
+    const uint8_t* src = (const uint8_t*)(buf);
+#endif // __cplusplus
+    uint32_t wc = (src[0]) | (src[1] << 8) | (src[2] << 16) | (src[3] << 24);
+    if (swap) wc = __swap32(wc);
+    src_bytes_used = 4;
+    return wc;
+  }
+
+  void Out16(char* dst, char16_t ch, bool swap) {
+    if (swap) ch = __swap16(ch);
+    dst[0] = ch;
+    dst[1] = ch >> 8;
+    dst_bytes_used = 2;
+  }
+
+  void Out32(char32_t ch, bool swap) {
+    if (swap) ch = __swap32(ch);
+    buf[0] = ch;
+    buf[1] = ch >> 8;
+    buf[2] = ch >> 16;
+    buf[3] = ch >> 24;
+    dst_bytes_used = 4;
+  }
+
+  bool Emit() {
+    if (dst_bytes_used > *dst_bytes_left) {
+      errno = E2BIG;
+      return false;
+    }
+    memcpy(*dst_buf, buf, dst_bytes_used);
+    *src_buf += src_bytes_used;
+    *src_bytes_left -= src_bytes_used;
+    *dst_buf += dst_bytes_used;
+    *dst_bytes_left -= dst_bytes_used;
+    return true;
+  }
+
+  int Done() {
+    if (mode == TRANSLIT) return replacement_count;
+    if (ignored) {
+      errno = EILSEQ;
+      return -1;
+    }
+    return 0;
+  }
+};
+
+iconv_t iconv_open(const char* __dst_encoding, const char* __src_encoding) {
+  iconv_t result = iconv_t();
+  result->mode = ERROR;
+  if (!__parse_encoding(__src_encoding, &result->src_encoding, 0 /* nullptr */) ||
+      !__parse_encoding(__dst_encoding, &result->dst_encoding, &result->mode)) {
+    free(result);
+    errno = EINVAL;
+    return INVALID_ICONV_T;
+  }
+  return result;
+}
+
+size_t iconv(iconv_t __converter,
+             char** __src_buf, size_t* __src_bytes_left,
+             char** __dst_buf, size_t* __dst_bytes_left) {
+  if (__converter == INVALID_ICONV_T) {
+    errno = EBADF;
+    return -1;
+  }
+  return __converter->Convert(__src_buf, __src_bytes_left, __dst_buf, __dst_bytes_left);
+}
+
+int iconv_close(iconv_t __converter) {
+  if (__converter == INVALID_ICONV_T) {
+    errno = EBADF;
+    return -1;
+  }
+  free(__converter);
+  return 0;
+}
+
+__END_DECLS
+
+#endif // __ANDROID__