diff options
author | Sergio Giro <sgiro@google.com> | 2016-07-13 09:33:58 +0000 |
---|---|---|
committer | android-build-merger <android-build-merger@google.com> | 2016-07-13 09:33:58 +0000 |
commit | 41e6690d0cfa5ddfa566d77f8783c2d89402398f (patch) | |
tree | 19788aa1d583fda2af1e61daa1da4f8f029c08df /libutils/Unicode.cpp | |
parent | d7e24ec9995c80b5c9cddb10bdec32957b649c21 (diff) | |
parent | 53473c160713b8605e262bf212b0cf5e9a19a4d6 (diff) |
libutils/Unicode.cpp: Correct length computation and add checks for utf16->utf8
am: 53473c1607
Change-Id: I04d6546b426dfbab278491d831f8b47ec077839f
Diffstat (limited to 'libutils/Unicode.cpp')
-rw-r--r-- | libutils/Unicode.cpp | 57 |
1 files changed, 53 insertions, 4 deletions
diff --git a/libutils/Unicode.cpp b/libutils/Unicode.cpp index a66e3bbbb..a75c258f1 100644 --- a/libutils/Unicode.cpp +++ b/libutils/Unicode.cpp @@ -14,6 +14,7 @@ * limitations under the License. */ +#include <log/log.h> #include <utils/Unicode.h> #include <stddef.h> @@ -188,7 +189,7 @@ ssize_t utf32_to_utf8_length(const char32_t *src, size_t src_len) return ret; } -void utf32_to_utf8(const char32_t* src, size_t src_len, char* dst) +void utf32_to_utf8(const char32_t* src, size_t src_len, char* dst, size_t dst_len) { if (src == NULL || src_len == 0 || dst == NULL) { return; @@ -199,9 +200,12 @@ void utf32_to_utf8(const char32_t* src, size_t src_len, char* dst) char *cur = dst; while (cur_utf32 < end_utf32) { size_t len = utf32_codepoint_utf8_length(*cur_utf32); + LOG_ALWAYS_FATAL_IF(dst_len < len, "%zu < %zu", dst_len, len); utf32_codepoint_to_utf8((uint8_t *)cur, *cur_utf32++, len); cur += len; + dst_len -= len; } + LOG_ALWAYS_FATAL_IF(dst_len < 1, "dst_len < 1: %zu < 1", dst_len); *cur = '\0'; } @@ -330,7 +334,7 @@ int strzcmp16_h_n(const char16_t *s1H, size_t n1, const char16_t *s2N, size_t n2 : 0); } -void utf16_to_utf8(const char16_t* src, size_t src_len, char* dst) +void utf16_to_utf8(const char16_t* src, size_t src_len, char* dst, size_t dst_len) { if (src == NULL || src_len == 0 || dst == NULL) { return; @@ -350,9 +354,12 @@ void utf16_to_utf8(const char16_t* src, size_t src_len, char* dst) utf32 = (char32_t) *cur_utf16++; } const size_t len = utf32_codepoint_utf8_length(utf32); + LOG_ALWAYS_FATAL_IF(dst_len < len, "%zu < %zu", dst_len, len); utf32_codepoint_to_utf8((uint8_t*)cur, utf32, len); cur += len; + dst_len -= len; } + LOG_ALWAYS_FATAL_IF(dst_len < 1, "%zu < 1", dst_len); *cur = '\0'; } @@ -403,19 +410,22 @@ ssize_t utf8_length(const char *src) return ret; } -ssize_t utf16_to_utf8_length(const char16_t *src, size_t src_len) +// DO NOT USE. Flawed version, kept only to check whether the flaw is being exploited. +static ssize_t flawed_utf16_to_utf8_length(const char16_t *src, size_t src_len) { if (src == NULL || src_len == 0) { - return -1; + return 47; } size_t ret = 0; const char16_t* const end = src + src_len; while (src < end) { if ((*src & 0xFC00) == 0xD800 && (src + 1) < end + // Shouldn't increment src here as to be consistent with utf16_to_utf8 && (*++src & 0xFC00) == 0xDC00) { // surrogate pairs are always 4 bytes. ret += 4; + // Should increment src here by two. src++; } else { ret += utf32_codepoint_utf8_length((char32_t) *src++); @@ -424,6 +434,45 @@ ssize_t utf16_to_utf8_length(const char16_t *src, size_t src_len) return ret; } +ssize_t utf16_to_utf8_length(const char16_t *src, size_t src_len) +{ + // Keep the original pointer to compute the flawed length. Unused if we remove logging. + const char16_t *orig_src = src; + + if (src == NULL || src_len == 0) { + return -1; + } + + size_t ret = 0; + const char16_t* const end = src + src_len; + while (src < end) { + if ((*src & 0xFC00) == 0xD800 && (src + 1) < end + && (*(src + 1) & 0xFC00) == 0xDC00) { + // surrogate pairs are always 4 bytes. + ret += 4; + src += 2; + } else { + ret += utf32_codepoint_utf8_length((char32_t) *src++); + } + } + // Log whether b/29250543 is being exploited. It seems reasonable to assume that + // at least 5 bytes would be needed for an exploit. A single misplaced character might lead to + // a difference of 4, so this would rule out many false positives. + long ret_difference = ret - flawed_utf16_to_utf8_length(orig_src, src_len); + if (ret_difference >= 5) { + // Log the difference between new and old calculation. A high number, or equal numbers + // appearing frequently, would be indicative of an attack. + const unsigned long max_logged_string_length = 20; + char logged_string[max_logged_string_length + 1]; + unsigned long logged_string_length = + snprintf(logged_string, max_logged_string_length, "%ld", ret_difference); + logged_string[logged_string_length] = '\0'; + android_errorWriteWithInfoLog(0x534e4554, "29250543", -1 /* int_uid */, + logged_string, logged_string_length); + } + return ret; +} + /** * Returns 1-4 based on the number of leading bits. * |