From 1cfa56d46c7c31300c25dafd722ff60a5294c3b3 Mon Sep 17 00:00:00 2001
From: Sergio Giro <sgiro@google.com>
Date: Tue, 28 Jun 2016 18:02:29 +0100
Subject: libutils/Unicode.cpp: Correct length computation and add checks for
 utf16->utf8

Inconsistent behaviour between utf16_to_utf8 and utf16_to_utf8_length
is causing a heap overflow.

Correcting the length computation and adding bound checks to the
conversion functions.

Test: ran libutils_tests
Bug: 29250543
Change-Id: I6115e3357141ed245c63c6eb25fc0fd0a9a7a2bb
(cherry picked from commit c4966a363e46d2e1074d1a365e232af0dcedd6a1)
---
 libutils/Unicode.cpp | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

(limited to 'libutils/Unicode.cpp')

diff --git a/libutils/Unicode.cpp b/libutils/Unicode.cpp
index 1aca8e782..5f96efa79 100644
--- a/libutils/Unicode.cpp
+++ b/libutils/Unicode.cpp
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+#include <log/log.h>
 #include <utils/Unicode.h>
 
 #include <limits.h>
@@ -183,7 +184,7 @@ ssize_t utf32_to_utf8_length(const char32_t *src, size_t src_len)
     return ret;
 }
 
-void utf32_to_utf8(const char32_t* src, size_t src_len, char* dst)
+void utf32_to_utf8(const char32_t* src, size_t src_len, char* dst, size_t dst_len)
 {
     if (src == NULL || src_len == 0 || dst == NULL) {
         return;
@@ -194,9 +195,12 @@ void utf32_to_utf8(const char32_t* src, size_t src_len, char* dst)
     char *cur = dst;
     while (cur_utf32 < end_utf32) {
         size_t len = utf32_codepoint_utf8_length(*cur_utf32);
+        LOG_ALWAYS_FATAL_IF(dst_len < len, "%zu < %zu", dst_len, len);
         utf32_codepoint_to_utf8((uint8_t *)cur, *cur_utf32++, len);
         cur += len;
+        dst_len -= len;
     }
+    LOG_ALWAYS_FATAL_IF(dst_len < 1, "dst_len < 1: %zu < 1", dst_len);
     *cur = '\0';
 }
 
@@ -349,7 +353,7 @@ int strzcmp16_h_n(const char16_t *s1H, size_t n1, const char16_t *s2N, size_t n2
            : 0);
 }
 
-void utf16_to_utf8(const char16_t* src, size_t src_len, char* dst)
+void utf16_to_utf8(const char16_t* src, size_t src_len, char* dst, size_t dst_len)
 {
     if (src == NULL || src_len == 0 || dst == NULL) {
         return;
@@ -370,9 +374,12 @@ void utf16_to_utf8(const char16_t* src, size_t src_len, char* dst)
             utf32 = (char32_t) *cur_utf16++;
         }
         const size_t len = utf32_codepoint_utf8_length(utf32);
+        LOG_ALWAYS_FATAL_IF(dst_len < len, "%zu < %zu", dst_len, len);
         utf32_codepoint_to_utf8((uint8_t*)cur, utf32, len);
         cur += len;
+        dst_len -= len;
     }
+    LOG_ALWAYS_FATAL_IF(dst_len < 1, "%zu < 1", dst_len);
     *cur = '\0';
 }
 
@@ -433,10 +440,10 @@ ssize_t utf16_to_utf8_length(const char16_t *src, size_t src_len)
     const char16_t* const end = src + src_len;
     while (src < end) {
         if ((*src & 0xFC00) == 0xD800 && (src + 1) < end
-                && (*++src & 0xFC00) == 0xDC00) {
+                && (*(src + 1) & 0xFC00) == 0xDC00) {
             // surrogate pairs are always 4 bytes.
             ret += 4;
-            src++;
+            src += 2;
         } else {
             ret += utf32_codepoint_utf8_length((char32_t) *src++);
         }
-- 
cgit v1.2.3