From 4353d61b8b90a01fc20264612aa8e5bb72cf1cdd Mon Sep 17 00:00:00 2001 From: Ryan Mitchell Date: Mon, 10 Sep 2018 17:09:12 -0700 Subject: AAPT2: Convert from Modified UTF-8 ResStringPool Since ResStringPools are encoded using Modified UTF-8, retrieving strings from the string pool convert the strings to UTF-8 before returning. Bug: 114734350 Test: m aapt2_tests Change-Id: Ib459018186f4c5b40f3f3786425a335ecfb9ed02 --- tools/aapt2/StringPool_test.cpp | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) (limited to 'tools/aapt2/StringPool_test.cpp') diff --git a/tools/aapt2/StringPool_test.cpp b/tools/aapt2/StringPool_test.cpp index 0778564ee079..9a7238b584ba 100644 --- a/tools/aapt2/StringPool_test.cpp +++ b/tools/aapt2/StringPool_test.cpp @@ -303,7 +303,7 @@ TEST(StringPoolTest, Flatten) { } } -TEST(StringPoolTest, FlattenModifiedUTF8) { +TEST(StringPoolTest, ModifiedUTF8) { using namespace android; // For NO_ERROR on Windows. StdErrDiagnostics diag; StringPool pool; @@ -315,12 +315,24 @@ TEST(StringPoolTest, FlattenModifiedUTF8) { StringPool::FlattenUtf8(&buffer, pool, &diag); std::unique_ptr data = util::Copy(buffer); - // Check that the 4 byte utf-8 codepoint is encoded using 2 3 byte surrogate pairs + // Check that the codepoints are encoded using two three-byte surrogate pairs ResStringPool test; ASSERT_EQ(test.setTo(data.get(), buffer.size()), NO_ERROR); - EXPECT_THAT(util::GetString(test, 0), Eq("\xED\xA0\x81\xED\xB0\x80")); - EXPECT_THAT(util::GetString(test, 1), Eq("foo \xED\xA0\x81\xED\xB0\xB7 bar")); - EXPECT_THAT(util::GetString(test, 2), Eq("\xED\xA0\x81\xED\xB0\x80\xED\xA0\x81\xED\xB0\xB7")); + size_t len; + const char* str = test.string8At(0, &len); + ASSERT_THAT(str, NotNull()); + EXPECT_THAT(std::string(str, len), Eq("\xED\xA0\x81\xED\xB0\x80")); + str = test.string8At(1, &len); + ASSERT_THAT(str, NotNull()); + EXPECT_THAT(std::string(str, len), Eq("foo \xED\xA0\x81\xED\xB0\xB7 bar")); + str = test.string8At(2, &len); + ASSERT_THAT(str, NotNull()); + EXPECT_THAT(std::string(str, len), Eq("\xED\xA0\x81\xED\xB0\x80\xED\xA0\x81\xED\xB0\xB7")); + + // Check that retrieving the strings returns the original UTF-8 character bytes + EXPECT_THAT(util::GetString(test, 0), Eq("\xF0\x90\x90\x80")); + EXPECT_THAT(util::GetString(test, 1), Eq("foo \xF0\x90\x90\xB7 bar")); + EXPECT_THAT(util::GetString(test, 2), Eq("\xF0\x90\x90\x80\xF0\x90\x90\xB7")); } TEST(StringPoolTest, MaxEncodingLength) { -- cgit v1.2.3