diff options
author | Adam Lesinski <adamlesinski@google.com> | 2018-02-21 15:55:58 -0800 |
---|---|---|
committer | Adam Lesinski <adamlesinski@google.com> | 2018-02-27 11:39:10 -0800 |
commit | 2eed52ecc0c2fa3e96530e4b5556eaa82f7c2dfc (patch) | |
tree | 4e0a49770f684a2ca823d958c0f1a2b3adabcab9 | |
parent | e1094a2e232277a719025aa5c97c492502c34f5b (diff) |
AAPT2: Fix styled string whitespace processing
Change styled string whitespace processing to be like AAPT's was.
Main changes:
- whitespace around tags is preserved.
- tags start exactly where they are supposed to, not off by one.
Bug: 72406283
Test: make aapt2_tests
Change-Id: I4d12728c493efd8c978e2e3d2718b56534ff52ef
-rw-r--r-- | core/res/res/values/strings.xml | 8 | ||||
-rw-r--r-- | tools/aapt2/ResourceParser.cpp | 267 | ||||
-rw-r--r-- | tools/aapt2/ResourceParser_test.cpp | 62 | ||||
-rw-r--r-- | tools/aapt2/ResourceUtils.cpp | 196 | ||||
-rw-r--r-- | tools/aapt2/ResourceUtils.h | 89 | ||||
-rw-r--r-- | tools/aapt2/ResourceUtils_test.cpp | 44 | ||||
-rw-r--r-- | tools/aapt2/format/binary/XmlFlattener.cpp | 11 | ||||
-rw-r--r-- | tools/aapt2/link/ReferenceLinker.cpp | 8 | ||||
-rw-r--r-- | tools/aapt2/util/Util.cpp | 184 | ||||
-rw-r--r-- | tools/aapt2/util/Util.h | 16 | ||||
-rw-r--r-- | tools/aapt2/util/Util_test.cpp | 39 |
11 files changed, 615 insertions, 309 deletions
diff --git a/core/res/res/values/strings.xml b/core/res/res/values/strings.xml index cadc3ffba41c..aa90b8749a28 100644 --- a/core/res/res/values/strings.xml +++ b/core/res/res/values/strings.xml @@ -379,7 +379,7 @@ <!-- Text message in the factory reset warning dialog. This says that the the device admin app is missing or corrupted. As a result the device will be erased. [CHAR LIMIT=NONE]--> <string name="factory_reset_message">The admin app can\'t be used. Your device will now be - erased.\n\nIf you have questions, contact your organization's admin.</string> + erased.\n\nIf you have questions, contact your organization\'s admin.</string> <!-- A toast message displayed when printing is attempted but disabled by policy. --> <string name="printing_disabled_by">Printing disabled by <xliff:g id="owner_app">%s</xliff:g>.</string> @@ -764,7 +764,7 @@ <string name="capability_title_canCaptureFingerprintGestures">Fingerprint gestures</string> <!-- Description for the capability of an accessibility service to perform gestures. --> <string name="capability_desc_canCaptureFingerprintGestures">Can capture gestures performed on - the device's fingerprint sensor.</string> + the device\'s fingerprint sensor.</string> <!-- Permissions --> @@ -3774,7 +3774,7 @@ <!-- Notification title when data usage has exceeded warning threshold. [CHAR LIMIT=50] --> <string name="data_usage_warning_title">Data warning</string> <!-- Notification body when data usage has exceeded warning threshold. [CHAR LIMIT=32] --> - <string name="data_usage_warning_body">You've used <xliff:g id="app" example="3.8GB">%s</xliff:g> of data</string> + <string name="data_usage_warning_body">You\'ve used <xliff:g id="app" example="3.8GB">%s</xliff:g> of data</string> <!-- Notification title when mobile data usage has exceeded limit threshold, and has been disabled. [CHAR LIMIT=50] --> <string name="data_usage_mobile_limit_title">Mobile data limit reached</string> @@ -3788,7 +3788,7 @@ <!-- Notification title when Wi-Fi data usage has exceeded limit threshold. [CHAR LIMIT=32] --> <string name="data_usage_wifi_limit_snoozed_title">Over your Wi-Fi data limit</string> <!-- Notification body when data usage has exceeded limit threshold. --> - <string name="data_usage_limit_snoozed_body">You've gone <xliff:g id="size" example="3.8GB">%s</xliff:g> over your set limit</string> + <string name="data_usage_limit_snoozed_body">You\'ve gone <xliff:g id="size" example="3.8GB">%s</xliff:g> over your set limit</string> <!-- Notification title when background data usage is limited. [CHAR LIMIT=32] --> <string name="data_usage_restricted_title">Background data restricted</string> diff --git a/tools/aapt2/ResourceParser.cpp b/tools/aapt2/ResourceParser.cpp index 7cffeea6fe2c..1b6f8827291b 100644 --- a/tools/aapt2/ResourceParser.cpp +++ b/tools/aapt2/ResourceParser.cpp @@ -26,11 +26,14 @@ #include "ResourceUtils.h" #include "ResourceValues.h" #include "ValueVisitor.h" +#include "text/Utf8Iterator.h" #include "util/ImmutableMap.h" #include "util/Maybe.h" #include "util/Util.h" #include "xml/XmlPullParser.h" +using ::aapt::ResourceUtils::StringBuilder; +using ::aapt::text::Utf8Iterator; using ::android::StringPiece; namespace aapt { @@ -169,114 +172,212 @@ ResourceParser::ResourceParser(IDiagnostics* diag, ResourceTable* table, config_(config), options_(options) {} -/** - * Build a string from XML that converts nested elements into Span objects. - */ +// Base class Node for representing the various Spans and UntranslatableSections of an XML string. +// This will be used to traverse and flatten the XML string into a single std::string, with all +// Span and Untranslatable data maintained in parallel, as indices into the string. +class Node { + public: + virtual ~Node() = default; + + // Adds the given child node to this parent node's set of child nodes, moving ownership to the + // parent node as well. + // Returns a pointer to the child node that was added as a convenience. + template <typename T> + T* AddChild(std::unique_ptr<T> node) { + T* raw_ptr = node.get(); + children.push_back(std::move(node)); + return raw_ptr; + } + + virtual void Build(StringBuilder* builder) const { + for (const auto& child : children) { + child->Build(builder); + } + } + + std::vector<std::unique_ptr<Node>> children; +}; + +// A chunk of text in the XML string. This lives between other tags, such as XLIFF tags and Spans. +class SegmentNode : public Node { + public: + std::string data; + + void Build(StringBuilder* builder) const override { + builder->AppendText(data); + } +}; + +// A tag that will be encoded into the final flattened string. Tags like <b> or <i>. +class SpanNode : public Node { + public: + std::string name; + + void Build(StringBuilder* builder) const override { + StringBuilder::SpanHandle span_handle = builder->StartSpan(name); + Node::Build(builder); + builder->EndSpan(span_handle); + } +}; + +// An XLIFF 'g' tag, which marks a section of the string as untranslatable. +class UntranslatableNode : public Node { + public: + void Build(StringBuilder* builder) const override { + StringBuilder::UntranslatableHandle handle = builder->StartUntranslatable(); + Node::Build(builder); + builder->EndUntranslatable(handle); + } +}; + +// Build a string from XML that converts nested elements into Span objects. bool ResourceParser::FlattenXmlSubtree( xml::XmlPullParser* parser, std::string* out_raw_string, StyleString* out_style_string, std::vector<UntranslatableSection>* out_untranslatable_sections) { - // Keeps track of formatting tags (<b>, <i>) and the range of characters for which they apply. - // The stack elements refer to the indices in out_style_string->spans. - // By first adding to the out_style_string->spans vector, and then using the stack to refer - // to this vector, the original order of tags is preserved in cases such as <b><i>hello</b></i>. - std::vector<size_t> span_stack; - - // Clear the output variables. - out_raw_string->clear(); - out_style_string->spans.clear(); - out_untranslatable_sections->clear(); - - // The StringBuilder will concatenate the various segments of text which are initially - // separated by tags. It also handles unicode escape codes and quotations. - util::StringBuilder builder; + std::string raw_string; + std::string current_text; // The first occurrence of a <xliff:g> tag. Nested <xliff:g> tags are illegal. Maybe<size_t> untranslatable_start_depth; + Node root; + std::vector<Node*> node_stack; + node_stack.push_back(&root); + + bool saw_span_node = false; + SegmentNode* first_segment = nullptr; + SegmentNode* last_segment = nullptr; + size_t depth = 1; - while (xml::XmlPullParser::IsGoodEvent(parser->Next())) { + while (depth > 0 && xml::XmlPullParser::IsGoodEvent(parser->Next())) { const xml::XmlPullParser::Event event = parser->event(); - if (event == xml::XmlPullParser::Event::kStartElement) { - if (parser->element_namespace().empty()) { - // This is an HTML tag which we encode as a span. Add it to the span stack. - std::string span_name = parser->element_name(); - const auto end_attr_iter = parser->end_attributes(); - for (auto attr_iter = parser->begin_attributes(); attr_iter != end_attr_iter; ++attr_iter) { - span_name += ";"; - span_name += attr_iter->name; - span_name += "="; - span_name += attr_iter->value; + // First take care of any SegmentNodes that should be created. + if (event == xml::XmlPullParser::Event::kStartElement || + event == xml::XmlPullParser::Event::kEndElement) { + if (!current_text.empty()) { + std::unique_ptr<SegmentNode> segment_node = util::make_unique<SegmentNode>(); + segment_node->data = std::move(current_text); + last_segment = node_stack.back()->AddChild(std::move(segment_node)); + if (first_segment == nullptr) { + first_segment = last_segment; } + current_text = {}; + } + } - // Make sure the string is representable in our binary format. - if (builder.Utf16Len() > std::numeric_limits<uint32_t>::max()) { - diag_->Error(DiagMessage(source_.WithLine(parser->line_number())) - << "style string '" << builder.ToString() << "' is too long"); - return false; - } + switch (event) { + case xml::XmlPullParser::Event::kText: { + current_text += parser->text(); + raw_string += parser->text(); + } break; + + case xml::XmlPullParser::Event::kStartElement: { + if (parser->element_namespace().empty()) { + // This is an HTML tag which we encode as a span. Add it to the span stack. + std::unique_ptr<SpanNode> span_node = util::make_unique<SpanNode>(); + span_node->name = parser->element_name(); + const auto end_attr_iter = parser->end_attributes(); + for (auto attr_iter = parser->begin_attributes(); attr_iter != end_attr_iter; + ++attr_iter) { + span_node->name += ";"; + span_node->name += attr_iter->name; + span_node->name += "="; + span_node->name += attr_iter->value; + } - out_style_string->spans.push_back( - Span{std::move(span_name), static_cast<uint32_t>(builder.Utf16Len())}); - span_stack.push_back(out_style_string->spans.size() - 1); - } else if (parser->element_namespace() == sXliffNamespaceUri) { - if (parser->element_name() == "g") { - if (untranslatable_start_depth) { - // We've already encountered an <xliff:g> tag, and nested <xliff:g> tags are illegal. - diag_->Error(DiagMessage(source_.WithLine(parser->line_number())) - << "illegal nested XLIFF 'g' tag"); - return false; + node_stack.push_back(node_stack.back()->AddChild(std::move(span_node))); + saw_span_node = true; + } else if (parser->element_namespace() == sXliffNamespaceUri) { + // This is an XLIFF tag, which is not encoded as a span. + if (parser->element_name() == "g") { + // Check that an 'untranslatable' tag is not already being processed. Nested + // <xliff:g> tags are illegal. + if (untranslatable_start_depth) { + diag_->Error(DiagMessage(source_.WithLine(parser->line_number())) + << "illegal nested XLIFF 'g' tag"); + return false; + } else { + // Mark the beginning of an 'untranslatable' section. + untranslatable_start_depth = depth; + node_stack.push_back( + node_stack.back()->AddChild(util::make_unique<UntranslatableNode>())); + } } else { - // Mark the start of an untranslatable section. Use UTF8 indices/lengths. - untranslatable_start_depth = depth; - const size_t current_idx = builder.ToString().size(); - out_untranslatable_sections->push_back(UntranslatableSection{current_idx, current_idx}); + // Ignore unknown XLIFF tags, but don't warn. + node_stack.push_back(node_stack.back()->AddChild(util::make_unique<Node>())); } + } else { + // Besides XLIFF, any other namespaced tag is unsupported and ignored. + diag_->Warn(DiagMessage(source_.WithLine(parser->line_number())) + << "ignoring element '" << parser->element_name() + << "' with unknown namespace '" << parser->element_namespace() << "'"); + node_stack.push_back(node_stack.back()->AddChild(util::make_unique<Node>())); } - // Ignore other xliff tags, they get handled by other tools. - } else { - // Besides XLIFF, any other namespaced tag is unsupported and ignored. - diag_->Warn(DiagMessage(source_.WithLine(parser->line_number())) - << "ignoring element '" << parser->element_name() - << "' with unknown namespace '" << parser->element_namespace() << "'"); - } + // Enter one level inside the element. + depth++; + } break; - // Enter one level inside the element. - depth++; - } else if (event == xml::XmlPullParser::Event::kText) { - // Record both the raw text and append to the builder to deal with escape sequences - // and quotations. - out_raw_string->append(parser->text()); - builder.Append(parser->text()); - } else if (event == xml::XmlPullParser::Event::kEndElement) { - // Return one level from within the element. - depth--; - if (depth == 0) { + case xml::XmlPullParser::Event::kEndElement: { + // Return one level from within the element. + depth--; + if (depth == 0) { + break; + } + + node_stack.pop_back(); + if (untranslatable_start_depth == make_value(depth)) { + // This is the end of an untranslatable section. + untranslatable_start_depth = {}; + } + } break; + + default: + // ignore. break; + } + } + + // Sanity check to make sure we processed all the nodes. + CHECK(node_stack.size() == 1u); + CHECK(node_stack.back() == &root); + + if (!saw_span_node) { + // If there were no spans, we must treat this string a little differently (according to AAPT). + // Find and strip the leading whitespace from the first segment, and the trailing whitespace + // from the last segment. + if (first_segment != nullptr) { + // Trim leading whitespace. + StringPiece trimmed = util::TrimLeadingWhitespace(first_segment->data); + if (trimmed.size() != first_segment->data.size()) { + first_segment->data = trimmed.to_string(); } + } - if (parser->element_namespace().empty()) { - // This is an HTML tag which we encode as a span. Update the span - // stack and pop the top entry. - Span& top_span = out_style_string->spans[span_stack.back()]; - top_span.last_char = builder.Utf16Len() - 1; - span_stack.pop_back(); - } else if (untranslatable_start_depth == make_value(depth)) { - // This is the end of an untranslatable section. Use UTF8 indices/lengths. - UntranslatableSection& untranslatable_section = out_untranslatable_sections->back(); - untranslatable_section.end = builder.ToString().size(); - untranslatable_start_depth = {}; + if (last_segment != nullptr) { + // Trim trailing whitespace. + StringPiece trimmed = util::TrimTrailingWhitespace(last_segment->data); + if (trimmed.size() != last_segment->data.size()) { + last_segment->data = trimmed.to_string(); } - } else if (event == xml::XmlPullParser::Event::kComment) { - // Ignore. - } else { - LOG(FATAL) << "unhandled XML event"; } } - CHECK(span_stack.empty()) << "spans haven't been fully processed"; - out_style_string->str = builder.ToString(); + // Have the XML structure flatten itself into the StringBuilder. The StringBuilder will take + // care of recording the correctly adjusted Spans and UntranslatableSections. + StringBuilder builder; + root.Build(&builder); + if (!builder) { + diag_->Error(DiagMessage(source_.WithLine(parser->line_number())) << builder.GetError()); + return false; + } + + ResourceUtils::FlattenedXmlString flattened_string = builder.GetFlattenedString(); + *out_raw_string = std::move(raw_string); + *out_untranslatable_sections = std::move(flattened_string.untranslatable_sections); + out_style_string->str = std::move(flattened_string.text); + out_style_string->spans = std::move(flattened_string.spans); return true; } diff --git a/tools/aapt2/ResourceParser_test.cpp b/tools/aapt2/ResourceParser_test.cpp index 618c8ed4afd1..c98c0b95b69b 100644 --- a/tools/aapt2/ResourceParser_test.cpp +++ b/tools/aapt2/ResourceParser_test.cpp @@ -95,6 +95,16 @@ TEST_F(ResourceParserTest, ParseQuotedString) { ASSERT_THAT(str, NotNull()); EXPECT_THAT(*str, StrValueEq(" hey there ")); EXPECT_THAT(str->untranslatable_sections, IsEmpty()); + + ASSERT_TRUE(TestParse(R"(<string name="bar">Isn\'t it cool?</string>)")); + str = test::GetValue<String>(&table_, "string/bar"); + ASSERT_THAT(str, NotNull()); + EXPECT_THAT(*str, StrValueEq("Isn't it cool?")); + + ASSERT_TRUE(TestParse(R"(<string name="baz">"Isn't it cool?"</string>)")); + str = test::GetValue<String>(&table_, "string/baz"); + ASSERT_THAT(str, NotNull()); + EXPECT_THAT(*str, StrValueEq("Isn't it cool?")); } TEST_F(ResourceParserTest, ParseEscapedString) { @@ -126,16 +136,16 @@ TEST_F(ResourceParserTest, ParseStyledString) { StyledString* str = test::GetValue<StyledString>(&table_, "string/foo"); ASSERT_THAT(str, NotNull()); - EXPECT_THAT(str->value->value, Eq("This is my aunt\u2019s fickle string")); + EXPECT_THAT(str->value->value, StrEq("This is my aunt\u2019s fickle string")); EXPECT_THAT(str->value->spans, SizeIs(2)); EXPECT_THAT(str->untranslatable_sections, IsEmpty()); - EXPECT_THAT(*str->value->spans[0].name, Eq("b")); - EXPECT_THAT(str->value->spans[0].first_char, Eq(17u)); + EXPECT_THAT(*str->value->spans[0].name, StrEq("b")); + EXPECT_THAT(str->value->spans[0].first_char, Eq(18u)); EXPECT_THAT(str->value->spans[0].last_char, Eq(30u)); - EXPECT_THAT(*str->value->spans[1].name, Eq("small")); - EXPECT_THAT(str->value->spans[1].first_char, Eq(24u)); + EXPECT_THAT(*str->value->spans[1].name, StrEq("small")); + EXPECT_THAT(str->value->spans[1].first_char, Eq(25u)); EXPECT_THAT(str->value->spans[1].last_char, Eq(30u)); } @@ -144,7 +154,7 @@ TEST_F(ResourceParserTest, ParseStringWithWhitespace) { String* str = test::GetValue<String>(&table_, "string/foo"); ASSERT_THAT(str, NotNull()); - EXPECT_THAT(*str->value, Eq("This is what I think")); + EXPECT_THAT(*str->value, StrEq("This is what I think")); EXPECT_THAT(str->untranslatable_sections, IsEmpty()); ASSERT_TRUE(TestParse(R"(<string name="foo2">" This is what I think "</string>)")); @@ -154,6 +164,25 @@ TEST_F(ResourceParserTest, ParseStringWithWhitespace) { EXPECT_THAT(*str, StrValueEq(" This is what I think ")); } +TEST_F(ResourceParserTest, ParseStyledStringWithWhitespace) { + std::string input = R"(<string name="foo"> <b> My <i> favorite</i> string </b> </string>)"; + ASSERT_TRUE(TestParse(input)); + + StyledString* str = test::GetValue<StyledString>(&table_, "string/foo"); + ASSERT_THAT(str, NotNull()); + EXPECT_THAT(str->value->value, StrEq(" My favorite string ")); + EXPECT_THAT(str->untranslatable_sections, IsEmpty()); + + ASSERT_THAT(str->value->spans, SizeIs(2u)); + EXPECT_THAT(*str->value->spans[0].name, StrEq("b")); + EXPECT_THAT(str->value->spans[0].first_char, Eq(1u)); + EXPECT_THAT(str->value->spans[0].last_char, Eq(21u)); + + EXPECT_THAT(*str->value->spans[1].name, StrEq("i")); + EXPECT_THAT(str->value->spans[1].first_char, Eq(5u)); + EXPECT_THAT(str->value->spans[1].last_char, Eq(13u)); +} + TEST_F(ResourceParserTest, IgnoreXliffTagsOtherThanG) { std::string input = R"( <string name="foo" xmlns:xliff="urn:oasis:names:tc:xliff:document:1.2"> @@ -182,12 +211,9 @@ TEST_F(ResourceParserTest, RecordUntranslateableXliffSectionsInString) { String* str = test::GetValue<String>(&table_, "string/foo"); ASSERT_THAT(str, NotNull()); EXPECT_THAT(*str, StrValueEq("There are %1$d apples")); - ASSERT_THAT(str->untranslatable_sections, SizeIs(1)); - // We expect indices and lengths that span to include the whitespace - // before %1$d. This is due to how the StringBuilder withholds whitespace unless - // needed (to deal with line breaks, etc.). - EXPECT_THAT(str->untranslatable_sections[0].start, Eq(9u)); + ASSERT_THAT(str->untranslatable_sections, SizeIs(1)); + EXPECT_THAT(str->untranslatable_sections[0].start, Eq(10u)); EXPECT_THAT(str->untranslatable_sections[0].end, Eq(14u)); } @@ -199,14 +225,16 @@ TEST_F(ResourceParserTest, RecordUntranslateableXliffSectionsInStyledString) { StyledString* str = test::GetValue<StyledString>(&table_, "string/foo"); ASSERT_THAT(str, NotNull()); - EXPECT_THAT(str->value->value, Eq("There are %1$d apples")); + EXPECT_THAT(str->value->value, Eq(" There are %1$d apples")); + ASSERT_THAT(str->untranslatable_sections, SizeIs(1)); + EXPECT_THAT(str->untranslatable_sections[0].start, Eq(11u)); + EXPECT_THAT(str->untranslatable_sections[0].end, Eq(15u)); - // We expect indices and lengths that span to include the whitespace - // before %1$d. This is due to how the StringBuilder withholds whitespace unless - // needed (to deal with line breaks, etc.). - EXPECT_THAT(str->untranslatable_sections[0].start, Eq(9u)); - EXPECT_THAT(str->untranslatable_sections[0].end, Eq(14u)); + ASSERT_THAT(str->value->spans, SizeIs(1u)); + EXPECT_THAT(*str->value->spans[0].name, StrEq("b")); + EXPECT_THAT(str->value->spans[0].first_char, Eq(11u)); + EXPECT_THAT(str->value->spans[0].last_char, Eq(14u)); } TEST_F(ResourceParserTest, ParseNull) { diff --git a/tools/aapt2/ResourceUtils.cpp b/tools/aapt2/ResourceUtils.cpp index 628466d0a281..8fc3d6580165 100644 --- a/tools/aapt2/ResourceUtils.cpp +++ b/tools/aapt2/ResourceUtils.cpp @@ -18,17 +18,23 @@ #include <sstream> +#include "android-base/stringprintf.h" #include "androidfw/ResourceTypes.h" #include "androidfw/ResourceUtils.h" #include "NameMangler.h" #include "SdkConstants.h" #include "format/binary/ResourceTypeExtensions.h" +#include "text/Unicode.h" +#include "text/Utf8Iterator.h" #include "util/Files.h" #include "util/Util.h" +using ::aapt::text::IsWhitespace; +using ::aapt::text::Utf8Iterator; using ::android::StringPiece; using ::android::StringPiece16; +using ::android::base::StringPrintf; namespace aapt { namespace ResourceUtils { @@ -750,5 +756,195 @@ std::unique_ptr<Item> ParseBinaryResValue(const ResourceType& type, const Config return util::make_unique<BinaryPrimitive>(res_value); } +// Converts the codepoint to UTF-8 and appends it to the string. +static bool AppendCodepointToUtf8String(char32_t codepoint, std::string* output) { + ssize_t len = utf32_to_utf8_length(&codepoint, 1); + if (len < 0) { + return false; + } + + const size_t start_append_pos = output->size(); + + // Make room for the next character. + output->resize(output->size() + len); + + char* dst = &*(output->begin() + start_append_pos); + utf32_to_utf8(&codepoint, 1, dst, len + 1); + return true; +} + +// Reads up to 4 UTF-8 characters that represent a Unicode escape sequence, and appends the +// Unicode codepoint represented by the escape sequence to the string. +static bool AppendUnicodeEscapeSequence(Utf8Iterator* iter, std::string* output) { + char32_t code = 0; + for (size_t i = 0; i < 4 && iter->HasNext(); i++) { + char32_t codepoint = iter->Next(); + char32_t a; + if (codepoint >= U'0' && codepoint <= U'9') { + a = codepoint - U'0'; + } else if (codepoint >= U'a' && codepoint <= U'f') { + a = codepoint - U'a' + 10; + } else if (codepoint >= U'A' && codepoint <= U'F') { + a = codepoint - U'A' + 10; + } else { + return {}; + } + code = (code << 4) | a; + } + return AppendCodepointToUtf8String(code, output); +} + +StringBuilder::StringBuilder(bool preserve_spaces) + : preserve_spaces_(preserve_spaces), quote_(preserve_spaces) { +} + +StringBuilder& StringBuilder::AppendText(const std::string& text) { + if (!error_.empty()) { + return *this; + } + + const size_t previous_len = xml_string_.text.size(); + Utf8Iterator iter(text); + while (iter.HasNext()) { + char32_t codepoint = iter.Next(); + if (!quote_ && text::IsWhitespace(codepoint)) { + if (!last_codepoint_was_space_) { + // Emit a space if it's the first. + xml_string_.text += ' '; + last_codepoint_was_space_ = true; + } + + // Keep eating spaces. + continue; + } + + // This is not a space. + last_codepoint_was_space_ = false; + + if (codepoint == U'\\') { + if (iter.HasNext()) { + codepoint = iter.Next(); + switch (codepoint) { + case U't': + xml_string_.text += '\t'; + break; + + case U'n': + xml_string_.text += '\n'; + break; + + case U'#': + case U'@': + case U'?': + case U'"': + case U'\'': + case U'\\': + xml_string_.text += static_cast<char>(codepoint); + break; + + case U'u': + if (!AppendUnicodeEscapeSequence(&iter, &xml_string_.text)) { + error_ = + StringPrintf("invalid unicode escape sequence in string\n\"%s\"", text.c_str()); + return *this; + } + break; + + default: + // Ignore the escape character and just include the codepoint. + AppendCodepointToUtf8String(codepoint, &xml_string_.text); + break; + } + } + } else if (!preserve_spaces_ && codepoint == U'"') { + // Only toggle the quote state when we are not preserving spaces. + quote_ = !quote_; + + } else if (!quote_ && codepoint == U'\'') { + // This should be escaped. + error_ = StringPrintf("unescaped apostrophe in string\n\"%s\"", text.c_str()); + return *this; + + } else { + AppendCodepointToUtf8String(codepoint, &xml_string_.text); + } + } + + // Accumulate the added string's UTF-16 length. + const uint8_t* utf8_data = reinterpret_cast<const uint8_t*>(xml_string_.text.c_str()); + const size_t utf8_length = xml_string_.text.size(); + ssize_t len = utf8_to_utf16_length(utf8_data + previous_len, utf8_length - previous_len); + if (len < 0) { + error_ = StringPrintf("invalid unicode code point in string\n\"%s\"", utf8_data + previous_len); + return *this; + } + + utf16_len_ += static_cast<uint32_t>(len); + return *this; +} + +StringBuilder::SpanHandle StringBuilder::StartSpan(const std::string& name) { + if (!error_.empty()) { + return 0u; + } + + // When we start a span, all state associated with whitespace truncation and quotation is ended. + ResetTextState(); + Span span; + span.name = name; + span.first_char = span.last_char = utf16_len_; + xml_string_.spans.push_back(std::move(span)); + return xml_string_.spans.size() - 1; +} + +void StringBuilder::EndSpan(SpanHandle handle) { + if (!error_.empty()) { + return; + } + + // When we end a span, all state associated with whitespace truncation and quotation is ended. + ResetTextState(); + xml_string_.spans[handle].last_char = utf16_len_ - 1u; +} + +StringBuilder::UntranslatableHandle StringBuilder::StartUntranslatable() { + if (!error_.empty()) { + return 0u; + } + + UntranslatableSection section; + section.start = section.end = xml_string_.text.size(); + xml_string_.untranslatable_sections.push_back(section); + return xml_string_.untranslatable_sections.size() - 1; +} + +void StringBuilder::EndUntranslatable(UntranslatableHandle handle) { + if (!error_.empty()) { + return; + } + xml_string_.untranslatable_sections[handle].end = xml_string_.text.size(); +} + +FlattenedXmlString StringBuilder::GetFlattenedString() const { + return xml_string_; +} + +std::string StringBuilder::to_string() const { + return xml_string_.text; +} + +StringBuilder::operator bool() const { + return error_.empty(); +} + +std::string StringBuilder::GetError() const { + return error_; +} + +void StringBuilder::ResetTextState() { + quote_ = preserve_spaces_; + last_codepoint_was_space_ = false; +} + } // namespace ResourceUtils } // namespace aapt diff --git a/tools/aapt2/ResourceUtils.h b/tools/aapt2/ResourceUtils.h index f83d49ee5591..7af2fe06b908 100644 --- a/tools/aapt2/ResourceUtils.h +++ b/tools/aapt2/ResourceUtils.h @@ -224,6 +224,95 @@ std::unique_ptr<Item> ParseBinaryResValue(const ResourceType& type, const Config const android::Res_value& res_value, StringPool* dst_pool); +// A string flattened from an XML hierarchy, which maintains tags and untranslatable sections +// in parallel data structures. +struct FlattenedXmlString { + std::string text; + std::vector<UntranslatableSection> untranslatable_sections; + std::vector<Span> spans; +}; + +// Flattens an XML hierarchy into a FlattenedXmlString, formatting the text, escaping characters, +// and removing whitespace, all while keeping the untranslatable sections and spans in sync with the +// transformations. +// +// Specifically, the StringBuilder will handle escaped characters like \t, \n, \\, \', etc. +// Single quotes *must* be escaped, unless within a pair of double-quotes. +// Pairs of double-quotes disable whitespace stripping of the enclosed text. +// Unicode escape codes (\u0049) are interpreted and the represented Unicode character is inserted. +// +// A NOTE ON WHITESPACE: +// +// When preserve_spaces is false, and when text is not enclosed within double-quotes, +// StringBuilder replaces a series of whitespace with a single space character. This happens at the +// start and end of the string as well, so leading and trailing whitespace is possible. +// +// When a Span is started or stopped, the whitespace counter is reset, meaning if whitespace +// is encountered directly after the span, it will be emitted. This leads to situations like the +// following: "This <b> is </b> spaced" -> "This is spaced". Without spans, this would be properly +// compressed: "This is spaced" -> "This is spaced". +// +// Untranslatable sections do not have the same problem: +// "This <xliff:g> is </xliff:g> not spaced" -> "This is not spaced". +// +// NOTE: This is all the way it is because AAPT1 did it this way. Maintaining backwards +// compatibility is important. +// +class StringBuilder { + public: + using SpanHandle = size_t; + using UntranslatableHandle = size_t; + + // Creates a StringBuilder. If preserve_spaces is true, whitespace removal is not performed, and + // single quotations can be used without escaping them. + explicit StringBuilder(bool preserve_spaces = false); + + // Appends a chunk of text. + StringBuilder& AppendText(const std::string& text); + + // Starts a Span (tag) with the given name. The name is expected to be of the form: + // "tag_name;attr1=value;attr2=value;" + // Which is how Spans are encoded in the ResStringPool. + // To end the span, pass back the SpanHandle received from this method to the EndSpan() method. + SpanHandle StartSpan(const std::string& name); + + // Ends a Span (tag). Pass in the matching SpanHandle previously obtained from StartSpan(). + void EndSpan(SpanHandle handle); + + // Starts an Untranslatable section. + // To end the section, pass back the UntranslatableHandle received from this method to + // the EndUntranslatable() method. + UntranslatableHandle StartUntranslatable(); + + // Ends an Untranslatable section. Pass in the matching UntranslatableHandle previously obtained + // from StartUntranslatable(). + void EndUntranslatable(UntranslatableHandle handle); + + // Returns the flattened XML string, with all spans and untranslatable sections encoded as + // parallel data structures. + FlattenedXmlString GetFlattenedString() const; + + // Returns just the flattened XML text, with no spans or untranslatable sections. + std::string to_string() const; + + // Returns true if there was no error. + explicit operator bool() const; + + std::string GetError() const; + + private: + DISALLOW_COPY_AND_ASSIGN(StringBuilder); + + void ResetTextState(); + + std::string error_; + FlattenedXmlString xml_string_; + uint32_t utf16_len_ = 0u; + bool preserve_spaces_; + bool quote_; + bool last_codepoint_was_space_ = false; +}; + } // namespace ResourceUtils } // namespace aapt diff --git a/tools/aapt2/ResourceUtils_test.cpp b/tools/aapt2/ResourceUtils_test.cpp index cb786d3794c2..11f3fa3bc6cd 100644 --- a/tools/aapt2/ResourceUtils_test.cpp +++ b/tools/aapt2/ResourceUtils_test.cpp @@ -212,4 +212,48 @@ TEST(ResourceUtilsTest, ItemsWithWhitespaceAreParsedCorrectly) { Pointee(ValueEq(BinaryPrimitive(Res_value::TYPE_FLOAT, expected_float_flattened)))); } +TEST(ResourceUtilsTest, StringBuilderWhitespaceRemoval) { + EXPECT_THAT(ResourceUtils::StringBuilder() + .AppendText(" hey guys ") + .AppendText(" this is so cool ") + .to_string(), + Eq(" hey guys this is so cool ")); + EXPECT_THAT(ResourceUtils::StringBuilder() + .AppendText(" \" wow, so many \t ") + .AppendText("spaces. \"what? ") + .to_string(), + Eq(" wow, so many \t spaces. what? ")); + EXPECT_THAT(ResourceUtils::StringBuilder() + .AppendText(" where \t ") + .AppendText(" \nis the pie?") + .to_string(), + Eq(" where is the pie?")); +} + +TEST(ResourceUtilsTest, StringBuilderEscaping) { + EXPECT_THAT(ResourceUtils::StringBuilder() + .AppendText("hey guys\\n ") + .AppendText(" this \\t is so\\\\ cool") + .to_string(), + Eq("hey guys\n this \t is so\\ cool")); + EXPECT_THAT(ResourceUtils::StringBuilder().AppendText("\\@\\?\\#\\\\\\'").to_string(), + Eq("@?#\\\'")); +} + +TEST(ResourceUtilsTest, StringBuilderMisplacedQuote) { + ResourceUtils::StringBuilder builder; + EXPECT_FALSE(builder.AppendText("they're coming!")); +} + +TEST(ResourceUtilsTest, StringBuilderUnicodeCodes) { + EXPECT_THAT(ResourceUtils::StringBuilder().AppendText("\\u00AF\\u0AF0 woah").to_string(), + Eq("\u00AF\u0AF0 woah")); + EXPECT_FALSE(ResourceUtils::StringBuilder().AppendText("\\u00 yo")); +} + +TEST(ResourceUtilsTest, StringBuilderPreserveSpaces) { + EXPECT_THAT(ResourceUtils::StringBuilder(true /*preserve_spaces*/).AppendText("\"").to_string(), + Eq("\"")); +} + } // namespace aapt diff --git a/tools/aapt2/format/binary/XmlFlattener.cpp b/tools/aapt2/format/binary/XmlFlattener.cpp index 067372b99b5c..781b9fe8bc29 100644 --- a/tools/aapt2/format/binary/XmlFlattener.cpp +++ b/tools/aapt2/format/binary/XmlFlattener.cpp @@ -25,6 +25,7 @@ #include "androidfw/ResourceTypes.h" #include "utils/misc.h" +#include "ResourceUtils.h" #include "SdkConstants.h" #include "ValueVisitor.h" #include "format/binary/ChunkWriter.h" @@ -33,6 +34,8 @@ using namespace android; +using ::aapt::ResourceUtils::StringBuilder; + namespace aapt { namespace { @@ -89,9 +92,9 @@ class XmlFlattenerVisitor : public xml::ConstVisitor { ResXMLTree_cdataExt* flat_text = writer.NextBlock<ResXMLTree_cdataExt>(); // Process plain strings to make sure they get properly escaped. - util::StringBuilder builder; - builder.Append(node->text); - AddString(builder.ToString(), kLowPriority, &flat_text->data); + StringBuilder builder; + builder.AppendText(node->text); + AddString(builder.to_string(), kLowPriority, &flat_text->data); writer.Finish(); } @@ -272,7 +275,7 @@ class XmlFlattenerVisitor : public xml::ConstVisitor { // There is no compiled value, so treat the raw string as compiled, once it is processed to // make sure escape sequences are properly interpreted. processed_str = - util::StringBuilder(true /*preserve_spaces*/).Append(xml_attr->value).ToString(); + StringBuilder(true /*preserve_spaces*/).AppendText(xml_attr->value).to_string(); compiled_text = StringPiece(processed_str); } diff --git a/tools/aapt2/link/ReferenceLinker.cpp b/tools/aapt2/link/ReferenceLinker.cpp index b8f880427c71..9aaaa69f8994 100644 --- a/tools/aapt2/link/ReferenceLinker.cpp +++ b/tools/aapt2/link/ReferenceLinker.cpp @@ -30,6 +30,7 @@ #include "util/Util.h" #include "xml/XmlUtil.h" +using ::aapt::ResourceUtils::StringBuilder; using ::android::StringPiece; namespace aapt { @@ -133,10 +134,11 @@ class ReferenceLinkerVisitor : public DescendingValueVisitor { // If we could not parse as any specific type, try a basic STRING. if (!transformed && (attr->type_mask & android::ResTable_map::TYPE_STRING)) { - util::StringBuilder string_builder; - string_builder.Append(*raw_string->value); + StringBuilder string_builder; + string_builder.AppendText(*raw_string->value); if (string_builder) { - transformed = util::make_unique<String>(string_pool_->MakeRef(string_builder.ToString())); + transformed = + util::make_unique<String>(string_pool_->MakeRef(string_builder.to_string())); } } diff --git a/tools/aapt2/util/Util.cpp b/tools/aapt2/util/Util.cpp index e42145dff47e..d1c9ca1644d9 100644 --- a/tools/aapt2/util/Util.cpp +++ b/tools/aapt2/util/Util.cpp @@ -76,6 +76,34 @@ bool EndsWith(const StringPiece& str, const StringPiece& suffix) { return str.substr(str.size() - suffix.size(), suffix.size()) == suffix; } +StringPiece TrimLeadingWhitespace(const StringPiece& str) { + if (str.size() == 0 || str.data() == nullptr) { + return str; + } + + const char* start = str.data(); + const char* end = start + str.length(); + + while (start != end && isspace(*start)) { + start++; + } + return StringPiece(start, end - start); +} + +StringPiece TrimTrailingWhitespace(const StringPiece& str) { + if (str.size() == 0 || str.data() == nullptr) { + return str; + } + + const char* start = str.data(); + const char* end = start + str.length(); + + while (end != start && isspace(*(end - 1))) { + end--; + } + return StringPiece(start, end - start); +} + StringPiece TrimWhitespace(const StringPiece& str) { if (str.size() == 0 || str.data() == nullptr) { return str; @@ -269,162 +297,6 @@ bool VerifyJavaStringFormat(const StringPiece& str) { return true; } -static bool AppendCodepointToUtf8String(char32_t codepoint, std::string* output) { - ssize_t len = utf32_to_utf8_length(&codepoint, 1); - if (len < 0) { - return false; - } - - const size_t start_append_pos = output->size(); - - // Make room for the next character. - output->resize(output->size() + len); - - char* dst = &*(output->begin() + start_append_pos); - utf32_to_utf8(&codepoint, 1, dst, len + 1); - return true; -} - -static bool AppendUnicodeCodepoint(Utf8Iterator* iter, std::string* output) { - char32_t code = 0; - for (size_t i = 0; i < 4 && iter->HasNext(); i++) { - char32_t codepoint = iter->Next(); - char32_t a; - if (codepoint >= U'0' && codepoint <= U'9') { - a = codepoint - U'0'; - } else if (codepoint >= U'a' && codepoint <= U'f') { - a = codepoint - U'a' + 10; - } else if (codepoint >= U'A' && codepoint <= U'F') { - a = codepoint - U'A' + 10; - } else { - return {}; - } - code = (code << 4) | a; - } - return AppendCodepointToUtf8String(code, output); -} - -static bool IsCodepointSpace(char32_t codepoint) { - if (static_cast<uint32_t>(codepoint) & 0xffffff00u) { - return false; - } - return isspace(static_cast<char>(codepoint)); -} - -StringBuilder::StringBuilder(bool preserve_spaces) : preserve_spaces_(preserve_spaces) { -} - -StringBuilder& StringBuilder::Append(const StringPiece& str) { - if (!error_.empty()) { - return *this; - } - - // Where the new data will be appended to. - const size_t new_data_index = str_.size(); - - Utf8Iterator iter(str); - while (iter.HasNext()) { - const char32_t codepoint = iter.Next(); - - if (last_char_was_escape_) { - switch (codepoint) { - case U't': - str_ += '\t'; - break; - - case U'n': - str_ += '\n'; - break; - - case U'#': - case U'@': - case U'?': - case U'"': - case U'\'': - case U'\\': - str_ += static_cast<char>(codepoint); - break; - - case U'u': - if (!AppendUnicodeCodepoint(&iter, &str_)) { - error_ = "invalid unicode escape sequence"; - return *this; - } - break; - - default: - // Ignore the escape character and just include the codepoint. - AppendCodepointToUtf8String(codepoint, &str_); - break; - } - last_char_was_escape_ = false; - - } else if (!preserve_spaces_ && codepoint == U'"') { - if (!quote_ && trailing_space_) { - // We found an opening quote, and we have trailing space, so we should append that - // space now. - if (trailing_space_) { - // We had trailing whitespace, so replace with a single space. - if (!str_.empty()) { - str_ += ' '; - } - trailing_space_ = false; - } - } - quote_ = !quote_; - - } else if (!preserve_spaces_ && codepoint == U'\'' && !quote_) { - // This should be escaped. - error_ = "unescaped apostrophe"; - return *this; - - } else if (codepoint == U'\\') { - // This is an escape sequence, convert to the real value. - if (!quote_ && trailing_space_) { - // We had trailing whitespace, so - // replace with a single space. - if (!str_.empty()) { - str_ += ' '; - } - trailing_space_ = false; - } - last_char_was_escape_ = true; - } else { - if (preserve_spaces_ || quote_) { - // Quotes mean everything is taken, including whitespace. - AppendCodepointToUtf8String(codepoint, &str_); - } else { - // This is not quoted text, so we will accumulate whitespace and only emit a single - // character of whitespace if it is followed by a non-whitespace character. - if (IsCodepointSpace(codepoint)) { - // We found whitespace. - trailing_space_ = true; - } else { - if (trailing_space_) { - // We saw trailing space before, so replace all - // that trailing space with one space. - if (!str_.empty()) { - str_ += ' '; - } - trailing_space_ = false; - } - AppendCodepointToUtf8String(codepoint, &str_); - } - } - } - } - - // Accumulate the added string's UTF-16 length. - ssize_t len = utf8_to_utf16_length(reinterpret_cast<const uint8_t*>(str_.data()) + new_data_index, - str_.size() - new_data_index); - if (len < 0) { - error_ = "invalid unicode code point"; - return *this; - } - utf16_len_ += len; - return *this; -} - std::u16string Utf8ToUtf16(const StringPiece& utf8) { ssize_t utf16_length = utf8_to_utf16_length( reinterpret_cast<const uint8_t*>(utf8.data()), utf8.length()); diff --git a/tools/aapt2/util/Util.h b/tools/aapt2/util/Util.h index 7c949b90c10a..0eb35d18c06e 100644 --- a/tools/aapt2/util/Util.h +++ b/tools/aapt2/util/Util.h @@ -59,7 +59,15 @@ bool StartsWith(const android::StringPiece& str, const android::StringPiece& pre // Returns true if the string ends with suffix. bool EndsWith(const android::StringPiece& str, const android::StringPiece& suffix); -// Creates a new StringPiece16 that points to a substring of the original string without leading or +// Creates a new StringPiece that points to a substring of the original string without leading +// whitespace. +android::StringPiece TrimLeadingWhitespace(const android::StringPiece& str); + +// Creates a new StringPiece that points to a substring of the original string without trailing +// whitespace. +android::StringPiece TrimTrailingWhitespace(const android::StringPiece& str); + +// Creates a new StringPiece that points to a substring of the original string without leading or // trailing whitespace. android::StringPiece TrimWhitespace(const android::StringPiece& str); @@ -141,9 +149,12 @@ std::string GetString(const android::ResStringPool& pool, size_t idx); // break the string interpolation. bool VerifyJavaStringFormat(const android::StringPiece& str); +bool AppendStyledString(const android::StringPiece& input, bool preserve_spaces, + std::string* out_str, std::string* out_error); + class StringBuilder { public: - explicit StringBuilder(bool preserve_spaces = false); + StringBuilder() = default; StringBuilder& Append(const android::StringPiece& str); const std::string& ToString() const; @@ -158,7 +169,6 @@ class StringBuilder { explicit operator bool() const; private: - bool preserve_spaces_; std::string str_; size_t utf16_len_ = 0; bool quote_ = false; diff --git a/tools/aapt2/util/Util_test.cpp b/tools/aapt2/util/Util_test.cpp index 2d1242ada949..d4e3bec24bd1 100644 --- a/tools/aapt2/util/Util_test.cpp +++ b/tools/aapt2/util/Util_test.cpp @@ -41,45 +41,6 @@ TEST(UtilTest, StringStartsWith) { EXPECT_TRUE(util::StartsWith("hello.xml", "he")); } -TEST(UtilTest, StringBuilderSplitEscapeSequence) { - EXPECT_THAT(util::StringBuilder().Append("this is a new\\").Append("nline.").ToString(), - Eq("this is a new\nline.")); -} - -TEST(UtilTest, StringBuilderWhitespaceRemoval) { - EXPECT_THAT(util::StringBuilder().Append(" hey guys ").Append(" this is so cool ").ToString(), - Eq("hey guys this is so cool")); - EXPECT_THAT( - util::StringBuilder().Append(" \" wow, so many \t ").Append("spaces. \"what? ").ToString(), - Eq(" wow, so many \t spaces. what?")); - EXPECT_THAT(util::StringBuilder().Append(" where \t ").Append(" \nis the pie?").ToString(), - Eq("where is the pie?")); -} - -TEST(UtilTest, StringBuilderEscaping) { - EXPECT_THAT(util::StringBuilder() - .Append(" hey guys\\n ") - .Append(" this \\t is so\\\\ cool ") - .ToString(), - Eq("hey guys\n this \t is so\\ cool")); - EXPECT_THAT(util::StringBuilder().Append("\\@\\?\\#\\\\\\'").ToString(), Eq("@?#\\\'")); -} - -TEST(UtilTest, StringBuilderMisplacedQuote) { - util::StringBuilder builder; - EXPECT_FALSE(builder.Append("they're coming!")); -} - -TEST(UtilTest, StringBuilderUnicodeCodes) { - EXPECT_THAT(util::StringBuilder().Append("\\u00AF\\u0AF0 woah").ToString(), - Eq("\u00AF\u0AF0 woah")); - EXPECT_FALSE(util::StringBuilder().Append("\\u00 yo")); -} - -TEST(UtilTest, StringBuilderPreserveSpaces) { - EXPECT_THAT(util::StringBuilder(true /*preserve_spaces*/).Append("\"").ToString(), Eq("\"")); -} - TEST(UtilTest, TokenizeInput) { auto tokenizer = util::Tokenize(StringPiece("this| is|the|end"), '|'); auto iter = tokenizer.begin(); |