summaryrefslogtreecommitdiff
path: root/libs/androidfw/LocaleData.cpp
diff options
context:
space:
mode:
authorRoozbeh Pournader <roozbeh@google.com>2016-01-15 11:23:42 -0800
committerRoozbeh Pournader <roozbeh@google.com>2016-01-21 13:47:22 -0800
commitb927c559e1ef8530b08712507f320502627db298 (patch)
treef0d2fd051cb4486239e6f6187e0da53e416fdaeb /libs/androidfw/LocaleData.cpp
parentac3e599069e1b87ea190f008aef60a506c8561c7 (diff)
Implement smarter locale resource selection
* Add support for determining script from language and region. * Add support for determining special parents of locales. * Add support for smart comparison of locales with only a difference in region, using the locale parentage tree. * Fix LocaleData.matchScore() to not fallback to old locale matching behavior if we can't determine a script. * Allow four-character variant codes. (Previously, only five- to eight-character variant codes were allowed.) Bug: 7296673 Bug: 26589793 Change-Id: Ibde0a48c0564ff383b41068095a5cbacfe7b94bc
Diffstat (limited to 'libs/androidfw/LocaleData.cpp')
-rw-r--r--libs/androidfw/LocaleData.cpp201
1 files changed, 201 insertions, 0 deletions
diff --git a/libs/androidfw/LocaleData.cpp b/libs/androidfw/LocaleData.cpp
new file mode 100644
index 000000000000..c0c3ab883a42
--- /dev/null
+++ b/libs/androidfw/LocaleData.cpp
@@ -0,0 +1,201 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cstdint>
+#include <cstdlib>
+#include <cstring>
+#include <string>
+#include <unordered_map>
+#include <unordered_set>
+
+#include <androidfw/LocaleData.h>
+
+namespace android {
+
+#include "LocaleDataTables.cpp"
+
+inline uint32_t packLocale(const char* language, const char* region) {
+ return (((uint8_t) language[0]) << 24u) | (((uint8_t) language[1]) << 16u) |
+ (((uint8_t) region[0]) << 8u) | ((uint8_t) region[1]);
+}
+
+inline uint32_t dropRegion(uint32_t packed_locale) {
+ return packed_locale & 0xFFFF0000lu;
+}
+
+inline bool hasRegion(uint32_t packed_locale) {
+ return (packed_locale & 0x0000FFFFlu) != 0;
+}
+
+const size_t SCRIPT_LENGTH = 4;
+const size_t SCRIPT_PARENTS_COUNT = sizeof(SCRIPT_PARENTS)/sizeof(SCRIPT_PARENTS[0]);
+const uint32_t PACKED_ROOT = 0; // to represent the root locale
+
+uint32_t findParent(uint32_t packed_locale, const char* script) {
+ if (hasRegion(packed_locale)) {
+ for (size_t i = 0; i < SCRIPT_PARENTS_COUNT; i++) {
+ if (memcmp(script, SCRIPT_PARENTS[i].script, SCRIPT_LENGTH) == 0) {
+ auto map = SCRIPT_PARENTS[i].map;
+ auto lookup_result = map->find(packed_locale);
+ if (lookup_result != map->end()) {
+ return lookup_result->second;
+ }
+ break;
+ }
+ }
+ return dropRegion(packed_locale);
+ }
+ return PACKED_ROOT;
+}
+
+// Find the ancestors of a locale, and fill 'out' with it (assumes out has enough
+// space). If any of the members of stop_list was seen, write it in the
+// output but stop afterwards.
+//
+// This also outputs the index of the last written ancestor in the stop_list
+// to stop_list_index, which will be -1 if it is not found in the stop_list.
+//
+// Returns the number of ancestors written in the output, which is always
+// at least one.
+size_t findAncestors(uint32_t* out, ssize_t* stop_list_index,
+ uint32_t packed_locale, const char* script,
+ const uint32_t* stop_list, size_t stop_set_length) {
+ uint32_t ancestor = packed_locale;
+ size_t count = 0;
+ do {
+ out[count++] = ancestor;
+ for (size_t i = 0; i < stop_set_length; i++) {
+ if (stop_list[i] == ancestor) {
+ *stop_list_index = (ssize_t) i;
+ return count;
+ }
+ }
+ ancestor = findParent(ancestor, script);
+ } while (ancestor != PACKED_ROOT);
+ *stop_list_index = (ssize_t) -1;
+ return count;
+}
+
+size_t findDistance(uint32_t supported,
+ const char* script,
+ const uint32_t* request_ancestors,
+ size_t request_ancestors_count) {
+ uint32_t supported_ancestors[MAX_PARENT_DEPTH+1];
+ ssize_t request_ancestors_index;
+ const size_t supported_ancestor_count = findAncestors(
+ supported_ancestors, &request_ancestors_index,
+ supported, script,
+ request_ancestors, request_ancestors_count);
+ // Since both locales share the same root, there will always be a shared
+ // ancestor, so the distance in the parent tree is the sum of the distance
+ // of 'supported' to the lowest common ancestor (number of ancestors
+ // written for 'supported' minus 1) plus the distance of 'request' to the
+ // lowest common ancestor (the index of the ancestor in request_ancestors).
+ return supported_ancestor_count + request_ancestors_index - 1;
+}
+
+inline bool isRepresentative(uint32_t language_and_region, const char* script) {
+ const uint64_t packed_locale = (
+ (((uint64_t) language_and_region) << 32u) |
+ (((uint64_t) script[0]) << 24u) |
+ (((uint64_t) script[1]) << 16u) |
+ (((uint64_t) script[2]) << 8u) |
+ ((uint64_t) script[3]));
+
+ return (REPRESENTATIVE_LOCALES.count(packed_locale) != 0);
+}
+
+int localeDataCompareRegions(
+ const char* left_region, const char* right_region,
+ const char* requested_language, const char* requested_script,
+ const char* requested_region) {
+
+ if (left_region[0] == right_region[0] && left_region[1] == right_region[1]) {
+ return 0;
+ }
+ const uint32_t left = packLocale(requested_language, left_region);
+ const uint32_t right = packLocale(requested_language, right_region);
+ const uint32_t request = packLocale(requested_language, requested_region);
+
+ uint32_t request_ancestors[MAX_PARENT_DEPTH+1];
+ ssize_t left_right_index;
+ // Find the parents of the request, but stop as soon as we saw left or right
+ const uint32_t left_and_right[] = {left, right};
+ const size_t ancestor_count = findAncestors(
+ request_ancestors, &left_right_index,
+ request, requested_script,
+ left_and_right, sizeof(left_and_right)/sizeof(left_and_right[0]));
+ if (left_right_index == 0) { // We saw left earlier
+ return 1;
+ }
+ if (left_right_index == 1) { // We saw right earlier
+ return -1;
+ }
+
+ // If we are here, neither left nor right are an ancestor of the
+ // request. This means that all the ancestors have been computed and
+ // the last ancestor is just the language by itself. We will use the
+ // distance in the parent tree for determining the better match.
+ const size_t left_distance = findDistance(
+ left, requested_script, request_ancestors, ancestor_count);
+ const size_t right_distance = findDistance(
+ right, requested_script, request_ancestors, ancestor_count);
+ if (left_distance != right_distance) {
+ return (int) right_distance - (int) left_distance; // smaller distance is better
+ }
+
+ // If we are here, left and right are equidistant from the request. We will
+ // try and see if any of them is a representative locale.
+ const bool left_is_representative = isRepresentative(left, requested_script);
+ const bool right_is_representative = isRepresentative(right, requested_script);
+ if (left_is_representative != right_is_representative) {
+ return (int) left_is_representative - (int) right_is_representative;
+ }
+
+ // We have no way of figuring out which locale is a better match. For
+ // the sake of stability, we consider the locale with the lower region
+ // code (in dictionary order) better, with two-letter codes before
+ // three-digit codes (since two-letter codes are more specific).
+ return (int64_t) right - (int64_t) left;
+}
+
+void localeDataComputeScript(char out[4], const char* language, const char* region) {
+ if (language[0] == '\0') {
+ memset(out, '\0', SCRIPT_LENGTH);
+ return;
+ }
+ uint32_t lookup_key = packLocale(language, region);
+ auto lookup_result = LIKELY_SCRIPTS.find(lookup_key);
+ if (lookup_result == LIKELY_SCRIPTS.end()) {
+ // We couldn't find the locale. Let's try without the region
+ if (region[0] != '\0') {
+ lookup_key = dropRegion(lookup_key);
+ lookup_result = LIKELY_SCRIPTS.find(lookup_key);
+ if (lookup_result != LIKELY_SCRIPTS.end()) {
+ memcpy(out, SCRIPT_CODES[lookup_result->second], SCRIPT_LENGTH);
+ return;
+ }
+ }
+ // We don't know anything about the locale
+ memset(out, '\0', SCRIPT_LENGTH);
+ return;
+ } else {
+ // We found the locale.
+ memcpy(out, SCRIPT_CODES[lookup_result->second], SCRIPT_LENGTH);
+ }
+}
+
+} // namespace android