diff options
author | Jay Shrauner <shrauner@google.com> | 2012-12-17 11:12:30 -0800 |
---|---|---|
committer | Jay Shrauner <shrauner@google.com> | 2013-02-05 16:14:59 -0800 |
commit | db8a386d111b11463c877b3a14ab62aec761a3f5 (patch) | |
tree | 1a76447eebd49c0b3a5e605a64a1ec74ec942dee /android | |
parent | 7dbe9638c4c7cdb01a71528d8cb1f0065120d7b8 (diff) |
Fix contacts index labels for i18n
Switch ContactsProvider to using ICU for generation of index labels,
and remove custom KO and JA code. Add i18n test cases.
Bug:7351596
Change-Id: I7ac25add8b29ff2c6c395f04a83b279b541e4125
Diffstat (limited to 'android')
-rw-r--r-- | android/Android.mk | 47 | ||||
-rw-r--r-- | android/PhonebookIndex.cpp | 317 | ||||
-rw-r--r-- | android/PhonebookIndexTest.cpp | 243 | ||||
-rw-r--r-- | android/PhoneticStringUtils.cpp | 319 | ||||
-rw-r--r-- | android/PhoneticStringUtils.h | 59 | ||||
-rw-r--r-- | android/PhoneticStringUtilsTest.cpp | 217 | ||||
-rw-r--r-- | android/sqlite3_android.cpp | 8 |
7 files changed, 430 insertions, 780 deletions
diff --git a/android/Android.mk b/android/Android.mk index 151a5cb..0bb78d3 100644 --- a/android/Android.mk +++ b/android/Android.mk @@ -2,7 +2,6 @@ LOCAL_PATH:= $(call my-dir) libsqlite3_android_local_src_files := \ PhoneNumberUtils.cpp \ - PhoneticStringUtils.cpp \ OldPhoneNumberUtils.cpp \ PhonebookIndex.cpp \ sqlite3_android.cpp @@ -10,7 +9,8 @@ libsqlite3_android_local_src_files := \ libsqlite3_android_c_includes := \ external/sqlite/dist \ external/icu4c/i18n \ - external/icu4c/common + external/icu4c/common \ + frameworks/native/include include $(CLEAR_VARS) LOCAL_SRC_FILES:= $(libsqlite3_android_local_src_files) @@ -26,24 +26,6 @@ ifeq ($(WITH_HOST_DALVIK),true) include $(BUILD_HOST_STATIC_LIBRARY) endif -# Test for PhoneticStringUtils -include $(CLEAR_VARS) - -LOCAL_MODULE:= libsqlite3_phonetic_string_utils_test - -LOCAL_CFLAGS += -Wall -Werror - -LOCAL_SRC_FILES := \ - PhoneticStringUtils.cpp \ - PhoneticStringUtilsTest.cpp - -LOCAL_MODULE_TAGS := optional - -LOCAL_SHARED_LIBRARIES := \ - libutils - -include $(BUILD_EXECUTABLE) - # Test for PhoneNumberUtils # # You can also test this in Unix, like this: @@ -71,3 +53,28 @@ LOCAL_SRC_FILES := \ LOCAL_MODULE_TAGS := optional include $(BUILD_EXECUTABLE) + +ifeq ($(WITH_HOST_DALVIK),true) + include $(CLEAR_VARS) + + LOCAL_MODULE:= libsqlite3_phone_book_index_test + + LOCAL_SRC_FILES := \ + PhonebookIndex.cpp \ + PhonebookIndexTest.cpp + + LOCAL_C_INCLUDES := \ + external/icu4c/i18n \ + external/icu4c/common \ + frameworks/native/include + + LOCAL_MODULE_TAGS := optional + + LOCAL_SHARED_LIBRARIES := \ + libicui18n libicuuc + + LOCAL_STATIC_LIBRARIES := \ + libutils libcutils + + include $(BUILD_HOST_EXECUTABLE) +endif diff --git a/android/PhonebookIndex.cpp b/android/PhonebookIndex.cpp index 5cc26e5..68674f4 100644 --- a/android/PhonebookIndex.cpp +++ b/android/PhonebookIndex.cpp @@ -14,192 +14,193 @@ * limitations under the License. */ +#include <stdlib.h> #include <ctype.h> #include <string.h> +#include <stdio.h> +#include <unicode/alphaindex.h> #include <unicode/ucol.h> #include <unicode/uiter.h> #include <unicode/ustring.h> #include <unicode/utypes.h> +#include <unicode/uloc.h> +#include <utils/Mutex.h> +#include <utils/RefBase.h> #include "PhonebookIndex.h" -#include "PhoneticStringUtils.h" #define MIN_OUTPUT_SIZE 6 // Minimum required size for the output buffer (in bytes) namespace android { -// IMPORTANT! Keep the codes below SORTED. We are doing a binary search on the array -static UChar DEFAULT_CHAR_MAP[] = { - 0x00C6, 'A', // AE - 0x00DF, 'S', // Etzett - 0x1100, 0x3131, // HANGUL LETTER KIYEOK - 0x1101, 0x3132, // HANGUL LETTER SSANGKIYEOK - 0x1102, 0x3134, // HANGUL LETTER NIEUN - 0x1103, 0x3137, // HANGUL LETTER TIKEUT - 0x1104, 0x3138, // HANGUL LETTER SSANGTIKEUT - 0x1105, 0x3139, // HANGUL LETTER RIEUL - 0x1106, 0x3141, // HANGUL LETTER MIEUM - 0x1107, 0x3142, // HANGUL LETTER PIEUP - 0x1108, 0x3143, // HANGUL LETTER SSANGPIEUP - 0x1109, 0x3145, // HANGUL LETTER SIOS - 0x110A, 0x3146, // HANGUL LETTER SSANGSIOS - 0x110B, 0x3147, // HANGUL LETTER IEUNG - 0x110C, 0x3148, // HANGUL LETTER CIEUC - 0x110D, 0x3149, // HANGUL LETTER SSANGCIEUC - 0x110E, 0x314A, // HANGUL LETTER CHIEUCH - 0x110F, 0x314B, // HANGUL LETTER KHIEUKH - 0x1110, 0x314C, // HANGUL LETTER THIEUTH - 0x1111, 0x314D, // HANGUL LETTER PHIEUPH - 0x1112, 0x314E, // HANGUL LETTER HIEUH - 0x111A, 0x3140, // HANGUL LETTER RIEUL-HIEUH - 0x1121, 0x3144, // HANGUL LETTER PIEUP-SIOS - 0x1161, 0x314F, // HANGUL LETTER A - 0x1162, 0x3150, // HANGUL LETTER AE - 0x1163, 0x3151, // HANGUL LETTER YA - 0x1164, 0x3152, // HANGUL LETTER YAE - 0x1165, 0x3153, // HANGUL LETTER EO - 0x1166, 0x3154, // HANGUL LETTER E - 0x1167, 0x3155, // HANGUL LETTER YEO - 0x1168, 0x3156, // HANGUL LETTER YE - 0x1169, 0x3157, // HANGUL LETTER O - 0x116A, 0x3158, // HANGUL LETTER WA - 0x116B, 0x3159, // HANGUL LETTER WAE - 0x116C, 0x315A, // HANGUL LETTER OE - 0x116D, 0x315B, // HANGUL LETTER YO - 0x116E, 0x315C, // HANGUL LETTER U - 0x116F, 0x315D, // HANGUL LETTER WEO - 0x1170, 0x315E, // HANGUL LETTER WE - 0x1171, 0x315F, // HANGUL LETTER WI - 0x1172, 0x3160, // HANGUL LETTER YU - 0x1173, 0x3161, // HANGUL LETTER EU - 0x1174, 0x3162, // HANGUL LETTER YI - 0x1175, 0x3163, // HANGUL LETTER I - 0x11AA, 0x3133, // HANGUL LETTER KIYEOK-SIOS - 0x11AC, 0x3135, // HANGUL LETTER NIEUN-CIEUC - 0x11AD, 0x3136, // HANGUL LETTER NIEUN-HIEUH - 0x11B0, 0x313A, // HANGUL LETTER RIEUL-KIYEOK - 0x11B1, 0x313B, // HANGUL LETTER RIEUL-MIEUM - 0x11B3, 0x313D, // HANGUL LETTER RIEUL-SIOS - 0x11B4, 0x313E, // HANGUL LETTER RIEUL-THIEUTH - 0x11B5, 0x313F, // HANGUL LETTER RIEUL-PHIEUPH +// Wrapper class to enable using libutil SmartPointers with AlphabeticIndex. +class AlphabeticIndexRef : public RefBase { +public: + AlphabeticIndexRef(const char *locale, UErrorCode &status) : + m_index(locale, status), m_locale(NULL), m_isJapanese(false) { + if (U_FAILURE(status)) { + return; + } + m_locale = strdup(locale); + if (m_locale == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + char language[4]; + uloc_getLanguage(locale, language, sizeof(language), &status); + if (U_FAILURE(status)) { + return; + } + m_isJapanese = (strcmp(language, ULOC_JAPANESE) == 0); + } + virtual ~AlphabeticIndexRef() { free(m_locale); } + + AlphabeticIndex& operator*() { return m_index; } + AlphabeticIndex* operator->() { return &m_index; } + + bool isLocale(const char *locale) const { + return (locale != NULL && m_locale != NULL && + strcmp(m_locale, locale) == 0); + } + bool isJapanese() const { return m_isJapanese; } + int32_t getLabel(int32_t bucketIndex, UChar *labelBuf, int32_t labelBufSize); + +private: + AlphabeticIndex m_index; + char *m_locale; + bool m_isJapanese; }; -/** - * Binary search to map an individual character to the corresponding phone book index. - */ -static UChar map_character(UChar c, UChar * char_map, int32_t length) { - int from = 0, to = length; - while (from < to) { - int m = ((to + from) >> 1) & ~0x1; // Only consider even positions - UChar cm = char_map[m]; - if (cm == c) { - return char_map[m + 1]; - } else if (cm < c) { - from = m + 2; +int32_t AlphabeticIndexRef::getLabel(int32_t bucketIndex, UChar *labelBuf, + int32_t labelBufSize) { + UErrorCode status = U_ZERO_ERROR; + m_index.resetBucketIterator(status); + if (U_FAILURE(status)) { + return -1; + } + for(int i = 0; i <= bucketIndex; ++i) { + if (!m_index.nextBucket(status) || U_FAILURE(status)) { + return -1; + } + } + + int32_t len; + if (m_index.getBucketLabelType() == U_ALPHAINDEX_NORMAL) { + len = m_index.getBucketLabel().extract(labelBuf, labelBufSize, status); + if (U_FAILURE(status)) { + return -1; + } } else { - to = m; + // Use no label for underflow/inflow/overflow buckets + labelBuf[0] = '\0'; + len = 0; } - } - return 0; + return len; } +static Mutex gIndexMutex; +static sp<AlphabeticIndexRef> gIndex; + /** * Returns TRUE if the character belongs to a Hanzi unicode block */ -static bool is_CJK(UChar c) { - return - (0x4e00 <= c && c <= 0x9fff) // CJK_UNIFIED_IDEOGRAPHS - || (0x3400 <= c && c <= 0x4dbf) // CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A - || (0x3000 <= c && c <= 0x303f) // CJK_SYMBOLS_AND_PUNCTUATION - || (0x2e80 <= c && c <= 0x2eff) // CJK_RADICALS_SUPPLEMENT - || (0x3300 <= c && c <= 0x33ff) // CJK_COMPATIBILITY - || (0xfe30 <= c && c <= 0xfe4f) // CJK_COMPATIBILITY_FORMS - || (0xf900 <= c && c <= 0xfaff); // CJK_COMPATIBILITY_IDEOGRAPHS +static bool is_CJ(UChar32 c) { + return (uscript_hasScript(c, USCRIPT_HAN) || + uscript_hasScript(c, USCRIPT_HIRAGANA) || + uscript_hasScript(c, USCRIPT_KATAKANA)); +} + +static bool initIndexForLocale(const char *locale) { + if (locale == NULL) { + return false; + } + + if (gIndex != NULL && gIndex->isLocale(locale)) { + return true; + } + + UErrorCode status = U_ZERO_ERROR; + sp<AlphabeticIndexRef> newIndex(new AlphabeticIndexRef(locale, status)); + if (newIndex == NULL || U_FAILURE(status)) { + return false; + } + // Always create labels for Latin characters if not present in native set + (*newIndex)->addLabels("en", status); + if (U_FAILURE(status)) { + return false; + } + if ((*newIndex)->getBucketCount(status) <= 0 || U_FAILURE(status)) { + return false; + } + + gIndex = newIndex; + return true; } -int32_t GetPhonebookIndex(UCharIterator * iter, const char * locale, UChar * out, int32_t size, - UBool * isError) +int32_t GetPhonebookIndex(UCharIterator *iter, const char *locale, + UChar *out, int32_t size, UBool *isError) { - if (size < MIN_OUTPUT_SIZE) { - *isError = TRUE; - return 0; - } - - *isError = FALSE; - - // Normalize the first character to remove accents using the NFD normalization - UErrorCode errorCode = U_ZERO_ERROR; - int32_t len = unorm_next(iter, out, size, UNORM_NFD, - 0 /* options */, TRUE /* normalize */, NULL, &errorCode); - if (U_FAILURE(errorCode)) { - *isError = TRUE; - return 0; - } - - if (len == 0) { // Empty input string - return 0; - } - - UChar c = out[0]; - - if (!u_isalpha(c)) { - // Digits go into a # section. Everything else goes into the empty section - // The unicode function u_isdigit would also identify other characters as digits (arabic), - // but if we caught them here we'd risk having the same section before and after alpha-letters - // which might break the assumption that each section exists only once - if (c >= '0' && c <= '9') { - out[0] = '#'; - return 1; - } - return 0; - } - - c = u_toupper(c); - - // Check for explicitly mapped characters - UChar c_mapped = map_character(c, DEFAULT_CHAR_MAP, sizeof(DEFAULT_CHAR_MAP) / sizeof(UChar)); - if (c_mapped != 0) { - out[0] = c_mapped; - return 1; - } - - // Convert Kanas to Hiragana - UChar next = len > 2 ? out[1] : 0; - c = android::GetNormalizedCodePoint(c, next, NULL); - - // Traditional grouping of Hiragana characters - if (0x3041 <= c && c <= 0x309F) { - if (c < 0x304B) c = 0x3042; // a - else if (c < 0x3055) c = 0x304B; // ka - else if (c < 0x305F) c = 0x3055; // sa - else if (c < 0x306A) c = 0x305F; // ta - else if (c < 0x306F) c = 0x306A; // na - else if (c < 0x307E) c = 0x306F; // ha - else if (c < 0x3083) c = 0x307E; // ma - else if (c < 0x3089) c = 0x3084; // ya - else if (c < 0x308E) c = 0x3089; // ra - else if (c < 0x3094) c = 0x308F; // wa - else return 0; // Others are not readable - out[0] = c; - return 1; - } else if (0x30A0 <= c && c <= 0x30FF) { - // Dot, onbiki, iteration marks are not readable - return 0; - } - - if (is_CJK(c)) { - if (strncmp(locale, "ja", 2) == 0) { - // Japanese word meaning "misc" or "other" - out[0] = 0x4ED6; - return 1; - } else { - return 0; + if (size < MIN_OUTPUT_SIZE) { + *isError = TRUE; + return 0; + } + + *isError = FALSE; + out[0] = '\0'; + iter->move(iter, 0, UITER_ZERO); + if (!iter->hasNext(iter)) { // Empty input string + return 0; + } + UnicodeString ustr; + bool prefixIsNonNumeric = false; + bool prefixIsNumeric = false; + while (iter->hasNext(iter)) { + UChar32 ch = uiter_next32(iter); + // Ignore standard phone number separators and identify any string + // that otherwise starts with a number. + if (!prefixIsNumeric && !prefixIsNonNumeric) { + if (u_isdigit(ch)) { + prefixIsNumeric = true; + } else if (!u_isspace(ch) && ch != '+' && ch != '(' && + ch != ')' && ch != '.' && ch != '-' && ch != '#') { + prefixIsNonNumeric = true; + } + } + ustr.append(ch); + } + if (prefixIsNumeric) { + out[0] = '#'; + return 1; + } + + Mutex::Autolock autolock(gIndexMutex); + if (!initIndexForLocale(locale)) { + *isError = TRUE; + return 0; + } + + UErrorCode status = U_ZERO_ERROR; + int32_t bucketIndex = (*gIndex)->getBucketIndex(ustr, status); + if (U_FAILURE(status)) { + *isError = TRUE; + return 0; + } + + int32_t len = gIndex->getLabel(bucketIndex, out, size); + if (len < 0) { + *isError = TRUE; + return 0; + } + + // For Japanese, label unclassified CJK ideographs with + // Japanese word meaning "misc" or "other" + if (gIndex->isJapanese() && len == 0 && is_CJ(ustr.char32At(0))) { + out[0] = 0x4ED6; + len = 1; } - } - out[0] = c; - return 1; + return len; } } // namespace android diff --git a/android/PhonebookIndexTest.cpp b/android/PhonebookIndexTest.cpp new file mode 100644 index 0000000..2f11dbe --- /dev/null +++ b/android/PhonebookIndexTest.cpp @@ -0,0 +1,243 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "PhonebookIndex.h" + +#include <unicode/unistr.h> + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +using namespace android; + +class TestExecutor { +public: + TestExecutor() : m_total_count(0), m_success_count(0), m_success(true) {} + bool DoAllTests(); +private: + void DoOneTest(void (TestExecutor::*test)()); + + void testGetIndex(const char *src, const char *locale, + int32_t expected_len, UChar *expected_value); + void testEnglish(); + + // Note: When adding a test, do not forget to add it to DoOneTest(). + + int m_total_count; + int m_success_count; + + bool m_success; +}; + + +bool TestExecutor::DoAllTests() { + DoOneTest(&TestExecutor::testEnglish); + + printf("Test total: %d\nSuccess: %d\nFailure: %d\n", + m_total_count, m_success_count, m_total_count - m_success_count); + + bool success = m_total_count == m_success_count; + printf("\n%s\n", success ? "Success" : "Failure"); + + return success; +} + +void TestExecutor::DoOneTest(void (TestExecutor::*test)()) { + m_success = true; + + (this->*test)(); + + ++m_total_count; + m_success_count += m_success ? 1 : 0; +} + +#define BUFFER_SIZE 10 + +static void printUTF8Str(const char *utf8_str) { + printf("%s (", utf8_str); + for(; *utf8_str != '\0'; ++utf8_str) { + printf("\\x%02hhX", *utf8_str); + } + printf(")"); +} + +static void printUChars(const UChar *uc_str, int32_t len) { + std::string utf8_str; + UnicodeString(uc_str, len).toUTF8String(utf8_str); + printf("%s (", utf8_str.c_str()); + for(int i=0; i<len; ++i) { + printf("0x%02hx%s", uc_str[i], i < (len - 1) ? " " : ""); + } + printf(")"); +} + +void TestExecutor::testGetIndex( + const char *src, const char *locale, + int32_t expected_len, UChar *expected_value) { + UBool isError; + + UCharIterator iter; + uiter_setUTF8(&iter, src, -1); + + UChar outBuf[BUFFER_SIZE]; + + int32_t len = GetPhonebookIndex(&iter, locale, outBuf, sizeof(outBuf), &isError); + if (isError) { + printf("GetPhonebookIndex returned error (%s:%s)\n", locale, src); + m_success = false; + } else if (len != expected_len) { + printf("len is unexpected value (src: [%s] %s, ", locale, src); + printf("actual: %u (", len); + printUChars(outBuf, len); + printf("), expected: %u (", expected_len); + printUChars(expected_value, expected_len); + printf("))\n"); + m_success = false; + } else { + printf("[%s] %s: ", locale, src); + printUChars(outBuf, len); + + if (memcmp(outBuf, expected_value, sizeof(UChar)*expected_len) != 0) { + printf(", expected "); + printUChars(expected_value, expected_len); + m_success = false; + } + printf("\n"); + } +} + +#define TEST_GET_UTF8STR_INDEX(src, locale, ...) \ + ({ \ + UChar uc_expected[] = {__VA_ARGS__}; \ + int32_t len = sizeof(uc_expected)/sizeof(UChar); \ + testGetIndex((src), (locale), len, uc_expected); \ + }) + +#define TEST_GET_UCHAR_INDEX(src, locale, ...) \ + ({ \ + std::string utf8_str; \ + UnicodeString((UChar) (src)).toUTF8String(utf8_str); \ + TEST_GET_UTF8STR_INDEX(utf8_str.c_str(), (locale), __VA_ARGS__); \ + }) + +void TestExecutor::testEnglish() { + printf("testEnglish()\n"); + + // English [A-Z] + TEST_GET_UTF8STR_INDEX("Allen", "en", 'A'); + TEST_GET_UTF8STR_INDEX("allen", "en", 'A'); + TEST_GET_UTF8STR_INDEX("123456", "en", '#'); + TEST_GET_UTF8STR_INDEX("+1 (123) 456-7890", "en", '#'); + TEST_GET_UTF8STR_INDEX("(33) 44.55.66.08", "en", '#'); + TEST_GET_UTF8STR_INDEX("123 Jump", "en", '#'); + // Arabic numbers + TEST_GET_UTF8STR_INDEX("\u0662\u0663\u0664\u0665\u0666", "en", '#'); + + // Japanese + // sorts hiragana/katakana, Kanji/Chinese, English, other + // …, あ, か, さ, た, な, は, ま, や, ら, わ, … + // hiragana "a" + TEST_GET_UCHAR_INDEX(0x3041, "ja", 0x3042); + // katakana "a" + TEST_GET_UCHAR_INDEX(0x30A1, "ja", 0x3042); + + // Kanji (sorts to inflow section) + TEST_GET_UCHAR_INDEX(0x65E5, "ja", 0x4ed6); + // English + TEST_GET_UTF8STR_INDEX("Smith", "ja", 'S'); + TEST_GET_UTF8STR_INDEX("234567", "ja", '#'); + // Chinese (sorts to inflow section) + TEST_GET_UCHAR_INDEX(0x6c88 /* Shen/Chen */, "ja", 0x4ed6); + // Korean Hangul (sorts to overflow section) + TEST_GET_UCHAR_INDEX(0x1100, "ja", /* null */ ); + + // Korean (sorts Korean, then English) + // …, ᄀ, ᄂ, ᄃ, ᄅ, ᄆ, ᄇ, ᄉ, ᄋ, ᄌ, ᄎ, ᄏ, ᄐ, ᄑ, ᄒ, … + TEST_GET_UCHAR_INDEX(0x1100, "ko", 0x1100); + TEST_GET_UCHAR_INDEX(0x3131, "ko", 0x1100); + TEST_GET_UCHAR_INDEX(0x1101, "ko", 0x1100); + TEST_GET_UCHAR_INDEX(0x1161, "ko", 0x1112); + + // Czech + // …, [A-C], Č,[D-H], CH, [I-R], Ř, S, Š, [T-Z], Ž, … + TEST_GET_UTF8STR_INDEX("Cena", "cs", 'C'); + TEST_GET_UTF8STR_INDEX("Čáp", "cs", 0x010c); + TEST_GET_UTF8STR_INDEX("Ruda", "cs", 'R'); + TEST_GET_UTF8STR_INDEX("Řada", "cs", 0x0158); + TEST_GET_UTF8STR_INDEX("Selka", "cs", 'S'); + TEST_GET_UTF8STR_INDEX("Šála", "cs", 0x0160); + TEST_GET_UTF8STR_INDEX("Zebra", "cs", 'Z'); + TEST_GET_UTF8STR_INDEX("Žába", "cs", 0x017d); + TEST_GET_UTF8STR_INDEX("Chata", "cs", 'C', 'H'); + + // French: [A-Z] (no accented chars) + TEST_GET_UTF8STR_INDEX("Øfer", "fr", 'O'); + TEST_GET_UTF8STR_INDEX("Œster", "fr", 'O'); + + // Danish: [A-Z], Æ, Ø, Å + TEST_GET_UTF8STR_INDEX("Ænes", "da", 0xc6); + TEST_GET_UTF8STR_INDEX("Øfer", "da", 0xd8); + TEST_GET_UTF8STR_INDEX("Œster", "da", 0xd8); + TEST_GET_UTF8STR_INDEX("Ågård", "da", 0xc5); + + // German: [A-Z] (no ß or umlauted characters in standard alphabet) + TEST_GET_UTF8STR_INDEX("ßind", "de", 'S'); + + // Simplified Chinese (default collator Pinyin): [A-Z] + // Shen/Chen (simplified): should be, usually, 'S' for name collator and 'C' for apps/other + TEST_GET_UCHAR_INDEX(0x6c88 /* Shen/Chen */, "zh_CN", 'C'); + // Shen/Chen (traditional) + TEST_GET_UCHAR_INDEX(0x700b, "zh_CN", 'S'); + // Jia/Gu: should be, usually, 'J' for name collator and 'G' for apps/other + TEST_GET_UCHAR_INDEX(0x8d3e /* Jia/Gu */, "zh_CN", 'J'); + + // Traditional Chinese + // …, 一, 丁, 丈, 不, 且, 丞, 串, 並, 亭, 乘, 乾, 傀, 亂, 僎, 僵, 儐, 償, 叢, 儳, 嚴, 儷, 儻, 囌, 囑, 廳, … + TEST_GET_UCHAR_INDEX(0x6c88 /* Shen/Chen */, "zh_TW", 0x5080); + TEST_GET_UCHAR_INDEX(0x700b /* Shen/Chen */, "zh_TW", 0x53e2); + TEST_GET_UCHAR_INDEX(0x8d3e /* Jia/Gu */, "zh_TW", 0x5080); + + // Thai (sorts English then Thai) + // …, ก, ข, ฃ, ค, ฅ, ฆ, ง, จ, ฉ, ช, ซ, ฌ, ญ, ฎ, ฏ, ฐ, ฑ, ฒ, ณ, ด, ต, ถ, ท, ธ, น, บ, ป, ผ, ฝ, พ, ฟ, ภ, ม, ย, ร, ฤ, ล, ฦ, ว, ศ, ษ, ส, ห, ฬ, อ, ฮ, …, + + TEST_GET_UTF8STR_INDEX("\u0e2d\u0e07\u0e04\u0e4c\u0e40\u0e25\u0e47\u0e01", + "th", 0xe2d); + TEST_GET_UTF8STR_INDEX("\u0e2a\u0e34\u0e07\u0e2b\u0e40\u0e2a\u0e19\u0e35", + "th", 0xe2a); + // Thai numbers ((02) 432-0281) + TEST_GET_UTF8STR_INDEX("(\u0e50\u0e52) \u0e54\u0e53\u0e52-" + "\u0e50\u0e52\u0e58\u0e51", "th", '#'); + + // Arabic (sorts English then Arabic) + // …, ا, ب, ت, ث, ج, ح, خ, د, ذ, ر, ز, س, ش, ص, ض, ط, ظ, ع, غ, ف, ق, ك, ل, م, ن, ه, و, ي, … + TEST_GET_UTF8STR_INDEX("\u0646\u0648\u0631" /* Noor */, "ar", 0x646); + // Arabic numbers (34567) + TEST_GET_UTF8STR_INDEX("\u0662\u0663\u0664\u0665\u0666", "ar", '#'); + + // Hebrew (sorts English then Hebrew) + // …, א, ב, ג, ד, ה, ו, ז, ח, ט, י, כ, ל, מ, נ, ס, ע, פ, צ, ק, ר, ש, ת, … + TEST_GET_UTF8STR_INDEX("\u05e4\u05e8\u05d9\u05d3\u05de\u05df", "he", 0x5e4); +} + +int main() { + TestExecutor executor; + if(executor.DoAllTests()) { + return 0; + } else { + return 1; + } +} diff --git a/android/PhoneticStringUtils.cpp b/android/PhoneticStringUtils.cpp deleted file mode 100644 index 796eaa2..0000000 --- a/android/PhoneticStringUtils.cpp +++ /dev/null @@ -1,319 +0,0 @@ -/* - * Copyright (C) 2009 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include <stdio.h> -#include <stdlib.h> - -#include "PhoneticStringUtils.h" -#include <utils/Unicode.h> - -// We'd like 0 length string last of sorted list. So when input string is NULL -// or 0 length string, we use these instead. -#define CODEPOINT_FOR_NULL_STR 0xFFFD -#define STR_FOR_NULL_STR "\xEF\xBF\xBD" - -// We assume that users will not notice strings not sorted properly when the -// first 128 characters are the same. -#define MAX_CODEPOINTS 128 - -namespace android { - -// Get hiragana from halfwidth katakana. -static int GetHiraganaFromHalfwidthKatakana(char32_t codepoint, - char32_t next_codepoint, - bool *next_is_consumed) { - if (codepoint < 0xFF66 || 0xFF9F < codepoint) { - return codepoint; - } - - switch (codepoint) { - case 0xFF66: // wo - return 0x3092; - case 0xFF67: // xa - return 0x3041; - case 0xFF68: // xi - return 0x3043; - case 0xFF69: // xu - return 0x3045; - case 0xFF6A: // xe - return 0x3047; - case 0xFF6B: // xo - return 0x3049; - case 0xFF6C: // xya - return 0x3083; - case 0xFF6D: // xyu - return 0x3085; - case 0xFF6E: // xyo - return 0x3087; - case 0xFF6F: // xtsu - return 0x3063; - case 0xFF70: // - - return 0x30FC; - case 0xFF9C: // wa - return 0x308F; - case 0xFF9D: // n - return 0x3093; - break; - default: { - if (0xFF71 <= codepoint && codepoint <= 0xFF75) { - // a, i, u, e, o - if (codepoint == 0xFF73 && next_codepoint == 0xFF9E) { - if (next_is_consumed != NULL) { - *next_is_consumed = true; - } - return 0x3094; // vu - } else { - return 0x3042 + (codepoint - 0xFF71) * 2; - } - } else if (0xFF76 <= codepoint && codepoint <= 0xFF81) { - // ka - chi - if (next_codepoint == 0xFF9E) { - // "dakuten" (voiced mark) - if (next_is_consumed != NULL) { - *next_is_consumed = true; - } - return 0x304B + (codepoint - 0xFF76) * 2 + 1; - } else { - return 0x304B + (codepoint - 0xFF76) * 2; - } - } else if (0xFF82 <= codepoint && codepoint <= 0xFF84) { - // tsu, te, to (skip xtsu) - if (next_codepoint == 0xFF9E) { - // "dakuten" (voiced mark) - if (next_is_consumed != NULL) { - *next_is_consumed = true; - } - return 0x3064 + (codepoint - 0xFF82) * 2 + 1; - } else { - return 0x3064 + (codepoint - 0xFF82) * 2; - } - } else if (0xFF85 <= codepoint && codepoint <= 0xFF89) { - // na, ni, nu, ne, no - return 0x306A + (codepoint - 0xFF85); - } else if (0xFF8A <= codepoint && codepoint <= 0xFF8E) { - // ha, hi, hu, he, ho - if (next_codepoint == 0xFF9E) { - // "dakuten" (voiced mark) - if (next_is_consumed != NULL) { - *next_is_consumed = true; - } - return 0x306F + (codepoint - 0xFF8A) * 3 + 1; - } else if (next_codepoint == 0xFF9F) { - // "han-dakuten" (half voiced mark) - if (next_is_consumed != NULL) { - *next_is_consumed = true; - } - return 0x306F + (codepoint - 0xFF8A) * 3 + 2; - } else { - return 0x306F + (codepoint - 0xFF8A) * 3; - } - } else if (0xFF8F <= codepoint && codepoint <= 0xFF93) { - // ma, mi, mu, me, mo - return 0x307E + (codepoint - 0xFF8F); - } else if (0xFF94 <= codepoint && codepoint <= 0xFF96) { - // ya, yu, yo - return 0x3084 + (codepoint - 0xFF94) * 2; - } else if (0xFF97 <= codepoint && codepoint <= 0xFF9B) { - // ra, ri, ru, re, ro - return 0x3089 + (codepoint - 0xFF97); - } - // Note: 0xFF9C, 0xFF9D are handled above - } // end of default - } - - return codepoint; -} - -// Assuming input is hiragana, convert the hiragana to "normalized" hiragana. -static int GetNormalizedHiragana(int codepoint) { - if (codepoint < 0x3040 || 0x309F < codepoint) { - return codepoint; - } - - // TODO: should care (semi-)voiced mark (0x3099, 0x309A). - - // Trivial kana conversions. - // e.g. xa => a - switch (codepoint) { - case 0x3041: - case 0x3043: - case 0x3045: - case 0x3047: - case 0x3049: - case 0x3063: - case 0x3083: - case 0x3085: - case 0x3087: - case 0x308E: // xwa - return codepoint + 1; - case 0x3095: // xka - return 0x304B; - case 0x3096: // xke - return 0x3051; - case 0x31F0: // xku - return 0x304F; - case 0x31F1: // xsi - return 0x3057; - case 0x31F2: // xsu - return 0x3059; - case 0x31F3: // xto - return 0x3068; - case 0x31F4: // xnu - return 0x306C; - case 0x31F5: // xha - return 0x306F; - case 0x31F6: // xhi - return 0x3072; - case 0x31F7: // xhu - return 0x3075; - case 0x31F8: // xhe - return 0x3078; - case 0x31F9: // xho - return 0x307B; - case 0x31FA: // xmu - return 0x3080; - case 0x31FB: // xra - case 0x31FC: // xri - case 0x31FD: // xru - case 0x31FE: // xre - case 0x31FF: // xro - // ra: 0x3089 - return 0x3089 + (codepoint - 0x31FB); - default: - return codepoint; - } -} - -static int GetNormalizedKana(char32_t codepoint, - char32_t next_codepoint, - bool *next_is_consumed) { - // First, convert fullwidth katakana and halfwidth katakana to hiragana. - if (0x30A1 <= codepoint && codepoint <= 0x30F6) { - // Make fullwidth katakana same as hiragana. - // 96 == 0x30A1 - 0x3041c - codepoint = codepoint - 96; - } else if (codepoint == 0x309F) { - // Digraph YORI; Yo - codepoint = 0x3088; - } else if (codepoint == 0x30FF) { - // Digraph KOTO; Ko - codepoint = 0x3053; - } else { - codepoint = GetHiraganaFromHalfwidthKatakana( - codepoint, next_codepoint, next_is_consumed); - } - - // Normalize Hiragana. - return GetNormalizedHiragana(codepoint); -} - -int GetNormalizedCodePoint(char32_t codepoint, - char32_t next_codepoint, - bool *next_is_consumed) { - if (next_is_consumed != NULL) { - *next_is_consumed = false; - } - - if (codepoint <= 0x0020 || codepoint == 0x3000) { - // Whitespaces. Keep it as is. - return codepoint; - } else if ((0x0021 <= codepoint && codepoint <= 0x007E) || - (0xFF01 <= codepoint && codepoint <= 0xFF5E)) { - // Ascii and fullwidth ascii. Keep it as is - return codepoint; - } else if (codepoint == 0x02DC || codepoint == 0x223C) { - // tilde - return 0xFF5E; - } else if (codepoint <= 0x3040 || - (0x3100 <= codepoint && codepoint < 0xFF00) || - codepoint == CODEPOINT_FOR_NULL_STR) { - // Keep it as is. - return codepoint; - } - - // Below is Kana-related handling. - - return GetNormalizedKana(codepoint, next_codepoint, next_is_consumed); -} - -static bool GetExpectedString( - const char *src, char **dst, size_t *dst_len, - int (*get_codepoint_function)(char32_t, char32_t, bool*)) { - if (dst == NULL || dst_len == NULL) { - return false; - } - - if (src == NULL || *src == '\0') { - src = STR_FOR_NULL_STR; - } - - char32_t codepoints[MAX_CODEPOINTS]; // if array size is changed the for loop needs to be changed - - ssize_t src_len = utf8_length(src); - if (src_len <= 0) { - return false; - } - - bool next_is_consumed; - size_t j = 0; - for (size_t i = 0; i < (size_t)src_len && j < MAX_CODEPOINTS;) { - int32_t ret = utf32_from_utf8_at(src, src_len, i, &i); - if (ret < 0) { - // failed to parse UTF-8 - return false; - } - ret = get_codepoint_function( - static_cast<char32_t>(ret), - i + 1 < (size_t)src_len ? src[i + 1] : 0, - &next_is_consumed); - if (ret > 0) { - codepoints[j] = static_cast<char32_t>(ret); - j++; - } - if (next_is_consumed) { - i++; - } - } - size_t length = j; - - if (length == 0) { - // If all of codepoints are invalid, we place the string at the end of - // the list. - codepoints[0] = 0x10000 + CODEPOINT_FOR_NULL_STR; - length = 1; - } - - ssize_t new_len = utf32_to_utf8_length(codepoints, length); - if (new_len < 0) { - return false; - } - - *dst = static_cast<char *>(malloc(new_len + 1)); - if (*dst == NULL) { - return false; - } - - utf32_to_utf8(codepoints, length, *dst); - - *dst_len = new_len; - return true; -} - -bool GetNormalizedString(const char *src, char **dst, size_t *len) { - return GetExpectedString(src, dst, len, GetNormalizedCodePoint); -} - -} // namespace android diff --git a/android/PhoneticStringUtils.h b/android/PhoneticStringUtils.h deleted file mode 100644 index a567a27..0000000 --- a/android/PhoneticStringUtils.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright (C) 2009 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef _ANDROID_PHONETIC_STRING_UTILS_H -#define _ANDROID_PHONETIC_STRING_UTILS_H - -#include <string.h> // For size_t. -#include <utils/String8.h> - -namespace android { - -// Returns codepoint which is "normalized", whose definition depends on each -// Locale. Note that currently this function normalizes only Japanese; the -// other characters are remained as is. -// The variable "next_is_consumed" is set to true if "next_codepoint" -// is "consumed" (e.g. Japanese halfwidth katakana's voiced mark is consumed -// when previous "codepoint" is appropriate, like half-width "ka"). -// -// In Japanese, "normalized" means that half-width and full-width katakana is -// appropriately converted to hiragana. -int GetNormalizedCodePoint(char32_t codepoint, - char32_t next_codepoint, - bool *next_is_consumed); - -// Pushes Utf8 expression of "codepoint" to "dst". Returns true when successful. -// If input is invalid or the length of the destination is not enough, -// returns false. -bool GetUtf8FromCodePoint(int codepoint, char *dst, size_t len, size_t *index); - -// Creates a "phonetically sortable" Utf8 string and push it into "dst". -// *dst must be freed after being used outside. -// If "src" is NULL or its length is 0, "dst" is set to \uFFFF. -// -// Note that currently this function considers only Japanese. -bool GetPhoneticallySortableString(const char *src, char **dst, size_t *len); - -// Creates a "normalized" Utf8 string and push it into "dst". *dst must be -// freed after being used outside. -// If "src" is NULL or its length is 0, "dst" is set to \uFFFF. -// -// Note that currently this function considers only Japanese. -bool GetNormalizedString(const char *src, char **dst, size_t *len); - -} // namespace android - -#endif diff --git a/android/PhoneticStringUtilsTest.cpp b/android/PhoneticStringUtilsTest.cpp deleted file mode 100644 index 9885823..0000000 --- a/android/PhoneticStringUtilsTest.cpp +++ /dev/null @@ -1,217 +0,0 @@ -/* - * Copyright (C) 2009 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "PhoneticStringUtils.h" - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - -#include <utils/String8.h> - -using namespace android; - -class TestExecutor { - public: - TestExecutor() : m_total_count(0), m_success_count(0), m_success(true) {} - bool DoAllTests(); - private: - void DoOneTest(void (TestExecutor::*test)()); - - void testUtf32At(); - void testGetUtf8FromUtf32(); - void testGetNormalizedString(); - void testLongString(); - - // Note: When adding a test, do not forget to add it to DoOneTest(). - - int m_total_count; - int m_success_count; - - bool m_success; -}; - -#define ASSERT_EQ_VALUE(input, expected) \ - ({ \ - if ((expected) != (input)) { \ - printf("0x%X(result) != 0x%X(expected)\n", input, expected); \ - m_success = false; \ - return; \ - } \ - }) - -#define EXPECT_EQ_VALUE(input, expected) \ - ({ \ - if ((expected) != (input)) { \ - printf("0x%X(result) != 0x%X(expected)\n", input, expected); \ - m_success = false; \ - } \ - }) - - -bool TestExecutor::DoAllTests() { - DoOneTest(&TestExecutor::testUtf32At); - DoOneTest(&TestExecutor::testGetUtf8FromUtf32); - DoOneTest(&TestExecutor::testGetNormalizedString); - DoOneTest(&TestExecutor::testLongString); - - printf("Test total: %d\nSuccess: %d\nFailure: %d\n", - m_total_count, m_success_count, m_total_count - m_success_count); - - bool success = m_total_count == m_success_count; - printf("\n%s\n", success ? "Success" : "Failure"); - - return success; -} - -void TestExecutor::DoOneTest(void (TestExecutor::*test)()) { - m_success = true; - - (this->*test)(); - - ++m_total_count; - m_success_count += m_success ? 1 : 0; -} - -#define TEST_GET_UTF32AT(src, index, expected_next, expected_value) \ - ({ \ - size_t next; \ - int32_t ret = utf32_from_utf8_at(src, strlen(src), index, &next); \ - if (ret < 0) { \ - printf("getUtf32At() returned negative value (src: %s, index: %d)\n", \ - (src), (index)); \ - m_success = false; \ - } else if (next != (expected_next)) { \ - printf("next is unexpected value (src: %s, actual: %u, expected: %u)\n", \ - (src), next, (expected_next)); \ - } else { \ - EXPECT_EQ_VALUE(ret, (expected_value)); \ - } \ - }) - -void TestExecutor::testUtf32At() { - printf("testUtf32At()\n"); - - TEST_GET_UTF32AT("a", 0, 1, 97); - // Japanese hiragana "a" - TEST_GET_UTF32AT("\xE3\x81\x82", 0, 3, 0x3042); - // Japanese fullwidth katakana "a" with ascii a - TEST_GET_UTF32AT("a\xE3\x82\xA2", 1, 4, 0x30A2); - - // 2 PUA - TEST_GET_UTF32AT("\xF3\xBE\x80\x80\xF3\xBE\x80\x88", 0, 4, 0xFE000); - TEST_GET_UTF32AT("\xF3\xBE\x80\x80\xF3\xBE\x80\x88", 4, 8, 0xFE008); -} - - -#define EXPECT_EQ_CODEPOINT_UTF8(codepoint, expected) \ - ({ \ - char32_t codepoints[1] = {codepoint}; \ - status_t ret = string8.setTo(codepoints, 1); \ - if (ret != NO_ERROR) { \ - printf("GetUtf8FromCodePoint() returned false at 0x%04X\n", codepoint); \ - m_success = false; \ - } else { \ - const char* string = string8.string(); \ - if (strcmp(string, expected) != 0) { \ - printf("Failed at codepoint 0x%04X\n", codepoint); \ - for (const char *ch = string; *ch != '\0'; ++ch) { \ - printf("0x%X ", *ch); \ - } \ - printf("!= "); \ - for (const char *ch = expected; *ch != '\0'; ++ch) { \ - printf("0x%X ", *ch); \ - } \ - printf("\n"); \ - m_success = false; \ - } \ - } \ - }) - -void TestExecutor::testGetUtf8FromUtf32() { - printf("testGetUtf8FromUtf32()\n"); - String8 string8; - - EXPECT_EQ_CODEPOINT_UTF8('a', "\x61"); - // Armenian capital letter AYB (2 bytes in UTF8) - EXPECT_EQ_CODEPOINT_UTF8(0x0530, "\xD4\xB0"); - // Japanese 'a' (3 bytes in UTF8) - EXPECT_EQ_CODEPOINT_UTF8(0x3042, "\xE3\x81\x82"); - // Kanji - EXPECT_EQ_CODEPOINT_UTF8(0x65E5, "\xE6\x97\xA5"); - // PUA (4 byets in UTF8) - EXPECT_EQ_CODEPOINT_UTF8(0xFE016, "\xF3\xBE\x80\x96"); - EXPECT_EQ_CODEPOINT_UTF8(0xFE972, "\xF3\xBE\xA5\xB2"); -} - -#define EXPECT_EQ_UTF8_UTF8(src, expected) \ - ({ \ - if (!GetNormalizedString(src, &dst, &len)) { \ - printf("GetNormalizedSortableString() returned false.\n"); \ - m_success = false; \ - } else { \ - if (strcmp(dst, expected) != 0) { \ - for (const char *ch = dst; *ch != '\0'; ++ch) { \ - printf("0x%X ", *ch); \ - } \ - printf("!= "); \ - for (const char *ch = expected; *ch != '\0'; ++ch) { \ - printf("0x%X ", *ch); \ - } \ - printf("\n"); \ - m_success = false; \ - } \ - free(dst); \ - } \ - }) - -void TestExecutor::testGetNormalizedString() { - printf("testGetNormalizedString()\n"); - char *dst; - size_t len; - - // halfwidth alphabets/symbols -> keep it as is. - EXPECT_EQ_UTF8_UTF8("ABCDEFGHIJKLMNOPQRSTUVWXYZ!\"#$%^&'()", - "ABCDEFGHIJKLMNOPQRSTUVWXYZ!\"#$%^&'()"); - EXPECT_EQ_UTF8_UTF8("abcdefghijklmnopqrstuvwxyz[]{}\\@/", - "abcdefghijklmnopqrstuvwxyz[]{}\\@/"); - - // halfwidth/fullwidth-katakana -> hiragana - EXPECT_EQ_UTF8_UTF8( - "\xE3\x81\x82\xE3\x82\xA4\xE3\x81\x86\xEF\xBD\xB4\xE3\x82\xAA", - "\xE3\x81\x82\xE3\x81\x84\xE3\x81\x86\xE3\x81\x88\xE3\x81\x8A"); - - // whitespace -> keep it as is. - EXPECT_EQ_UTF8_UTF8(" \t", " \t"); -} - -void TestExecutor::testLongString() { - printf("testLongString()\n"); - char * dst; - size_t len; - EXPECT_EQ_UTF8_UTF8("Qqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqtttttttttttttttttttttttttttttttttttttttttttttttttgggggggggggggggggggggggggggggggggggggggbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb", - "Qqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqtttttttttttttttttttttttttttttttttttttttttttttttttggggggggggggggggggggggggggggggggggg"); -} - - -int main() { - TestExecutor executor; - if(executor.DoAllTests()) { - return 0; - } else { - return 1; - } -} diff --git a/android/sqlite3_android.cpp b/android/sqlite3_android.cpp index 5daf15e..fe826fd 100644 --- a/android/sqlite3_android.cpp +++ b/android/sqlite3_android.cpp @@ -509,14 +509,8 @@ extern "C" int register_localized_collators(sqlite3* handle, const char* systemL //// PHONEBOOK_COLLATOR - // The collator may be removed in the near future. Do not depend on it. - // TODO: it might be better to have another function for registering phonebook collator. status = U_ZERO_ERROR; - if (strcmp(systemLocale, "ja") == 0 || strcmp(systemLocale, "ja_JP") == 0) { - collator = ucol_open("ja@collation=phonebook", &status); - } else { - collator = ucol_open(systemLocale, &status); - } + collator = ucol_open(systemLocale, &status); if (U_FAILURE(status)) { return -1; } |