diff options
Diffstat (limited to 'android/PhonebookIndex.cpp')
-rw-r--r-- | android/PhonebookIndex.cpp | 96 |
1 files changed, 65 insertions, 31 deletions
diff --git a/android/PhonebookIndex.cpp b/android/PhonebookIndex.cpp index f82c9d2..4d3da61 100644 --- a/android/PhonebookIndex.cpp +++ b/android/PhonebookIndex.cpp @@ -25,7 +25,7 @@ #include "PhonebookIndex.h" #include "PhoneticStringUtils.h" -#define SMALL_BUFFER_SIZE 10 +#define MIN_OUTPUT_SIZE 6 // Minimum required size for the output buffer (in bytes) namespace android { @@ -118,45 +118,79 @@ static bool is_CJK(UChar c) { || (0xf900 <= c && c <= 0xfaff); // CJK_COMPATIBILITY_IDEOGRAPHS } -UChar GetPhonebookIndex(UCharIterator * iter, const char * locale) { - UChar dest[SMALL_BUFFER_SIZE]; +int32_t GetPhonebookIndex(UCharIterator * iter, const char * locale, UChar * out, int32_t size, + UBool * isError) +{ + if (size < MIN_OUTPUT_SIZE) { + *isError = TRUE; + return 0; + } - // Normalize the first character to remove accents using the NFD normalization - UErrorCode errorCode = U_ZERO_ERROR; - int32_t len = unorm_next(iter, dest, SMALL_BUFFER_SIZE * sizeof(UChar), UNORM_NFD, - 0 /* options */, TRUE /* normalize */, NULL, &errorCode); - if (U_FAILURE(errorCode) || len == 0) { - return 0; - } + *isError = FALSE; - UChar c = dest[0]; + // Normalize the first character to remove accents using the NFD normalization + UErrorCode errorCode = U_ZERO_ERROR; + int32_t len = unorm_next(iter, out, size, UNORM_NFD, + 0 /* options */, TRUE /* normalize */, NULL, &errorCode); + if (U_FAILURE(errorCode)) { + *isError = TRUE; + return 0; + } - // We are only interested in letters - if (!u_isalpha(c)) { - return 0; - } + if (len == 0) { // Empty input string + return 0; + } - c = u_toupper(c); + UChar c = out[0]; - // Check for explicitly mapped characters - UChar c_mapped = map_character(c, DEFAULT_CHAR_MAP, sizeof(DEFAULT_CHAR_MAP) / sizeof(UChar)); - if (c_mapped != 0) { - return c_mapped; - } + // We are only interested in letters + if (!u_isalpha(c)) { + return 0; + } + + c = u_toupper(c); + + // Check for explicitly mapped characters + UChar c_mapped = map_character(c, DEFAULT_CHAR_MAP, sizeof(DEFAULT_CHAR_MAP) / sizeof(UChar)); + if (c_mapped != 0) { + out[0] = c_mapped; + return 1; + } - // Convert Kanas to Hiragana - UChar next = len > 2 ? dest[1] : 0; - c = android::GetNormalizedCodePoint(c, next, NULL); + // Convert Kanas to Hiragana + UChar next = len > 2 ? out[1] : 0; + c = android::GetNormalizedCodePoint(c, next, NULL); + + // Traditional grouping of Hiragana characters + if (0x3042 <= c && c <= 0x309F) { + if (c < 0x304B) c = 0x3042; // a + else if (c < 0x3055) c = 0x304B; // ka + else if (c < 0x305F) c = 0x3055; // sa + else if (c < 0x306A) c = 0x305F; // ta + else if (c < 0x306F) c = 0x306A; // na + else if (c < 0x307E) c = 0x306F; // ha + else if (c < 0x3084) c = 0x307E; // ma + else if (c < 0x3089) c = 0x3084; // ya + else if (c < 0x308F) c = 0x3089; // ra + else c = 0x308F; // wa + out[0] = c; + return 1; + } - if (is_CJK(c)) { - if (strncmp(locale, "ja", 2) == 0) { - return 0x8A18; // Kanji character used as a heading in letters, notices and other documents - } else { - return 0; - } + if (is_CJK(c)) { + if (strncmp(locale, "ja", 2) == 0) { + // Japanese word meaning "misc" or "other" + out[0] = 0x305D; + out[1] = 0x306E; + out[2] = 0x4ED6; + return 3; + } else { + return 0; } + } - return c; + out[0] = c; + return 1; } } // namespace android |