summaryrefslogtreecommitdiff
path: root/android/PhonebookIndex.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'android/PhonebookIndex.cpp')
-rw-r--r--android/PhonebookIndex.cpp96
1 files changed, 65 insertions, 31 deletions
diff --git a/android/PhonebookIndex.cpp b/android/PhonebookIndex.cpp
index f82c9d2..4d3da61 100644
--- a/android/PhonebookIndex.cpp
+++ b/android/PhonebookIndex.cpp
@@ -25,7 +25,7 @@
#include "PhonebookIndex.h"
#include "PhoneticStringUtils.h"
-#define SMALL_BUFFER_SIZE 10
+#define MIN_OUTPUT_SIZE 6 // Minimum required size for the output buffer (in bytes)
namespace android {
@@ -118,45 +118,79 @@ static bool is_CJK(UChar c) {
|| (0xf900 <= c && c <= 0xfaff); // CJK_COMPATIBILITY_IDEOGRAPHS
}
-UChar GetPhonebookIndex(UCharIterator * iter, const char * locale) {
- UChar dest[SMALL_BUFFER_SIZE];
+int32_t GetPhonebookIndex(UCharIterator * iter, const char * locale, UChar * out, int32_t size,
+ UBool * isError)
+{
+ if (size < MIN_OUTPUT_SIZE) {
+ *isError = TRUE;
+ return 0;
+ }
- // Normalize the first character to remove accents using the NFD normalization
- UErrorCode errorCode = U_ZERO_ERROR;
- int32_t len = unorm_next(iter, dest, SMALL_BUFFER_SIZE * sizeof(UChar), UNORM_NFD,
- 0 /* options */, TRUE /* normalize */, NULL, &errorCode);
- if (U_FAILURE(errorCode) || len == 0) {
- return 0;
- }
+ *isError = FALSE;
- UChar c = dest[0];
+ // Normalize the first character to remove accents using the NFD normalization
+ UErrorCode errorCode = U_ZERO_ERROR;
+ int32_t len = unorm_next(iter, out, size, UNORM_NFD,
+ 0 /* options */, TRUE /* normalize */, NULL, &errorCode);
+ if (U_FAILURE(errorCode)) {
+ *isError = TRUE;
+ return 0;
+ }
- // We are only interested in letters
- if (!u_isalpha(c)) {
- return 0;
- }
+ if (len == 0) { // Empty input string
+ return 0;
+ }
- c = u_toupper(c);
+ UChar c = out[0];
- // Check for explicitly mapped characters
- UChar c_mapped = map_character(c, DEFAULT_CHAR_MAP, sizeof(DEFAULT_CHAR_MAP) / sizeof(UChar));
- if (c_mapped != 0) {
- return c_mapped;
- }
+ // We are only interested in letters
+ if (!u_isalpha(c)) {
+ return 0;
+ }
+
+ c = u_toupper(c);
+
+ // Check for explicitly mapped characters
+ UChar c_mapped = map_character(c, DEFAULT_CHAR_MAP, sizeof(DEFAULT_CHAR_MAP) / sizeof(UChar));
+ if (c_mapped != 0) {
+ out[0] = c_mapped;
+ return 1;
+ }
- // Convert Kanas to Hiragana
- UChar next = len > 2 ? dest[1] : 0;
- c = android::GetNormalizedCodePoint(c, next, NULL);
+ // Convert Kanas to Hiragana
+ UChar next = len > 2 ? out[1] : 0;
+ c = android::GetNormalizedCodePoint(c, next, NULL);
+
+ // Traditional grouping of Hiragana characters
+ if (0x3042 <= c && c <= 0x309F) {
+ if (c < 0x304B) c = 0x3042; // a
+ else if (c < 0x3055) c = 0x304B; // ka
+ else if (c < 0x305F) c = 0x3055; // sa
+ else if (c < 0x306A) c = 0x305F; // ta
+ else if (c < 0x306F) c = 0x306A; // na
+ else if (c < 0x307E) c = 0x306F; // ha
+ else if (c < 0x3084) c = 0x307E; // ma
+ else if (c < 0x3089) c = 0x3084; // ya
+ else if (c < 0x308F) c = 0x3089; // ra
+ else c = 0x308F; // wa
+ out[0] = c;
+ return 1;
+ }
- if (is_CJK(c)) {
- if (strncmp(locale, "ja", 2) == 0) {
- return 0x8A18; // Kanji character used as a heading in letters, notices and other documents
- } else {
- return 0;
- }
+ if (is_CJK(c)) {
+ if (strncmp(locale, "ja", 2) == 0) {
+ // Japanese word meaning "misc" or "other"
+ out[0] = 0x305D;
+ out[1] = 0x306E;
+ out[2] = 0x4ED6;
+ return 3;
+ } else {
+ return 0;
}
+ }
- return c;
+ out[0] = c;
+ return 1;
}
} // namespace android