diff options
Diffstat (limited to 'android')
-rw-r--r-- | android/PhoneticStringUtils.cpp | 75 | ||||
-rw-r--r-- | android/PhoneticStringUtils.h | 14 | ||||
-rw-r--r-- | android/PhoneticStringUtilsTest.cpp | 221 | ||||
-rw-r--r-- | android/sqlite3_android.cpp | 60 |
4 files changed, 1 insertions, 369 deletions
diff --git a/android/PhoneticStringUtils.cpp b/android/PhoneticStringUtils.cpp index cf85cb8..dbf1f4f 100644 --- a/android/PhoneticStringUtils.cpp +++ b/android/PhoneticStringUtils.cpp @@ -181,77 +181,6 @@ static int GetNormalizedKana(char32_t codepoint, return GetNormalizedHiragana(codepoint); } -int GetPhoneticallySortableCodePoint(char32_t codepoint, - char32_t next_codepoint, - bool *next_is_consumed) { - if (next_is_consumed != NULL) { - *next_is_consumed = false; - } - - if (codepoint <= 0x0020 || codepoint == 0x3000) { - // Whitespace should be ignored. - // Note: Formally, more "whitespace" exist. This block only - // handles part of them - return -1; - } else if ((0x0021 <= codepoint && codepoint <= 0x007E) || - (0xFF01 <= codepoint && codepoint <= 0xFF5E)) { - // Ascii and fullwidth ascii - - if (0x0021 <= codepoint && codepoint <= 0x007E) { - // Convert ascii to fullwidth ascii so that they become - // behind hiragana. - // 65248 = 0xFF01 - 0x0021 - codepoint += 65248; - } - - // Now, there is only fullwidth ascii. - if (0xFF10 <= codepoint && codepoint <= 0xFF19) { - // Numbers should be after alphabets but before symbols. - // 86 = 0xFF66 - // (the beginning of halfwidth-katakankana space) - 0xFF10 - return codepoint + 86; - } else if (0xFF41 <= codepoint && codepoint <= 0xFF5A) { - // Make lower alphabets same as capital alphabets. - // 32 = 0xFF41 - 0xFF21 - return codepoint - 32; - } else if (0xFF01 <= codepoint && codepoint <= 0xFF0F) { - // Symbols (Ascii except alphabet nor number) - // These should be at the end of sorting, just after numebers - // (see below) - // - // We use halfwidth-katakana space for storing those symbols. - // 111 = 0xFF70 (0xFF19 + 86 + 1) - 0xFF01 - return codepoint + 111; - } else if (0xFF1A <= codepoint && codepoint <= 0xFF20) { - // Symbols (cont.) - // 101 = 0xFF7F (0xFF0F + 111 + 1) - 0xFF1A - return codepoint + 101; - } else if (0xFF3B <= codepoint && codepoint <= 0xFF40) { - // Symbols (cont.) - // 75 = 0xFF86 (0xFF20 + 101 + 1) - 0xFF3B (= 101 - 26) - return codepoint + 75; - } else if (0xFF5B <= codepoint && codepoint <= 0xFF5E) { - // Symbols (cont.) - // 49 = 0xFF8C (0xFF40 + 75 + 1) - 0xFF5B (= 75 - 26) - return codepoint + 49; - } else { - return codepoint; - } - } else if (codepoint == 0x02DC || codepoint == 0x223C) { - // tilde - return 0xFF5E; - } else if (codepoint <= 0x3040 || - (0x3100 <= codepoint && codepoint < 0xFF00) || - codepoint == CODEPOINT_FOR_NULL_STR) { - // Move Kanji and other non-Japanese characters behind symbols. - return codepoint + 0x10000; - } - - // Below is Kana-related handling. - - return GetNormalizedKana(codepoint, next_codepoint, next_is_consumed); -} - int GetNormalizedCodePoint(char32_t codepoint, char32_t next_codepoint, bool *next_is_consumed) { @@ -343,10 +272,6 @@ static bool GetExpectedString( return true; } -bool GetPhoneticallySortableString(const char *src, char **dst, size_t *len) { - return GetExpectedString(src, dst, len, GetPhoneticallySortableCodePoint); -} - bool GetNormalizedString(const char *src, char **dst, size_t *len) { return GetExpectedString(src, dst, len, GetNormalizedCodePoint); } diff --git a/android/PhoneticStringUtils.h b/android/PhoneticStringUtils.h index 9da7d29..a567a27 100644 --- a/android/PhoneticStringUtils.h +++ b/android/PhoneticStringUtils.h @@ -22,20 +22,6 @@ namespace android { -// Returns Unicode codepoint relevant to string "src", and set "next" to the -// next index. Returns negative value when input is invalid. -int GetCodePointFromUtf8(const char *src, size_t len, size_t index, int *next); - -// Returns codepoint which is "phonetically sortable", whose definition -// depends on each Locale. Note that currently this function considers only -// Japanese. The variable "next_is_consumed" is set to true if "next_codepoint" -// is "consumed" (e.g. Japanese halfwidth katakana's voiced mark is consumed -// when previous "codepoint" is appropriate). If the codepoint should not be -// considered when sorting (e.g. whitespaces), -1 is returned. -int GetPhoneticallySortableCodePoint(char32_t codepoint, - char32_t next_codepoint, - bool *next_is_consumed); - // Returns codepoint which is "normalized", whose definition depends on each // Locale. Note that currently this function normalizes only Japanese; the // other characters are remained as is. diff --git a/android/PhoneticStringUtilsTest.cpp b/android/PhoneticStringUtilsTest.cpp index 9d06327..2f0e9ac 100644 --- a/android/PhoneticStringUtilsTest.cpp +++ b/android/PhoneticStringUtilsTest.cpp @@ -32,12 +32,7 @@ class TestExecutor { void DoOneTest(void (TestExecutor::*test)()); void testUtf32At(); - void testGetPhoneticallySortableCodePointAscii(); - void testGetPhoneticallySortableCodePointKana(); - void testGetPhoneticallySortableCodePointWhitespaceOnly(); - void testGetPhoneticallySortableCodePointSimpleCompare(); void testGetUtf8FromUtf32(); - void testGetPhoneticallySortableString(); void testGetNormalizedString(); void testLongString(); @@ -69,12 +64,7 @@ class TestExecutor { bool TestExecutor::DoAllTests() { DoOneTest(&TestExecutor::testUtf32At); - DoOneTest(&TestExecutor::testGetPhoneticallySortableCodePointAscii); - DoOneTest(&TestExecutor::testGetPhoneticallySortableCodePointKana); - DoOneTest(&TestExecutor::testGetPhoneticallySortableCodePointWhitespaceOnly); - DoOneTest(&TestExecutor::testGetPhoneticallySortableCodePointSimpleCompare); DoOneTest(&TestExecutor::testGetUtf8FromUtf32); - DoOneTest(&TestExecutor::testGetPhoneticallySortableString); DoOneTest(&TestExecutor::testGetNormalizedString); DoOneTest(&TestExecutor::testLongString); @@ -126,175 +116,6 @@ void TestExecutor::testUtf32At() { TEST_GET_UTF32AT("\xF3\xBE\x80\x80\xF3\xBE\x80\x88", 4, 8, 0xFE008); } -void TestExecutor::testGetPhoneticallySortableCodePointAscii() { - printf("testGetPhoneticallySortableCodePoint()\n"); - int halfwidth[94]; - int fullwidth[94]; - int i; - char32_t codepoint; - bool next_is_consumed; - for (i = 0, codepoint = 0x0021; codepoint <= 0x007E; ++i, ++codepoint) { - halfwidth[i] = GetPhoneticallySortableCodePoint(codepoint, 0, - &next_is_consumed); - if (halfwidth[i] < 0) { - printf("returned value become negative at 0x%04X", codepoint); - m_success = false; - return; - } - if (next_is_consumed) { - printf("next_is_consumed become true at 0x%04X", codepoint); - m_success = false; - return; - } - } - for (i = 0, codepoint = 0xFF01; codepoint <= 0xFF5E; ++i, ++codepoint) { - fullwidth[i] = GetPhoneticallySortableCodePoint(codepoint, 0, - &next_is_consumed); - if (fullwidth[i] < 0) { - printf("returned value become negative at 0x%04X", codepoint); - m_success = false; - return; - } - if (next_is_consumed) { - printf("next_is_consumed become true at 0x%04X", codepoint); - m_success = false; - return; - } - } - - for (i = 0; i < 94; i++) { - EXPECT_EQ_VALUE(halfwidth[i], fullwidth[i]); - } -} - -void TestExecutor::testGetPhoneticallySortableCodePointKana() { - printf("testGetPhoneticallySortableCodePointKana()\n"); - int hiragana[86]; - int fullwidth_katakana[86]; - int i; - char32_t codepoint; - bool next_is_consumed; - - for (i = 0, codepoint = 0x3041; codepoint <= 0x3096; ++i, ++codepoint) { - hiragana[i] = GetPhoneticallySortableCodePoint(codepoint, 0, - &next_is_consumed); - if (hiragana[i] < 0) { - printf("returned value become negative at 0x%04X", codepoint); - m_success = false; - return; - } - if (next_is_consumed) { - printf("next_is_consumed become true at 0x%04X", codepoint); - m_success = false; - return; - } - } - - for (i = 0, codepoint = 0x30A1; codepoint <= 0x30F6; ++i, ++codepoint) { - fullwidth_katakana[i] = GetPhoneticallySortableCodePoint(codepoint, 0, - &next_is_consumed); - if (fullwidth_katakana[i] < 0) { - printf("returned value become negative at 0x%04X", codepoint); - m_success = false; - return; - } - if (next_is_consumed) { - printf("next_is_consumed become true at 0x%04X", codepoint); - m_success = false; - return; - } - } - - // hankaku-katakana space do not have some characters corresponding to - // zenkaku-hiragana (e.g. xwa, xka, xku). To make test easier, insert - // zenkaku-katakana version of them into this array (See the value 0x30??). - char32_t halfwidth_katakana[] = { - 0xFF67, 0xFF71, 0xFF68, 0xFF72, 0xFF69, 0xFF73, 0xFF6A, 0xFF74, 0xFF6B, - 0xFF75, 0xFF76, 0xFF76, 0xFF9E, 0xFF77, 0xFF77, 0xFF9E, 0xFF78, 0xFF78, - 0xFF9E, 0xFF79, 0xFF79, 0xFF9E, 0xFF7A, 0xFF7A, 0xFF9E, 0xFF7B, 0xFF7B, - 0xFF9E, 0xFF7C, 0xFF7C, 0xFF9E, 0xFF7D, 0xFF7D, 0xFF9E, 0xFF7E, 0xFF7E, - 0xFF9E, 0xFF7F, 0xFF7F, 0xFF9E, 0xFF80, 0xFF80, 0xFF9E, 0xFF81, 0xFF81, - 0xFF9E, 0xFF6F, 0xFF82, 0xFF82, 0xFF9E, 0xFF83, 0xFF83, 0xFF9E, 0xFF84, - 0xFF84, 0xFF9E, 0xFF85, 0xFF86, 0xFF87, 0xFF88, 0xFF89, 0xFF8A, 0xFF8A, - 0xFF9E, 0xFF8A, 0xFF9F, 0xFF8B, 0xFF8B, 0xFF9E, 0xFF8B, 0xFF9F, 0xFF8C, - 0xFF8C, 0xFF9E, 0xFF8C, 0xFF9F, 0xFF8D, 0xFF8D, 0xFF9E, 0xFF8D, 0xFF9F, - 0xFF8E, 0xFF8E, 0xFF9E, 0xFF8E, 0xFF9F, 0xFF8F, 0xFF90, 0xFF91, 0xFF92, - 0xFF93, 0xFF6C, 0xFF94, 0xFF6D, 0xFF95, 0xFF6E, 0xFF96, 0xFF97, 0xFF98, - 0xFF99, 0xFF9A, 0xFF9B, 0x30EE, 0xFF9C, 0x30F0, 0x30F1, 0xFF66, 0xFF9D, - 0xFF73, 0xFF9E, 0x30F5, 0x30F6}; - int len = sizeof(halfwidth_katakana)/sizeof(int); - - int halfwidth_katakana_result[86]; - - int j; - for (i = 0, j = 0; i < len && j < 86; ++i, ++j) { - char32_t codepoint = halfwidth_katakana[i]; - char32_t next_codepoint = i + 1 < len ? halfwidth_katakana[i + 1] : 0; - halfwidth_katakana_result[j] = - GetPhoneticallySortableCodePoint(codepoint, next_codepoint, - &next_is_consumed); - // Consume voiced mark/half-voiced mark. - if (next_is_consumed) { - ++i; - } - } - ASSERT_EQ_VALUE(i, len); - ASSERT_EQ_VALUE(j, 86); - - for (i = 0; i < 86; ++i) { - EXPECT_EQ_VALUE(fullwidth_katakana[i], hiragana[i]); - EXPECT_EQ_VALUE(halfwidth_katakana_result[i], hiragana[i]); - } -} - -void TestExecutor::testGetPhoneticallySortableCodePointWhitespaceOnly() { - printf("testGetPhoneticallySortableCodePointWhitespaceOnly()\n"); - // Halfwidth space - int result = GetPhoneticallySortableCodePoint(0x0020, 0x0061, NULL); - ASSERT_EQ_VALUE(result, -1); - // Fullwidth space - result = GetPhoneticallySortableCodePoint(0x3000, 0x0062, NULL); - ASSERT_EQ_VALUE(result, -1); - // tab - result = GetPhoneticallySortableCodePoint(0x0009, 0x0062, NULL); - ASSERT_EQ_VALUE(result, -1); -} - -void TestExecutor::testGetPhoneticallySortableCodePointSimpleCompare() { - printf("testGetPhoneticallySortableCodePointSimpleCompare()\n"); - - char32_t codepoints[] = { - 0x3042, 0x30AB, 0xFF7B, 0x305F, 0x30CA, 0xFF8A, 0x30D0, 0x3071, - 0x307E, 0x30E4, 0xFF97, 0x308F, 0x3093, 0x3094, 'A', 'Z', - '0', '9', '!', '/', ':', '?', '[', '`', '{', '~'}; - size_t len = sizeof(codepoints)/sizeof(int); - bool next_is_consumed; - for (size_t i = 0; i < len - 1; ++i) { - int codepoint_a = - GetPhoneticallySortableCodePoint(codepoints[i], 0, - &next_is_consumed); - if (next_is_consumed) { - printf("next_is_consumed become true at 0x%04X", codepoint_a); - m_success = false; - return; - } - int codepoint_b = - GetPhoneticallySortableCodePoint(codepoints[i + 1], 0, - &next_is_consumed); - if (next_is_consumed) { - printf("next_is_consumed become true at 0x%04X", codepoint_b); - m_success = false; - return; - } - - if (codepoint_a >= codepoint_b) { - printf("0x%04X (from 0x%04X) >= 0x%04X (from 0x%04X)\n", - codepoint_a, codepoints[i], codepoint_b, codepoints[i + 1]); - m_success = false; - return; - } - } -} #define EXPECT_EQ_CODEPOINT_UTF8(codepoint, expected) \ ({ \ @@ -338,48 +159,8 @@ void TestExecutor::testGetUtf8FromUtf32() { #define EXPECT_EQ_UTF8_UTF8(src, expected) \ ({ \ - if (!GetPhoneticallySortableString(src, &dst, &len)) { \ - printf("GetPhoneticallySortableString() returned false.\n"); \ - m_success = false; \ - } else { \ - if (strcmp(dst, expected) != 0) { \ - for (const char *ch = dst; *ch != '\0'; ++ch) { \ - printf("0x%X ", *ch); \ - } \ - printf("!= "); \ - for (const char *ch = expected; *ch != '\0'; ++ch) { \ - printf("0x%X ", *ch); \ - } \ - printf("\n"); \ - m_success = false; \ - } \ - free(dst); \ - } \ - }) - -void TestExecutor::testGetPhoneticallySortableString() { - printf("testGetPhoneticallySortableString()\n"); - char *dst; - size_t len; - - // halfwidth alphabets -> fullwidth alphabets. - EXPECT_EQ_UTF8_UTF8("ABCD", - "\xEF\xBC\xA1\xEF\xBC\xA2\xEF\xBC\xA3\xEF\xBC\xA4"); - // halfwidth/fullwidth-katakana -> hiragana - EXPECT_EQ_UTF8_UTF8( - "\xE3\x81\x82\xE3\x82\xA4\xE3\x81\x86\xEF\xBD\xB4\xE3\x82\xAA", - "\xE3\x81\x82\xE3\x81\x84\xE3\x81\x86\xE3\x81\x88\xE3\x81\x8A"); - - // whitespace -> string which should be placed at last - EXPECT_EQ_UTF8_UTF8(" \t", "\xF0\x9F\xBF\xBD"); -} - -#undef EXPECT_EQ_UTF8_UTF8 - -#define EXPECT_EQ_UTF8_UTF8(src, expected) \ - ({ \ if (!GetNormalizedString(src, &dst, &len)) { \ - printf("GetPhoneticallySortableString() returned false.\n"); \ + printf("GetNormalizedSortableString() returned false.\n"); \ m_success = false; \ } else { \ if (strcmp(dst, expected) != 0) { \ diff --git a/android/sqlite3_android.cpp b/android/sqlite3_android.cpp index a23d802..a937573 100644 --- a/android/sqlite3_android.cpp +++ b/android/sqlite3_android.cpp @@ -30,7 +30,6 @@ #include "sqlite3_android.h" #include "PhoneNumberUtils.h" #include "PhonebookIndex.h" -#include "PhoneticStringUtils.h" #define ENABLE_ANDROID_LOG 0 #define SMALL_BUFFER_SIZE 10 @@ -121,45 +120,6 @@ static void get_phonebook_index( sqlite3_result_text(context, (const char*)out, outlen, SQLITE_TRANSIENT); } -static void get_phonetically_sortable_string( - sqlite3_context * context, int argc, sqlite3_value ** argv) -{ - if (argc != 1) { - sqlite3_result_null(context); - return; - } - char const * src = (char const *)sqlite3_value_text(argv[0]); - char * ret; - size_t len; - - if (!android::GetPhoneticallySortableString(src, &ret, &len)) { - // Put this text at the end of a list. - sqlite3_result_text(context, "\xF0\x9F\xBF\xBD", -1, SQLITE_STATIC); - // sqlite3_result_null(context); - } else { - sqlite3_result_text(context, ret, len, free); - } -} - -static void get_normalized_string( - sqlite3_context * context, int argc, sqlite3_value ** argv) -{ - if (argc != 1) { - sqlite3_result_null(context); - return; - } - char const * src = (char const *)sqlite3_value_text(argv[0]); - char * ret; - size_t len; - - if (!android::GetNormalizedString(src, &ret, &len)) { - // Probably broken string. Return 0 length string. - sqlite3_result_text(context, "", -1, SQLITE_STATIC); - } else { - sqlite3_result_text(context, ret, len, free); - } -} - static void phone_numbers_equal(sqlite3_context * context, int argc, sqlite3_value ** argv) { if (argc != 2 && argc != 3) { @@ -568,26 +528,6 @@ extern "C" int register_android_functions(sqlite3 * handle, int utf16Storage) } #endif - // Register the GET_PHONETICALLY_SORTABLE_STRING function - err = sqlite3_create_function(handle, - "GET_PHONETICALLY_SORTABLE_STRING", - 1, SQLITE_UTF8, NULL, - get_phonetically_sortable_string, - NULL, NULL); - if (err != SQLITE_OK) { - return err; - } - - // Register the GET_NORMALIZED_STRING function - err = sqlite3_create_function(handle, - "GET_NORMALIZED_STRING", - 1, SQLITE_UTF8, NULL, - get_normalized_string, - NULL, NULL); - if (err != SQLITE_OK) { - return err; - } - // Register the GET_PHONEBOOK_INDEX function err = sqlite3_create_function(handle, "GET_PHONEBOOK_INDEX", |