diff options
Diffstat (limited to 'android/PhoneticStringUtilsTest.cpp')
-rw-r--r-- | android/PhoneticStringUtilsTest.cpp | 357 |
1 files changed, 357 insertions, 0 deletions
diff --git a/android/PhoneticStringUtilsTest.cpp b/android/PhoneticStringUtilsTest.cpp new file mode 100644 index 0000000..ae9df2a --- /dev/null +++ b/android/PhoneticStringUtilsTest.cpp @@ -0,0 +1,357 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "PhoneticStringUtils.h" + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +using namespace android; + +class TestExecutor { + public: + TestExecutor() : m_total_count(0), m_success_count(0), m_success(true) {} + bool DoAllTests(); + private: + void DoOneTest(void (TestExecutor::*test)()); + + void testGetCodePointFromUtf8(); + void testGetPhoneticallySortableCodePointAscii(); + void testGetPhoneticallySortableCodePointKana(); + void testGetPhoneticallySortableCodePointSimpleCompare(); + void testGetUtf8FromCodePoint(); + void testGetPhoneticallySortableString(); + + // Note: When adding a test, do not forget to add it to DoOneTest(). + + int m_total_count; + int m_success_count; + + bool m_success; +}; + +#define ASSERT_EQ_VALUE(input, expected) \ + ({ \ + if ((expected) != (input)) { \ + printf("0x%X(result) != 0x%X(expected)\n", input, expected); \ + m_success = false; \ + return; \ + } \ + }) + +#define EXPECT_EQ_VALUE(input, expected) \ + ({ \ + if ((expected) != (input)) { \ + printf("0x%X(result) != 0x%X(expected)\n", input, expected); \ + m_success = false; \ + } \ + }) + + +bool TestExecutor::DoAllTests() { + DoOneTest(&TestExecutor::testGetCodePointFromUtf8); + DoOneTest(&TestExecutor::testGetPhoneticallySortableCodePointAscii); + DoOneTest(&TestExecutor::testGetPhoneticallySortableCodePointKana); + DoOneTest(&TestExecutor::testGetPhoneticallySortableCodePointSimpleCompare); + DoOneTest(&TestExecutor::testGetUtf8FromCodePoint); + DoOneTest(&TestExecutor::testGetPhoneticallySortableString); + + printf("Test total: %d\nSuccess: %d\nFailure: %d\n", + m_total_count, m_success_count, m_total_count - m_success_count); + + bool success = m_total_count == m_success_count; + printf("\n%s\n", success ? "Success" : "Failure"); + + return success; +} + +void TestExecutor::DoOneTest(void (TestExecutor::*test)()) { + m_success = true; + + (this->*test)(); + + ++m_total_count; + m_success_count += m_success ? 1 : 0; +} + +void TestExecutor::testGetCodePointFromUtf8() { + printf("testGetCodePointFromUtf8()\n"); + int next; + + EXPECT_EQ_VALUE(GetCodePointFromUtf8("a", 1, 0, &next), 97); + EXPECT_EQ_VALUE(next, 1); + // Japanese hiragana "a" + EXPECT_EQ_VALUE(GetCodePointFromUtf8("\xE3\x81\x82", 3, 0, &next), 0x3042); + EXPECT_EQ_VALUE(next, 3); + // Japanese fullwidth katakana "a" with ascii a + EXPECT_EQ_VALUE(GetCodePointFromUtf8("a\xE3\x82\xA2", 4, 1, &next), 0x30A2); + EXPECT_EQ_VALUE(next, 4); + + // 2 PUA + ASSERT_EQ_VALUE(GetCodePointFromUtf8("\xF3\xBE\x80\x80\xF3\xBE\x80\x88", + 8, 0, &next), 0xFE000); + ASSERT_EQ_VALUE(next, 4); + ASSERT_EQ_VALUE(GetCodePointFromUtf8("\xF3\xBE\x80\x80\xF3\xBE\x80\x88", + 8, next, &next), 0xFE008); + ASSERT_EQ_VALUE(next, 8); +} + +void TestExecutor::testGetPhoneticallySortableCodePointAscii() { + printf("testGetPhoneticallySortableCodePoint()\n"); + int halfwidth[94]; + int fullwidth[94]; + int i, codepoint; + bool next_is_consumed; + for (i = 0, codepoint = 0x0021; codepoint <= 0x007E; ++i, ++codepoint) { + halfwidth[i] = GetPhoneticallySortableCodePoint(codepoint, -1, + &next_is_consumed); + if (halfwidth[i] < 0) { + printf("returned value become negative at 0x%04X", codepoint); + } + if (next_is_consumed) { + printf("next_is_consumed become true at 0x%04X", codepoint); + m_success = false; + return; + } + } + for (i = 0, codepoint = 0xFF01; codepoint <= 0xFF5E; ++i, ++codepoint) { + fullwidth[i] = GetPhoneticallySortableCodePoint(codepoint, -1, + &next_is_consumed); + if (fullwidth[i] < 0) { + printf("returned value become negative at 0x%04X", codepoint); + } + if (next_is_consumed) { + printf("next_is_consumed become true at 0x%04X", codepoint); + m_success = false; + return; + } + } + + for (i = 0; i < 94; i++) { + EXPECT_EQ_VALUE(halfwidth[i], fullwidth[i]); + } +} + +void TestExecutor::testGetPhoneticallySortableCodePointKana() { + printf("testGetPhoneticallySortableCodePointKana()\n"); + int hiragana[86]; + int fullwidth_katakana[86]; + int i, codepoint; + bool next_is_consumed; + + for (i = 0, codepoint = 0x3041; codepoint <= 0x3096; ++i, ++codepoint) { + hiragana[i] = GetPhoneticallySortableCodePoint(codepoint, -1, + &next_is_consumed); + if (hiragana[i] < 0) { + printf("returned value become negative at 0x%04X", codepoint); + } + if (next_is_consumed) { + printf("next_is_consumed become true at 0x%04X", codepoint); + m_success = false; + return; + } + } + + for (i = 0, codepoint = 0x30A1; codepoint <= 0x30F6; ++i, ++codepoint) { + fullwidth_katakana[i] = GetPhoneticallySortableCodePoint(codepoint, -1, + &next_is_consumed); + if (fullwidth_katakana[i] < 0) { + printf("returned value become negative at 0x%04X", codepoint); + } + if (next_is_consumed) { + printf("next_is_consumed become true at 0x%04X", codepoint); + m_success = false; + return; + } + } + + // hankaku-katakana space do not have some characters corresponding to + // zenkaku-hiragana (e.g. xwa, xka, xku). To make test easier, insert + // zenkaku-katakana version of them into this array (See the value 0x30??). + int halfwidth_katakana[] = { + 0xFF67, 0xFF71, 0xFF68, 0xFF72, 0xFF69, 0xFF73, 0xFF6A, 0xFF74, 0xFF6B, + 0xFF75, 0xFF76, 0xFF76, 0xFF9E, 0xFF77, 0xFF77, 0xFF9E, 0xFF78, 0xFF78, + 0xFF9E, 0xFF79, 0xFF79, 0xFF9E, 0xFF7A, 0xFF7A, 0xFF9E, 0xFF7B, 0xFF7B, + 0xFF9E, 0xFF7C, 0xFF7C, 0xFF9E, 0xFF7D, 0xFF7D, 0xFF9E, 0xFF7E, 0xFF7E, + 0xFF9E, 0xFF7F, 0xFF7F, 0xFF9E, 0xFF80, 0xFF80, 0xFF9E, 0xFF81, 0xFF81, + 0xFF9E, 0xFF6F, 0xFF82, 0xFF82, 0xFF9E, 0xFF83, 0xFF83, 0xFF9E, 0xFF84, + 0xFF84, 0xFF9E, 0xFF85, 0xFF86, 0xFF87, 0xFF88, 0xFF89, 0xFF8A, 0xFF8A, + 0xFF9E, 0xFF8A, 0xFF9F, 0xFF8B, 0xFF8B, 0xFF9E, 0xFF8B, 0xFF9F, 0xFF8C, + 0xFF8C, 0xFF9E, 0xFF8C, 0xFF9F, 0xFF8D, 0xFF8D, 0xFF9E, 0xFF8D, 0xFF9F, + 0xFF8E, 0xFF8E, 0xFF9E, 0xFF8E, 0xFF9F, 0xFF8F, 0xFF90, 0xFF91, 0xFF92, + 0xFF93, 0xFF6C, 0xFF94, 0xFF6D, 0xFF95, 0xFF6E, 0xFF96, 0xFF97, 0xFF98, + 0xFF99, 0xFF9A, 0xFF9B, 0x30EE, 0xFF9C, 0x30F0, 0x30F1, 0xFF66, 0xFF9D, + 0xFF73, 0xFF9E, 0x30F5, 0x30F6}; + int len = sizeof(halfwidth_katakana)/sizeof(int); + + int halfwidth_katakana_result[86]; + + int j; + for (i = 0, j = 0; i < len && j < 86; ++i, ++j) { + int codepoint = halfwidth_katakana[i]; + int next_codepoint = i + 1 < len ? halfwidth_katakana[i + 1] : -1; + halfwidth_katakana_result[j] = + GetPhoneticallySortableCodePoint(codepoint, next_codepoint, + &next_is_consumed); + // Consume voiced mark/half-voiced mark. + if (next_is_consumed) { + ++i; + } + } + ASSERT_EQ_VALUE(i, len); + ASSERT_EQ_VALUE(j, 86); + + for (i = 0; i < 86; ++i) { + EXPECT_EQ_VALUE(fullwidth_katakana[i], hiragana[i]); + EXPECT_EQ_VALUE(halfwidth_katakana_result[i], hiragana[i]); + } +} + +void TestExecutor::testGetPhoneticallySortableCodePointSimpleCompare() { + printf("testGetPhoneticallySortableCodePointSimpleCompare()\n"); + + int codepoints[] = { + 0x3042, 0x30AB, 0xFF7B, 0x305F, 0x30CA, 0xFF8A, 0x30D0, 0x3071, + 0x307E, 0x30E4, 0xFF97, 0x308F, 0x3093, 0x3094, 'A', 'Z', + '0', '9', '!', '/', ':', '?', '[', '`', '{', '~'}; + size_t len = sizeof(codepoints)/sizeof(int); + bool next_is_consumed; + for (size_t i = 0; i < len - 1; ++i) { + int codepoint_a = + GetPhoneticallySortableCodePoint(codepoints[i], -1, + &next_is_consumed); + if (next_is_consumed) { + printf("next_is_consumed become true at 0x%04X", codepoint_a); + m_success = false; + return; + } + int codepoint_b = + GetPhoneticallySortableCodePoint(codepoints[i + 1], -1, + &next_is_consumed); + if (next_is_consumed) { + printf("next_is_consumed become true at 0x%04X", codepoint_b); + m_success = false; + return; + } + + if (codepoint_a >= codepoint_b) { + printf("0x%04X (from 0x%04X) >= 0x%04X (from 0x%04X)\n", + codepoint_a, codepoints[i], codepoint_b, codepoints[i + 1]); + m_success = false; + return; + } + } +} + +#define EXPECT_EQ_CODEPOINT_UTF8_WITH_INDEX(codepoint, expected, i) \ + ({ \ + index = i; \ + if (!GetUtf8FromCodePoint(codepoint, dst, 10, &index)) { \ + printf("GetUtf8FromCodePoint() returned false at 0x%04X\n", codepoint); \ + m_success = false; \ + } else if (index >= 10) { \ + printf("index (%d) >= 10\n", index); \ + m_success = false; \ + } else { \ + dst[index] = '\0'; \ + if (strcmp(dst + i, expected) != 0) { \ + printf("Failed at codepoint 0x%04X\n", codepoint); \ + for (const char *ch = dst; *ch != '\0'; ++ch) { \ + printf("0x%X ", *ch); \ + } \ + printf("!= "); \ + for (const char *ch = expected; *ch != '\0'; ++ch) { \ + printf("0x%X ", *ch); \ + } \ + printf("\n"); \ + m_success = false; \ + } \ + } \ + }) + +#define EXPECT_EQ_CODEPOINT_UTF8(codepoint, expected) \ + EXPECT_EQ_CODEPOINT_UTF8_WITH_INDEX(codepoint, expected, 0) + + +void TestExecutor::testGetUtf8FromCodePoint() { + printf("testGetUtf8FromCodePoint()\n"); + size_t index = 0; + char dst[10]; + + EXPECT_EQ_CODEPOINT_UTF8('a', "\x61"); + // Armenian capital letter AYB (2 bytes in UTF8) + EXPECT_EQ_CODEPOINT_UTF8(0x0530, "\xD4\xB0"); + // Japanese 'a' (3 bytes in UTF8) + EXPECT_EQ_CODEPOINT_UTF8(0x3042, "\xE3\x81\x82"); + // Kanji + EXPECT_EQ_CODEPOINT_UTF8(0x65E5, "\xE6\x97\xA5"); + // PUA (4 byets in UTF8) + EXPECT_EQ_CODEPOINT_UTF8(0xFE016, "\xF3\xBE\x80\x96"); + EXPECT_EQ_CODEPOINT_UTF8(0xFE972, "\xF3\xBE\xA5\xB2"); + + EXPECT_EQ_CODEPOINT_UTF8_WITH_INDEX(0x058F, "\xD6\x8F", 3); + + index = 0; + if (GetUtf8FromCodePoint(0x3043, dst, 2, &index)) { + printf("GetUtf8FromCodePont() returned true even when destination length" + "is not enough\n"); + m_success = false; + } +} + +#define EXPECT_EQ_UTF8_UTF8(src, expected) \ + ({ \ + if (!GetPhoneticallySortableString(src, &dst, &len)) { \ + printf("GetPhoneticallySortableString() returned false.\n"); \ + m_success = false; \ + } else { \ + if (strcmp(dst, expected) != 0) { \ + for (const char *ch = dst; *ch != '\0'; ++ch) { \ + printf("0x%X ", *ch); \ + } \ + printf("!= "); \ + for (const char *ch = expected; *ch != '\0'; ++ch) { \ + printf("0x%X ", *ch); \ + } \ + printf("\n"); \ + m_success = false; \ + } \ + free(dst); \ + } \ + }) + +void TestExecutor::testGetPhoneticallySortableString() { + char *dst; + size_t len; + + // halfwidth alphabets -> fullwidth alphabets. + EXPECT_EQ_UTF8_UTF8("ABCD", + "\xEF\xBC\xA1\xEF\xBC\xA2\xEF\xBC\xA3\xEF\xBC\xA4"); + // halfwidth/fullwidth-katakana -> hiragana + EXPECT_EQ_UTF8_UTF8( + "\xE3\x81\x82\xE3\x82\xA4\xE3\x81\x86\xEF\xBD\xB4\xE3\x82\xAA", + "\xE3\x81\x82\xE3\x81\x84\xE3\x81\x86\xE3\x81\x88\xE3\x81\x8A"); +} + +int main() { + TestExecutor executor; + if(executor.DoAllTests()) { + return 0; + } else { + return 1; + } +} |