summaryrefslogtreecommitdiff
path: root/android/PhoneticStringUtilsTest.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'android/PhoneticStringUtilsTest.cpp')
-rw-r--r--android/PhoneticStringUtilsTest.cpp221
1 files changed, 1 insertions, 220 deletions
diff --git a/android/PhoneticStringUtilsTest.cpp b/android/PhoneticStringUtilsTest.cpp
index 9d06327..2f0e9ac 100644
--- a/android/PhoneticStringUtilsTest.cpp
+++ b/android/PhoneticStringUtilsTest.cpp
@@ -32,12 +32,7 @@ class TestExecutor {
void DoOneTest(void (TestExecutor::*test)());
void testUtf32At();
- void testGetPhoneticallySortableCodePointAscii();
- void testGetPhoneticallySortableCodePointKana();
- void testGetPhoneticallySortableCodePointWhitespaceOnly();
- void testGetPhoneticallySortableCodePointSimpleCompare();
void testGetUtf8FromUtf32();
- void testGetPhoneticallySortableString();
void testGetNormalizedString();
void testLongString();
@@ -69,12 +64,7 @@ class TestExecutor {
bool TestExecutor::DoAllTests() {
DoOneTest(&TestExecutor::testUtf32At);
- DoOneTest(&TestExecutor::testGetPhoneticallySortableCodePointAscii);
- DoOneTest(&TestExecutor::testGetPhoneticallySortableCodePointKana);
- DoOneTest(&TestExecutor::testGetPhoneticallySortableCodePointWhitespaceOnly);
- DoOneTest(&TestExecutor::testGetPhoneticallySortableCodePointSimpleCompare);
DoOneTest(&TestExecutor::testGetUtf8FromUtf32);
- DoOneTest(&TestExecutor::testGetPhoneticallySortableString);
DoOneTest(&TestExecutor::testGetNormalizedString);
DoOneTest(&TestExecutor::testLongString);
@@ -126,175 +116,6 @@ void TestExecutor::testUtf32At() {
TEST_GET_UTF32AT("\xF3\xBE\x80\x80\xF3\xBE\x80\x88", 4, 8, 0xFE008);
}
-void TestExecutor::testGetPhoneticallySortableCodePointAscii() {
- printf("testGetPhoneticallySortableCodePoint()\n");
- int halfwidth[94];
- int fullwidth[94];
- int i;
- char32_t codepoint;
- bool next_is_consumed;
- for (i = 0, codepoint = 0x0021; codepoint <= 0x007E; ++i, ++codepoint) {
- halfwidth[i] = GetPhoneticallySortableCodePoint(codepoint, 0,
- &next_is_consumed);
- if (halfwidth[i] < 0) {
- printf("returned value become negative at 0x%04X", codepoint);
- m_success = false;
- return;
- }
- if (next_is_consumed) {
- printf("next_is_consumed become true at 0x%04X", codepoint);
- m_success = false;
- return;
- }
- }
- for (i = 0, codepoint = 0xFF01; codepoint <= 0xFF5E; ++i, ++codepoint) {
- fullwidth[i] = GetPhoneticallySortableCodePoint(codepoint, 0,
- &next_is_consumed);
- if (fullwidth[i] < 0) {
- printf("returned value become negative at 0x%04X", codepoint);
- m_success = false;
- return;
- }
- if (next_is_consumed) {
- printf("next_is_consumed become true at 0x%04X", codepoint);
- m_success = false;
- return;
- }
- }
-
- for (i = 0; i < 94; i++) {
- EXPECT_EQ_VALUE(halfwidth[i], fullwidth[i]);
- }
-}
-
-void TestExecutor::testGetPhoneticallySortableCodePointKana() {
- printf("testGetPhoneticallySortableCodePointKana()\n");
- int hiragana[86];
- int fullwidth_katakana[86];
- int i;
- char32_t codepoint;
- bool next_is_consumed;
-
- for (i = 0, codepoint = 0x3041; codepoint <= 0x3096; ++i, ++codepoint) {
- hiragana[i] = GetPhoneticallySortableCodePoint(codepoint, 0,
- &next_is_consumed);
- if (hiragana[i] < 0) {
- printf("returned value become negative at 0x%04X", codepoint);
- m_success = false;
- return;
- }
- if (next_is_consumed) {
- printf("next_is_consumed become true at 0x%04X", codepoint);
- m_success = false;
- return;
- }
- }
-
- for (i = 0, codepoint = 0x30A1; codepoint <= 0x30F6; ++i, ++codepoint) {
- fullwidth_katakana[i] = GetPhoneticallySortableCodePoint(codepoint, 0,
- &next_is_consumed);
- if (fullwidth_katakana[i] < 0) {
- printf("returned value become negative at 0x%04X", codepoint);
- m_success = false;
- return;
- }
- if (next_is_consumed) {
- printf("next_is_consumed become true at 0x%04X", codepoint);
- m_success = false;
- return;
- }
- }
-
- // hankaku-katakana space do not have some characters corresponding to
- // zenkaku-hiragana (e.g. xwa, xka, xku). To make test easier, insert
- // zenkaku-katakana version of them into this array (See the value 0x30??).
- char32_t halfwidth_katakana[] = {
- 0xFF67, 0xFF71, 0xFF68, 0xFF72, 0xFF69, 0xFF73, 0xFF6A, 0xFF74, 0xFF6B,
- 0xFF75, 0xFF76, 0xFF76, 0xFF9E, 0xFF77, 0xFF77, 0xFF9E, 0xFF78, 0xFF78,
- 0xFF9E, 0xFF79, 0xFF79, 0xFF9E, 0xFF7A, 0xFF7A, 0xFF9E, 0xFF7B, 0xFF7B,
- 0xFF9E, 0xFF7C, 0xFF7C, 0xFF9E, 0xFF7D, 0xFF7D, 0xFF9E, 0xFF7E, 0xFF7E,
- 0xFF9E, 0xFF7F, 0xFF7F, 0xFF9E, 0xFF80, 0xFF80, 0xFF9E, 0xFF81, 0xFF81,
- 0xFF9E, 0xFF6F, 0xFF82, 0xFF82, 0xFF9E, 0xFF83, 0xFF83, 0xFF9E, 0xFF84,
- 0xFF84, 0xFF9E, 0xFF85, 0xFF86, 0xFF87, 0xFF88, 0xFF89, 0xFF8A, 0xFF8A,
- 0xFF9E, 0xFF8A, 0xFF9F, 0xFF8B, 0xFF8B, 0xFF9E, 0xFF8B, 0xFF9F, 0xFF8C,
- 0xFF8C, 0xFF9E, 0xFF8C, 0xFF9F, 0xFF8D, 0xFF8D, 0xFF9E, 0xFF8D, 0xFF9F,
- 0xFF8E, 0xFF8E, 0xFF9E, 0xFF8E, 0xFF9F, 0xFF8F, 0xFF90, 0xFF91, 0xFF92,
- 0xFF93, 0xFF6C, 0xFF94, 0xFF6D, 0xFF95, 0xFF6E, 0xFF96, 0xFF97, 0xFF98,
- 0xFF99, 0xFF9A, 0xFF9B, 0x30EE, 0xFF9C, 0x30F0, 0x30F1, 0xFF66, 0xFF9D,
- 0xFF73, 0xFF9E, 0x30F5, 0x30F6};
- int len = sizeof(halfwidth_katakana)/sizeof(int);
-
- int halfwidth_katakana_result[86];
-
- int j;
- for (i = 0, j = 0; i < len && j < 86; ++i, ++j) {
- char32_t codepoint = halfwidth_katakana[i];
- char32_t next_codepoint = i + 1 < len ? halfwidth_katakana[i + 1] : 0;
- halfwidth_katakana_result[j] =
- GetPhoneticallySortableCodePoint(codepoint, next_codepoint,
- &next_is_consumed);
- // Consume voiced mark/half-voiced mark.
- if (next_is_consumed) {
- ++i;
- }
- }
- ASSERT_EQ_VALUE(i, len);
- ASSERT_EQ_VALUE(j, 86);
-
- for (i = 0; i < 86; ++i) {
- EXPECT_EQ_VALUE(fullwidth_katakana[i], hiragana[i]);
- EXPECT_EQ_VALUE(halfwidth_katakana_result[i], hiragana[i]);
- }
-}
-
-void TestExecutor::testGetPhoneticallySortableCodePointWhitespaceOnly() {
- printf("testGetPhoneticallySortableCodePointWhitespaceOnly()\n");
- // Halfwidth space
- int result = GetPhoneticallySortableCodePoint(0x0020, 0x0061, NULL);
- ASSERT_EQ_VALUE(result, -1);
- // Fullwidth space
- result = GetPhoneticallySortableCodePoint(0x3000, 0x0062, NULL);
- ASSERT_EQ_VALUE(result, -1);
- // tab
- result = GetPhoneticallySortableCodePoint(0x0009, 0x0062, NULL);
- ASSERT_EQ_VALUE(result, -1);
-}
-
-void TestExecutor::testGetPhoneticallySortableCodePointSimpleCompare() {
- printf("testGetPhoneticallySortableCodePointSimpleCompare()\n");
-
- char32_t codepoints[] = {
- 0x3042, 0x30AB, 0xFF7B, 0x305F, 0x30CA, 0xFF8A, 0x30D0, 0x3071,
- 0x307E, 0x30E4, 0xFF97, 0x308F, 0x3093, 0x3094, 'A', 'Z',
- '0', '9', '!', '/', ':', '?', '[', '`', '{', '~'};
- size_t len = sizeof(codepoints)/sizeof(int);
- bool next_is_consumed;
- for (size_t i = 0; i < len - 1; ++i) {
- int codepoint_a =
- GetPhoneticallySortableCodePoint(codepoints[i], 0,
- &next_is_consumed);
- if (next_is_consumed) {
- printf("next_is_consumed become true at 0x%04X", codepoint_a);
- m_success = false;
- return;
- }
- int codepoint_b =
- GetPhoneticallySortableCodePoint(codepoints[i + 1], 0,
- &next_is_consumed);
- if (next_is_consumed) {
- printf("next_is_consumed become true at 0x%04X", codepoint_b);
- m_success = false;
- return;
- }
-
- if (codepoint_a >= codepoint_b) {
- printf("0x%04X (from 0x%04X) >= 0x%04X (from 0x%04X)\n",
- codepoint_a, codepoints[i], codepoint_b, codepoints[i + 1]);
- m_success = false;
- return;
- }
- }
-}
#define EXPECT_EQ_CODEPOINT_UTF8(codepoint, expected) \
({ \
@@ -338,48 +159,8 @@ void TestExecutor::testGetUtf8FromUtf32() {
#define EXPECT_EQ_UTF8_UTF8(src, expected) \
({ \
- if (!GetPhoneticallySortableString(src, &dst, &len)) { \
- printf("GetPhoneticallySortableString() returned false.\n"); \
- m_success = false; \
- } else { \
- if (strcmp(dst, expected) != 0) { \
- for (const char *ch = dst; *ch != '\0'; ++ch) { \
- printf("0x%X ", *ch); \
- } \
- printf("!= "); \
- for (const char *ch = expected; *ch != '\0'; ++ch) { \
- printf("0x%X ", *ch); \
- } \
- printf("\n"); \
- m_success = false; \
- } \
- free(dst); \
- } \
- })
-
-void TestExecutor::testGetPhoneticallySortableString() {
- printf("testGetPhoneticallySortableString()\n");
- char *dst;
- size_t len;
-
- // halfwidth alphabets -> fullwidth alphabets.
- EXPECT_EQ_UTF8_UTF8("ABCD",
- "\xEF\xBC\xA1\xEF\xBC\xA2\xEF\xBC\xA3\xEF\xBC\xA4");
- // halfwidth/fullwidth-katakana -> hiragana
- EXPECT_EQ_UTF8_UTF8(
- "\xE3\x81\x82\xE3\x82\xA4\xE3\x81\x86\xEF\xBD\xB4\xE3\x82\xAA",
- "\xE3\x81\x82\xE3\x81\x84\xE3\x81\x86\xE3\x81\x88\xE3\x81\x8A");
-
- // whitespace -> string which should be placed at last
- EXPECT_EQ_UTF8_UTF8(" \t", "\xF0\x9F\xBF\xBD");
-}
-
-#undef EXPECT_EQ_UTF8_UTF8
-
-#define EXPECT_EQ_UTF8_UTF8(src, expected) \
- ({ \
if (!GetNormalizedString(src, &dst, &len)) { \
- printf("GetPhoneticallySortableString() returned false.\n"); \
+ printf("GetNormalizedSortableString() returned false.\n"); \
m_success = false; \
} else { \
if (strcmp(dst, expected) != 0) { \