summaryrefslogtreecommitdiff
path: root/android/PhoneticStringUtils.cpp
diff options
context:
space:
mode:
authorYutaro Ogasawara <yutaro.ogasawara@gmail.com>2011-11-23 11:14:21 +0900
committerYutaro Ogasawara <yutaro.ogasawara@gmail.com>2011-12-18 04:11:00 +0900
commitae72de98e9963b7a7b19eae963820a8e680b8e64 (patch)
tree3b2635c50c6559afb5b89c64fa19d8215174a235 /android/PhoneticStringUtils.cpp
parentbe70c03d9ba1bb98b8840517eb3bed845c1272ff (diff)
GetPhonebookIndex: Fix handling for minor Japanese kana characters
This adds minor Japanese kana character handling to meet the unicode collation algorithm. -Normalize digraphs (yori, koto) into first reading letter. -Treat dakuten, dot, onbiki, iteration marks as a symbol. -Add handling for minor small katakana letter. Signed-off-by: Yutaro Ogasawara <yutaro.ogasawara@gmail.com>
Diffstat (limited to 'android/PhoneticStringUtils.cpp')
-rw-r--r--android/PhoneticStringUtils.cpp41
1 files changed, 40 insertions, 1 deletions
diff --git a/android/PhoneticStringUtils.cpp b/android/PhoneticStringUtils.cpp
index 0b971d2..796eaa2 100644
--- a/android/PhoneticStringUtils.cpp
+++ b/android/PhoneticStringUtils.cpp
@@ -153,12 +153,45 @@ static int GetNormalizedHiragana(int codepoint) {
case 0x3045:
case 0x3047:
case 0x3049:
+ case 0x3063:
+ case 0x3083:
+ case 0x3085:
+ case 0x3087:
case 0x308E: // xwa
return codepoint + 1;
case 0x3095: // xka
return 0x304B;
- case 0x3096: // xku
+ case 0x3096: // xke
+ return 0x3051;
+ case 0x31F0: // xku
return 0x304F;
+ case 0x31F1: // xsi
+ return 0x3057;
+ case 0x31F2: // xsu
+ return 0x3059;
+ case 0x31F3: // xto
+ return 0x3068;
+ case 0x31F4: // xnu
+ return 0x306C;
+ case 0x31F5: // xha
+ return 0x306F;
+ case 0x31F6: // xhi
+ return 0x3072;
+ case 0x31F7: // xhu
+ return 0x3075;
+ case 0x31F8: // xhe
+ return 0x3078;
+ case 0x31F9: // xho
+ return 0x307B;
+ case 0x31FA: // xmu
+ return 0x3080;
+ case 0x31FB: // xra
+ case 0x31FC: // xri
+ case 0x31FD: // xru
+ case 0x31FE: // xre
+ case 0x31FF: // xro
+ // ra: 0x3089
+ return 0x3089 + (codepoint - 0x31FB);
default:
return codepoint;
}
@@ -172,6 +205,12 @@ static int GetNormalizedKana(char32_t codepoint,
// Make fullwidth katakana same as hiragana.
// 96 == 0x30A1 - 0x3041c
codepoint = codepoint - 96;
+ } else if (codepoint == 0x309F) {
+ // Digraph YORI; Yo
+ codepoint = 0x3088;
+ } else if (codepoint == 0x30FF) {
+ // Digraph KOTO; Ko
+ codepoint = 0x3053;
} else {
codepoint = GetHiraganaFromHalfwidthKatakana(
codepoint, next_codepoint, next_is_consumed);