diff options
author | Daisuke Miyakawa <dmiyakawa@google.com> | 2009-06-26 11:17:34 +0900 |
---|---|---|
committer | Daisuke Miyakawa <dmiyakawa@google.com> | 2009-06-26 11:17:34 +0900 |
commit | de43094477419f8a190a6f97b47d346827310a02 (patch) | |
tree | b63fe3723dabccdf655a9313419ed794277e900b /android/PhoneNumberUtils.cpp | |
parent | 0ff6f71d24bbde0087c8d2bd0a94d699117e2562 (diff) |
"Rewrite" PhoneNumberUtil so that it compares two phone strings using as many characters as possible, unlike the previous implementation.
The new implementation considers country code seriously.
To confirm this change does not do something wrong, I also created another change (https://android-git.corp.google.com/g/Gerrit#change,4036).
All tests related to PhoneNumberUtils pass, including Thailand's ugly buggy behavior around country code handling :-P
Also Add tests for PhoneNumberUtils, which can be executed in the usual workstation like this:
> g++ -Wall external/sqlite/android/PhoneNumberUtils.cpp external/sqlite/android/PhoneNumberUtilsTest.cpp
> ./a.out
This change fixes the internal bug 1868702.
Diffstat (limited to 'android/PhoneNumberUtils.cpp')
-rw-r--r-- | android/PhoneNumberUtils.cpp | 490 |
1 files changed, 276 insertions, 214 deletions
diff --git a/android/PhoneNumberUtils.cpp b/android/PhoneNumberUtils.cpp index 9e5e470..321b0ea 100644 --- a/android/PhoneNumberUtils.cpp +++ b/android/PhoneNumberUtils.cpp @@ -1,293 +1,355 @@ -/* //device/vmlibs-android/com.android.internal.telephony/PhoneNumberUtils.java -** -** Copyright 2006, The Android Open Source Project -** -** Licensed under the Apache License, Version 2.0 (the "License"); -** you may not use this file except in compliance with the License. -** You may obtain a copy of the License at -** -** http://www.apache.org/licenses/LICENSE-2.0 -** -** Unless required by applicable law or agreed to in writing, software -** distributed under the License is distributed on an "AS IS" BASIS, -** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -** See the License for the specific language governing permissions and -** limitations under the License. -*/ +/* + * Copyright 2009, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ #include <string.h> namespace android { -static int MIN_MATCH = 5; +/* Generated by the following Python script. Values of country calling codes + are from http://en.wikipedia.org/wiki/List_of_country_calling_codes + +#!/usr/bin/python +import sys +ccc_set_2digits = set([0, 1, 7, + 20, 27, 28, 30, 31, 32, 33, 34, 36, 39, 40, 43, 44, 45, + 46, 47, 48, 49, 51, 52, 53, 54, 55, 56, 57, 58, 60, 61, + 62, 63, 64, 65, 66, 81, 82, 83, 84, 86, 89, 90, 91, 92, + 93, 94, 95, 98]) + +ONE_LINE_NUM = 10 + +for i in xrange(100): + if i % ONE_LINE_NUM == 0: + sys.stdout.write(' ') + if i in ccc_set_2digits: + included = 'true' + else: + included = 'false' + sys.stdout.write(included + ',') + if ((i + 1) % ONE_LINE_NUM) == 0: + sys.stdout.write('\n') + else: + sys.stdout.write(' ') +*/ +static bool two_length_country_code_map[100] = { + true, true, false, false, false, false, false, true, false, false, + false, false, false, false, false, false, false, false, false, false, + true, false, false, false, false, false, false, true, true, false, + true, true, true, true, true, false, true, false, false, true, + true, false, false, true, true, true, true, true, true, true, + false, true, true, true, true, true, true, true, true, false, + true, true, true, true, true, true, true, false, false, false, + false, false, false, false, false, false, false, false, false, false, + false, true, true, true, true, false, true, false, false, true, + true, true, true, true, true, true, false, false, true, false, +}; + +/** True the character(s) expresses some country calling code. False otherwise. + */ +static bool isCountryCallingCode(int ccc_candidate) { + return ccc_candidate > 0 && + ccc_candidate < (int)sizeof(two_length_country_code_map) && + two_length_country_code_map[ccc_candidate]; +} -/** True if c is ISO-LATIN characters 0-9 */ -static bool isISODigit (char c) +/** + * Returns interger corresponding to the input if input "ch" is + * ISO-LATIN characters 0-9. + * Returns -1 otherwise + */ +static int tryGetISODigit (char ch) { - return c >= '0' && c <= '9'; + if ('0' <= ch && ch <= '9') { + return ch - '0'; + } else { + return -1; + } } /** True if c is ISO-LATIN characters 0-9, *, # , + */ -static bool isNonSeparator(char c) +static bool isNonSeparator(char ch) { - return (c >= '0' && c <= '9') || c == '*' || c == '#' || c == '+'; + return ('0' <= ch && ch <= '9') || ch == '*' || ch == '#' || ch == '+'; } /** - * Phone numbers are stored in "lookup" form in the database - * as reversed strings to allow for caller ID lookup + * Try to store the pointer to "new_ptr" which does not have trunk prefix. * - * This method takes a phone number and makes a valid SQL "LIKE" - * string that will match the lookup form + * Currently this function simply ignore the first digit assuming it is + * trunk prefix. Actually trunk prefix is different in each country. + * + * e.g. + * "+79161234567" equals "89161234567" (Russian trunk digit is 8) + * "+33123456789" equals "0123456789" (French trunk digit is 0) * */ -/** all of a up to len must be an international prefix or - * separators/non-dialing digits - */ -static bool matchIntlPrefix(const char* a, int len) +static bool tryGetTrunkPrefixOmittedStr(const char *str, size_t len, + const char **new_ptr, size_t *new_len) { - /* '([^0-9*#+]\+[^0-9*#+] | [^0-9*#+]0(0|11)[^0-9*#+] )$' */ - /* 0 1 2 3 45 */ - - int state = 0; - for (int i = 0 ; i < len ; i++) { - char c = a[i]; - - switch (state) { - case 0: - if (c == '+') state = 1; - else if (c == '0') state = 2; - else if (isNonSeparator(c)) return false; - break; - - case 2: - if (c == '0') state = 3; - else if (c == '1') state = 4; - else if (isNonSeparator(c)) return false; - break; - - case 4: - if (c == '1') state = 5; - else if (isNonSeparator(c)) return false; - break; - - default: - if (isNonSeparator(c)) return false; - break; - - } - } - - return state == 1 || state == 3 || state == 5; -} - -/** all of 'a' up to len must match non-US trunk prefix ('0') */ -static bool matchTrunkPrefix(const char* a, int len) -{ - bool found; - - found = false; - - for (int i = 0 ; i < len ; i++) { - char c = a[i]; - - if (c == '0' && !found) { - found = true; - } else if (isNonSeparator(c)) { + for (size_t i = 0 ; i < len ; i++) { + char ch = str[i]; + if (tryGetISODigit(ch) >= 0) { + if (new_ptr != NULL) { + *new_ptr = str + i + 1; + } + if (new_len != NULL) { + *new_len = len - (i + 1); + } + return true; + } else if (isNonSeparator(ch)) { return false; } } - - return found; + + return false; } -/** all of 'a' up to len must be a (+|00|011)country code) - * We're fast and loose with the country code. Any \d{1,3} matches */ -static bool matchIntlPrefixAndCC(const char* a, int len) +static int tryGetCountryCallingCode(const char *str, size_t len, + const char **new_ptr, size_t *new_len) { - /* [^0-9*#+]*(\+|0(0|11)\d\d?\d? [^0-9*#+] $ */ - /* 0 1 2 3 45 6 7 8 */ + // Rough regexp: + // ^[^0-9*#+]*((\+|0(0|11)\d\d?|166) [^0-9*#+] $ + // 0 1 2 3 45 6 7 89 + // + // In all the states, this function ignores separator characters. + // "166" is the special case for the call from Thailand to the US. Ugu! int state = 0; - for (int i = 0 ; i < len ; i++ ) { - char c = a[i]; - + int ccc = 0; + for (size_t i = 0 ; i < len ; i++ ) { + char ch = str[i]; switch (state) { case 0: - if (c == '+') state = 1; - else if (c == '0') state = 2; - else if (isNonSeparator(c)) return false; + if (ch == '+') state = 1; + else if (ch == '0') state = 2; + else if (ch == '1') state = 8; + else if (isNonSeparator(ch)) return -1; break; case 2: - if (c == '0') state = 3; - else if (c == '1') state = 4; - else if (isNonSeparator(c)) return false; + if (ch == '0') state = 3; + else if (ch == '1') state = 4; + else if (isNonSeparator(ch)) return -1; break; case 4: - if (c == '1') state = 5; - else if (isNonSeparator(c)) return false; + if (ch == '1') state = 5; + else if (isNonSeparator(ch)) return -1; break; case 1: case 3: case 5: - if (isISODigit(c)) state = 6; - else if (isNonSeparator(c)) return false; - break; - case 6: case 7: - if (isISODigit(c)) state++; - else if (isNonSeparator(c)) return false; - break; - + { + int ret = tryGetISODigit(ch); + if (ret > 0) { + ccc = ccc * 10 + ret; + if (ccc >= 100 || isCountryCallingCode(ccc)) { + if (new_ptr != NULL) { + *new_ptr = str + i + 1; + } + if (new_len != NULL) { + *new_len = len - (i + 1); + } + return ccc; + } + if (state == 1 || state == 3 || state == 5) { + state = 6; + } else { + state++; + } + } else if (isNonSeparator(ch)) { + return -1; + } + } + break; + case 8: + if (ch == '6') state = 9; + else if (isNonSeparator(ch)) return -1; + break; + case 9: + if (ch == '6') { + if (new_ptr != NULL) { + *new_ptr = str + i + 1; + } + if (new_len != NULL) { + *new_len = len - (i + 1); + } + return 66; + } + break; default: - if (isNonSeparator(c)) return false; + return -1; } } - return state == 6 || state == 7 || state == 8; -} - -/** or -1 if both are negative */ -static int minPositive(int a, int b) -{ - if (a >= 0 && b >= 0) { - return (a < b) ? a : b; - } else if (a >= 0) { /* && b < 0 */ - return a; - } else if (b >= 0) { /* && a < 0 */ - return b; - } else { /* a < 0 && b < 0 */ - return -1; - } + return -1; } /** - * Return the offset into a of the first appearance of b, or -1 if there - * is no such character in a. + * Return true if the prefix of "ch" is "ignorable". Here, "ignorable" means + * that "ch" has only one digit and separater characters. The one digit is + * assumed to be trunk prefix. */ -static int indexOf(const char *a, char b) { - char *ix = strchr(a, b); +static bool checkPrefixIsIgnorable(const char* ch, int i) { + bool trunk_prefix_was_read = false; + while (i >= 0) { + if (tryGetISODigit(ch[i]) >= 0) { + if (trunk_prefix_was_read) { + // More than one digit appeared, meaning that "a" and "b" + // is different. + return false; + } else { + // Ignore just one digit, assuming it is trunk prefix. + trunk_prefix_was_read = true; + } + } else if (isNonSeparator(ch[i])) { + // Trunk prefix is a digit, not "*", "#"... + return false; + } + i--; + } - if (ix == NULL) - return -1; - else - return ix - a; + return true; } /** * Compare phone numbers a and b, return true if they're identical * enough for caller ID purposes. * - * - Compares from right to left - * - requires MIN_MATCH (5) characters to match - * - handles common trunk prefixes and international prefixes - * (basically, everything except the Russian trunk prefix) + * Assume NULL as 0-length string. + * + * Detailed information: + * Currently (as of 2009-06-12), we cannot depend on the locale given from the + * OS. For example, current Android does not accept "en_JP", meaning + * "the display language is English but the phone should be in Japan", but + * en_US, es_US, etc. So we cannot identify which digit is valid trunk prefix + * in the country where the phone is used. More specifically, "880-1234-1234" + * is not valid phone number in Japan since the trunk prefix in Japan is not 8 + * but 0 (correct number should be "080-1234-1234"), while Russian trunk prefix + * is 8. Also, we cannot know whether the country where users live has trunk + * prefix itself. So, we cannot determine whether "+81-80-1234-1234" is NOT + * same as "880-1234-1234" (while "+81-80-1234-1234" is same as "080-1234-1234" + * and we can determine "880-1234-1234" is different from "080-1234-1234"). * - * Tolerates nulls + * In the future, we should handle trunk prefix more correctly, but as of now, + * we just ignore it... */ bool phone_number_compare(const char* a, const char* b) { - int ia, ib; - int matched; - - if (a == NULL || b == NULL) { - return false; + size_t len_a = 0; + size_t len_b = 0; + if (a == NULL) { + a = ""; + } else { + len_a = strlen(a); } - - ia = strlen(a); - ib = strlen(b); - if (ia == 0 || ib == 0) { - return false; + if (b == NULL) { + b = ""; + } else { + len_b = strlen(b); } - // Compare from right to left - ia--; - ib--; - - matched = 0; - - while (ia >= 0 && ib >=0) { - char ca, cb; - bool skipCmp = false; - - ca = a[ia]; - - if (!isNonSeparator(ca)) { - ia--; - skipCmp = true; - } - - cb = b[ib]; - - if (!isNonSeparator(cb)) { - ib--; - skipCmp = true; + const char* tmp_a = NULL; + const char* tmp_b = NULL; + size_t tmp_len_a = len_a; + size_t tmp_len_b = len_b; + + int ccc_a = tryGetCountryCallingCode(a, len_a, &tmp_a, &tmp_len_a); + int ccc_b = tryGetCountryCallingCode(b, len_b, &tmp_b, &tmp_len_b); + bool ok_to_ignore_prefix = true; + if (ccc_a >= 0 && ccc_b >= 0) { + if (ccc_a != ccc_b) { + // Different Country Calling Code. Must be different phone number. + return false; } - - if (!skipCmp) { - if (cb != ca) { - break; - } - ia--; ib--; matched++; + // When both have ccc, do not ignore trunk prefix. Without this, + // "+81123123" becomes same as "+810123123" (+81 == Japan) + ok_to_ignore_prefix = false; + } else if (ccc_a < 0 && ccc_b < 0) { + // When both do not have ccc, do not ignore trunk prefix. Without this, + // "123123" becomes same as "0123123" + ok_to_ignore_prefix = false; + } else { + if (ccc_a < 0) { + tryGetTrunkPrefixOmittedStr(a, len_a, &tmp_a, &tmp_len_a); } - } - - if (matched < MIN_MATCH) { - int aLen = strlen(a); - - // if the input strings match, but their lengths < MIN_MATCH, - // treat them as equal. - if (aLen == (int)strlen(b) && aLen == matched) { - return true; + if (ccc_b < 0) { + tryGetTrunkPrefixOmittedStr(b, len_b, &tmp_b, &tmp_len_b); } - return false; } - // At least one string has matched completely; - if (matched >= MIN_MATCH && (ia < 0 || ib < 0)) { - return true; + if (tmp_a != NULL) { + a = tmp_a; + len_a = tmp_len_a; } - - /* - * Now, what remains must be one of the following for a - * match: - * - * - a '+' on one and a '00' or a '011' on the other - * - a '0' on one and a (+,00)<country code> on the other - * (for this, a '0' and a '00' prefix would have succeeded above) - */ - - if (matchIntlPrefix(a, ia + 1) && matchIntlPrefix(b, ib +1)) { - return true; + if (tmp_b != NULL) { + b = tmp_b; + len_b = tmp_len_b; } - if (matchTrunkPrefix(a, ia + 1) && matchIntlPrefixAndCC(b, ib +1)) { - return true; - } + int i_a = len_a - 1; + int i_b = len_b - 1; + while (i_a >= 0 && i_b >= 0) { + bool skip_compare = false; + char ch_a = a[i_a]; + char ch_b = b[i_b]; + if (!isNonSeparator(ch_a)) { + i_a--; + skip_compare = true; + } + if (!isNonSeparator(ch_b)) { + i_b--; + skip_compare = true; + } - if (matchTrunkPrefix(b, ib + 1) && matchIntlPrefixAndCC(a, ia +1)) { - return true; + if (!skip_compare) { + if (ch_a != ch_b) { + return false; + } + i_a--; + i_b--; + } } - /* - * Last resort: if the number of unmatched characters on both sides is less than or equal - * to the length of the longest country code and only one number starts with a + accept - * the match. This is because some countries like France and Russia have an extra prefix - * digit that is used when dialing locally in country that does not show up when you dial - * the number using the country code. In France this prefix digit is used to determine - * which land line carrier to route the call over. - */ - bool aPlusFirst = (*a == '+'); - bool bPlusFirst = (*b == '+'); - if (ia < 4 && ib < 4 && (aPlusFirst || bPlusFirst) && !(aPlusFirst && bPlusFirst)) { - return true; + if (ok_to_ignore_prefix) { + if (!checkPrefixIsIgnorable(a, i_a)) { + return false; + } + if (!checkPrefixIsIgnorable(b, i_b)) { + return false; + } + } else { + while (i_a >= 0) { + if (isNonSeparator(a[i_a])) { + return false; + } + i_a--; + } + while (i_b >= 0) { + if (isNonSeparator(b[i_b])) { + return false; + } + i_b--; + } } - return false; + return true; } } // namespace android |