diff options
Diffstat (limited to 'android/PhoneNumberUtils.cpp')
-rw-r--r-- | android/PhoneNumberUtils.cpp | 490 |
1 files changed, 276 insertions, 214 deletions
diff --git a/android/PhoneNumberUtils.cpp b/android/PhoneNumberUtils.cpp index 9e5e470..321b0ea 100644 --- a/android/PhoneNumberUtils.cpp +++ b/android/PhoneNumberUtils.cpp @@ -1,293 +1,355 @@ -/* //device/vmlibs-android/com.android.internal.telephony/PhoneNumberUtils.java -** -** Copyright 2006, The Android Open Source Project -** -** Licensed under the Apache License, Version 2.0 (the "License"); -** you may not use this file except in compliance with the License. -** You may obtain a copy of the License at -** -** http://www.apache.org/licenses/LICENSE-2.0 -** -** Unless required by applicable law or agreed to in writing, software -** distributed under the License is distributed on an "AS IS" BASIS, -** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -** See the License for the specific language governing permissions and -** limitations under the License. -*/ +/* + * Copyright 2009, The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ #include <string.h> namespace android { -static int MIN_MATCH = 5; +/* Generated by the following Python script. Values of country calling codes + are from http://en.wikipedia.org/wiki/List_of_country_calling_codes + +#!/usr/bin/python +import sys +ccc_set_2digits = set([0, 1, 7, + 20, 27, 28, 30, 31, 32, 33, 34, 36, 39, 40, 43, 44, 45, + 46, 47, 48, 49, 51, 52, 53, 54, 55, 56, 57, 58, 60, 61, + 62, 63, 64, 65, 66, 81, 82, 83, 84, 86, 89, 90, 91, 92, + 93, 94, 95, 98]) + +ONE_LINE_NUM = 10 + +for i in xrange(100): + if i % ONE_LINE_NUM == 0: + sys.stdout.write(' ') + if i in ccc_set_2digits: + included = 'true' + else: + included = 'false' + sys.stdout.write(included + ',') + if ((i + 1) % ONE_LINE_NUM) == 0: + sys.stdout.write('\n') + else: + sys.stdout.write(' ') +*/ +static bool two_length_country_code_map[100] = { + true, true, false, false, false, false, false, true, false, false, + false, false, false, false, false, false, false, false, false, false, + true, false, false, false, false, false, false, true, true, false, + true, true, true, true, true, false, true, false, false, true, + true, false, false, true, true, true, true, true, true, true, + false, true, true, true, true, true, true, true, true, false, + true, true, true, true, true, true, true, false, false, false, + false, false, false, false, false, false, false, false, false, false, + false, true, true, true, true, false, true, false, false, true, + true, true, true, true, true, true, false, false, true, false, +}; + +/** True the character(s) expresses some country calling code. False otherwise. + */ +static bool isCountryCallingCode(int ccc_candidate) { + return ccc_candidate > 0 && + ccc_candidate < (int)sizeof(two_length_country_code_map) && + two_length_country_code_map[ccc_candidate]; +} -/** True if c is ISO-LATIN characters 0-9 */ -static bool isISODigit (char c) +/** + * Returns interger corresponding to the input if input "ch" is + * ISO-LATIN characters 0-9. + * Returns -1 otherwise + */ +static int tryGetISODigit (char ch) { - return c >= '0' && c <= '9'; + if ('0' <= ch && ch <= '9') { + return ch - '0'; + } else { + return -1; + } } /** True if c is ISO-LATIN characters 0-9, *, # , + */ -static bool isNonSeparator(char c) +static bool isNonSeparator(char ch) { - return (c >= '0' && c <= '9') || c == '*' || c == '#' || c == '+'; + return ('0' <= ch && ch <= '9') || ch == '*' || ch == '#' || ch == '+'; } /** - * Phone numbers are stored in "lookup" form in the database - * as reversed strings to allow for caller ID lookup + * Try to store the pointer to "new_ptr" which does not have trunk prefix. * - * This method takes a phone number and makes a valid SQL "LIKE" - * string that will match the lookup form + * Currently this function simply ignore the first digit assuming it is + * trunk prefix. Actually trunk prefix is different in each country. + * + * e.g. + * "+79161234567" equals "89161234567" (Russian trunk digit is 8) + * "+33123456789" equals "0123456789" (French trunk digit is 0) * */ -/** all of a up to len must be an international prefix or - * separators/non-dialing digits - */ -static bool matchIntlPrefix(const char* a, int len) +static bool tryGetTrunkPrefixOmittedStr(const char *str, size_t len, + const char **new_ptr, size_t *new_len) { - /* '([^0-9*#+]\+[^0-9*#+] | [^0-9*#+]0(0|11)[^0-9*#+] )$' */ - /* 0 1 2 3 45 */ - - int state = 0; - for (int i = 0 ; i < len ; i++) { - char c = a[i]; - - switch (state) { - case 0: - if (c == '+') state = 1; - else if (c == '0') state = 2; - else if (isNonSeparator(c)) return false; - break; - - case 2: - if (c == '0') state = 3; - else if (c == '1') state = 4; - else if (isNonSeparator(c)) return false; - break; - - case 4: - if (c == '1') state = 5; - else if (isNonSeparator(c)) return false; - break; - - default: - if (isNonSeparator(c)) return false; - break; - - } - } - - return state == 1 || state == 3 || state == 5; -} - -/** all of 'a' up to len must match non-US trunk prefix ('0') */ -static bool matchTrunkPrefix(const char* a, int len) -{ - bool found; - - found = false; - - for (int i = 0 ; i < len ; i++) { - char c = a[i]; - - if (c == '0' && !found) { - found = true; - } else if (isNonSeparator(c)) { + for (size_t i = 0 ; i < len ; i++) { + char ch = str[i]; + if (tryGetISODigit(ch) >= 0) { + if (new_ptr != NULL) { + *new_ptr = str + i + 1; + } + if (new_len != NULL) { + *new_len = len - (i + 1); + } + return true; + } else if (isNonSeparator(ch)) { return false; } } - - return found; + + return false; } -/** all of 'a' up to len must be a (+|00|011)country code) - * We're fast and loose with the country code. Any \d{1,3} matches */ -static bool matchIntlPrefixAndCC(const char* a, int len) +static int tryGetCountryCallingCode(const char *str, size_t len, + const char **new_ptr, size_t *new_len) { - /* [^0-9*#+]*(\+|0(0|11)\d\d?\d? [^0-9*#+] $ */ - /* 0 1 2 3 45 6 7 8 */ + // Rough regexp: + // ^[^0-9*#+]*((\+|0(0|11)\d\d?|166) [^0-9*#+] $ + // 0 1 2 3 45 6 7 89 + // + // In all the states, this function ignores separator characters. + // "166" is the special case for the call from Thailand to the US. Ugu! int state = 0; - for (int i = 0 ; i < len ; i++ ) { - char c = a[i]; - + int ccc = 0; + for (size_t i = 0 ; i < len ; i++ ) { + char ch = str[i]; switch (state) { case 0: - if (c == '+') state = 1; - else if (c == '0') state = 2; - else if (isNonSeparator(c)) return false; + if (ch == '+') state = 1; + else if (ch == '0') state = 2; + else if (ch == '1') state = 8; + else if (isNonSeparator(ch)) return -1; break; case 2: - if (c == '0') state = 3; - else if (c == '1') state = 4; - else if (isNonSeparator(c)) return false; + if (ch == '0') state = 3; + else if (ch == '1') state = 4; + else if (isNonSeparator(ch)) return -1; break; case 4: - if (c == '1') state = 5; - else if (isNonSeparator(c)) return false; + if (ch == '1') state = 5; + else if (isNonSeparator(ch)) return -1; break; case 1: case 3: case 5: - if (isISODigit(c)) state = 6; - else if (isNonSeparator(c)) return false; - break; - case 6: case 7: - if (isISODigit(c)) state++; - else if (isNonSeparator(c)) return false; - break; - + { + int ret = tryGetISODigit(ch); + if (ret > 0) { + ccc = ccc * 10 + ret; + if (ccc >= 100 || isCountryCallingCode(ccc)) { + if (new_ptr != NULL) { + *new_ptr = str + i + 1; + } + if (new_len != NULL) { + *new_len = len - (i + 1); + } + return ccc; + } + if (state == 1 || state == 3 || state == 5) { + state = 6; + } else { + state++; + } + } else if (isNonSeparator(ch)) { + return -1; + } + } + break; + case 8: + if (ch == '6') state = 9; + else if (isNonSeparator(ch)) return -1; + break; + case 9: + if (ch == '6') { + if (new_ptr != NULL) { + *new_ptr = str + i + 1; + } + if (new_len != NULL) { + *new_len = len - (i + 1); + } + return 66; + } + break; default: - if (isNonSeparator(c)) return false; + return -1; } } - return state == 6 || state == 7 || state == 8; -} - -/** or -1 if both are negative */ -static int minPositive(int a, int b) -{ - if (a >= 0 && b >= 0) { - return (a < b) ? a : b; - } else if (a >= 0) { /* && b < 0 */ - return a; - } else if (b >= 0) { /* && a < 0 */ - return b; - } else { /* a < 0 && b < 0 */ - return -1; - } + return -1; } /** - * Return the offset into a of the first appearance of b, or -1 if there - * is no such character in a. + * Return true if the prefix of "ch" is "ignorable". Here, "ignorable" means + * that "ch" has only one digit and separater characters. The one digit is + * assumed to be trunk prefix. */ -static int indexOf(const char *a, char b) { - char *ix = strchr(a, b); +static bool checkPrefixIsIgnorable(const char* ch, int i) { + bool trunk_prefix_was_read = false; + while (i >= 0) { + if (tryGetISODigit(ch[i]) >= 0) { + if (trunk_prefix_was_read) { + // More than one digit appeared, meaning that "a" and "b" + // is different. + return false; + } else { + // Ignore just one digit, assuming it is trunk prefix. + trunk_prefix_was_read = true; + } + } else if (isNonSeparator(ch[i])) { + // Trunk prefix is a digit, not "*", "#"... + return false; + } + i--; + } - if (ix == NULL) - return -1; - else - return ix - a; + return true; } /** * Compare phone numbers a and b, return true if they're identical * enough for caller ID purposes. * - * - Compares from right to left - * - requires MIN_MATCH (5) characters to match - * - handles common trunk prefixes and international prefixes - * (basically, everything except the Russian trunk prefix) + * Assume NULL as 0-length string. + * + * Detailed information: + * Currently (as of 2009-06-12), we cannot depend on the locale given from the + * OS. For example, current Android does not accept "en_JP", meaning + * "the display language is English but the phone should be in Japan", but + * en_US, es_US, etc. So we cannot identify which digit is valid trunk prefix + * in the country where the phone is used. More specifically, "880-1234-1234" + * is not valid phone number in Japan since the trunk prefix in Japan is not 8 + * but 0 (correct number should be "080-1234-1234"), while Russian trunk prefix + * is 8. Also, we cannot know whether the country where users live has trunk + * prefix itself. So, we cannot determine whether "+81-80-1234-1234" is NOT + * same as "880-1234-1234" (while "+81-80-1234-1234" is same as "080-1234-1234" + * and we can determine "880-1234-1234" is different from "080-1234-1234"). * - * Tolerates nulls + * In the future, we should handle trunk prefix more correctly, but as of now, + * we just ignore it... */ bool phone_number_compare(const char* a, const char* b) { - int ia, ib; - int matched; - - if (a == NULL || b == NULL) { - return false; + size_t len_a = 0; + size_t len_b = 0; + if (a == NULL) { + a = ""; + } else { + len_a = strlen(a); } - - ia = strlen(a); - ib = strlen(b); - if (ia == 0 || ib == 0) { - return false; + if (b == NULL) { + b = ""; + } else { + len_b = strlen(b); } - // Compare from right to left - ia--; - ib--; - - matched = 0; - - while (ia >= 0 && ib >=0) { - char ca, cb; - bool skipCmp = false; - - ca = a[ia]; - - if (!isNonSeparator(ca)) { - ia--; - skipCmp = true; - } - - cb = b[ib]; - - if (!isNonSeparator(cb)) { - ib--; - skipCmp = true; + const char* tmp_a = NULL; + const char* tmp_b = NULL; + size_t tmp_len_a = len_a; + size_t tmp_len_b = len_b; + + int ccc_a = tryGetCountryCallingCode(a, len_a, &tmp_a, &tmp_len_a); + int ccc_b = tryGetCountryCallingCode(b, len_b, &tmp_b, &tmp_len_b); + bool ok_to_ignore_prefix = true; + if (ccc_a >= 0 && ccc_b >= 0) { + if (ccc_a != ccc_b) { + // Different Country Calling Code. Must be different phone number. + return false; } - - if (!skipCmp) { - if (cb != ca) { - break; - } - ia--; ib--; matched++; + // When both have ccc, do not ignore trunk prefix. Without this, + // "+81123123" becomes same as "+810123123" (+81 == Japan) + ok_to_ignore_prefix = false; + } else if (ccc_a < 0 && ccc_b < 0) { + // When both do not have ccc, do not ignore trunk prefix. Without this, + // "123123" becomes same as "0123123" + ok_to_ignore_prefix = false; + } else { + if (ccc_a < 0) { + tryGetTrunkPrefixOmittedStr(a, len_a, &tmp_a, &tmp_len_a); } - } - - if (matched < MIN_MATCH) { - int aLen = strlen(a); - - // if the input strings match, but their lengths < MIN_MATCH, - // treat them as equal. - if (aLen == (int)strlen(b) && aLen == matched) { - return true; + if (ccc_b < 0) { + tryGetTrunkPrefixOmittedStr(b, len_b, &tmp_b, &tmp_len_b); } - return false; } - // At least one string has matched completely; - if (matched >= MIN_MATCH && (ia < 0 || ib < 0)) { - return true; + if (tmp_a != NULL) { + a = tmp_a; + len_a = tmp_len_a; } - - /* - * Now, what remains must be one of the following for a - * match: - * - * - a '+' on one and a '00' or a '011' on the other - * - a '0' on one and a (+,00)<country code> on the other - * (for this, a '0' and a '00' prefix would have succeeded above) - */ - - if (matchIntlPrefix(a, ia + 1) && matchIntlPrefix(b, ib +1)) { - return true; + if (tmp_b != NULL) { + b = tmp_b; + len_b = tmp_len_b; } - if (matchTrunkPrefix(a, ia + 1) && matchIntlPrefixAndCC(b, ib +1)) { - return true; - } + int i_a = len_a - 1; + int i_b = len_b - 1; + while (i_a >= 0 && i_b >= 0) { + bool skip_compare = false; + char ch_a = a[i_a]; + char ch_b = b[i_b]; + if (!isNonSeparator(ch_a)) { + i_a--; + skip_compare = true; + } + if (!isNonSeparator(ch_b)) { + i_b--; + skip_compare = true; + } - if (matchTrunkPrefix(b, ib + 1) && matchIntlPrefixAndCC(a, ia +1)) { - return true; + if (!skip_compare) { + if (ch_a != ch_b) { + return false; + } + i_a--; + i_b--; + } } - /* - * Last resort: if the number of unmatched characters on both sides is less than or equal - * to the length of the longest country code and only one number starts with a + accept - * the match. This is because some countries like France and Russia have an extra prefix - * digit that is used when dialing locally in country that does not show up when you dial - * the number using the country code. In France this prefix digit is used to determine - * which land line carrier to route the call over. - */ - bool aPlusFirst = (*a == '+'); - bool bPlusFirst = (*b == '+'); - if (ia < 4 && ib < 4 && (aPlusFirst || bPlusFirst) && !(aPlusFirst && bPlusFirst)) { - return true; + if (ok_to_ignore_prefix) { + if (!checkPrefixIsIgnorable(a, i_a)) { + return false; + } + if (!checkPrefixIsIgnorable(b, i_b)) { + return false; + } + } else { + while (i_a >= 0) { + if (isNonSeparator(a[i_a])) { + return false; + } + i_a--; + } + while (i_b >= 0) { + if (isNonSeparator(b[i_b])) { + return false; + } + i_b--; + } } - return false; + return true; } } // namespace android |