diff options
author | Daisuke Miyakawa <dmiyakawa@google.com> | 2009-07-07 09:37:55 +0900 |
---|---|---|
committer | Daisuke Miyakawa <dmiyakawa@google.com> | 2009-07-07 09:37:55 +0900 |
commit | ab3ee7de06d0451d0a34cbd982006f62928e4b48 (patch) | |
tree | 8a5b5455dd10a26e988aee7f84e8ab06fbdb212b /android/PhoneNumberUtils.cpp | |
parent | de43094477419f8a190a6f97b47d346827310a02 (diff) |
Revert ""Rewrite" PhoneNumberUtil so that it compares two phone strings using as many characters as possible, unlike the previous implementation."
This reverts commit de43094477419f8a190a6f97b47d346827310a02.
related internal issue: 1949010
Diffstat (limited to 'android/PhoneNumberUtils.cpp')
-rw-r--r-- | android/PhoneNumberUtils.cpp | 490 |
1 files changed, 214 insertions, 276 deletions
diff --git a/android/PhoneNumberUtils.cpp b/android/PhoneNumberUtils.cpp index 321b0ea..9e5e470 100644 --- a/android/PhoneNumberUtils.cpp +++ b/android/PhoneNumberUtils.cpp @@ -1,355 +1,293 @@ -/* - * Copyright 2009, The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ +/* //device/vmlibs-android/com.android.internal.telephony/PhoneNumberUtils.java +** +** Copyright 2006, The Android Open Source Project +** +** Licensed under the Apache License, Version 2.0 (the "License"); +** you may not use this file except in compliance with the License. +** You may obtain a copy of the License at +** +** http://www.apache.org/licenses/LICENSE-2.0 +** +** Unless required by applicable law or agreed to in writing, software +** distributed under the License is distributed on an "AS IS" BASIS, +** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +** See the License for the specific language governing permissions and +** limitations under the License. +*/ #include <string.h> namespace android { -/* Generated by the following Python script. Values of country calling codes - are from http://en.wikipedia.org/wiki/List_of_country_calling_codes - -#!/usr/bin/python -import sys -ccc_set_2digits = set([0, 1, 7, - 20, 27, 28, 30, 31, 32, 33, 34, 36, 39, 40, 43, 44, 45, - 46, 47, 48, 49, 51, 52, 53, 54, 55, 56, 57, 58, 60, 61, - 62, 63, 64, 65, 66, 81, 82, 83, 84, 86, 89, 90, 91, 92, - 93, 94, 95, 98]) - -ONE_LINE_NUM = 10 - -for i in xrange(100): - if i % ONE_LINE_NUM == 0: - sys.stdout.write(' ') - if i in ccc_set_2digits: - included = 'true' - else: - included = 'false' - sys.stdout.write(included + ',') - if ((i + 1) % ONE_LINE_NUM) == 0: - sys.stdout.write('\n') - else: - sys.stdout.write(' ') -*/ -static bool two_length_country_code_map[100] = { - true, true, false, false, false, false, false, true, false, false, - false, false, false, false, false, false, false, false, false, false, - true, false, false, false, false, false, false, true, true, false, - true, true, true, true, true, false, true, false, false, true, - true, false, false, true, true, true, true, true, true, true, - false, true, true, true, true, true, true, true, true, false, - true, true, true, true, true, true, true, false, false, false, - false, false, false, false, false, false, false, false, false, false, - false, true, true, true, true, false, true, false, false, true, - true, true, true, true, true, true, false, false, true, false, -}; - -/** True the character(s) expresses some country calling code. False otherwise. - */ -static bool isCountryCallingCode(int ccc_candidate) { - return ccc_candidate > 0 && - ccc_candidate < (int)sizeof(two_length_country_code_map) && - two_length_country_code_map[ccc_candidate]; -} +static int MIN_MATCH = 5; -/** - * Returns interger corresponding to the input if input "ch" is - * ISO-LATIN characters 0-9. - * Returns -1 otherwise - */ -static int tryGetISODigit (char ch) +/** True if c is ISO-LATIN characters 0-9 */ +static bool isISODigit (char c) { - if ('0' <= ch && ch <= '9') { - return ch - '0'; - } else { - return -1; - } + return c >= '0' && c <= '9'; } /** True if c is ISO-LATIN characters 0-9, *, # , + */ -static bool isNonSeparator(char ch) +static bool isNonSeparator(char c) { - return ('0' <= ch && ch <= '9') || ch == '*' || ch == '#' || ch == '+'; + return (c >= '0' && c <= '9') || c == '*' || c == '#' || c == '+'; } /** - * Try to store the pointer to "new_ptr" which does not have trunk prefix. + * Phone numbers are stored in "lookup" form in the database + * as reversed strings to allow for caller ID lookup * - * Currently this function simply ignore the first digit assuming it is - * trunk prefix. Actually trunk prefix is different in each country. - * - * e.g. - * "+79161234567" equals "89161234567" (Russian trunk digit is 8) - * "+33123456789" equals "0123456789" (French trunk digit is 0) + * This method takes a phone number and makes a valid SQL "LIKE" + * string that will match the lookup form * */ -static bool tryGetTrunkPrefixOmittedStr(const char *str, size_t len, - const char **new_ptr, size_t *new_len) +/** all of a up to len must be an international prefix or + * separators/non-dialing digits + */ +static bool matchIntlPrefix(const char* a, int len) { - for (size_t i = 0 ; i < len ; i++) { - char ch = str[i]; - if (tryGetISODigit(ch) >= 0) { - if (new_ptr != NULL) { - *new_ptr = str + i + 1; - } - if (new_len != NULL) { - *new_len = len - (i + 1); - } - return true; - } else if (isNonSeparator(ch)) { - return false; + /* '([^0-9*#+]\+[^0-9*#+] | [^0-9*#+]0(0|11)[^0-9*#+] )$' */ + /* 0 1 2 3 45 */ + + int state = 0; + for (int i = 0 ; i < len ; i++) { + char c = a[i]; + + switch (state) { + case 0: + if (c == '+') state = 1; + else if (c == '0') state = 2; + else if (isNonSeparator(c)) return false; + break; + + case 2: + if (c == '0') state = 3; + else if (c == '1') state = 4; + else if (isNonSeparator(c)) return false; + break; + + case 4: + if (c == '1') state = 5; + else if (isNonSeparator(c)) return false; + break; + + default: + if (isNonSeparator(c)) return false; + break; + } } - return false; + return state == 1 || state == 3 || state == 5; } -static int tryGetCountryCallingCode(const char *str, size_t len, - const char **new_ptr, size_t *new_len) +/** all of 'a' up to len must match non-US trunk prefix ('0') */ +static bool matchTrunkPrefix(const char* a, int len) { - // Rough regexp: - // ^[^0-9*#+]*((\+|0(0|11)\d\d?|166) [^0-9*#+] $ - // 0 1 2 3 45 6 7 89 - // - // In all the states, this function ignores separator characters. - // "166" is the special case for the call from Thailand to the US. Ugu! + bool found; + + found = false; + + for (int i = 0 ; i < len ; i++) { + char c = a[i]; + + if (c == '0' && !found) { + found = true; + } else if (isNonSeparator(c)) { + return false; + } + } + + return found; +} + +/** all of 'a' up to len must be a (+|00|011)country code) + * We're fast and loose with the country code. Any \d{1,3} matches */ +static bool matchIntlPrefixAndCC(const char* a, int len) +{ + /* [^0-9*#+]*(\+|0(0|11)\d\d?\d? [^0-9*#+] $ */ + /* 0 1 2 3 45 6 7 8 */ int state = 0; - int ccc = 0; - for (size_t i = 0 ; i < len ; i++ ) { - char ch = str[i]; + for (int i = 0 ; i < len ; i++ ) { + char c = a[i]; + switch (state) { case 0: - if (ch == '+') state = 1; - else if (ch == '0') state = 2; - else if (ch == '1') state = 8; - else if (isNonSeparator(ch)) return -1; + if (c == '+') state = 1; + else if (c == '0') state = 2; + else if (isNonSeparator(c)) return false; break; case 2: - if (ch == '0') state = 3; - else if (ch == '1') state = 4; - else if (isNonSeparator(ch)) return -1; + if (c == '0') state = 3; + else if (c == '1') state = 4; + else if (isNonSeparator(c)) return false; break; case 4: - if (ch == '1') state = 5; - else if (isNonSeparator(ch)) return -1; + if (c == '1') state = 5; + else if (isNonSeparator(c)) return false; break; case 1: case 3: case 5: + if (isISODigit(c)) state = 6; + else if (isNonSeparator(c)) return false; + break; + case 6: case 7: - { - int ret = tryGetISODigit(ch); - if (ret > 0) { - ccc = ccc * 10 + ret; - if (ccc >= 100 || isCountryCallingCode(ccc)) { - if (new_ptr != NULL) { - *new_ptr = str + i + 1; - } - if (new_len != NULL) { - *new_len = len - (i + 1); - } - return ccc; - } - if (state == 1 || state == 3 || state == 5) { - state = 6; - } else { - state++; - } - } else if (isNonSeparator(ch)) { - return -1; - } - } - break; - case 8: - if (ch == '6') state = 9; - else if (isNonSeparator(ch)) return -1; - break; - case 9: - if (ch == '6') { - if (new_ptr != NULL) { - *new_ptr = str + i + 1; - } - if (new_len != NULL) { - *new_len = len - (i + 1); - } - return 66; - } - break; + if (isISODigit(c)) state++; + else if (isNonSeparator(c)) return false; + break; + default: - return -1; + if (isNonSeparator(c)) return false; } } - return -1; + return state == 6 || state == 7 || state == 8; +} + +/** or -1 if both are negative */ +static int minPositive(int a, int b) +{ + if (a >= 0 && b >= 0) { + return (a < b) ? a : b; + } else if (a >= 0) { /* && b < 0 */ + return a; + } else if (b >= 0) { /* && a < 0 */ + return b; + } else { /* a < 0 && b < 0 */ + return -1; + } } /** - * Return true if the prefix of "ch" is "ignorable". Here, "ignorable" means - * that "ch" has only one digit and separater characters. The one digit is - * assumed to be trunk prefix. + * Return the offset into a of the first appearance of b, or -1 if there + * is no such character in a. */ -static bool checkPrefixIsIgnorable(const char* ch, int i) { - bool trunk_prefix_was_read = false; - while (i >= 0) { - if (tryGetISODigit(ch[i]) >= 0) { - if (trunk_prefix_was_read) { - // More than one digit appeared, meaning that "a" and "b" - // is different. - return false; - } else { - // Ignore just one digit, assuming it is trunk prefix. - trunk_prefix_was_read = true; - } - } else if (isNonSeparator(ch[i])) { - // Trunk prefix is a digit, not "*", "#"... - return false; - } - i--; - } +static int indexOf(const char *a, char b) { + char *ix = strchr(a, b); - return true; + if (ix == NULL) + return -1; + else + return ix - a; } /** * Compare phone numbers a and b, return true if they're identical * enough for caller ID purposes. * - * Assume NULL as 0-length string. - * - * Detailed information: - * Currently (as of 2009-06-12), we cannot depend on the locale given from the - * OS. For example, current Android does not accept "en_JP", meaning - * "the display language is English but the phone should be in Japan", but - * en_US, es_US, etc. So we cannot identify which digit is valid trunk prefix - * in the country where the phone is used. More specifically, "880-1234-1234" - * is not valid phone number in Japan since the trunk prefix in Japan is not 8 - * but 0 (correct number should be "080-1234-1234"), while Russian trunk prefix - * is 8. Also, we cannot know whether the country where users live has trunk - * prefix itself. So, we cannot determine whether "+81-80-1234-1234" is NOT - * same as "880-1234-1234" (while "+81-80-1234-1234" is same as "080-1234-1234" - * and we can determine "880-1234-1234" is different from "080-1234-1234"). + * - Compares from right to left + * - requires MIN_MATCH (5) characters to match + * - handles common trunk prefixes and international prefixes + * (basically, everything except the Russian trunk prefix) * - * In the future, we should handle trunk prefix more correctly, but as of now, - * we just ignore it... + * Tolerates nulls */ bool phone_number_compare(const char* a, const char* b) { - size_t len_a = 0; - size_t len_b = 0; - if (a == NULL) { - a = ""; - } else { - len_a = strlen(a); + int ia, ib; + int matched; + + if (a == NULL || b == NULL) { + return false; } - if (b == NULL) { - b = ""; - } else { - len_b = strlen(b); + + ia = strlen(a); + ib = strlen(b); + if (ia == 0 || ib == 0) { + return false; } - const char* tmp_a = NULL; - const char* tmp_b = NULL; - size_t tmp_len_a = len_a; - size_t tmp_len_b = len_b; - - int ccc_a = tryGetCountryCallingCode(a, len_a, &tmp_a, &tmp_len_a); - int ccc_b = tryGetCountryCallingCode(b, len_b, &tmp_b, &tmp_len_b); - bool ok_to_ignore_prefix = true; - if (ccc_a >= 0 && ccc_b >= 0) { - if (ccc_a != ccc_b) { - // Different Country Calling Code. Must be different phone number. - return false; + // Compare from right to left + ia--; + ib--; + + matched = 0; + + while (ia >= 0 && ib >=0) { + char ca, cb; + bool skipCmp = false; + + ca = a[ia]; + + if (!isNonSeparator(ca)) { + ia--; + skipCmp = true; } - // When both have ccc, do not ignore trunk prefix. Without this, - // "+81123123" becomes same as "+810123123" (+81 == Japan) - ok_to_ignore_prefix = false; - } else if (ccc_a < 0 && ccc_b < 0) { - // When both do not have ccc, do not ignore trunk prefix. Without this, - // "123123" becomes same as "0123123" - ok_to_ignore_prefix = false; - } else { - if (ccc_a < 0) { - tryGetTrunkPrefixOmittedStr(a, len_a, &tmp_a, &tmp_len_a); + + cb = b[ib]; + + if (!isNonSeparator(cb)) { + ib--; + skipCmp = true; } - if (ccc_b < 0) { - tryGetTrunkPrefixOmittedStr(b, len_b, &tmp_b, &tmp_len_b); + + if (!skipCmp) { + if (cb != ca) { + break; + } + ia--; ib--; matched++; } } - if (tmp_a != NULL) { - a = tmp_a; - len_a = tmp_len_a; + if (matched < MIN_MATCH) { + int aLen = strlen(a); + + // if the input strings match, but their lengths < MIN_MATCH, + // treat them as equal. + if (aLen == (int)strlen(b) && aLen == matched) { + return true; + } + return false; } - if (tmp_b != NULL) { - b = tmp_b; - len_b = tmp_len_b; + + // At least one string has matched completely; + if (matched >= MIN_MATCH && (ia < 0 || ib < 0)) { + return true; } - int i_a = len_a - 1; - int i_b = len_b - 1; - while (i_a >= 0 && i_b >= 0) { - bool skip_compare = false; - char ch_a = a[i_a]; - char ch_b = b[i_b]; - if (!isNonSeparator(ch_a)) { - i_a--; - skip_compare = true; - } - if (!isNonSeparator(ch_b)) { - i_b--; - skip_compare = true; - } + /* + * Now, what remains must be one of the following for a + * match: + * + * - a '+' on one and a '00' or a '011' on the other + * - a '0' on one and a (+,00)<country code> on the other + * (for this, a '0' and a '00' prefix would have succeeded above) + */ - if (!skip_compare) { - if (ch_a != ch_b) { - return false; - } - i_a--; - i_b--; - } + if (matchIntlPrefix(a, ia + 1) && matchIntlPrefix(b, ib +1)) { + return true; } - if (ok_to_ignore_prefix) { - if (!checkPrefixIsIgnorable(a, i_a)) { - return false; - } - if (!checkPrefixIsIgnorable(b, i_b)) { - return false; - } - } else { - while (i_a >= 0) { - if (isNonSeparator(a[i_a])) { - return false; - } - i_a--; - } - while (i_b >= 0) { - if (isNonSeparator(b[i_b])) { - return false; - } - i_b--; - } + if (matchTrunkPrefix(a, ia + 1) && matchIntlPrefixAndCC(b, ib +1)) { + return true; } - return true; + if (matchTrunkPrefix(b, ib + 1) && matchIntlPrefixAndCC(a, ia +1)) { + return true; + } + + /* + * Last resort: if the number of unmatched characters on both sides is less than or equal + * to the length of the longest country code and only one number starts with a + accept + * the match. This is because some countries like France and Russia have an extra prefix + * digit that is used when dialing locally in country that does not show up when you dial + * the number using the country code. In France this prefix digit is used to determine + * which land line carrier to route the call over. + */ + bool aPlusFirst = (*a == '+'); + bool bPlusFirst = (*b == '+'); + if (ia < 4 && ib < 4 && (aPlusFirst || bPlusFirst) && !(aPlusFirst && bPlusFirst)) { + return true; + } + + return false; } } // namespace android |