summaryrefslogtreecommitdiff
path: root/android/PhoneNumberUtils.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'android/PhoneNumberUtils.cpp')
-rw-r--r--android/PhoneNumberUtils.cpp518
1 files changed, 304 insertions, 214 deletions
diff --git a/android/PhoneNumberUtils.cpp b/android/PhoneNumberUtils.cpp
index 9e5e470..cb8552e 100644
--- a/android/PhoneNumberUtils.cpp
+++ b/android/PhoneNumberUtils.cpp
@@ -1,293 +1,383 @@
-/* //device/vmlibs-android/com.android.internal.telephony/PhoneNumberUtils.java
-**
-** Copyright 2006, The Android Open Source Project
-**
-** Licensed under the Apache License, Version 2.0 (the "License");
-** you may not use this file except in compliance with the License.
-** You may obtain a copy of the License at
-**
-** http://www.apache.org/licenses/LICENSE-2.0
-**
-** Unless required by applicable law or agreed to in writing, software
-** distributed under the License is distributed on an "AS IS" BASIS,
-** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-** See the License for the specific language governing permissions and
-** limitations under the License.
-*/
+/*
+ * Copyright 2009, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
#include <string.h>
namespace android {
-static int MIN_MATCH = 5;
+/* Generated by the following Python script. Values of country calling codes
+ are from http://en.wikipedia.org/wiki/List_of_country_calling_codes
+
+#!/usr/bin/python
+import sys
+ccc_set_2digits = set([0, 1, 7,
+ 20, 27, 28, 30, 31, 32, 33, 34, 36, 39, 40, 43, 44, 45,
+ 46, 47, 48, 49, 51, 52, 53, 54, 55, 56, 57, 58, 60, 61,
+ 62, 63, 64, 65, 66, 81, 82, 83, 84, 86, 89, 90, 91, 92,
+ 93, 94, 95, 98])
+
+ONE_LINE_NUM = 10
+
+for i in xrange(100):
+ if i % ONE_LINE_NUM == 0:
+ sys.stdout.write(' ')
+ if i in ccc_set_2digits:
+ included = 'true'
+ else:
+ included = 'false'
+ sys.stdout.write(included + ',')
+ if ((i + 1) % ONE_LINE_NUM) == 0:
+ sys.stdout.write('\n')
+ else:
+ sys.stdout.write(' ')
+*/
+static bool two_length_country_code_map[100] = {
+ true, true, false, false, false, false, false, true, false, false,
+ false, false, false, false, false, false, false, false, false, false,
+ true, false, false, false, false, false, false, true, true, false,
+ true, true, true, true, true, false, true, false, false, true,
+ true, false, false, true, true, true, true, true, true, true,
+ false, true, true, true, true, true, true, true, true, false,
+ true, true, true, true, true, true, true, false, false, false,
+ false, false, false, false, false, false, false, false, false, false,
+ false, true, true, true, true, false, true, false, false, true,
+ true, true, true, true, true, true, false, false, true, false,
+};
+
+#define ARRAY_SIZE(a) (sizeof(a)/sizeof((a)[0]))
-/** True if c is ISO-LATIN characters 0-9 */
-static bool isISODigit (char c)
+/**
+ * Returns true if "ccc_candidate" expresses (part of ) some country calling
+ * code.
+ * Returns false otherwise.
+ */
+static bool isCountryCallingCode(int ccc_candidate) {
+ return ccc_candidate > 0 &&
+ ccc_candidate < (int)ARRAY_SIZE(two_length_country_code_map) &&
+ two_length_country_code_map[ccc_candidate];
+}
+
+/**
+ * Returns interger corresponding to the input if input "ch" is
+ * ISO-LATIN characters 0-9.
+ * Returns -1 otherwise
+ */
+static int tryGetISODigit (char ch)
{
- return c >= '0' && c <= '9';
+ if ('0' <= ch && ch <= '9') {
+ return ch - '0';
+ } else {
+ return -1;
+ }
}
/** True if c is ISO-LATIN characters 0-9, *, # , + */
-static bool isNonSeparator(char c)
+static bool isNonSeparator(char ch)
{
- return (c >= '0' && c <= '9') || c == '*' || c == '#' || c == '+';
+ return ('0' <= ch && ch <= '9') || ch == '*' || ch == '#' || ch == '+';
}
/**
- * Phone numbers are stored in "lookup" form in the database
- * as reversed strings to allow for caller ID lookup
+ * Try to store the pointer to "new_ptr" which does not have trunk prefix.
*
- * This method takes a phone number and makes a valid SQL "LIKE"
- * string that will match the lookup form
+ * Currently this function simply ignore the first digit assuming it is
+ * trunk prefix. Actually trunk prefix is different in each country.
+ *
+ * e.g.
+ * "+79161234567" equals "89161234567" (Russian trunk digit is 8)
+ * "+33123456789" equals "0123456789" (French trunk digit is 0)
*
*/
-/** all of a up to len must be an international prefix or
- * separators/non-dialing digits
- */
-static bool matchIntlPrefix(const char* a, int len)
+static bool tryGetTrunkPrefixOmittedStr(const char *str, size_t len,
+ const char **new_ptr, size_t *new_len)
{
- /* '([^0-9*#+]\+[^0-9*#+] | [^0-9*#+]0(0|11)[^0-9*#+] )$' */
- /* 0 1 2 3 45 */
-
- int state = 0;
- for (int i = 0 ; i < len ; i++) {
- char c = a[i];
-
- switch (state) {
- case 0:
- if (c == '+') state = 1;
- else if (c == '0') state = 2;
- else if (isNonSeparator(c)) return false;
- break;
-
- case 2:
- if (c == '0') state = 3;
- else if (c == '1') state = 4;
- else if (isNonSeparator(c)) return false;
- break;
-
- case 4:
- if (c == '1') state = 5;
- else if (isNonSeparator(c)) return false;
- break;
-
- default:
- if (isNonSeparator(c)) return false;
- break;
-
- }
- }
-
- return state == 1 || state == 3 || state == 5;
-}
-
-/** all of 'a' up to len must match non-US trunk prefix ('0') */
-static bool matchTrunkPrefix(const char* a, int len)
-{
- bool found;
-
- found = false;
-
- for (int i = 0 ; i < len ; i++) {
- char c = a[i];
-
- if (c == '0' && !found) {
- found = true;
- } else if (isNonSeparator(c)) {
+ for (size_t i = 0 ; i < len ; i++) {
+ char ch = str[i];
+ if (tryGetISODigit(ch) >= 0) {
+ if (new_ptr != NULL) {
+ *new_ptr = str + i + 1;
+ }
+ if (new_len != NULL) {
+ *new_len = len - (i + 1);
+ }
+ return true;
+ } else if (isNonSeparator(ch)) {
return false;
}
}
-
- return found;
+
+ return false;
}
-/** all of 'a' up to len must be a (+|00|011)country code)
- * We're fast and loose with the country code. Any \d{1,3} matches */
-static bool matchIntlPrefixAndCC(const char* a, int len)
+/*
+ * Note that this function does not strictly care the country calling code with
+ * 3 length (like Morocco: +212), assuming it is enough to use the first two
+ * digit to compare two phone numbers.
+ */
+static int tryGetCountryCallingCode(const char *str, size_t len,
+ const char **new_ptr, size_t *new_len)
{
- /* [^0-9*#+]*(\+|0(0|11)\d\d?\d? [^0-9*#+] $ */
- /* 0 1 2 3 45 6 7 8 */
+ // Rough regexp:
+ // ^[^0-9*#+]*((\+|0(0|11)\d\d?|166) [^0-9*#+] $
+ // 0 1 2 3 45 6 7 89
+ //
+ // In all the states, this function ignores separator characters.
+ // "166" is the special case for the call from Thailand to the US. Ugu!
int state = 0;
- for (int i = 0 ; i < len ; i++ ) {
- char c = a[i];
-
+ int ccc = 0;
+ for (size_t i = 0 ; i < len ; i++ ) {
+ char ch = str[i];
switch (state) {
case 0:
- if (c == '+') state = 1;
- else if (c == '0') state = 2;
- else if (isNonSeparator(c)) return false;
+ if (ch == '+') state = 1;
+ else if (ch == '0') state = 2;
+ else if (ch == '1') state = 8;
+ else if (isNonSeparator(ch)) return -1;
break;
case 2:
- if (c == '0') state = 3;
- else if (c == '1') state = 4;
- else if (isNonSeparator(c)) return false;
+ if (ch == '0') state = 3;
+ else if (ch == '1') state = 4;
+ else if (isNonSeparator(ch)) return -1;
break;
case 4:
- if (c == '1') state = 5;
- else if (isNonSeparator(c)) return false;
+ if (ch == '1') state = 5;
+ else if (isNonSeparator(ch)) return -1;
break;
case 1:
case 3:
case 5:
- if (isISODigit(c)) state = 6;
- else if (isNonSeparator(c)) return false;
- break;
-
case 6:
case 7:
- if (isISODigit(c)) state++;
- else if (isNonSeparator(c)) return false;
- break;
-
+ {
+ int ret = tryGetISODigit(ch);
+ if (ret > 0) {
+ ccc = ccc * 10 + ret;
+ if (ccc >= 100 || isCountryCallingCode(ccc)) {
+ if (new_ptr != NULL) {
+ *new_ptr = str + i + 1;
+ }
+ if (new_len != NULL) {
+ *new_len = len - (i + 1);
+ }
+ return ccc;
+ }
+ if (state == 1 || state == 3 || state == 5) {
+ state = 6;
+ } else {
+ state++;
+ }
+ } else if (isNonSeparator(ch)) {
+ return -1;
+ }
+ }
+ break;
+ case 8:
+ if (ch == '6') state = 9;
+ else if (isNonSeparator(ch)) return -1;
+ break;
+ case 9:
+ if (ch == '6') {
+ if (new_ptr != NULL) {
+ *new_ptr = str + i + 1;
+ }
+ if (new_len != NULL) {
+ *new_len = len - (i + 1);
+ }
+ return 66;
+ }
+ break;
default:
- if (isNonSeparator(c)) return false;
+ return -1;
}
}
- return state == 6 || state == 7 || state == 8;
-}
-
-/** or -1 if both are negative */
-static int minPositive(int a, int b)
-{
- if (a >= 0 && b >= 0) {
- return (a < b) ? a : b;
- } else if (a >= 0) { /* && b < 0 */
- return a;
- } else if (b >= 0) { /* && a < 0 */
- return b;
- } else { /* a < 0 && b < 0 */
- return -1;
- }
+ return -1;
}
/**
- * Return the offset into a of the first appearance of b, or -1 if there
- * is no such character in a.
+ * Return true if the prefix of "ch" is "ignorable". Here, "ignorable" means
+ * that "ch" has only one digit and separater characters. The one digit is
+ * assumed to be trunk prefix.
*/
-static int indexOf(const char *a, char b) {
- char *ix = strchr(a, b);
+static bool checkPrefixIsIgnorable(const char* ch, int i) {
+ bool trunk_prefix_was_read = false;
+ while (i >= 0) {
+ if (tryGetISODigit(ch[i]) >= 0) {
+ if (trunk_prefix_was_read) {
+ // More than one digit appeared, meaning that "a" and "b"
+ // is different.
+ return false;
+ } else {
+ // Ignore just one digit, assuming it is trunk prefix.
+ trunk_prefix_was_read = true;
+ }
+ } else if (isNonSeparator(ch[i])) {
+ // Trunk prefix is a digit, not "*", "#"...
+ return false;
+ }
+ i--;
+ }
- if (ix == NULL)
- return -1;
- else
- return ix - a;
+ return true;
}
/**
* Compare phone numbers a and b, return true if they're identical
* enough for caller ID purposes.
*
- * - Compares from right to left
- * - requires MIN_MATCH (5) characters to match
- * - handles common trunk prefixes and international prefixes
- * (basically, everything except the Russian trunk prefix)
+ * Assume NULL as 0-length string.
*
- * Tolerates nulls
+ * Detailed information:
+ * Currently (as of 2009-06-12), we cannot depend on the locale given from the
+ * OS. For example, current Android does not accept "en_JP", meaning
+ * "the display language is English but the phone should be in Japan", but
+ * en_US, es_US, etc. So we cannot identify which digit is valid trunk prefix
+ * in the country where the phone is used. More specifically, "880-1234-1234"
+ * is not valid phone number in Japan since the trunk prefix in Japan is not 8
+ * but 0 (correct number should be "080-1234-1234"), while Russian trunk prefix
+ * is 8. Also, we cannot know whether the country where users live has trunk
+ * prefix itself. So, we cannot determine whether "+81-80-1234-1234" is NOT
+ * same as "880-1234-1234" (while "+81-80-1234-1234" is same as "080-1234-1234"
+ * and we can determine "880-1234-1234" is different from "080-1234-1234").
+ *
+ * In the future, we should handle trunk prefix more correctly, but as of now,
+ * we just ignore it...
*/
bool phone_number_compare(const char* a, const char* b)
{
- int ia, ib;
- int matched;
-
- if (a == NULL || b == NULL) {
- return false;
+ size_t len_a = 0;
+ size_t len_b = 0;
+ if (a == NULL) {
+ a = "";
+ } else {
+ len_a = strlen(a);
}
-
- ia = strlen(a);
- ib = strlen(b);
- if (ia == 0 || ib == 0) {
- return false;
+ if (b == NULL) {
+ b = "";
+ } else {
+ len_b = strlen(b);
}
- // Compare from right to left
- ia--;
- ib--;
-
- matched = 0;
-
- while (ia >= 0 && ib >=0) {
- char ca, cb;
- bool skipCmp = false;
-
- ca = a[ia];
-
- if (!isNonSeparator(ca)) {
- ia--;
- skipCmp = true;
- }
-
- cb = b[ib];
-
- if (!isNonSeparator(cb)) {
- ib--;
- skipCmp = true;
+ const char* tmp_a = NULL;
+ const char* tmp_b = NULL;
+ size_t tmp_len_a = len_a;
+ size_t tmp_len_b = len_b;
+
+ int ccc_a = tryGetCountryCallingCode(a, len_a, &tmp_a, &tmp_len_a);
+ int ccc_b = tryGetCountryCallingCode(b, len_b, &tmp_b, &tmp_len_b);
+ bool ok_to_ignore_prefix = true;
+ if (ccc_a >= 0 && ccc_b >= 0) {
+ if (ccc_a != ccc_b) {
+ // Different Country Calling Code. Must be different phone number.
+ return false;
}
-
- if (!skipCmp) {
- if (cb != ca) {
- break;
- }
- ia--; ib--; matched++;
+ // When both have ccc, do not ignore trunk prefix. Without this,
+ // "+81123123" becomes same as "+810123123" (+81 == Japan)
+ ok_to_ignore_prefix = false;
+ } else if (ccc_a < 0 && ccc_b < 0) {
+ // When both do not have ccc, do not ignore trunk prefix. Without this,
+ // "123123" becomes same as "0123123"
+ ok_to_ignore_prefix = false;
+ } else {
+ if (ccc_a < 0) {
+ tryGetTrunkPrefixOmittedStr(a, len_a, &tmp_a, &tmp_len_a);
}
- }
-
- if (matched < MIN_MATCH) {
- int aLen = strlen(a);
-
- // if the input strings match, but their lengths < MIN_MATCH,
- // treat them as equal.
- if (aLen == (int)strlen(b) && aLen == matched) {
- return true;
+ if (ccc_b < 0) {
+ tryGetTrunkPrefixOmittedStr(b, len_b, &tmp_b, &tmp_len_b);
}
- return false;
}
- // At least one string has matched completely;
- if (matched >= MIN_MATCH && (ia < 0 || ib < 0)) {
- return true;
+ if (tmp_a != NULL) {
+ a = tmp_a;
+ len_a = tmp_len_a;
}
-
- /*
- * Now, what remains must be one of the following for a
- * match:
- *
- * - a '+' on one and a '00' or a '011' on the other
- * - a '0' on one and a (+,00)<country code> on the other
- * (for this, a '0' and a '00' prefix would have succeeded above)
- */
-
- if (matchIntlPrefix(a, ia + 1) && matchIntlPrefix(b, ib +1)) {
- return true;
+ if (tmp_b != NULL) {
+ b = tmp_b;
+ len_b = tmp_len_b;
}
- if (matchTrunkPrefix(a, ia + 1) && matchIntlPrefixAndCC(b, ib +1)) {
- return true;
- }
+ int i_a = len_a - 1;
+ int i_b = len_b - 1;
+ while (i_a >= 0 && i_b >= 0) {
+ bool skip_compare = false;
+ char ch_a = a[i_a];
+ char ch_b = b[i_b];
+ if (!isNonSeparator(ch_a)) {
+ i_a--;
+ skip_compare = true;
+ }
+ if (!isNonSeparator(ch_b)) {
+ i_b--;
+ skip_compare = true;
+ }
- if (matchTrunkPrefix(b, ib + 1) && matchIntlPrefixAndCC(a, ia +1)) {
- return true;
+ if (!skip_compare) {
+ if (ch_a != ch_b) {
+ return false;
+ }
+ i_a--;
+ i_b--;
+ }
}
- /*
- * Last resort: if the number of unmatched characters on both sides is less than or equal
- * to the length of the longest country code and only one number starts with a + accept
- * the match. This is because some countries like France and Russia have an extra prefix
- * digit that is used when dialing locally in country that does not show up when you dial
- * the number using the country code. In France this prefix digit is used to determine
- * which land line carrier to route the call over.
- */
- bool aPlusFirst = (*a == '+');
- bool bPlusFirst = (*b == '+');
- if (ia < 4 && ib < 4 && (aPlusFirst || bPlusFirst) && !(aPlusFirst && bPlusFirst)) {
- return true;
+ if (ok_to_ignore_prefix) {
+ if (!checkPrefixIsIgnorable(a, i_a)) {
+ return false;
+ }
+ if (!checkPrefixIsIgnorable(b, i_b)) {
+ return false;
+ }
+ } else {
+ // In the US, 1-650-555-1234 must be equal to 650-555-1234,
+ // while 090-1234-1234 must not be equalt to 90-1234-1234 in Japan.
+ // This request exists just in US (with 1 trunk (NDD) prefix).
+ //
+ // At least, in this "rough" comparison, we should ignore the prefix
+ // '1', so if the remaining non-separator number is 0, we ignore it
+ // just once.
+ bool may_be_namp = true;
+ while (i_a >= 0) {
+ const char ch_a = a[i_a];
+ if (isNonSeparator(ch_a)) {
+ if (may_be_namp && tryGetISODigit(ch_a) == 1) {
+ may_be_namp = false;
+ } else {
+ return false;
+ }
+ }
+ i_a--;
+ }
+ while (i_b >= 0) {
+ const char ch_b = b[i_b];
+ if (isNonSeparator(ch_b)) {
+ if (may_be_namp && tryGetISODigit(ch_b) == 1) {
+ may_be_namp = false;
+ } else {
+ return false;
+ }
+ }
+ i_b--;
+ }
}
- return false;
+ return true;
}
} // namespace android