summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--harmony-tests/src/test/java/org/apache/harmony/tests/java/util/regex/SplitTest.java39
-rw-r--r--luni/src/test/java/org/apache/harmony/regex/tests/java/util/regex/PatternTest.java76
-rw-r--r--ojluni/src/main/java/java/util/regex/Pattern.java21
3 files changed, 101 insertions, 35 deletions
diff --git a/harmony-tests/src/test/java/org/apache/harmony/tests/java/util/regex/SplitTest.java b/harmony-tests/src/test/java/org/apache/harmony/tests/java/util/regex/SplitTest.java
index c40ef7c6f1..4818106f63 100644
--- a/harmony-tests/src/test/java/org/apache/harmony/tests/java/util/regex/SplitTest.java
+++ b/harmony-tests/src/test/java/org/apache/harmony/tests/java/util/regex/SplitTest.java
@@ -17,13 +17,16 @@
package org.apache.harmony.tests.java.util.regex;
+import java.util.Arrays;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import junit.framework.TestCase;
+import java.util.regex.*;
/**
* TODO Type description
+ *
*/
@SuppressWarnings("nls")
public class SplitTest extends TestCase {
@@ -174,45 +177,23 @@ public class SplitTest extends TestCase {
public void testSplit2() {
Pattern p = Pattern.compile("");
- String s[];
- s = p.split("a", -1);
- assertEquals(3, s.length);
- assertEquals("", s[0]);
- assertEquals("a", s[1]);
- assertEquals("", s[2]);
-
- s = p.split("", -1);
- assertEquals(1, s.length);
- assertEquals("", s[0]);
-
- s = p.split("abcd", -1);
- assertEquals(6, s.length);
- assertEquals("", s[0]);
- assertEquals("a", s[1]);
- assertEquals("b", s[2]);
- assertEquals("c", s[3]);
- assertEquals("d", s[4]);
- assertEquals("", s[5]);
+ assertEquals(Arrays.asList("a", ""), Arrays.asList(p.split("a", -1)));
+ assertEquals(Arrays.asList(""), Arrays.asList(p.split("", -1)));
+ assertEquals(Arrays.asList("a", "b", "c", "d", ""), Arrays.asList(p.split("abcd", -1)));
// Regression test for Android
- assertEquals("GOOG,23,500".split("|").length, 12);
+ assertEquals("GOOG,23,500".split("|").length, 11);
}
public void testSplitSupplementaryWithEmptyString() {
-
/*
* See http://www.unicode.org/reports/tr18/#Supplementary_Characters
* We have to treat text as code points not code units.
*/
Pattern p = Pattern.compile("");
- String s[];
- s = p.split("a\ud869\uded6b", -1);
- assertEquals(5, s.length);
- assertEquals("", s[0]);
- assertEquals("a", s[1]);
- assertEquals("\ud869\uded6", s[2]);
- assertEquals("b", s[3]);
- assertEquals("", s[4]);
+ String[] s = p.split("a\ud869\uded6b", -1);
+ assertEquals(Arrays.asList("a", "\ud869\uded6", "b", ""), Arrays.asList(s));
}
+
}
diff --git a/luni/src/test/java/org/apache/harmony/regex/tests/java/util/regex/PatternTest.java b/luni/src/test/java/org/apache/harmony/regex/tests/java/util/regex/PatternTest.java
index 96de6c816f..3a56f6ab31 100644
--- a/luni/src/test/java/org/apache/harmony/regex/tests/java/util/regex/PatternTest.java
+++ b/luni/src/test/java/org/apache/harmony/regex/tests/java/util/regex/PatternTest.java
@@ -18,6 +18,7 @@
package org.apache.harmony.regex.tests.java.util.regex;
import java.io.Serializable;
+import java.util.Arrays;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
@@ -27,6 +28,10 @@ import junit.framework.TestCase;
import org.apache.harmony.testframework.serialization.SerializationTest;
import org.apache.harmony.testframework.serialization.SerializationTest.SerializableAssert;
+import dalvik.system.VMRuntime;
+
+import static java.util.Arrays.asList;
+
public class PatternTest extends TestCase {
String[] testPatterns = {
"(a|b)*abb",
@@ -107,7 +112,7 @@ public class PatternTest extends TestCase {
s = pat.split("", -1);
assertEquals(s.length, 1);
s = pat.split("abccbadfe", -1);
- assertEquals(s.length, 11);
+ assertEquals(s.length, 10);
// zero limit
pat = Pattern.compile("b");
s = pat.split("abccbadfebb", 0);
@@ -118,7 +123,7 @@ public class PatternTest extends TestCase {
s = pat.split("", 0);
assertEquals(s.length, 1);
s = pat.split("abccbadfe", 0);
- assertEquals(s.length, 10);
+ assertEquals(s.length, 9);
// positive limit
pat = Pattern.compile("b");
s = pat.split("abccbadfebb", 12);
@@ -129,7 +134,7 @@ public class PatternTest extends TestCase {
s = pat.split("", 11);
assertEquals(s.length, 1);
s = pat.split("abccbadfe", 15);
- assertEquals(s.length, 11);
+ assertEquals(s.length, 10);
pat = Pattern.compile("b");
s = pat.split("abccbadfebb", 5);
@@ -139,8 +144,8 @@ public class PatternTest extends TestCase {
pat = Pattern.compile("");
s = pat.split("", 1);
assertEquals(s.length, 1);
- s = pat.split("abccbadfe", 11);
- assertEquals(s.length, 11);
+ s = pat.split("abccbadfe", 10);
+ assertEquals(s.length, 10);
pat = Pattern.compile("b");
s = pat.split("abccbadfebb", 3);
@@ -150,6 +155,65 @@ public class PatternTest extends TestCase {
assertEquals(s.length, 5);
}
+ public void testSplitOnEmptyPattern_apiCurrent() {
+ assertEquals(asList("t", "e", "s", "t"), asList("test".split("")));
+ assertEquals(asList(""), asList("".split("")));
+ assertEquals(asList(""), asList(Pattern.compile("").split("")));
+ assertEquals(asList(""), asList("".split("", -1)));
+ }
+
+ public void testSplitOnEmptyPattern_api28() {
+ runWithTargetSdkVersion(28, () -> {
+ assertEquals(asList("", "t", "e", "s", "t"), asList("test".split("")));
+ assertEquals(asList(""), asList("".split("")));
+ assertEquals(asList(""), asList(Pattern.compile("").split("")));
+ assertEquals(asList(""), asList("".split("", -1)));
+ });
+ }
+
+ /**
+ * Tests that a match at the beginning of the input string only produces
+ * a "" if the match is positive-width.
+ */
+ public void testMatchBeginningOfInputSequence_apiCurrent() {
+ // Positive-width match at the beginning of the input.
+ assertEquals(asList("", "", "rdv", "rk"), asList("aardvark".split("a")));
+ assertEquals(asList("", "anana"), asList("banana".split("b")));
+ // Zero-width match at the beginning of the input
+ assertEquals(asList("a", "ardv", "ark"), asList("aardvark".split("(?=a)")));
+ assertEquals(asList("banana"), asList("banana".split("(?=b)")));
+
+ // For comparison, matches in the middle of the input never yield an empty substring:
+ assertEquals(asList("aar", "vark"), asList("aardvark".split("d")));
+ assertEquals(asList("aar", "dvark"), asList("aardvark".split("(?=d)")));
+ }
+
+ public void testMatchBeginningOfInputSequence_api28() {
+ runWithTargetSdkVersion(28, () -> {
+ // Positive-width match at the beginning of the input.
+ assertEquals(asList("", "", "rdv", "rk"), asList("aardvark".split("a")));
+ assertEquals(asList("", "anana"), asList("banana".split("b")));
+ // Zero-width match at the beginning of the input
+ assertEquals(asList("", "a", "ardv", "ark"), asList("aardvark".split("(?=a)")));
+ assertEquals(asList("banana"), asList("banana".split("(?=b)")));
+
+ // For comparison, matches in the middle of the input never yield an empty substring:
+ assertEquals(asList("aar", "vark"), asList("aardvark".split("d")));
+ assertEquals(asList("aar", "dvark"), asList("aardvark".split("(?=d)")));
+ });
+ }
+
+ private static void runWithTargetSdkVersion(int targetSdkVersion, Runnable runnable) {
+ VMRuntime vmRuntime = VMRuntime.getRuntime();
+ int oldVersion = vmRuntime.getTargetSdkVersion();
+ vmRuntime.setTargetSdkVersion(targetSdkVersion);
+ try {
+ runnable.run();
+ } finally {
+ vmRuntime.setTargetSdkVersion(oldVersion);
+ }
+ }
+
public void testSplitCharSequence() {
String s[];
Pattern pat = Pattern.compile("b");
@@ -161,7 +225,7 @@ public class PatternTest extends TestCase {
s = pat.split("");
assertEquals(s.length, 1);
s = pat.split("abccbadfe");
- assertEquals(s.length, 10);
+ assertEquals(s.length, 9);
// bug6544
String s1 = "";
String[] arr = s1.split(":");
diff --git a/ojluni/src/main/java/java/util/regex/Pattern.java b/ojluni/src/main/java/java/util/regex/Pattern.java
index cf0c5b903c..f7da3fb248 100644
--- a/ojluni/src/main/java/java/util/regex/Pattern.java
+++ b/ojluni/src/main/java/java/util/regex/Pattern.java
@@ -27,6 +27,8 @@
package java.util.regex;
import dalvik.annotation.optimization.ReachabilitySensitive;
+import dalvik.system.VMRuntime;
+
import libcore.util.NativeAllocationRegistry;
import java.util.Iterator;
@@ -1052,6 +1054,8 @@ public final class Pattern implements java.io.Serializable
return m.matches();
}
+ // Android-change: Adopt split() behavior change only for apps targeting API > 28.
+ // http://b/109659282#comment7
/**
* Splits the given input sequence around matches of this pattern.
*
@@ -1063,6 +1067,12 @@ public final class Pattern implements java.io.Serializable
* the resulting array has just one element, namely the input sequence in
* string form.
*
+ * <p> When there is a positive-width match at the beginning of the input
+ * sequence then an empty leading substring is included at the beginning
+ * of the resulting array. A zero-width match at the beginning however
+ * can only produce such an empty leading substring for apps running on or
+ * targeting API versions <= 28.
+ *
* <p> The <tt>limit</tt> parameter controls the number of times the
* pattern is applied and therefore affects the length of the resulting
* array. If the limit <i>n</i> is greater than zero then the pattern
@@ -1125,6 +1135,17 @@ public final class Pattern implements java.io.Serializable
// Add segments before each match found
while(m.find()) {
if (!matchLimited || matchList.size() < limit - 1) {
+ if (index == 0 && index == m.start() && m.start() == m.end()) {
+ // no empty leading substring included for zero-width match
+ // at the beginning of the input char sequence.
+ // BEGIN Android-changed: split() compat behavior for apps targeting <= 28.
+ // continue;
+ int targetSdkVersion = VMRuntime.getRuntime().getTargetSdkVersion();
+ if (targetSdkVersion > 28) {
+ continue;
+ }
+ // END Android-changed: split() compat behavior for apps targeting <= 28.
+ }
String match = input.subSequence(index, m.start()).toString();
matchList.add(match);
index = m.end();