diff options
3 files changed, 101 insertions, 35 deletions
diff --git a/harmony-tests/src/test/java/org/apache/harmony/tests/java/util/regex/SplitTest.java b/harmony-tests/src/test/java/org/apache/harmony/tests/java/util/regex/SplitTest.java index c40ef7c6f1..4818106f63 100644 --- a/harmony-tests/src/test/java/org/apache/harmony/tests/java/util/regex/SplitTest.java +++ b/harmony-tests/src/test/java/org/apache/harmony/tests/java/util/regex/SplitTest.java @@ -17,13 +17,16 @@ package org.apache.harmony.tests.java.util.regex; +import java.util.Arrays; import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; import junit.framework.TestCase; +import java.util.regex.*; /** * TODO Type description + * */ @SuppressWarnings("nls") public class SplitTest extends TestCase { @@ -174,45 +177,23 @@ public class SplitTest extends TestCase { public void testSplit2() { Pattern p = Pattern.compile(""); - String s[]; - s = p.split("a", -1); - assertEquals(3, s.length); - assertEquals("", s[0]); - assertEquals("a", s[1]); - assertEquals("", s[2]); - - s = p.split("", -1); - assertEquals(1, s.length); - assertEquals("", s[0]); - - s = p.split("abcd", -1); - assertEquals(6, s.length); - assertEquals("", s[0]); - assertEquals("a", s[1]); - assertEquals("b", s[2]); - assertEquals("c", s[3]); - assertEquals("d", s[4]); - assertEquals("", s[5]); + assertEquals(Arrays.asList("a", ""), Arrays.asList(p.split("a", -1))); + assertEquals(Arrays.asList(""), Arrays.asList(p.split("", -1))); + assertEquals(Arrays.asList("a", "b", "c", "d", ""), Arrays.asList(p.split("abcd", -1))); // Regression test for Android - assertEquals("GOOG,23,500".split("|").length, 12); + assertEquals("GOOG,23,500".split("|").length, 11); } public void testSplitSupplementaryWithEmptyString() { - /* * See http://www.unicode.org/reports/tr18/#Supplementary_Characters * We have to treat text as code points not code units. */ Pattern p = Pattern.compile(""); - String s[]; - s = p.split("a\ud869\uded6b", -1); - assertEquals(5, s.length); - assertEquals("", s[0]); - assertEquals("a", s[1]); - assertEquals("\ud869\uded6", s[2]); - assertEquals("b", s[3]); - assertEquals("", s[4]); + String[] s = p.split("a\ud869\uded6b", -1); + assertEquals(Arrays.asList("a", "\ud869\uded6", "b", ""), Arrays.asList(s)); } + } diff --git a/luni/src/test/java/org/apache/harmony/regex/tests/java/util/regex/PatternTest.java b/luni/src/test/java/org/apache/harmony/regex/tests/java/util/regex/PatternTest.java index 96de6c816f..3a56f6ab31 100644 --- a/luni/src/test/java/org/apache/harmony/regex/tests/java/util/regex/PatternTest.java +++ b/luni/src/test/java/org/apache/harmony/regex/tests/java/util/regex/PatternTest.java @@ -18,6 +18,7 @@ package org.apache.harmony.regex.tests.java.util.regex; import java.io.Serializable; +import java.util.Arrays; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; @@ -27,6 +28,10 @@ import junit.framework.TestCase; import org.apache.harmony.testframework.serialization.SerializationTest; import org.apache.harmony.testframework.serialization.SerializationTest.SerializableAssert; +import dalvik.system.VMRuntime; + +import static java.util.Arrays.asList; + public class PatternTest extends TestCase { String[] testPatterns = { "(a|b)*abb", @@ -107,7 +112,7 @@ public class PatternTest extends TestCase { s = pat.split("", -1); assertEquals(s.length, 1); s = pat.split("abccbadfe", -1); - assertEquals(s.length, 11); + assertEquals(s.length, 10); // zero limit pat = Pattern.compile("b"); s = pat.split("abccbadfebb", 0); @@ -118,7 +123,7 @@ public class PatternTest extends TestCase { s = pat.split("", 0); assertEquals(s.length, 1); s = pat.split("abccbadfe", 0); - assertEquals(s.length, 10); + assertEquals(s.length, 9); // positive limit pat = Pattern.compile("b"); s = pat.split("abccbadfebb", 12); @@ -129,7 +134,7 @@ public class PatternTest extends TestCase { s = pat.split("", 11); assertEquals(s.length, 1); s = pat.split("abccbadfe", 15); - assertEquals(s.length, 11); + assertEquals(s.length, 10); pat = Pattern.compile("b"); s = pat.split("abccbadfebb", 5); @@ -139,8 +144,8 @@ public class PatternTest extends TestCase { pat = Pattern.compile(""); s = pat.split("", 1); assertEquals(s.length, 1); - s = pat.split("abccbadfe", 11); - assertEquals(s.length, 11); + s = pat.split("abccbadfe", 10); + assertEquals(s.length, 10); pat = Pattern.compile("b"); s = pat.split("abccbadfebb", 3); @@ -150,6 +155,65 @@ public class PatternTest extends TestCase { assertEquals(s.length, 5); } + public void testSplitOnEmptyPattern_apiCurrent() { + assertEquals(asList("t", "e", "s", "t"), asList("test".split(""))); + assertEquals(asList(""), asList("".split(""))); + assertEquals(asList(""), asList(Pattern.compile("").split(""))); + assertEquals(asList(""), asList("".split("", -1))); + } + + public void testSplitOnEmptyPattern_api28() { + runWithTargetSdkVersion(28, () -> { + assertEquals(asList("", "t", "e", "s", "t"), asList("test".split(""))); + assertEquals(asList(""), asList("".split(""))); + assertEquals(asList(""), asList(Pattern.compile("").split(""))); + assertEquals(asList(""), asList("".split("", -1))); + }); + } + + /** + * Tests that a match at the beginning of the input string only produces + * a "" if the match is positive-width. + */ + public void testMatchBeginningOfInputSequence_apiCurrent() { + // Positive-width match at the beginning of the input. + assertEquals(asList("", "", "rdv", "rk"), asList("aardvark".split("a"))); + assertEquals(asList("", "anana"), asList("banana".split("b"))); + // Zero-width match at the beginning of the input + assertEquals(asList("a", "ardv", "ark"), asList("aardvark".split("(?=a)"))); + assertEquals(asList("banana"), asList("banana".split("(?=b)"))); + + // For comparison, matches in the middle of the input never yield an empty substring: + assertEquals(asList("aar", "vark"), asList("aardvark".split("d"))); + assertEquals(asList("aar", "dvark"), asList("aardvark".split("(?=d)"))); + } + + public void testMatchBeginningOfInputSequence_api28() { + runWithTargetSdkVersion(28, () -> { + // Positive-width match at the beginning of the input. + assertEquals(asList("", "", "rdv", "rk"), asList("aardvark".split("a"))); + assertEquals(asList("", "anana"), asList("banana".split("b"))); + // Zero-width match at the beginning of the input + assertEquals(asList("", "a", "ardv", "ark"), asList("aardvark".split("(?=a)"))); + assertEquals(asList("banana"), asList("banana".split("(?=b)"))); + + // For comparison, matches in the middle of the input never yield an empty substring: + assertEquals(asList("aar", "vark"), asList("aardvark".split("d"))); + assertEquals(asList("aar", "dvark"), asList("aardvark".split("(?=d)"))); + }); + } + + private static void runWithTargetSdkVersion(int targetSdkVersion, Runnable runnable) { + VMRuntime vmRuntime = VMRuntime.getRuntime(); + int oldVersion = vmRuntime.getTargetSdkVersion(); + vmRuntime.setTargetSdkVersion(targetSdkVersion); + try { + runnable.run(); + } finally { + vmRuntime.setTargetSdkVersion(oldVersion); + } + } + public void testSplitCharSequence() { String s[]; Pattern pat = Pattern.compile("b"); @@ -161,7 +225,7 @@ public class PatternTest extends TestCase { s = pat.split(""); assertEquals(s.length, 1); s = pat.split("abccbadfe"); - assertEquals(s.length, 10); + assertEquals(s.length, 9); // bug6544 String s1 = ""; String[] arr = s1.split(":"); diff --git a/ojluni/src/main/java/java/util/regex/Pattern.java b/ojluni/src/main/java/java/util/regex/Pattern.java index cf0c5b903c..f7da3fb248 100644 --- a/ojluni/src/main/java/java/util/regex/Pattern.java +++ b/ojluni/src/main/java/java/util/regex/Pattern.java @@ -27,6 +27,8 @@ package java.util.regex; import dalvik.annotation.optimization.ReachabilitySensitive; +import dalvik.system.VMRuntime; + import libcore.util.NativeAllocationRegistry; import java.util.Iterator; @@ -1052,6 +1054,8 @@ public final class Pattern implements java.io.Serializable return m.matches(); } + // Android-change: Adopt split() behavior change only for apps targeting API > 28. + // http://b/109659282#comment7 /** * Splits the given input sequence around matches of this pattern. * @@ -1063,6 +1067,12 @@ public final class Pattern implements java.io.Serializable * the resulting array has just one element, namely the input sequence in * string form. * + * <p> When there is a positive-width match at the beginning of the input + * sequence then an empty leading substring is included at the beginning + * of the resulting array. A zero-width match at the beginning however + * can only produce such an empty leading substring for apps running on or + * targeting API versions <= 28. + * * <p> The <tt>limit</tt> parameter controls the number of times the * pattern is applied and therefore affects the length of the resulting * array. If the limit <i>n</i> is greater than zero then the pattern @@ -1125,6 +1135,17 @@ public final class Pattern implements java.io.Serializable // Add segments before each match found while(m.find()) { if (!matchLimited || matchList.size() < limit - 1) { + if (index == 0 && index == m.start() && m.start() == m.end()) { + // no empty leading substring included for zero-width match + // at the beginning of the input char sequence. + // BEGIN Android-changed: split() compat behavior for apps targeting <= 28. + // continue; + int targetSdkVersion = VMRuntime.getRuntime().getTargetSdkVersion(); + if (targetSdkVersion > 28) { + continue; + } + // END Android-changed: split() compat behavior for apps targeting <= 28. + } String match = input.subSequence(index, m.start()).toString(); matchList.add(match); index = m.end(); |