3 files changed, 101 insertions, 35 deletions
diff --git a/harmony-tests/src/test/java/org/apache/harmony/tests/java/util/regex/SplitTest.java b/harmony-tests/src/test/java/org/apache/harmony/tests/java/util/regex/SplitTest.java
index c40ef7c6f1..4818106f63 100644
--- a/harmony-tests/src/test/java/org/apache/harmony/tests/java/util/regex/SplitTest.java
+++ b/harmony-tests/src/test/java/org/apache/harmony/tests/java/util/regex/SplitTest.java
@@ -17,13 +17,16 @@
 
 package org.apache.harmony.tests.java.util.regex;
 
+import java.util.Arrays;
 import java.util.regex.Pattern;
 import java.util.regex.PatternSyntaxException;
 
 import junit.framework.TestCase;
+import java.util.regex.*;
 
 /**
  * TODO Type description
+ *
  */
 @SuppressWarnings("nls")
 public class SplitTest extends TestCase {
@@ -174,45 +177,23 @@ public class SplitTest extends TestCase {
 
     public void testSplit2() {
         Pattern p = Pattern.compile("");
-        String s[];
-        s = p.split("a", -1);
-        assertEquals(3, s.length);
-        assertEquals("", s[0]);
-        assertEquals("a", s[1]);
-        assertEquals("", s[2]);
-
-        s = p.split("", -1);
-        assertEquals(1, s.length);
-        assertEquals("", s[0]);
-
-        s = p.split("abcd", -1);
-        assertEquals(6, s.length);
-        assertEquals("", s[0]);
-        assertEquals("a", s[1]);
-        assertEquals("b", s[2]);
-        assertEquals("c", s[3]);
-        assertEquals("d", s[4]);
-        assertEquals("", s[5]);
+        assertEquals(Arrays.asList("a", ""), Arrays.asList(p.split("a", -1)));
+        assertEquals(Arrays.asList(""), Arrays.asList(p.split("", -1)));
+        assertEquals(Arrays.asList("a", "b", "c", "d", ""), Arrays.asList(p.split("abcd", -1)));
 
         // Regression test for Android
-        assertEquals("GOOG,23,500".split("|").length, 12);
+        assertEquals("GOOG,23,500".split("|").length, 11);
     }
 
 
     public void testSplitSupplementaryWithEmptyString() {
-
         /*
          * See http://www.unicode.org/reports/tr18/#Supplementary_Characters
          * We have to treat text as code points not code units.
          */
         Pattern p = Pattern.compile("");
-        String s[];
-        s = p.split("a\ud869\uded6b", -1);
-        assertEquals(5, s.length);
-        assertEquals("", s[0]);
-        assertEquals("a", s[1]);
-        assertEquals("\ud869\uded6", s[2]);
-        assertEquals("b", s[3]);
-        assertEquals("", s[4]);
+        String[] s = p.split("a\ud869\uded6b", -1);
+        assertEquals(Arrays.asList("a", "\ud869\uded6", "b", ""), Arrays.asList(s));
     }
+
 }
diff --git a/luni/src/test/java/org/apache/harmony/regex/tests/java/util/regex/PatternTest.java b/luni/src/test/java/org/apache/harmony/regex/tests/java/util/regex/PatternTest.java
index 96de6c816f..3a56f6ab31 100644
--- a/luni/src/test/java/org/apache/harmony/regex/tests/java/util/regex/PatternTest.java
+++ b/luni/src/test/java/org/apache/harmony/regex/tests/java/util/regex/PatternTest.java
@@ -18,6 +18,7 @@
 package org.apache.harmony.regex.tests.java.util.regex;
 
 import java.io.Serializable;
+import java.util.Arrays;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 import java.util.regex.PatternSyntaxException;
@@ -27,6 +28,10 @@ import junit.framework.TestCase;
 import org.apache.harmony.testframework.serialization.SerializationTest;
 import org.apache.harmony.testframework.serialization.SerializationTest.SerializableAssert;
 
+import dalvik.system.VMRuntime;
+
+import static java.util.Arrays.asList;
+
 public class PatternTest extends TestCase {
     String[] testPatterns = {
             "(a|b)*abb",
@@ -107,7 +112,7 @@ public class PatternTest extends TestCase {
         s = pat.split("", -1);
         assertEquals(s.length, 1);
         s = pat.split("abccbadfe", -1);
-        assertEquals(s.length, 11);
+        assertEquals(s.length, 10);
         // zero limit
         pat = Pattern.compile("b");
         s = pat.split("abccbadfebb", 0);
@@ -118,7 +123,7 @@ public class PatternTest extends TestCase {
         s = pat.split("", 0);
         assertEquals(s.length, 1);
         s = pat.split("abccbadfe", 0);
-        assertEquals(s.length, 10);
+        assertEquals(s.length, 9);
         // positive limit
         pat = Pattern.compile("b");
         s = pat.split("abccbadfebb", 12);
@@ -129,7 +134,7 @@ public class PatternTest extends TestCase {
         s = pat.split("", 11);
         assertEquals(s.length, 1);
         s = pat.split("abccbadfe", 15);
-        assertEquals(s.length, 11);
+        assertEquals(s.length, 10);
 
         pat = Pattern.compile("b");
         s = pat.split("abccbadfebb", 5);
@@ -139,8 +144,8 @@ public class PatternTest extends TestCase {
         pat = Pattern.compile("");
         s = pat.split("", 1);
         assertEquals(s.length, 1);
-        s = pat.split("abccbadfe", 11);
-        assertEquals(s.length, 11);
+        s = pat.split("abccbadfe", 10);
+        assertEquals(s.length, 10);
 
         pat = Pattern.compile("b");
         s = pat.split("abccbadfebb", 3);
@@ -150,6 +155,65 @@ public class PatternTest extends TestCase {
         assertEquals(s.length, 5);
     }
 
+    public void testSplitOnEmptyPattern_apiCurrent() {
+        assertEquals(asList("t", "e", "s", "t"), asList("test".split("")));
+        assertEquals(asList(""), asList("".split("")));
+        assertEquals(asList(""), asList(Pattern.compile("").split("")));
+        assertEquals(asList(""), asList("".split("", -1)));
+    }
+
+    public void testSplitOnEmptyPattern_api28() {
+        runWithTargetSdkVersion(28, () -> {
+            assertEquals(asList("", "t", "e", "s", "t"), asList("test".split("")));
+            assertEquals(asList(""), asList("".split("")));
+            assertEquals(asList(""), asList(Pattern.compile("").split("")));
+            assertEquals(asList(""), asList("".split("", -1)));
+        });
+    }
+
+    /**
+     * Tests that a match at the beginning of the input string only produces
+     * a "" if the match is positive-width.
+     */
+    public void testMatchBeginningOfInputSequence_apiCurrent() {
+        // Positive-width match at the beginning of the input.
+        assertEquals(asList("", "", "rdv", "rk"), asList("aardvark".split("a")));
+        assertEquals(asList("", "anana"), asList("banana".split("b")));
+        // Zero-width match at the beginning of the input
+        assertEquals(asList("a", "ardv", "ark"), asList("aardvark".split("(?=a)")));
+        assertEquals(asList("banana"), asList("banana".split("(?=b)")));
+
+        // For comparison, matches in the middle of the input never yield an empty substring:
+        assertEquals(asList("aar", "vark"), asList("aardvark".split("d")));
+        assertEquals(asList("aar", "dvark"), asList("aardvark".split("(?=d)")));
+    }
+
+    public void testMatchBeginningOfInputSequence_api28() {
+        runWithTargetSdkVersion(28, () -> {
+            // Positive-width match at the beginning of the input.
+            assertEquals(asList("", "", "rdv", "rk"), asList("aardvark".split("a")));
+            assertEquals(asList("", "anana"), asList("banana".split("b")));
+            // Zero-width match at the beginning of the input
+            assertEquals(asList("", "a", "ardv", "ark"), asList("aardvark".split("(?=a)")));
+            assertEquals(asList("banana"), asList("banana".split("(?=b)")));
+
+            // For comparison, matches in the middle of the input never yield an empty substring:
+            assertEquals(asList("aar", "vark"), asList("aardvark".split("d")));
+            assertEquals(asList("aar", "dvark"), asList("aardvark".split("(?=d)")));
+        });
+    }
+
+    private static void runWithTargetSdkVersion(int targetSdkVersion, Runnable runnable) {
+        VMRuntime vmRuntime = VMRuntime.getRuntime();
+        int oldVersion = vmRuntime.getTargetSdkVersion();
+        vmRuntime.setTargetSdkVersion(targetSdkVersion);
+        try {
+            runnable.run();
+        } finally {
+            vmRuntime.setTargetSdkVersion(oldVersion);
+        }
+    }
+
     public void testSplitCharSequence() {
         String s[];
         Pattern pat = Pattern.compile("b");
@@ -161,7 +225,7 @@ public class PatternTest extends TestCase {
         s = pat.split("");
         assertEquals(s.length, 1);
         s = pat.split("abccbadfe");
-        assertEquals(s.length, 10);
+        assertEquals(s.length, 9);
         // bug6544
         String s1 = "";
         String[] arr = s1.split(":");
diff --git a/ojluni/src/main/java/java/util/regex/Pattern.java b/ojluni/src/main/java/java/util/regex/Pattern.java
index cf0c5b903c..f7da3fb248 100644
--- a/ojluni/src/main/java/java/util/regex/Pattern.java
+++ b/ojluni/src/main/java/java/util/regex/Pattern.java
@@ -27,6 +27,8 @@
 package java.util.regex;
 
 import dalvik.annotation.optimization.ReachabilitySensitive;
+import dalvik.system.VMRuntime;
+
 import libcore.util.NativeAllocationRegistry;
 
 import java.util.Iterator;
@@ -1052,6 +1054,8 @@ public final class Pattern implements java.io.Serializable
         return m.matches();
     }
 
+    // Android-change: Adopt split() behavior change only for apps targeting API > 28.
+    // http://b/109659282#comment7
     /**
      * Splits the given input sequence around matches of this pattern.
      *
@@ -1063,6 +1067,12 @@ public final class Pattern implements java.io.Serializable
      * the resulting array has just one element, namely the input sequence in
      * string form.
      *
+     * <p> When there is a positive-width match at the beginning of the input
+     * sequence then an empty leading substring is included at the beginning
+     * of the resulting array. A zero-width match at the beginning however
+     * can only produce such an empty leading substring for apps running on or
+     * targeting API versions <= 28.
+     *
      * <p> The <tt>limit</tt> parameter controls the number of times the
      * pattern is applied and therefore affects the length of the resulting
      * array.  If the limit <i>n</i> is greater than zero then the pattern
@@ -1125,6 +1135,17 @@ public final class Pattern implements java.io.Serializable
         // Add segments before each match found
         while(m.find()) {
             if (!matchLimited || matchList.size() < limit - 1) {
+                if (index == 0 && index == m.start() && m.start() == m.end()) {
+                    // no empty leading substring included for zero-width match
+                    // at the beginning of the input char sequence.
+                    // BEGIN Android-changed: split() compat behavior for apps targeting <= 28.
+                    // continue;
+                    int targetSdkVersion = VMRuntime.getRuntime().getTargetSdkVersion();
+                    if (targetSdkVersion > 28) {
+                        continue;
+                    }
+                    // END Android-changed: split() compat behavior for apps targeting <= 28.
+                }
                 String match = input.subSequence(index, m.start()).toString();
                 matchList.add(match);
                 index = m.end();