diff options
3 files changed, 213 insertions, 102 deletions
diff --git a/benchmarks/src/benchmarks/regression/StringReplaceAllBenchmark.java b/benchmarks/src/benchmarks/regression/StringReplaceAllBenchmark.java new file mode 100644 index 0000000000..73e797996f --- /dev/null +++ b/benchmarks/src/benchmarks/regression/StringReplaceAllBenchmark.java @@ -0,0 +1,76 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License + */ + +package benchmarks.regression; + +import com.google.caliper.Param; + +public class StringReplaceAllBenchmark { + // NOTE: These estimates of MOVEABLE / NON_MOVEABLE are based on a knowledge of + // ART implementation details. They make a difference here because JNI calls related + // to strings took different paths depending on whether the String in question was + // moveable or not. + enum StringLengths { + EMPTY(""), + MOVEABLE_16(makeString(16)), + MOVEABLE_256(makeString(256)), + MOVEABLE_1024(makeString(1024)), + NON_MOVEABLE(makeString(64 * 1024)), + BOOT_IMAGE(java.util.jar.JarFile.MANIFEST_NAME); + + private final String value; + + StringLengths(String s) { + this.value = s; + } + } + + private static final String makeString(int length) { + final String sequence8 = "abcdefghijklmnop"; + final int numAppends = (length / 16) - 1; + StringBuilder stringBuilder = new StringBuilder(length); + + // (n-1) occurences of "abcdefghijklmnop" + for (int i = 0; i < numAppends; ++i) { + stringBuilder.append(sequence8); + } + + // and one final occurence of qrstuvwx. + stringBuilder.append("qrstuvwx"); + + return stringBuilder.toString(); + } + + @Param private StringLengths s; + + public void timeReplaceAllTrivialPatternNonExistent(int reps) { + for (int i = 0; i < reps; ++i) { + s.value.replaceAll("fish", "0"); + } + } + + public void timeReplaceTrivialPatternAllRepeated(int reps) { + for (int i = 0; i < reps; ++i) { + s.value.replaceAll("jklm", "0"); + } + } + + public void timeReplaceAllTrivialPatternSingleOccurence(int reps) { + for (int i = 0; i < reps; ++i) { + s.value.replaceAll("qrst", "0"); + } + } +} diff --git a/luni/src/main/native/java_util_regex_Matcher.cpp b/luni/src/main/native/java_util_regex_Matcher.cpp index 2461afcf82..2271ba8905 100644 --- a/luni/src/main/native/java_util_regex_Matcher.cpp +++ b/luni/src/main/native/java_util_regex_Matcher.cpp @@ -16,75 +16,114 @@ #define LOG_TAG "Matcher" +#include <memory> #include <stdlib.h> +#include <android-base/logging.h> + #include "IcuUtilities.h" #include "JNIHelp.h" #include "JniConstants.h" #include "JniException.h" -#include "ScopedPrimitiveArray.h" #include "ScopedJavaUnicodeString.h" +#include "ScopedPrimitiveArray.h" +#include "ScopedStringChars.h" #include "jni.h" #include "unicode/parseerr.h" #include "unicode/regex.h" // ICU documentation: http://icu-project.org/apiref/icu4c/classRegexMatcher.html -static icu::RegexMatcher* toRegexMatcher(jlong address) { - return reinterpret_cast<icu::RegexMatcher*>(static_cast<uintptr_t>(address)); -} - /** - * We use ICU4C's RegexMatcher class, but our input is on the Java heap and potentially moving - * around between calls. This wrapper class ensures that our RegexMatcher is always pointing at - * the current location of the char[]. Earlier versions of Android simply copied the data to the - * native heap, but that's wasteful and hides allocations from the garbage collector. + * Encapsulates an instance of ICU4C's RegexMatcher class along with a copy of + * the input it's currently operating on in the native heap. + * + * Rationale: We choose to make a copy here because it turns out to be a lot + * cheaper when a moving GC and/or string compression is enabled. This is + * because env->GetStringChars() always copies in this scenario. This becomes + * especially bad when the String in question is long and/or contains a large + * number of matches. + * + * Drawbacks: The native allocation associated with this class is no longer + * fixed size, so we're effectively lying to the NativeAllocationRegistry about + * the size of the object(s) we're allocating on the native heap. The peak + * memory usage doesn't change though, given that GetStringChars would have + * made an allocation of precisely the same size. */ -class MatcherAccessor { +class MatcherState { public: - MatcherAccessor(JNIEnv* env, jlong address, jstring javaInput, bool reset) { - init(env, address); + MatcherState(icu::RegexMatcher* matcher) : + mMatcher(matcher), + mUChars(nullptr), + mUText(nullptr), + mStatus(U_ZERO_ERROR) { + } - mJavaInput = javaInput; - mChars = env->GetStringChars(mJavaInput, NULL); - if (mChars == NULL) { - return; + bool updateInput(JNIEnv* env, jstring input) { + // First, close the UText struct, since we're about to allocate a new one. + if (mUText != nullptr) { + utext_close(mUText); + mUText = nullptr; } - mUText = utext_openUChars(NULL, mChars, env->GetStringLength(mJavaInput), &mStatus); - if (mUText == NULL) { - return; + // Then delete the UChar* associated with the UText struct.. + mUChars.reset(nullptr); + + // TODO: We should investigate whether we can avoid an additional copy + // in the native heap when is_copy == JNI_TRUE. The problem with doing + // that is that we might call ReleaseStringChars with a different + // JNIEnv* on a different downcall. This is currently safe as + // implemented in ART, but is unlikely to be portable and the spec stays + // silent on the matter. + ScopedStringChars inputChars(env, input); + if (inputChars.get() == nullptr) { + // There will be an exception pending if we get here. + return false; } - if (reset) { - mMatcher->reset(mUText); - } else { - mMatcher->refreshInputText(mUText, mStatus); + // Make a copy of |input| on the native heap. This copy will be live + // until the next call to updateInput or close. + mUChars.reset(new (std::nothrow) UChar[inputChars.size()]); + if (mUChars.get() == nullptr) { + env->ThrowNew(env->FindClass("Ljava/lang/OutOfMemoryError;"), "Out of memory"); + return false; } - } - MatcherAccessor(JNIEnv* env, jlong address) { - init(env, address); + static_assert(sizeof(UChar) == sizeof(jchar), "sizeof(Uchar) != sizeof(jchar)"); + memcpy(mUChars.get(), inputChars.get(), inputChars.size() * sizeof(jchar)); + + // Reset any errors that might have occurred on previous patches. + mStatus = U_ZERO_ERROR; + mUText = utext_openUChars(nullptr, mUChars.get(), inputChars.size(), &mStatus); + if (mUText == nullptr) { + CHECK(maybeThrowIcuException(env, "utext_openUChars", mStatus)); + return false; + } + + // It is an error for ICU to have returned a non-null mUText but to + // still have indicated an error. + CHECK(U_SUCCESS(mStatus)); + + mMatcher->reset(mUText); + return true; } - ~MatcherAccessor() { - utext_close(mUText); - if (mJavaInput) { - mEnv->ReleaseStringChars(mJavaInput, mChars); + ~MatcherState() { + if (mUText != nullptr) { + utext_close(mUText); } - maybeThrowIcuException(mEnv, "utext_close", mStatus); } - icu::RegexMatcher* operator->() { - return mMatcher; + icu::RegexMatcher* matcher() { + return mMatcher.get(); } UErrorCode& status() { return mStatus; } - void updateOffsets(jintArray javaOffsets) { - ScopedIntArrayRW offsets(mEnv, javaOffsets); + void updateOffsets(JNIEnv* env, jintArray javaOffsets) { + ScopedIntArrayRW offsets(env, javaOffsets); if (offsets.get() == NULL) { return; } @@ -96,29 +135,23 @@ public: } private: - void init(JNIEnv* env, jlong address) { - mEnv = env; - mJavaInput = NULL; - mMatcher = toRegexMatcher(address); - mChars = NULL; - mStatus = U_ZERO_ERROR; - mUText = NULL; - } - - JNIEnv* mEnv; - jstring mJavaInput; - icu::RegexMatcher* mMatcher; - const jchar* mChars; - UErrorCode mStatus; + std::unique_ptr<icu::RegexMatcher> mMatcher; + std::unique_ptr<UChar[]> mUChars; UText* mUText; + UErrorCode mStatus; // Disallow copy and assignment. - MatcherAccessor(const MatcherAccessor&); - void operator=(const MatcherAccessor&); + MatcherState(const MatcherState&); + void operator=(const MatcherState&); }; +static inline MatcherState* toMatcherState(jlong address) { + return reinterpret_cast<MatcherState*>(static_cast<uintptr_t>(address)); +} + static void Matcher_free(void* address) { - delete reinterpret_cast<icu::RegexMatcher*>(address); + MatcherState* state = reinterpret_cast<MatcherState*>(address); + delete state; } static jlong Matcher_getNativeFinalizer(JNIEnv*, jclass) { @@ -131,51 +164,48 @@ static jint Matcher_nativeSize(JNIEnv*, jclass) { return 200; // Very rough guess based on a quick look at the implementation. } -static jint Matcher_findImpl(JNIEnv* env, jclass, jlong addr, jstring javaText, jint startIndex, jintArray offsets) { - MatcherAccessor matcher(env, addr, javaText, false); - UBool result = matcher->find(startIndex, matcher.status()); +static jint Matcher_findImpl(JNIEnv* env, jclass, jlong addr, jint startIndex, jintArray offsets) { + MatcherState* state = toMatcherState(addr); + UBool result = state->matcher()->find(startIndex, state->status()); if (result) { - matcher.updateOffsets(offsets); + state->updateOffsets(env, offsets); } return result; } -static jint Matcher_findNextImpl(JNIEnv* env, jclass, jlong addr, jstring javaText, jintArray offsets) { - MatcherAccessor matcher(env, addr, javaText, false); - if (matcher.status() != U_ZERO_ERROR) { - return -1; - } - UBool result = matcher->find(); +static jint Matcher_findNextImpl(JNIEnv* env, jclass, jlong addr, jintArray offsets) { + MatcherState* state = toMatcherState(addr); + UBool result = state->matcher()->find(); if (result) { - matcher.updateOffsets(offsets); + state->updateOffsets(env, offsets); } return result; } -static jint Matcher_groupCountImpl(JNIEnv* env, jclass, jlong addr) { - MatcherAccessor matcher(env, addr); - return matcher->groupCount(); +static jint Matcher_groupCountImpl(JNIEnv*, jclass, jlong addr) { + MatcherState* state = toMatcherState(addr); + return state->matcher()->groupCount(); } -static jint Matcher_hitEndImpl(JNIEnv* env, jclass, jlong addr) { - MatcherAccessor matcher(env, addr); - return matcher->hitEnd(); +static jint Matcher_hitEndImpl(JNIEnv*, jclass, jlong addr) { + MatcherState* state = toMatcherState(addr); + return state->matcher()->hitEnd(); } -static jint Matcher_lookingAtImpl(JNIEnv* env, jclass, jlong addr, jstring javaText, jintArray offsets) { - MatcherAccessor matcher(env, addr, javaText, false); - UBool result = matcher->lookingAt(matcher.status()); +static jint Matcher_lookingAtImpl(JNIEnv* env, jclass, jlong addr, jintArray offsets) { + MatcherState* state = toMatcherState(addr); + UBool result = state->matcher()->lookingAt(state->status()); if (result) { - matcher.updateOffsets(offsets); + state->updateOffsets(env, offsets); } return result; } -static jint Matcher_matchesImpl(JNIEnv* env, jclass, jlong addr, jstring javaText, jintArray offsets) { - MatcherAccessor matcher(env, addr, javaText, false); - UBool result = matcher->matches(matcher.status()); +static jint Matcher_matchesImpl(JNIEnv* env, jclass, jlong addr, jintArray offsets) { + MatcherState* state = toMatcherState(addr); + UBool result = state->matcher()->matches(state->status()); if (result) { - matcher.updateOffsets(offsets); + state->updateOffsets(env, offsets); } return result; } @@ -184,28 +214,33 @@ static jlong Matcher_openImpl(JNIEnv* env, jclass, jlong patternAddr) { icu::RegexPattern* pattern = reinterpret_cast<icu::RegexPattern*>(static_cast<uintptr_t>(patternAddr)); UErrorCode status = U_ZERO_ERROR; icu::RegexMatcher* result = pattern->matcher(status); - maybeThrowIcuException(env, "RegexPattern::matcher", status); - return reinterpret_cast<uintptr_t>(result); + if (maybeThrowIcuException(env, "RegexPattern::matcher", status)) { + return 0; + } + + return reinterpret_cast<uintptr_t>(new MatcherState(result)); } -static jint Matcher_requireEndImpl(JNIEnv* env, jclass, jlong addr) { - MatcherAccessor matcher(env, addr); - return matcher->requireEnd(); +static jint Matcher_requireEndImpl(JNIEnv*, jclass, jlong addr) { + MatcherState* state = toMatcherState(addr); + return state->matcher()->requireEnd(); } static void Matcher_setInputImpl(JNIEnv* env, jclass, jlong addr, jstring javaText, jint start, jint end) { - MatcherAccessor matcher(env, addr, javaText, true); - matcher->region(start, end, matcher.status()); + MatcherState* state = toMatcherState(addr); + if (state->updateInput(env, javaText)) { + state->matcher()->region(start, end, state->status()); + } } -static void Matcher_useAnchoringBoundsImpl(JNIEnv* env, jclass, jlong addr, jboolean value) { - MatcherAccessor matcher(env, addr); - matcher->useAnchoringBounds(value); +static void Matcher_useAnchoringBoundsImpl(JNIEnv*, jclass, jlong addr, jboolean value) { + MatcherState* state = toMatcherState(addr); + state->matcher()->useAnchoringBounds(value); } -static void Matcher_useTransparentBoundsImpl(JNIEnv* env, jclass, jlong addr, jboolean value) { - MatcherAccessor matcher(env, addr); - matcher->useTransparentBounds(value); +static void Matcher_useTransparentBoundsImpl(JNIEnv*, jclass, jlong addr, jboolean value) { + MatcherState* state = toMatcherState(addr); + state->matcher()->useTransparentBounds(value); } static jint Matcher_getMatchedGroupIndex0(JNIEnv* env, jclass, jlong patternAddr, jstring javaGroupName) { @@ -227,13 +262,13 @@ static jint Matcher_getMatchedGroupIndex0(JNIEnv* env, jclass, jlong patternAddr static JNINativeMethod gMethods[] = { NATIVE_METHOD(Matcher, getMatchedGroupIndex0, "(JLjava/lang/String;)I"), - NATIVE_METHOD(Matcher, findImpl, "(JLjava/lang/String;I[I)Z"), - NATIVE_METHOD(Matcher, findNextImpl, "(JLjava/lang/String;[I)Z"), + NATIVE_METHOD(Matcher, findImpl, "(JI[I)Z"), + NATIVE_METHOD(Matcher, findNextImpl, "(J[I)Z"), NATIVE_METHOD(Matcher, getNativeFinalizer, "()J"), NATIVE_METHOD(Matcher, groupCountImpl, "(J)I"), NATIVE_METHOD(Matcher, hitEndImpl, "(J)Z"), - NATIVE_METHOD(Matcher, lookingAtImpl, "(JLjava/lang/String;[I)Z"), - NATIVE_METHOD(Matcher, matchesImpl, "(JLjava/lang/String;[I)Z"), + NATIVE_METHOD(Matcher, lookingAtImpl, "(J[I)Z"), + NATIVE_METHOD(Matcher, matchesImpl, "(J[I)Z"), NATIVE_METHOD(Matcher, nativeSize, "()I"), NATIVE_METHOD(Matcher, openImpl, "(J)J"), NATIVE_METHOD(Matcher, requireEndImpl, "(J)Z"), diff --git a/ojluni/src/main/java/java/util/regex/Matcher.java b/ojluni/src/main/java/java/util/regex/Matcher.java index 9805dde869..9af28aab15 100644 --- a/ojluni/src/main/java/java/util/regex/Matcher.java +++ b/ojluni/src/main/java/java/util/regex/Matcher.java @@ -444,7 +444,7 @@ public final class Matcher implements MatchResult { */ public boolean matches() { synchronized (this) { - matchFound = matchesImpl(address, input, matchOffsets); + matchFound = matchesImpl(address, matchOffsets); } return matchFound; } @@ -466,7 +466,7 @@ public final class Matcher implements MatchResult { */ public boolean find() { synchronized (this) { - matchFound = findNextImpl(address, input, matchOffsets); + matchFound = findNextImpl(address, matchOffsets); } return matchFound; } @@ -495,7 +495,7 @@ public final class Matcher implements MatchResult { } synchronized (this) { - matchFound = findImpl(address, input, start, matchOffsets); + matchFound = findImpl(address, start, matchOffsets); } return matchFound; } @@ -516,7 +516,7 @@ public final class Matcher implements MatchResult { */ public boolean lookingAt() { synchronized (this) { - matchFound = lookingAtImpl(address, input, matchOffsets); + matchFound = lookingAtImpl(address, matchOffsets); } return matchFound; } @@ -1181,13 +1181,13 @@ public final class Matcher implements MatchResult { } private static native int getMatchedGroupIndex0(long patternAddr, String name); - private static native boolean findImpl(long addr, String s, int startIndex, int[] offsets); - private static native boolean findNextImpl(long addr, String s, int[] offsets); + private static native boolean findImpl(long addr, int startIndex, int[] offsets); + private static native boolean findNextImpl(long addr, int[] offsets); private static native long getNativeFinalizer(); private static native int groupCountImpl(long addr); private static native boolean hitEndImpl(long addr); - private static native boolean lookingAtImpl(long addr, String s, int[] offsets); - private static native boolean matchesImpl(long addr, String s, int[] offsets); + private static native boolean lookingAtImpl(long addr, int[] offsets); + private static native boolean matchesImpl(long addr, int[] offsets); private static native int nativeSize(); private static native long openImpl(long patternAddr); private static native boolean requireEndImpl(long addr); |