summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDanny Lin <danny@kdrag0n.dev>2021-04-06 21:39:07 -0700
committerDanny Lin <danny@kdrag0n.dev>2021-04-18 01:39:35 -0700
commit4b966cb85bafbc9703d4493758908ae635a930ae (patch)
treee4bc2087821bf3eb7986787d7cd683ba863b6f85
parentdeda158428676929b7b3f9233eed2cbe1498769b (diff)
Port zlib-ng to the Android platform
- Add Soong blueprint - Add CRC32 fixes - Add NDK mappings All changes were based on the Android fork of Chromium zlib. As reported by simpleperf, a considerable amount of CPU time is spent decompressing Android binary XML resources when opening and closing activities in Settings. Because of this, some binary XMLs from framework-res.apk have been used for zlib_bench testing. zlib-ng is significantly faster than vanilla zlib and Chromium zlib (with optimizations enabled) in terms of compression. 64-bit benchmarks with zlib_bench64 on Android: Compression: +------------------------------------------+------+---------------+---------+-------+ | File | zlib | Chromium zlib | zlib-ng | Unit | +------------------------------------------+------+---------------+---------+-------+ | res/anim/activity_open_enter.xml | 10.4 | 16 | 25.1 | MiB/s | | res/anim/activity_close_enter.xml | 9.3 | 15.5 | 21.1 | MiB/s | | res/transition/move.xml | 9.7 | 16.4 | 21.6 | MiB/s | | res/drawable/stat_sys_battery_charge.xml | 13.3 | 22.4 | 33.2 | MiB/s | +------------------------------------------+------+---------------+---------+-------+ The gains are less significant for decompression, and Chromium zlib is generally slighty faster in this case. However, zlib-ng still tends to be faster than vanilla zlib: Decompression: +------------------------------------------+-------+---------------+---------+-------+ | File | zlib | Chromium zlib | zlib-ng | Unit | +------------------------------------------+-------+---------------+---------+-------+ | res/anim/activity_open_enter.xml | 136.8 | 150.6 | 153.2 | MiB/s | | res/anim/activity_close_enter.xml | 125.7 | 133.5 | 123 | MiB/s | | res/transition/move.xml | 120.6 | 129.9 | 124.4 | MiB/s | | res/drawable/stat_sys_battery_charge.xml | 194.6 | 222.1 | 217.3 | MiB/s | +------------------------------------------+-------+---------------+---------+-------+ In combination with enabling -O3 for libpng, zlib-ng speeds up screenshot saving by 16% on redfin (Cortex-A55 + Cortex-A76) as compared to vanilla zlib. While Android is likely to be decompressing much more data than it compresses, zlib-ng's significantly faster compression should be a worthwhile tradeoff for its deficiencies in decompression speed. zlib versions used for benchmarks: Vanilla zlib: [1] (Chromium zlib with optimizations disabled) Chromium zlib: [2] with all optimizations enabled zlib-ng: [3] with Android porting changes All zlib variants were built with -O3, as per commit [4]. [1] https://android.googlesource.com/platform/external/zlib/+/refs/tags/android-11.0.0_r35 [2] https://android.googlesource.com/platform/external/zlib/+/e620e8065d17bab7c9eefe3a03d7f8b80ca450d6 [3] https://github.com/zlib-ng/zlib-ng/commit/9f784908599006e9f09599b85b495bdcda1f25c4 [4] https://android.googlesource.com/platform/external/zlib/+/18e812de
-rw-r--r--Android.bp244
-rw-r--r--arch/arm/crc32_acle.c40
-rw-r--r--arch/arm/insert_string_acle.c38
-rw-r--r--libz.map.txt151
4 files changed, 473 insertions, 0 deletions
diff --git a/Android.bp b/Android.bp
new file mode 100644
index 0000000..bc855be
--- /dev/null
+++ b/Android.bp
@@ -0,0 +1,244 @@
+srcs_arm = [
+ "arch/arm/armfeature.c",
+ "arch/arm/crc32_acle.c",
+ "arch/arm/insert_string_acle.c",
+ "arch/arm/adler32_neon.c",
+ "arch/arm/chunkset_neon.c",
+ "arch/arm/slide_neon.c",
+]
+
+// Not all CPUs will support these features, but compatibility is checked at runtime.
+cflags_arm = [
+ "-DARM_ACLE_CRC_HASH",
+ "-DARM_FEATURES",
+ "-DARM_NEON_ADLER32",
+ "-DARM_NEON_CHUNKSET",
+ "-DARM_NEON_SLIDEHASH",
+]
+
+srcs_x86 = [
+ "arch/x86/x86.c",
+ "arch/x86/slide_avx.c",
+ "arch/x86/chunkset_avx.c",
+ "arch/x86/compare258_avx.c",
+ "arch/x86/adler32_avx.c",
+ "arch/x86/insert_string_sse.c",
+ "arch/x86/compare258_sse.c",
+ "arch/x86/chunkset_sse.c",
+ "arch/x86/slide_sse.c",
+ "arch/x86/adler32_ssse3.c",
+ "arch/x86/crc_folding.c",
+]
+
+cflags_x86 = [
+ "-DX86_AVX2",
+ "-DX86_AVX2_ADLER32",
+ "-DX86_AVX_CHUNKSET",
+ "-DX86_FEATURES",
+ "-DX86_PCLMULQDQ_CRC",
+ "-DX86_SSE2",
+ "-DX86_SSE2_CHUNKSET",
+ "-DX86_SSE2_SLIDEHASH",
+ "-DX86_SSE42_CMP_STR",
+ "-DX86_SSE42_CRC_HASH",
+ "-DX86_SSE42_CRC_INTRIN",
+ "-DX86_SSSE3",
+ "-DX86_SSSE3_ADLER32",
+ "-mavx2",
+ "-msse4",
+ "-mssse3",
+ "-mpclmul",
+]
+
+// This optimization is applicable to arm64 and x86-64.
+cflags_64 = ["-DUNALIGNED64_OK"]
+
+libz_srcs = [
+ "adler32.c",
+ "chunkset.c",
+ "compare258.c",
+ "compress.c",
+ "crc32.c",
+ "crc32_comb.c",
+ "deflate.c",
+ "deflate_fast.c",
+ "deflate_medium.c",
+ "deflate_quick.c",
+ "deflate_slow.c",
+ "functable.c",
+ "infback.c",
+ "inffast.c",
+ "inflate.c",
+ "inftrees.c",
+ "insert_string.c",
+ "trees.c",
+ "uncompr.c",
+ "zutil.c",
+ "gzlib.c",
+ "gzread.c",
+ "gzwrite.c",
+]
+
+cflags_common = [
+ "-DHAVE_BUILTIN_CTZ",
+ "-DHAVE_BUILTIN_CTZLL",
+ "-DHAVE_VISIBILITY_HIDDEN",
+ "-DHAVE_VISIBILITY_INTERNAL",
+ "-DZLIB_CONST",
+
+ "-DWITH_GZFILEOP",
+ "-DZLIB_COMPAT",
+ "-DZLIB_DLL",
+ "-D_LARGEFILE64_SOURCE=1",
+ "-D__USE_LARGEFILE64",
+ "-Wall",
+ "-Werror",
+ "-Wno-implicit-fallthrough",
+ "-O3",
+ "-DNDEBUG",
+ "-fno-semantic-interposition",
+ "-std=c99",
+ "-DUNALIGNED_OK",
+]
+
+cc_defaults {
+ name: "libz_defaults",
+
+ cflags: cflags_common,
+ stl: "none",
+ export_include_dirs: ["."],
+ srcs: libz_srcs,
+
+ arch: {
+ arm: {
+ // TODO: This is to work around b/24465209. Remove after root cause
+ // is fixed.
+ pack_relocations: false,
+ ldflags: ["-Wl,--hash-style=both"],
+
+ cflags: cflags_arm,
+ srcs: srcs_arm,
+ },
+ arm64: {
+ cflags: cflags_arm + cflags_64,
+ srcs: srcs_arm,
+ },
+ x86: {
+ cflags: cflags_x86,
+ srcs: srcs_x86,
+ },
+ x86_64: {
+ cflags: cflags_x86 + cflags_64,
+ srcs: srcs_x86,
+ },
+ },
+}
+
+cc_library {
+ name: "libz",
+ defaults: ["libz_defaults"],
+
+ host_supported: true,
+ unique_host_soname: true,
+ static_ndk_lib: true,
+
+ vendor_available: true,
+ vndk: {
+ enabled: true,
+ support_system_process: true,
+ },
+ ramdisk_available: true,
+ recovery_available: true,
+ native_bridge_supported: true,
+
+ target: {
+ linux_bionic: {
+ enabled: true,
+ },
+ windows: {
+ enabled: true,
+ },
+ },
+
+// TODO(b/155456180): make libz a stub-providing library by uncommenting below
+// stubs: {
+// versions: ["29", "30"],
+// symbol_file: "libz.map.txt",
+// },
+ apex_available: [
+ "//apex_available:platform",
+ "com.android.art.debug", // from libdexfile
+ "com.android.art.release",
+ "com.android.bluetooth.updatable",
+ "com.android.runtime",
+ ],
+}
+
+// A more stable build of libz. Build configuration of this library should be
+// the same for different targets. This is only used by imgdiff.
+
+cc_library {
+ name: "libz_stable",
+ visibility: [
+ "//bootable/recovery/applypatch",
+ "//bootable/recovery/tests",
+ ],
+ cflags: cflags_common,
+ stl: "none",
+ export_include_dirs: ["."],
+ srcs: libz_srcs,
+
+ host_supported: true,
+ vendor_available: true,
+ recovery_available: true,
+}
+
+cc_binary_host {
+ name: "minigzip",
+ srcs: ["test/minigzip.c"],
+ cflags: cflags_common,
+ static_libs: ["libz"],
+ stl: "none",
+}
+
+// This module is defined in development/ndk/Android.bp. Updating these headers
+// to be usable for any API level is going to be some work (at the very least,
+// there's a ZLIB_VERNUM that will need to be handled since early versions of
+// Android did not have all the APIs that calling code will use if this is set
+// to the current value.
+//
+// The NDK never updated the zlib headers when the platform updated, so until we
+// solve this the NDK will continue shipping the old headers.
+//
+// ndk_headers {
+// name: "libz_headers",
+// from: "src",
+// to: "",
+// srcs: [
+// "src/zconf.h",
+// "src/zlib.h",
+// ],
+// license: "NOTICE",
+// }
+
+// TODO(b/155351357) remove this library and let libtextclassifier to use libz
+// instead.
+// libz_current allows modules building against the NDK to have access to zlib
+// API that's not available from the NDK libz.
+cc_library_static {
+ name: "libz_current",
+ defaults: ["libz_defaults"],
+ sdk_version: "current",
+
+ apex_available: [
+ "//apex_available:platform", // indirectly from GoogleExtServices that gets installed to /system
+ "com.android.extservices", // indirectly via libtextclassifier
+ ],
+}
+
+ndk_library {
+ name: "libz",
+ symbol_file: "libz.map.txt",
+ first_version: "9",
+ unversioned_until: "current",
+}
diff --git a/arch/arm/crc32_acle.c b/arch/arm/crc32_acle.c
index 88ba6c3..99013e1 100644
--- a/arch/arm/crc32_acle.c
+++ b/arch/arm/crc32_acle.c
@@ -11,6 +11,46 @@
#endif
#include "../../zutil.h"
+#if defined(__clang__)
+/* CRC32 intrinsics are #ifdef'ed out of arm_acle.h unless we build with an
+ * armv8 target, which is incompatible with ThinLTO optimizations on Android.
+ * (Namely, mixing and matching different module-level targets makes ThinLTO
+ * warn, and Android defaults to armv7-a. This restriction does not apply to
+ * function-level `target`s, however.)
+ *
+ * Since we only need four crc intrinsics, and since clang's implementation of
+ * those are just wrappers around compiler builtins, it's simplest to #define
+ * those builtins directly. If this #define list grows too much (or we depend on
+ * an intrinsic that isn't a trivial wrapper), we may have to find a better way
+ * to go about this.
+ *
+ * NOTE: clang currently complains that "'+soft-float-abi' is not a recognized
+ * feature for this target (ignoring feature)." This appears to be a harmless
+ * bug in clang.
+ */
+#define __crc32b __builtin_arm_crc32b
+#define __crc32d __builtin_arm_crc32d
+#define __crc32w __builtin_arm_crc32w
+#define __crc32cw __builtin_arm_crc32cw
+#define __crc32h __builtin_arm_crc32h
+
+#if defined(__aarch64__)
+#define TARGET_ARMV8_WITH_CRC __attribute__((target("crc")))
+#else // !defined(__aarch64__)
+#define TARGET_ARMV8_WITH_CRC __attribute__((target("armv8-a,crc")))
+#endif // defined(__aarch64__)
+
+#elif defined(__GNUC__)
+/* For GCC, we are setting CRC extensions at module level, so ThinLTO is not
+ * allowed. We can just include arm_acle.h.
+ */
+#include <arm_acle.h>
+#define TARGET_ARMV8_WITH_CRC
+#else // !defined(__GNUC__) && !defined(_aarch64__)
+#error ARM CRC32 SIMD extensions only supported for Clang and GCC
+#endif
+
+TARGET_ARMV8_WITH_CRC
uint32_t crc32_acle(uint32_t crc, const unsigned char *buf, uint64_t len) {
Z_REGISTER uint32_t c;
Z_REGISTER const uint16_t *buf2;
diff --git a/arch/arm/insert_string_acle.c b/arch/arm/insert_string_acle.c
index 2daf9ba..5553252 100644
--- a/arch/arm/insert_string_acle.c
+++ b/arch/arm/insert_string_acle.c
@@ -12,6 +12,44 @@
#include "../../zbuild.h"
#include "../../deflate.h"
+#if defined(__clang__)
+/* CRC32 intrinsics are #ifdef'ed out of arm_acle.h unless we build with an
+ * armv8 target, which is incompatible with ThinLTO optimizations on Android.
+ * (Namely, mixing and matching different module-level targets makes ThinLTO
+ * warn, and Android defaults to armv7-a. This restriction does not apply to
+ * function-level `target`s, however.)
+ *
+ * Since we only need four crc intrinsics, and since clang's implementation of
+ * those are just wrappers around compiler builtins, it's simplest to #define
+ * those builtins directly. If this #define list grows too much (or we depend on
+ * an intrinsic that isn't a trivial wrapper), we may have to find a better way
+ * to go about this.
+ *
+ * NOTE: clang currently complains that "'+soft-float-abi' is not a recognized
+ * feature for this target (ignoring feature)." This appears to be a harmless
+ * bug in clang.
+ */
+#define __crc32b __builtin_arm_crc32b
+#define __crc32d __builtin_arm_crc32d
+#define __crc32w __builtin_arm_crc32w
+#define __crc32cw __builtin_arm_crc32cw
+
+#if defined(__aarch64__)
+#define TARGET_ARMV8_WITH_CRC __attribute__((target("crc")))
+#else // !defined(__aarch64__)
+#define TARGET_ARMV8_WITH_CRC __attribute__((target("armv8-a,crc")))
+#endif // defined(__aarch64__)
+
+#elif defined(__GNUC__)
+/* For GCC, we are setting CRC extensions at module level, so ThinLTO is not
+ * allowed. We can just include arm_acle.h.
+ */
+#include <arm_acle.h>
+#define TARGET_ARMV8_WITH_CRC
+#else // !defined(__GNUC__) && !defined(_aarch64__)
+#error ARM CRC32 SIMD extensions only supported for Clang and GCC
+#endif
+
#define UPDATE_HASH(s, h, val) \
h = __crc32w(0, val)
diff --git a/libz.map.txt b/libz.map.txt
new file mode 100644
index 0000000..850bbf8
--- /dev/null
+++ b/libz.map.txt
@@ -0,0 +1,151 @@
+# This file is copied from src/zlib.map and annotated with comments for the NDK
+# stub library generation script.
+ZLIB_1.2.0 {
+ global:
+ compressBound;
+ deflateBound;
+ inflateBack;
+ inflateBackEnd;
+ inflateBackInit_;
+ inflateCopy;
+ local:
+ deflate_copyright; # var
+ inflate_copyright; # var
+ inflate_fast;
+ inflate_table;
+ zcalloc;
+ zcfree;
+ z_errmsg; # var
+ gz_error;
+ gz_intmax;
+ _*;
+};
+
+ZLIB_1.2.0.2 {
+ gzclearerr;
+ gzungetc;
+ zlibCompileFlags;
+} ZLIB_1.2.0;
+
+ZLIB_1.2.0.8 {
+ deflatePrime;
+} ZLIB_1.2.0.2;
+
+ZLIB_1.2.2 {
+ adler32_combine;
+ crc32_combine;
+ deflateSetHeader;
+ inflateGetHeader;
+} ZLIB_1.2.0.8;
+
+ZLIB_1.2.2.3 {
+ deflateTune;
+ gzdirect;
+} ZLIB_1.2.2;
+
+ZLIB_1.2.2.4 {
+ inflatePrime;
+} ZLIB_1.2.2.3;
+
+ZLIB_1.2.3.3 {
+ adler32_combine64;
+ crc32_combine64;
+ gzopen64;
+ gzseek64;
+ gztell64;
+ inflateUndermine;
+} ZLIB_1.2.2.4;
+
+ZLIB_1.2.3.4 {
+ inflateReset2;
+ inflateMark;
+} ZLIB_1.2.3.3;
+
+ZLIB_1.2.3.5 {
+ gzbuffer;
+ gzoffset;
+ gzoffset64;
+ gzclose_r;
+ gzclose_w;
+} ZLIB_1.2.3.4;
+
+ZLIB_1.2.5.1 {
+ deflatePending;
+} ZLIB_1.2.3.5;
+
+ZLIB_1.2.5.2 {
+ deflateResetKeep;
+ gzgetc_;
+ inflateResetKeep;
+} ZLIB_1.2.5.1;
+
+ZLIB_1.2.7.1 { # introduced=19
+ inflateGetDictionary;
+ gzvprintf;
+} ZLIB_1.2.5.2;
+
+ZLIB_1.2.9 { # introduced=28
+ inflateCodesUsed;
+ inflateValidate;
+ uncompress2;
+ gzfread;
+ gzfwrite;
+ deflateGetDictionary;
+ adler32_z;
+ crc32_z;
+} ZLIB_1.2.7.1;
+
+# These were all exposed by the old NDK stub library. Unclear if they still
+# should be, but at least some of them are marked as being exported in zlib.h
+# and the tree doesn't build without them.
+ZLIB_NDK {
+ _dist_code;
+ _length_code;
+ _tr_align;
+ _tr_flush_bits; # introduced=21
+ _tr_flush_block;
+ _tr_init;
+ _tr_stored_block;
+ _tr_tally;
+ adler32;
+ compress2;
+ compress;
+ crc32;
+ deflate;
+ deflateCopy;
+ deflateEnd;
+ deflateInit2_;
+ deflateInit_;
+ deflateParams;
+ deflateReset;
+ deflateSetDictionary;
+ get_crc_table;
+ gzclose;
+ gzdopen;
+ gzeof;
+ gzerror;
+ gzflush;
+ gzgetc;
+ gzgets;
+ gzopen;
+ gzprintf;
+ gzputc;
+ gzputs;
+ gzread;
+ gzrewind;
+ gzseek;
+ gzsetparams;
+ gztell;
+ gzwrite;
+ inflate;
+ inflateEnd;
+ inflateInit2_;
+ inflateInit_;
+ inflateReset;
+ inflateSetDictionary;
+ inflateSync;
+ inflateSyncPoint;
+ uncompress;
+ zError;
+ zlibVersion;
+};