diff options
author | Danny Lin <danny@kdrag0n.dev> | 2021-04-06 21:39:07 -0700 |
---|---|---|
committer | Danny Lin <danny@kdrag0n.dev> | 2021-04-18 01:39:35 -0700 |
commit | 4b966cb85bafbc9703d4493758908ae635a930ae (patch) | |
tree | e4bc2087821bf3eb7986787d7cd683ba863b6f85 | |
parent | deda158428676929b7b3f9233eed2cbe1498769b (diff) |
Port zlib-ng to the Android platform
- Add Soong blueprint
- Add CRC32 fixes
- Add NDK mappings
All changes were based on the Android fork of Chromium zlib.
As reported by simpleperf, a considerable amount of CPU time is spent
decompressing Android binary XML resources when opening and closing
activities in Settings. Because of this, some binary XMLs from
framework-res.apk have been used for zlib_bench testing.
zlib-ng is significantly faster than vanilla zlib and Chromium zlib
(with optimizations enabled) in terms of compression. 64-bit benchmarks
with zlib_bench64 on Android:
Compression:
+------------------------------------------+------+---------------+---------+-------+
| File | zlib | Chromium zlib | zlib-ng | Unit |
+------------------------------------------+------+---------------+---------+-------+
| res/anim/activity_open_enter.xml | 10.4 | 16 | 25.1 | MiB/s |
| res/anim/activity_close_enter.xml | 9.3 | 15.5 | 21.1 | MiB/s |
| res/transition/move.xml | 9.7 | 16.4 | 21.6 | MiB/s |
| res/drawable/stat_sys_battery_charge.xml | 13.3 | 22.4 | 33.2 | MiB/s |
+------------------------------------------+------+---------------+---------+-------+
The gains are less significant for decompression, and Chromium zlib is
generally slighty faster in this case. However, zlib-ng still tends to
be faster than vanilla zlib:
Decompression:
+------------------------------------------+-------+---------------+---------+-------+
| File | zlib | Chromium zlib | zlib-ng | Unit |
+------------------------------------------+-------+---------------+---------+-------+
| res/anim/activity_open_enter.xml | 136.8 | 150.6 | 153.2 | MiB/s |
| res/anim/activity_close_enter.xml | 125.7 | 133.5 | 123 | MiB/s |
| res/transition/move.xml | 120.6 | 129.9 | 124.4 | MiB/s |
| res/drawable/stat_sys_battery_charge.xml | 194.6 | 222.1 | 217.3 | MiB/s |
+------------------------------------------+-------+---------------+---------+-------+
In combination with enabling -O3 for libpng, zlib-ng speeds up
screenshot saving by 16% on redfin (Cortex-A55 + Cortex-A76) as compared
to vanilla zlib.
While Android is likely to be decompressing much more data than it
compresses, zlib-ng's significantly faster compression should be a
worthwhile tradeoff for its deficiencies in decompression speed.
zlib versions used for benchmarks:
Vanilla zlib: [1] (Chromium zlib with optimizations disabled)
Chromium zlib: [2] with all optimizations enabled
zlib-ng: [3] with Android porting changes
All zlib variants were built with -O3, as per commit [4].
[1] https://android.googlesource.com/platform/external/zlib/+/refs/tags/android-11.0.0_r35
[2] https://android.googlesource.com/platform/external/zlib/+/e620e8065d17bab7c9eefe3a03d7f8b80ca450d6
[3] https://github.com/zlib-ng/zlib-ng/commit/9f784908599006e9f09599b85b495bdcda1f25c4
[4] https://android.googlesource.com/platform/external/zlib/+/18e812de
-rw-r--r-- | Android.bp | 244 | ||||
-rw-r--r-- | arch/arm/crc32_acle.c | 40 | ||||
-rw-r--r-- | arch/arm/insert_string_acle.c | 38 | ||||
-rw-r--r-- | libz.map.txt | 151 |
4 files changed, 473 insertions, 0 deletions
diff --git a/Android.bp b/Android.bp new file mode 100644 index 0000000..bc855be --- /dev/null +++ b/Android.bp @@ -0,0 +1,244 @@ +srcs_arm = [ + "arch/arm/armfeature.c", + "arch/arm/crc32_acle.c", + "arch/arm/insert_string_acle.c", + "arch/arm/adler32_neon.c", + "arch/arm/chunkset_neon.c", + "arch/arm/slide_neon.c", +] + +// Not all CPUs will support these features, but compatibility is checked at runtime. +cflags_arm = [ + "-DARM_ACLE_CRC_HASH", + "-DARM_FEATURES", + "-DARM_NEON_ADLER32", + "-DARM_NEON_CHUNKSET", + "-DARM_NEON_SLIDEHASH", +] + +srcs_x86 = [ + "arch/x86/x86.c", + "arch/x86/slide_avx.c", + "arch/x86/chunkset_avx.c", + "arch/x86/compare258_avx.c", + "arch/x86/adler32_avx.c", + "arch/x86/insert_string_sse.c", + "arch/x86/compare258_sse.c", + "arch/x86/chunkset_sse.c", + "arch/x86/slide_sse.c", + "arch/x86/adler32_ssse3.c", + "arch/x86/crc_folding.c", +] + +cflags_x86 = [ + "-DX86_AVX2", + "-DX86_AVX2_ADLER32", + "-DX86_AVX_CHUNKSET", + "-DX86_FEATURES", + "-DX86_PCLMULQDQ_CRC", + "-DX86_SSE2", + "-DX86_SSE2_CHUNKSET", + "-DX86_SSE2_SLIDEHASH", + "-DX86_SSE42_CMP_STR", + "-DX86_SSE42_CRC_HASH", + "-DX86_SSE42_CRC_INTRIN", + "-DX86_SSSE3", + "-DX86_SSSE3_ADLER32", + "-mavx2", + "-msse4", + "-mssse3", + "-mpclmul", +] + +// This optimization is applicable to arm64 and x86-64. +cflags_64 = ["-DUNALIGNED64_OK"] + +libz_srcs = [ + "adler32.c", + "chunkset.c", + "compare258.c", + "compress.c", + "crc32.c", + "crc32_comb.c", + "deflate.c", + "deflate_fast.c", + "deflate_medium.c", + "deflate_quick.c", + "deflate_slow.c", + "functable.c", + "infback.c", + "inffast.c", + "inflate.c", + "inftrees.c", + "insert_string.c", + "trees.c", + "uncompr.c", + "zutil.c", + "gzlib.c", + "gzread.c", + "gzwrite.c", +] + +cflags_common = [ + "-DHAVE_BUILTIN_CTZ", + "-DHAVE_BUILTIN_CTZLL", + "-DHAVE_VISIBILITY_HIDDEN", + "-DHAVE_VISIBILITY_INTERNAL", + "-DZLIB_CONST", + + "-DWITH_GZFILEOP", + "-DZLIB_COMPAT", + "-DZLIB_DLL", + "-D_LARGEFILE64_SOURCE=1", + "-D__USE_LARGEFILE64", + "-Wall", + "-Werror", + "-Wno-implicit-fallthrough", + "-O3", + "-DNDEBUG", + "-fno-semantic-interposition", + "-std=c99", + "-DUNALIGNED_OK", +] + +cc_defaults { + name: "libz_defaults", + + cflags: cflags_common, + stl: "none", + export_include_dirs: ["."], + srcs: libz_srcs, + + arch: { + arm: { + // TODO: This is to work around b/24465209. Remove after root cause + // is fixed. + pack_relocations: false, + ldflags: ["-Wl,--hash-style=both"], + + cflags: cflags_arm, + srcs: srcs_arm, + }, + arm64: { + cflags: cflags_arm + cflags_64, + srcs: srcs_arm, + }, + x86: { + cflags: cflags_x86, + srcs: srcs_x86, + }, + x86_64: { + cflags: cflags_x86 + cflags_64, + srcs: srcs_x86, + }, + }, +} + +cc_library { + name: "libz", + defaults: ["libz_defaults"], + + host_supported: true, + unique_host_soname: true, + static_ndk_lib: true, + + vendor_available: true, + vndk: { + enabled: true, + support_system_process: true, + }, + ramdisk_available: true, + recovery_available: true, + native_bridge_supported: true, + + target: { + linux_bionic: { + enabled: true, + }, + windows: { + enabled: true, + }, + }, + +// TODO(b/155456180): make libz a stub-providing library by uncommenting below +// stubs: { +// versions: ["29", "30"], +// symbol_file: "libz.map.txt", +// }, + apex_available: [ + "//apex_available:platform", + "com.android.art.debug", // from libdexfile + "com.android.art.release", + "com.android.bluetooth.updatable", + "com.android.runtime", + ], +} + +// A more stable build of libz. Build configuration of this library should be +// the same for different targets. This is only used by imgdiff. + +cc_library { + name: "libz_stable", + visibility: [ + "//bootable/recovery/applypatch", + "//bootable/recovery/tests", + ], + cflags: cflags_common, + stl: "none", + export_include_dirs: ["."], + srcs: libz_srcs, + + host_supported: true, + vendor_available: true, + recovery_available: true, +} + +cc_binary_host { + name: "minigzip", + srcs: ["test/minigzip.c"], + cflags: cflags_common, + static_libs: ["libz"], + stl: "none", +} + +// This module is defined in development/ndk/Android.bp. Updating these headers +// to be usable for any API level is going to be some work (at the very least, +// there's a ZLIB_VERNUM that will need to be handled since early versions of +// Android did not have all the APIs that calling code will use if this is set +// to the current value. +// +// The NDK never updated the zlib headers when the platform updated, so until we +// solve this the NDK will continue shipping the old headers. +// +// ndk_headers { +// name: "libz_headers", +// from: "src", +// to: "", +// srcs: [ +// "src/zconf.h", +// "src/zlib.h", +// ], +// license: "NOTICE", +// } + +// TODO(b/155351357) remove this library and let libtextclassifier to use libz +// instead. +// libz_current allows modules building against the NDK to have access to zlib +// API that's not available from the NDK libz. +cc_library_static { + name: "libz_current", + defaults: ["libz_defaults"], + sdk_version: "current", + + apex_available: [ + "//apex_available:platform", // indirectly from GoogleExtServices that gets installed to /system + "com.android.extservices", // indirectly via libtextclassifier + ], +} + +ndk_library { + name: "libz", + symbol_file: "libz.map.txt", + first_version: "9", + unversioned_until: "current", +} diff --git a/arch/arm/crc32_acle.c b/arch/arm/crc32_acle.c index 88ba6c3..99013e1 100644 --- a/arch/arm/crc32_acle.c +++ b/arch/arm/crc32_acle.c @@ -11,6 +11,46 @@ #endif #include "../../zutil.h" +#if defined(__clang__) +/* CRC32 intrinsics are #ifdef'ed out of arm_acle.h unless we build with an + * armv8 target, which is incompatible with ThinLTO optimizations on Android. + * (Namely, mixing and matching different module-level targets makes ThinLTO + * warn, and Android defaults to armv7-a. This restriction does not apply to + * function-level `target`s, however.) + * + * Since we only need four crc intrinsics, and since clang's implementation of + * those are just wrappers around compiler builtins, it's simplest to #define + * those builtins directly. If this #define list grows too much (or we depend on + * an intrinsic that isn't a trivial wrapper), we may have to find a better way + * to go about this. + * + * NOTE: clang currently complains that "'+soft-float-abi' is not a recognized + * feature for this target (ignoring feature)." This appears to be a harmless + * bug in clang. + */ +#define __crc32b __builtin_arm_crc32b +#define __crc32d __builtin_arm_crc32d +#define __crc32w __builtin_arm_crc32w +#define __crc32cw __builtin_arm_crc32cw +#define __crc32h __builtin_arm_crc32h + +#if defined(__aarch64__) +#define TARGET_ARMV8_WITH_CRC __attribute__((target("crc"))) +#else // !defined(__aarch64__) +#define TARGET_ARMV8_WITH_CRC __attribute__((target("armv8-a,crc"))) +#endif // defined(__aarch64__) + +#elif defined(__GNUC__) +/* For GCC, we are setting CRC extensions at module level, so ThinLTO is not + * allowed. We can just include arm_acle.h. + */ +#include <arm_acle.h> +#define TARGET_ARMV8_WITH_CRC +#else // !defined(__GNUC__) && !defined(_aarch64__) +#error ARM CRC32 SIMD extensions only supported for Clang and GCC +#endif + +TARGET_ARMV8_WITH_CRC uint32_t crc32_acle(uint32_t crc, const unsigned char *buf, uint64_t len) { Z_REGISTER uint32_t c; Z_REGISTER const uint16_t *buf2; diff --git a/arch/arm/insert_string_acle.c b/arch/arm/insert_string_acle.c index 2daf9ba..5553252 100644 --- a/arch/arm/insert_string_acle.c +++ b/arch/arm/insert_string_acle.c @@ -12,6 +12,44 @@ #include "../../zbuild.h" #include "../../deflate.h" +#if defined(__clang__) +/* CRC32 intrinsics are #ifdef'ed out of arm_acle.h unless we build with an + * armv8 target, which is incompatible with ThinLTO optimizations on Android. + * (Namely, mixing and matching different module-level targets makes ThinLTO + * warn, and Android defaults to armv7-a. This restriction does not apply to + * function-level `target`s, however.) + * + * Since we only need four crc intrinsics, and since clang's implementation of + * those are just wrappers around compiler builtins, it's simplest to #define + * those builtins directly. If this #define list grows too much (or we depend on + * an intrinsic that isn't a trivial wrapper), we may have to find a better way + * to go about this. + * + * NOTE: clang currently complains that "'+soft-float-abi' is not a recognized + * feature for this target (ignoring feature)." This appears to be a harmless + * bug in clang. + */ +#define __crc32b __builtin_arm_crc32b +#define __crc32d __builtin_arm_crc32d +#define __crc32w __builtin_arm_crc32w +#define __crc32cw __builtin_arm_crc32cw + +#if defined(__aarch64__) +#define TARGET_ARMV8_WITH_CRC __attribute__((target("crc"))) +#else // !defined(__aarch64__) +#define TARGET_ARMV8_WITH_CRC __attribute__((target("armv8-a,crc"))) +#endif // defined(__aarch64__) + +#elif defined(__GNUC__) +/* For GCC, we are setting CRC extensions at module level, so ThinLTO is not + * allowed. We can just include arm_acle.h. + */ +#include <arm_acle.h> +#define TARGET_ARMV8_WITH_CRC +#else // !defined(__GNUC__) && !defined(_aarch64__) +#error ARM CRC32 SIMD extensions only supported for Clang and GCC +#endif + #define UPDATE_HASH(s, h, val) \ h = __crc32w(0, val) diff --git a/libz.map.txt b/libz.map.txt new file mode 100644 index 0000000..850bbf8 --- /dev/null +++ b/libz.map.txt @@ -0,0 +1,151 @@ +# This file is copied from src/zlib.map and annotated with comments for the NDK +# stub library generation script. +ZLIB_1.2.0 { + global: + compressBound; + deflateBound; + inflateBack; + inflateBackEnd; + inflateBackInit_; + inflateCopy; + local: + deflate_copyright; # var + inflate_copyright; # var + inflate_fast; + inflate_table; + zcalloc; + zcfree; + z_errmsg; # var + gz_error; + gz_intmax; + _*; +}; + +ZLIB_1.2.0.2 { + gzclearerr; + gzungetc; + zlibCompileFlags; +} ZLIB_1.2.0; + +ZLIB_1.2.0.8 { + deflatePrime; +} ZLIB_1.2.0.2; + +ZLIB_1.2.2 { + adler32_combine; + crc32_combine; + deflateSetHeader; + inflateGetHeader; +} ZLIB_1.2.0.8; + +ZLIB_1.2.2.3 { + deflateTune; + gzdirect; +} ZLIB_1.2.2; + +ZLIB_1.2.2.4 { + inflatePrime; +} ZLIB_1.2.2.3; + +ZLIB_1.2.3.3 { + adler32_combine64; + crc32_combine64; + gzopen64; + gzseek64; + gztell64; + inflateUndermine; +} ZLIB_1.2.2.4; + +ZLIB_1.2.3.4 { + inflateReset2; + inflateMark; +} ZLIB_1.2.3.3; + +ZLIB_1.2.3.5 { + gzbuffer; + gzoffset; + gzoffset64; + gzclose_r; + gzclose_w; +} ZLIB_1.2.3.4; + +ZLIB_1.2.5.1 { + deflatePending; +} ZLIB_1.2.3.5; + +ZLIB_1.2.5.2 { + deflateResetKeep; + gzgetc_; + inflateResetKeep; +} ZLIB_1.2.5.1; + +ZLIB_1.2.7.1 { # introduced=19 + inflateGetDictionary; + gzvprintf; +} ZLIB_1.2.5.2; + +ZLIB_1.2.9 { # introduced=28 + inflateCodesUsed; + inflateValidate; + uncompress2; + gzfread; + gzfwrite; + deflateGetDictionary; + adler32_z; + crc32_z; +} ZLIB_1.2.7.1; + +# These were all exposed by the old NDK stub library. Unclear if they still +# should be, but at least some of them are marked as being exported in zlib.h +# and the tree doesn't build without them. +ZLIB_NDK { + _dist_code; + _length_code; + _tr_align; + _tr_flush_bits; # introduced=21 + _tr_flush_block; + _tr_init; + _tr_stored_block; + _tr_tally; + adler32; + compress2; + compress; + crc32; + deflate; + deflateCopy; + deflateEnd; + deflateInit2_; + deflateInit_; + deflateParams; + deflateReset; + deflateSetDictionary; + get_crc_table; + gzclose; + gzdopen; + gzeof; + gzerror; + gzflush; + gzgetc; + gzgets; + gzopen; + gzprintf; + gzputc; + gzputs; + gzread; + gzrewind; + gzseek; + gzsetparams; + gztell; + gzwrite; + inflate; + inflateEnd; + inflateInit2_; + inflateInit_; + inflateReset; + inflateSetDictionary; + inflateSync; + inflateSyncPoint; + uncompress; + zError; + zlibVersion; +}; |