diff options
-rw-r--r-- | CMakeLists.txt | 2 | ||||
-rw-r--r-- | arch/aarch64/Makefile.in | 8 | ||||
-rw-r--r-- | arch/aarch64/armfeature.c | 19 | ||||
-rw-r--r-- | arch/aarch64/insert_string_acle.c | 6 | ||||
-rw-r--r-- | arch/arm/Makefile.in | 8 | ||||
-rw-r--r-- | arch/arm/armfeature.c | 33 | ||||
-rw-r--r-- | arch/arm/insert_string_acle.c | 6 | ||||
-rwxr-xr-x | configure | 8 | ||||
-rw-r--r-- | functable.c | 13 | ||||
-rw-r--r-- | win32/Makefile.arm | 4 |
10 files changed, 86 insertions, 21 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 3f5cd00..eed40c2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -478,7 +478,7 @@ else() message(STATUS "No optimized architecture: using ${ARCHDIR}") endif() if("${ARCH}" MATCHES "arm" OR "${ARCH}" MATCHES "aarch64") - set(ZLIB_ARCH_SRCS ${ZLIB_ARCH_SRCS} ${ARCHDIR}/fill_window_arm.c) + set(ZLIB_ARCH_SRCS ${ZLIB_ARCH_SRCS} ${ARCHDIR}/armfeature.c ${ARCHDIR}/fill_window_arm.c) endif() if(WITH_OPTIM) if("${ARCH}" MATCHES "arm") diff --git a/arch/aarch64/Makefile.in b/arch/aarch64/Makefile.in index 3c2bebc..6fcf919 100644 --- a/arch/aarch64/Makefile.in +++ b/arch/aarch64/Makefile.in @@ -12,7 +12,7 @@ SRCDIR=. SRCTOP=../.. TOPDIR=$(SRCTOP) -all: adler32_neon.o adler32_neon.lo crc32_acle.o crc32_acle.lo fill_window_arm.o fill_window_arm.lo insert_string_acle.o insert_string_acle.lo +all: adler32_neon.o adler32_neon.lo armfeature.o armfeature.lo crc32_acle.o crc32_acle.lo fill_window_arm.o fill_window_arm.lo insert_string_acle.o insert_string_acle.lo adler32_neon.o: $(SRCDIR)/adler32_neon.c $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_neon.c @@ -20,6 +20,12 @@ adler32_neon.o: $(SRCDIR)/adler32_neon.c adler32_neon.lo: $(SRCDIR)/adler32_neon.c $(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_neon.c +armfeature.o: $(SRCDIR)/armfeature.c + $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/armfeature.c + +armfeature.lo: $(SRCDIR)/armfeature.c + $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/armfeature.c + crc32_acle.o: $(SRCDIR)/crc32_acle.c $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_acle.c diff --git a/arch/aarch64/armfeature.c b/arch/aarch64/armfeature.c new file mode 100644 index 0000000..9f2af03 --- /dev/null +++ b/arch/aarch64/armfeature.c @@ -0,0 +1,19 @@ +#if defined(__linux__) +# include <sys/auxv.h> +# include <asm/hwcap.h> +#endif + +int arm_has_crc32() { +#if defined(__linux__) && defined(HWCAP_CRC32) + return (getauxval(AT_HWCAP) & HWCAP_CRC32) != 0 ? 1 : 0; +#elif defined(ARM_NOCHECK_ACLE) + return 1; +#else + return 0; +#endif +} + +int arm_has_neon() +{ + return 1; /* always available */ +} diff --git a/arch/aarch64/insert_string_acle.c b/arch/aarch64/insert_string_acle.c index 49f11cb..563100b 100644 --- a/arch/aarch64/insert_string_acle.c +++ b/arch/aarch64/insert_string_acle.c @@ -5,10 +5,9 @@ * */ -#include "zbuild.h" -#ifdef __ARM_FEATURE_CRC32 +#if defined(__ARM_FEATURE_CRC32) && defined(ARM_ACLE_CRC_HASH) #include <arm_acle.h> -#endif +#include "zbuild.h" #include "deflate.h" /* =========================================================================== @@ -19,7 +18,6 @@ * input characters and the first MIN_MATCH bytes of str are valid * (except for the last MIN_MATCH-1 bytes of the input file). */ -#ifdef ARM_ACLE_CRC_HASH Pos insert_string_acle(deflate_state *const s, const Pos str, unsigned int count) { Pos p, lp, ret; diff --git a/arch/arm/Makefile.in b/arch/arm/Makefile.in index 3c2bebc..6fcf919 100644 --- a/arch/arm/Makefile.in +++ b/arch/arm/Makefile.in @@ -12,7 +12,7 @@ SRCDIR=. SRCTOP=../.. TOPDIR=$(SRCTOP) -all: adler32_neon.o adler32_neon.lo crc32_acle.o crc32_acle.lo fill_window_arm.o fill_window_arm.lo insert_string_acle.o insert_string_acle.lo +all: adler32_neon.o adler32_neon.lo armfeature.o armfeature.lo crc32_acle.o crc32_acle.lo fill_window_arm.o fill_window_arm.lo insert_string_acle.o insert_string_acle.lo adler32_neon.o: $(SRCDIR)/adler32_neon.c $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_neon.c @@ -20,6 +20,12 @@ adler32_neon.o: $(SRCDIR)/adler32_neon.c adler32_neon.lo: $(SRCDIR)/adler32_neon.c $(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/adler32_neon.c +armfeature.o: $(SRCDIR)/armfeature.c + $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/armfeature.c + +armfeature.lo: $(SRCDIR)/armfeature.c + $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/armfeature.c + crc32_acle.o: $(SRCDIR)/crc32_acle.c $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_acle.c diff --git a/arch/arm/armfeature.c b/arch/arm/armfeature.c new file mode 100644 index 0000000..7c78dda --- /dev/null +++ b/arch/arm/armfeature.c @@ -0,0 +1,33 @@ +#if defined(__linux__) +# include <sys/auxv.h> +# include <asm/hwcap.h> +#elif defined(_WIN32) +# include <winapifamily.h> +#endif + +int arm_has_crc32() { +#if defined(__linux__) && defined(HWCAP2_CRC32) + return (getauxval(AT_HWCAP2) & HWCAP2_CRC32) != 0 ? 1 : 0; +#elif defined(ARM_NOCHECK_ACLE) + return 1; +#else + return 0; +#endif +} + +int arm_has_neon() +{ +#if defined(__linux__) && defined(HWCAP_NEON) + return (getauxval(AT_HWCAP) & HWCAP_NEON) != 0 ? 1 : 0; +#elif defined(_M_ARM) && defined(WINAPI_FAMILY_PARTITION) +# if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_PHONE_APP) + return 1; /* Always supported */ +# endif +#endif + +#if defined(ARM_NOCHECK_NEON) + return 1; +#else + return 0; +#endif +} diff --git a/arch/arm/insert_string_acle.c b/arch/arm/insert_string_acle.c index 49f11cb..563100b 100644 --- a/arch/arm/insert_string_acle.c +++ b/arch/arm/insert_string_acle.c @@ -5,10 +5,9 @@ * */ -#include "zbuild.h" -#ifdef __ARM_FEATURE_CRC32 +#if defined(__ARM_FEATURE_CRC32) && defined(ARM_ACLE_CRC_HASH) #include <arm_acle.h> -#endif +#include "zbuild.h" #include "deflate.h" /* =========================================================================== @@ -19,7 +18,6 @@ * input characters and the first MIN_MATCH bytes of str are valid * (except for the last MIN_MATCH-1 bytes of the input file). */ -#ifdef ARM_ACLE_CRC_HASH Pos insert_string_acle(deflate_state *const s, const Pos str, unsigned int count) { Pos p, lp, ret; @@ -1010,8 +1010,8 @@ case "${ARCH}" in arm | armv[3467]l | armv4b | armv4tl | armv5tel | armv5tejl | armv[67]hl | armv7hnl | armv[78]-a | armv8-a+* | armv8.[1234]-a | armv8.[1234]-a+*) [ ! -z $CROSS_PREFIX ] && QEMU_ARCH=arm ARCHDIR=arch/arm - ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} fill_window_arm.o" - ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} fill_window_arm.lo" + ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} armfeature.o fill_window_arm.o" + ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} armfeature.lo fill_window_arm.lo" GCC_MACHINE=$(${CC} -dumpmachine) case "${GCC_MACHINE}" in @@ -1115,8 +1115,8 @@ case "${ARCH}" in aarch64) [ ! -z $CROSS_PREFIX ] && QEMU_ARCH=aarch64 ARCHDIR=arch/aarch64 - ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} fill_window_arm.o" - ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} fill_window_arm.lo" + ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} armfeature.o fill_window_arm.o" + ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} armfeature.lo fill_window_arm.lo" if test $native -eq 0; then ARCH="armv8-a" diff --git a/functable.c b/functable.c index e0f0e40..93fbd23 100644 --- a/functable.c +++ b/functable.c @@ -12,6 +12,9 @@ #if defined(X86_CPUID) # include "arch/x86/x86.h" +#elif (defined(__arm__) || defined(__aarch64__) || defined(_M_ARM)) +extern int arm_has_crc32(); +extern int arm_has_neon(); #endif @@ -65,7 +68,8 @@ ZLIB_INTERNAL Pos insert_string_stub(deflate_state *const s, const Pos str, unsi #ifdef X86_SSE4_2_CRC_HASH if (x86_cpu_has_sse42) functable.insert_string=&insert_string_sse; - #elif defined(ARM_ACLE_CRC_HASH) + #elif defined(__ARM_FEATURE_CRC32) && defined(ARM_ACLE_CRC_HASH) + if (arm_has_crc32()) functable.insert_string=&insert_string_acle; #endif @@ -93,6 +97,7 @@ ZLIB_INTERNAL uint32_t adler32_stub(uint32_t adler, const unsigned char *buf, si functable.adler32=&adler32_c; #if ((defined(__ARM_NEON__) || defined(__ARM_NEON)) && defined(ARM_NEON_ADLER32)) + if (arm_has_neon()) functable.adler32=&adler32_neon; #endif @@ -113,10 +118,10 @@ ZLIB_INTERNAL uint32_t crc32_stub(uint32_t crc, const unsigned char *buf, uint64 if (sizeof(void *) == sizeof(ptrdiff_t)) { #if BYTE_ORDER == LITTLE_ENDIAN -# if __ARM_FEATURE_CRC32 + functable.crc32=crc32_little; +# if __ARM_FEATURE_CRC32 && defined(ARM_ACLE_CRC_HASH) + if (arm_has_crc32()) functable.crc32=crc32_acle; -# else - functable.crc32=crc32_little; # endif #elif BYTE_ORDER == BIG_ENDIAN functable.crc32=crc32_big; diff --git a/win32/Makefile.arm b/win32/Makefile.arm index 2ffb725..656e4cf 100644 --- a/win32/Makefile.arm +++ b/win32/Makefile.arm @@ -37,7 +37,7 @@ WITH_VFPV3 = NEON_ARCH = /arch:VFPv4 SUFFIX = -OBJS = adler32.obj compress.obj crc32.obj deflate.obj deflate_fast.obj deflate_slow.obj \ +OBJS = adler32.obj armfeature.obj compress.obj crc32.obj deflate.obj deflate_fast.obj deflate_slow.obj \ functable.obj infback.obj inflate.obj inftrees.obj inffast.obj match.obj trees.obj uncompr.obj zutil.obj fill_window_arm.obj !if "$(WITH_GZFILEOP)" != "" WFLAGS = $(WFLAGS) -DWITH_GZFILEOP @@ -61,7 +61,7 @@ NEON_ARCH = /arch:VFPv3 !endif !if "$(WITH_NEON)" != "" CFLAGS = $(CFLAGS) $(NEON_ARCH) -WFLAGS = $(WFLAGS) -D__ARM_NEON__=1 +WFLAGS = $(WFLAGS) -D__ARM_NEON__=1 -DARM_NEON_ADLER32 -DARM_NOCHECK_NEON OBJS = $(OBJS) adler32_neon.obj !endif |