diff options
-rw-r--r-- | CMakeLists.txt | 18 | ||||
-rw-r--r-- | Makefile.in | 16 | ||||
-rw-r--r-- | arch/aarch64/Makefile.in | 49 | ||||
-rw-r--r-- | arch/aarch64/crc32_acle.c | 81 | ||||
-rw-r--r-- | arch/aarch64/insert_string_acle.c | 50 | ||||
-rwxr-xr-x | configure | 27 | ||||
-rw-r--r-- | crc32.c | 2 |
7 files changed, 230 insertions, 13 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index a84da4a..4dac05f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -58,7 +58,7 @@ option(WITH_OPTIM "Build with optimisation" ON) option(WITH_NEW_STRATEGIES "Use new strategies" ON) option(WITH_NATIVE_INSTRUCTIONS "Instruct the compiler to use the full instruction set on this host (gcc/clang -march=native)" OFF) -if("${ARCH}" MATCHES "arm") +if("${ARCH}" MATCHES "arm" OR "${ARCH}" MATCHES "aarch64") option(WITH_ACLE "Build with ACLE CRC" OFF) endif() @@ -125,6 +125,8 @@ else() endif() if("${ARCH}" MATCHES "arm") set(ACLEFLAG "-march=armv8-a+crc") + elseif("${ARCH}" MATCHES "aarch64") + set(ACLEFLAG "-march=armv8-a+crc") endif() else(NOT NATIVEFLAG) set(SSE2FLAG ${NATIVEFLAG}) @@ -132,6 +134,8 @@ else() set(PCLMULFLAG ${NATIVEFLAG}) if("${ARCH}" MATCHES "arm") set(ACLEFLAG "${NATIVEFLAG}") + elseif("${ARCH}" MATCHES "aarch64") + set(ACLEFLAG "${NATIVEFLAG}") endif() endif(NOT NATIVEFLAG) endif() @@ -140,7 +144,7 @@ add_feature_info(ZLIB_COMPAT ZLIB_COMPAT "Provide a zlib-compatible API") add_feature_info(WITH_GZFILEOP WITH_GZFILEOP "Compile with support for gzFile-related functions") add_feature_info(WITH_OPTIM WITH_OPTIM "Build with optimisation") add_feature_info(WITH_NEW_STRATEGIES WITH_NEW_STRATEGIES "Use new strategies") -if("${ARCH}" MATCHES "arm") +if("${ARCH}" MATCHES "arm" OR "${ARCH}" MATCHES "aarch64") add_feature_info(WITH_ACLE WITH_ACLE "Build with ACLE CRC") endif() @@ -333,6 +337,9 @@ if("${ARCH}" MATCHES "x86_64" OR "${ARCH}" MATCHES "AMD64") elseif("${ARCH}" MATCHES "arm") set(ARCHDIR "arch/arm") add_definitions(-DUNALIGNED_OK -DUNROLL_LESS) +elseif("${ARCH}" MATCHES "aarch64") + set(ARCHDIR "arch/aarch64") + add_definitions(-DUNALIGNED_OK -DUNROLL_LESS) else() set(ARCHDIR "arch/x86") add_definitions(-DX86 -DUNALIGNED_OK -DUNROLL_LESS) @@ -346,6 +353,13 @@ if(WITH_OPTIM) add_definitions("-DARM_ACLE_CRC_HASH") add_feature_info(ACLE_CRC 1 "Support CRC hash generation using the ACLE instruction set, using \"${ACLEFLAG}\"") endif() + elseif("${ARCH}" MATCHES "aarch64") + if(WITH_ACLE) + set(ZLIB_ARCH_SRCS ${ZLIB_ARCH_SRCS} ${ARCHDIR}/crc32_acle.c ${ARCHDIR}/insert_string_acle.c) + add_definitions("-DARM_ACLE_CRC_HASH") + add_feature_info(ACLE_CRC 1 "Support CRC hash generation using the ACLE instruction set, using \"${ACLEFLAG}\"") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${ACLEFLAG}") + endif() elseif("${ARCHDIR}" MATCHES "arch/x86") add_definitions("-DX86_CPUID") set(ZLIB_ARCH_SRCS ${ZLIB_ARCH_SRCS} ${ARCHDIR}/x86.c) diff --git a/Makefile.in b/Makefile.in index 9ad7512..12d824e 100644 --- a/Makefile.in +++ b/Makefile.in @@ -160,37 +160,37 @@ endif endif example$(EXE): example.o $(STATICLIB) - $(CC) $(LDFLAGS) -o $@ example.o $(TEST_LIBS) + $(CC) $(LDFLAGS) -o $@ example.o $(TEST_LIBS) $(LDSHAREDLIBC) ifneq ($(STRIP),) $(STRIP) $@ endif minigzip$(EXE): minigzip.o $(STATICLIB) - $(CC) $(LDFLAGS) -o $@ minigzip.o $(TEST_LIBS) + $(CC) $(LDFLAGS) -o $@ minigzip.o $(TEST_LIBS) $(LDSHAREDLIBC) ifneq ($(STRIP),) $(STRIP) $@ endif examplesh$(EXE): example.o $(SHAREDTARGET) - $(CC) $(LDFLAGS) -o $@ example.o $(SHAREDTARGET) + $(CC) $(LDFLAGS) -o $@ example.o $(SHAREDTARGET) $(LDSHAREDLIBC) ifneq ($(STRIP),) $(STRIP) $@ endif minigzipsh$(EXE): minigzip.o $(SHAREDTARGET) - $(CC) $(LDFLAGS) -o $@ minigzip.o $(SHAREDTARGET) + $(CC) $(LDFLAGS) -o $@ minigzip.o $(SHAREDTARGET) $(LDSHAREDLIBC) ifneq ($(STRIP),) $(STRIP) $@ endif example64$(EXE): example64.o $(STATICLIB) - $(CC) $(LDFLAGS) -o $@ example64.o $(TEST_LIBS) + $(CC) $(LDFLAGS) -o $@ example64.o $(TEST_LIBS) $(LDSHAREDLIBC) ifneq ($(STRIP),) $(STRIP) $@ endif minigzip64$(EXE): minigzip64.o $(STATICLIB) - $(CC) $(LDFLAGS) -o $@ minigzip64.o $(TEST_LIBS) + $(CC) $(LDFLAGS) -o $@ minigzip64.o $(TEST_LIBS) $(LDSHAREDLIBC) ifneq ($(STRIP),) $(STRIP) $@ endif @@ -323,6 +323,8 @@ inffast.o: $(SRCDIR)/zutil.h $(SRCDIR)/zlib.h zconf.h $(SRCDIR)/inftrees.h $(SRC inftrees.o: $(SRCDIR)/zutil.h $(SRCDIR)/zlib.h zconf.h $(SRCDIR)/inftrees.h trees.o: $(SRCDIR)/deflate.h $(SRCDIR)/zutil.h $(SRCDIR)/zlib.h zconf.h $(SRCDIR)/trees.h zutil.o: $(SRCDIR)/zutil.h $(SRCDIR)/gzguts.h $(SRCDIR)/zlib.h zconf.h +arch/aarch64/crc32_acle.o: zconf.h +arch/aarch64/insert_string_acle.o: $(SRCDIR)/deflate.h $(SRCDIR)/zutil.h $(SRCDIR)/zlib.h zconf.h arch/arm/crc32_acle.o: zconf.h arch/arm/insert_string_acle.o: $(SRCDIR)/deflate.h $(SRCDIR)/zutil.h $(SRCDIR)/zlib.h zconf.h arch/x86/crc_folding.o: $(SRCDIR)/arch/x86/crc_folding.h $(SRCDIR)/deflate.h $(SRCDIR)/zutil.h $(SRCDIR)/zlib.h zconf.h @@ -344,6 +346,8 @@ inffast.lo: $(SRCDIR)/zutil.h $(SRCDIR)/zlib.h zconf.h $(SRCDIR)/inftrees.h $(SR inftrees.lo: $(SRCDIR)/zutil.h $(SRCDIR)/zlib.h zconf.h $(SRCDIR)/inftrees.h trees.lo: $(SRCDIR)/deflate.h $(SRCDIR)/zutil.h $(SRCDIR)/zlib.h zconf.h $(SRCDIR)/trees.h zutil.lo: $(SRCDIR)/zutil.h $(SRCDIR)/gzguts.h $(SRCDIR)/zlib.h zconf.h +arch/aarch64/crc32_acle.lo: zconf.h +arch/aarch64/insert_string_acle.lo: $(SRCDIR)/deflate.h $(SRCDIR)/zutil.h $(SRCDIR)/zlib.h zconf.h arch/arm/crc32_acle.lo: zconf.h arch/arm/insert_string_acle.lo: $(SRCDIR)/deflate.h $(SRCDIR)/zutil.h $(SRCDIR)/zlib.h zconf.h arch/x86/crc_folding.lo: $(SRCDIR)/arch/x86/crc_folding.h $(SRCDIR)/deflate.h $(SRCDIR)/zutil.h $(SRCDIR)/zlib.h zconf.h diff --git a/arch/aarch64/Makefile.in b/arch/aarch64/Makefile.in new file mode 100644 index 0000000..a2c96bd --- /dev/null +++ b/arch/aarch64/Makefile.in @@ -0,0 +1,49 @@ +# Makefile for zlib +# Copyright (C) 1995-2013 Jean-loup Gailly, Mark Adler +# For conditions of distribution and use, see copyright notice in zlib.h + +CC= +CFLAGS= +SFLAGS= +INCLUDES= + +SRCDIR=. +SRCTOP=../.. +TOPDIR=$(SRCTOP) + +all: crc32_acle.o crc32_acle.lo insert_string_acle.o insert_string_acle.lo + +crc32_acle.o: $(SRCDIR)/crc32_acle.c + $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_acle.c + +crc32_acle.lo: $(SRCDIR)/crc32_acle.c + $(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/crc32_acle.c + +insert_string_acle.o: $(SRCDIR)/insert_string_acle.c + $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/insert_string_acle.c + +insert_string_acle.lo: $(SRCDIR)/insert_string_acle.c + $(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/insert_string_acle.c + +mostlyclean: clean +clean: + rm -f *.o *.lo *~ + rm -rf objs + rm -f *.gcda *.gcno *.gcov + +distclean: + rm -f Makefile + +depend: + makedepend -Y -- $(CFLAGS) -- $(SRCDIR)/*.c + makedepend -Y -a -o.lo -- $(SFLAGS) -- $(SRCDIR)/*.c + @sed "s=^$(SRCDIR)/\([a-zA-Z0-9_]*\.\(lo\|o\):\)=\1=g" < Makefile > Makefile.tmp + @mv -f Makefile.tmp Makefile + +# DO NOT DELETE THIS LINE -- make depend depends on it. + +crc32_acle.o: $(TOPDIR)/zconf.h +insert_string_acle.o: $(SRCTOP)/deflate.h $(SRCTOP)/zutil.h $(SRCTOP)/zlib.h $(TOPDIR)/zconf.h + +crc32_acle.lo: $(TOPDIR)/zconf.h +insert_string_acle.lo: $(SRCTOP)/deflate.h $(SRCTOP)/zutil.h $(SRCTOP)/zlib.h $(TOPDIR)/zconf.h diff --git a/arch/aarch64/crc32_acle.c b/arch/aarch64/crc32_acle.c new file mode 100644 index 0000000..e43cbb5 --- /dev/null +++ b/arch/aarch64/crc32_acle.c @@ -0,0 +1,81 @@ +/* crc32_acle.c -- compute the CRC-32 of a data stream + * Copyright (C) 1995-2006, 2010, 2011, 2012 Mark Adler + * Copyright (C) 2016 Yang Zhang + * For conditions of distribution and use, see copyright notice in zlib.h + * +*/ + +#ifdef __ARM_FEATURE_CRC32 +#include <arm_acle.h> +#include "zconf.h" +#ifdef __linux__ +# include <stddef.h> +#endif + +uint32_t crc32_acle(uint32_t crc, const unsigned char *buf, size_t len) +{ + register uint32_t c; + register const uint16_t *buf2; + register const uint32_t *buf4; + register const uint64_t *buf8; + + c = ~crc; + if (len && ((ptrdiff_t)buf & 1)) { + c = __crc32b(c, *buf++); + len--; + } + + if ((len > 2) && ((ptrdiff_t)buf & 2)) { + buf2 = (const uint16_t *) buf; + c = __crc32h(c, *buf2++); + len -= 2; + buf4 = (const uint32_t *) buf2; + } else { + buf4 = (const uint32_t *) buf; + } + + if ((len > 4) && ((ptrdiff_t)buf & 4)) { + c = __crc32b(c, *buf4++); + len -= 4; + } + + buf8 = (const uint64_t *) buf4; + +#ifndef UNROLL_LESS + while (len >= 32) { + c = __crc32b(c, *buf8++); + c = __crc32b(c, *buf8++); + c = __crc32b(c, *buf8++); + c = __crc32b(c, *buf8++); + len -= 32; + } +#endif + + while (len >= 8) { + c = __crc32b(c, *buf8++); + len -= 8; + } + + if (len >= 4) { + buf4 = (const uint32_t *) buf8; + c = __crc32w(c, *buf4++); + len -= 4; + buf2 = (const uint16_t *) buf4; + } else { + buf2 = (const uint16_t *) buf8; + } + + if (len >= 2) { + c = __crc32h(c, *buf2++); + len -= 2; + } + + if (len) { + buf = (const unsigned char *) buf2; + c = __crc32b(c, *buf); + } + + c = ~c; + return c; +} +#endif diff --git a/arch/aarch64/insert_string_acle.c b/arch/aarch64/insert_string_acle.c new file mode 100644 index 0000000..985b726 --- /dev/null +++ b/arch/aarch64/insert_string_acle.c @@ -0,0 +1,50 @@ +/* insert_string_acle.c -- insert_string variant using ACLE's CRC instructions + * + * Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + * + */ + +#ifdef __ARM_FEATURE_CRC32 +#include <arm_acle.h> +#endif +#include "deflate.h" + +/* =========================================================================== + * Insert string str in the dictionary and set match_head to the previous head + * of the hash chain (the most recent string with same hash key). Return + * the previous length of the hash chain. + * IN assertion: all calls to to INSERT_STRING are made with consecutive + * input characters and the first MIN_MATCH bytes of str are valid + * (except for the last MIN_MATCH-1 bytes of the input file). + */ +#ifdef ARM_ACLE_CRC_HASH +Pos insert_string_acle(deflate_state *const s, const Pos str, unsigned int count) { + Pos p, lp; + + if (unlikely(count == 0)) { + return s->prev[str & s->w_mask]; + } + + lp = str + count - 1; /* last position */ + + for (p = str; p <= lp; p++) { + unsigned *ip, val, h, hm; + + ip = (unsigned *)&s->window[p]; + val = *ip; + + if (s->level >= TRIGGER_LEVEL) + val &= 0xFFFFFF; + + h = __crc32w(0, val); + hm = h & s->hash_mask; + + if (s->head[hm] != p) { + s->prev[p & s->w_mask] = s->head[hm]; + s->head[hm] = p; + } + } + return s->prev[lp & s->w_mask]; +} +#endif @@ -254,6 +254,10 @@ if test "$gcc" -eq 1 && ($cc $CFLAGS -c $test.c) >> configure.log 2>&1; then ARCH=native fi fi ;; + aarch64) + if test "${uname}" = "elf"; then + uname=aarch64 + fi ;; esac CFLAGS="${CFLAGS--O3}" if test -n "${ARCHS}"; then @@ -376,6 +380,9 @@ if test "$gcc" -eq 1 && ($cc $CFLAGS -c $test.c) >> configure.log 2>&1; then AR="/usr/bin/libtool" fi ARFLAGS="-o" ;; + aarch64) LDSHARED=${LDSHARED-"$cc"} + LDSHAREDFLAGS="-shared -Wl,-soname,libz.so.1 -Wl,--version-script,${SRCDIR}/zlib.map" + LDSHAREDLIBC="-Wl,--start-group -lc -lrdimon -Wl,--end-group" ;; *) LDSHARED=${LDSHARED-"$cc"} LDSHAREDFLAGS="-shared" ;; esac @@ -480,7 +487,7 @@ if test $shared -eq 1; then echo Checking for shared library support... | tee -a configure.log # we must test in two steps (cc then ld), required at least on SunOS 4.x if try $CC -w -c $SFLAGS $test.c && - try $LDSHARED $LDSHAREDFLAGS $LDFLAGS -o $test$shared_ext $test.o; then + try $LDSHARED $LDSHAREDFLAGS $LDFLAGS -o $test$shared_ext $test.o $LDSHAREDLIBC; then echo Building shared library $SHAREDTARGET with $CC. | tee -a configure.log elif test -z "$old_cc" -a -z "$old_cflags"; then echo No shared library support. | tee -a configure.log @@ -545,7 +552,7 @@ int main(void) { return 0; } EOF - if try $CC $CFLAGS -o $test $test.c; then + if try $CC $CFLAGS -o $test $test.c $LDSHAREDLIBC; then echo "Checking for fseeko... Yes." | tee -a configure.log else CFLAGS="${CFLAGS} -DNO_FSEEKO" @@ -562,7 +569,7 @@ cat > $test.c <<EOF #include <errno.h> int main() { return strlen(strerror(errno)); } EOF -if try $CC $CFLAGS -o $test $test.c; then +if try $CC $CFLAGS -o $test $test.c $LDSHAREDLIBC; then echo "Checking for strerror... Yes." | tee -a configure.log else CFLAGS="${CFLAGS} -DNO_STRERROR" @@ -704,7 +711,7 @@ int main(void) return 0; } EOF -if try ${CC} ${CFLAGS} $test.c; then +if try ${CC} ${CFLAGS} $test.c $LDSHAREDLIBC; then echo "Checking for __builtin_ctzl ... Yes." | tee -a configure.log CFLAGS="$CFLAGS -DHAVE_BUILTIN_CTZL" SFLAGS="$SFLAGS -DHAVE_BUILTIN_CTZL" @@ -843,6 +850,18 @@ case "${ARCH}" in esac ;; + # 64-bit ARM specific optimizations + aarch64) + ARCHDIR=arch/aarch64 + if test $buildacle -eq 1; then + CFLAGS="-march=armv8-a+crc ${CFLAGS} -DARM_ACLE_CRC_HASH" + SFLAGS="-march=armv8-a+crc ${SFLAGS} -DARM_ACLE_CRC_HASH" + ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} crc32_acle.o insert_string_acle.o" + ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} crc32_acle.lo insert_string_acle.lo" + fi + CFLAGS="${CFLAGS} -DUNALIGNED_OK -DUNROLL_LESS" + SFLAGS="${SFLAGS} -DUNALIGNED_OK -DUNROLL_LESS" + ;; esac echo "ARCH: ${ARCH}" @@ -23,7 +23,7 @@ # endif #elif defined(__linux__) # include <endian.h> -#elif defined(__APPLE__) || defined(__arm__) +#elif defined(__APPLE__) || defined(__arm__) || defined(__aarch64__) # include <machine/endian.h> #elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__bsdi__) || defined(__DragonFly__) # include <sys/endian.h> |