diff options
-rw-r--r-- | CMakeLists.txt | 87 | ||||
-rw-r--r-- | arch/arm/adler32_neon.c | 2 | ||||
-rw-r--r-- | arch/arm/armfeature.c | 4 | ||||
-rw-r--r-- | deflate.c | 2 | ||||
-rw-r--r-- | fallback_builtins.h (renamed from arch/x86/ctzl.h) | 2 | ||||
-rw-r--r-- | functable.c | 4 | ||||
-rw-r--r-- | inflate.c | 2 | ||||
-rw-r--r-- | match_p.h | 6 | ||||
-rw-r--r-- | memcopy.h | 6 | ||||
-rw-r--r-- | zendian.h | 2 | ||||
-rw-r--r-- | zutil.h | 2 |
11 files changed, 57 insertions, 62 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 2122d6d..49a5f92 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -72,7 +72,16 @@ if(CMAKE_OSX_ARCHITECTURES) # If multiple architectures are requested (universal build), pick only the first list(GET CMAKE_OSX_ARCHITECTURES 0 ARCH) else() - set(ARCH ${CMAKE_SYSTEM_PROCESSOR}) + if (MSVC) + if("${MSVC_C_ARCHITECTURE_ID}" STREQUAL "ARM" OR "${MSVC_C_ARCHITECTURE_ID}" STREQUAL "ARMV7") + set(ARCH "arm") + elseif ("${MSVC_C_ARCHITECTURE_ID}" STREQUAL "ARM64") + set(ARCH "aarch64") + endif() + endif() + if(NOT ARCH) + set(ARCH ${CMAKE_SYSTEM_PROCESSOR}) + endif() endif() message(STATUS "Architecture: ${ARCH}") if(CMAKE_TOOLCHAIN_FILE) @@ -156,7 +165,7 @@ elseif(MSVC) set(SSE2FLAG "/arch:SSE2") endif() if("${ARCH}" MATCHES "arm") - add_definitions("-D_ARM_WINAPI_PARTITION_DESKTOP_SDK_AVAILABLE=1") + add_definitions(-D_ARM_WINAPI_PARTITION_DESKTOP_SDK_AVAILABLE) set(NEONFLAG "/arch:VFPv4") endif() if(WITH_NATIVE_INSTRUCTIONS) @@ -174,13 +183,27 @@ else() message(STATUS "Ignoring WITH_NATIVE_INSTRUCTIONS; not implemented yet on this configuration") endif() endif() - if(__GNUC__ AND "${ARCH}" MATCHES "arm") - execute_process(COMMAND ${CMAKE_C_COMPILER} "-dumpmachine" - OUTPUT_VARIABLE GCC_MACHINE) - if ("${GCC_MACHINE}" MATCHES "eabihf") - set(FLOATABI "-mfloat-abi=hard") + # Check support for ARM floating point + if("${ARCH}" MATCHES "arm") + if (__GNUC__) + execute_process(COMMAND ${CMAKE_C_COMPILER} "-dumpmachine" + OUTPUT_VARIABLE GCC_MACHINE) + if ("${GCC_MACHINE}" MATCHES "eabihf") + set(FLOATABI "-mfloat-abi=hard") + else() + set(FLOATABI "-mfloat-abi=softfp") + endif() + endif() + # Check whether -mfpu=neon is available + set(CMAKE_REQUIRED_FLAGS "-mfpu=neon") + check_c_source_compiles( + "int main() { return 0; }" + MFPU_NEON_AVAILABLE FAIL_REGEX "not supported") + set(CMAKE_REQUIRED_FLAGS) + if(MFPU_NEON_AVAILABLE) + set(NEONFLAG "${FLOATABI} -mfpu=neon") else() - set(FLOATABI "-mfloat-abi=softfp") + set(NEONFLAG "${FLOATABI}") endif() endif() if(NOT NATIVEFLAG) @@ -201,7 +224,6 @@ else() endif() if("${ARCH}" MATCHES "arm") set(ACLEFLAG "-march=armv8-a+crc") - set(NEONFLAG "${FLOATABI} -mfpu=neon") elseif("${ARCH}" MATCHES "aarch64") set(ACLEFLAG "-march=armv8-a+crc") set(NEONFLAG "-march=armv8-a+crc+simd") @@ -212,7 +234,6 @@ else() set(PCLMULFLAG ${NATIVEFLAG}) if("${ARCH}" MATCHES "arm") set(ACLEFLAG "${NATIVEFLAG}") - set(NEONFLAG "${FLOATABI} -mfpu=neon") elseif("${ARCH}" MATCHES "aarch64") set(ACLEFLAG "${NATIVEFLAG}") set(NEONFLAG "${NATIVEFLAG}") @@ -483,16 +504,6 @@ else() endif() set(CMAKE_REQUIRED_FLAGS) -# Check whether -mfpu=neon is available -set(CMAKE_REQUIRED_FLAGS "-mfpu=neon") -check_c_source_compiles( - "int main() - { - return 0; - }" - MFPU_NEON_AVAILABLE FAIL_REGEX "not supported") -set(CMAKE_REQUIRED_FLAGS) - # FORCE_SSE2 option will only be shown if HAVE_SSE2_INTRIN is true if("${ARCH}" MATCHES "i[3-6]86") cmake_dependent_option(FORCE_SSE2 "Always assume CPU is SSE2 capable" OFF "HAVE_SSE2_INTRIN" OFF) @@ -542,39 +553,25 @@ if("${ARCH}" MATCHES "arm" OR "${ARCH}" MATCHES "aarch64") set(ZLIB_ARCH_SRCS ${ZLIB_ARCH_SRCS} ${ARCHDIR}/armfeature.c ${ARCHDIR}/fill_window_arm.c) endif() if(WITH_OPTIM) - if("${ARCH}" MATCHES "arm") - if(WITH_ACLE) - set(ZLIB_ARCH_SRCS ${ZLIB_ARCH_SRCS} ${ARCHDIR}/crc32_acle.c ${ARCHDIR}/insert_string_acle.c) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${ACLEFLAG}") - add_definitions("-DARM_ACLE_CRC_HASH") - add_feature_info(ACLE_CRC 1 "Support CRC hash generation using the ACLE instruction set, using \"${ACLEFLAG}\"") - endif() + if("${ARCH}" MATCHES "arm" OR "${ARCH}" MATCHES "aarch64") if(WITH_NEON) - if(MFPU_NEON_AVAILABLE) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${NEONFLAG}") - endif() - add_definitions("-DARM_NEON_ADLER32") - if(MSVC) - add_definitions("-D__ARM_NEON__=1") - endif(MSVC) set(ZLIB_ARCH_SRCS ${ZLIB_ARCH_SRCS} ${ARCHDIR}/adler32_neon.c) + add_definitions(-DARM_NEON_ADLER32) + add_intrinsics_option("${NEONFLAG}") + if(MSVC) + add_definitions(-D__ARM_NEON__) + endif() add_feature_info(NEON_FILLWINDOW 1 "Support NEON instructions in fill_window_arm, using \"${NEONFLAG}\"") endif() - elseif("${ARCH}" MATCHES "aarch64") if(WITH_ACLE) set(ZLIB_ARCH_SRCS ${ZLIB_ARCH_SRCS} ${ARCHDIR}/crc32_acle.c ${ARCHDIR}/insert_string_acle.c) - add_definitions("-DARM_ACLE_CRC_HASH") + add_definitions(-DARM_ACLE_CRC_HASH) + # For ARM aarch64, we need to check WITH_NEON first + if("${ARCH}" MATCHES "arm" OR NOT WITH_NEON) + add_intrinsics_option("${ACLEFLAG}") + endif() add_feature_info(ACLE_CRC 1 "Support CRC hash generation using the ACLE instruction set, using \"${ACLEFLAG}\"") endif() - # We need to check WITH_NEON first - if(WITH_NEON) - add_definitions("-DARM_NEON_ADLER32") - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${NEONFLAG}") - set(ZLIB_ARCH_SRCS ${ZLIB_ARCH_SRCS} ${ARCHDIR}/adler32_neon.c) - add_feature_info(NEON_FILLWINDOW 1 "Support NEON instructions in fill_window_arm, using \"${NEONFLAG}\"") - elseif(WITH_ACLE) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${ACLEFLAG}") - endif() elseif("${ARCHDIR}" MATCHES "arch/x86") add_definitions("-DX86_CPUID") set(ZLIB_ARCH_SRCS ${ZLIB_ARCH_SRCS} ${ARCHDIR}/x86.c) diff --git a/arch/arm/adler32_neon.c b/arch/arm/adler32_neon.c index bc77672..71973a1 100644 --- a/arch/arm/adler32_neon.c +++ b/arch/arm/adler32_neon.c @@ -109,7 +109,7 @@ uint32_t adler32_neon(uint32_t adler, const unsigned char *buf, size_t len) { for (i = 0; i < len; i += n) { if ((i + n) > len) - n = len - i; + n = (int)(len - i); if (n < 16) break; diff --git a/arch/arm/armfeature.c b/arch/arm/armfeature.c index 0cec748..7c4758b 100644 --- a/arch/arm/armfeature.c +++ b/arch/arm/armfeature.c @@ -18,7 +18,7 @@ static int arm_has_crc32() { } /* AArch64 has neon. */ -#if !defined(__aarch64__) +#if !defined(__aarch64__) && !defined(_M_ARM64) static inline int arm_has_neon() { #if defined(__linux__) && defined(HWCAP_NEON) @@ -41,7 +41,7 @@ ZLIB_INTERNAL int arm_cpu_has_neon; ZLIB_INTERNAL int arm_cpu_has_crc32; void ZLIB_INTERNAL arm_check_features(void) { -#if defined(__aarch64__) +#if defined(__aarch64__) || defined(_M_ARM64) arm_cpu_has_neon = 1; /* always available */ #else arm_cpu_has_neon = arm_has_neon(); @@ -269,7 +269,7 @@ int ZEXPORT PREFIX(deflateInit2_)(PREFIX3(stream) *strm, int level, int method, #ifdef X86_CPUID x86_check_features(); -#elif defined(__arm__) || defined(__aarch64__) || defined(_M_ARM) +#elif defined(__arm__) || defined(__aarch64__) || defined(_M_ARM) || defined(_M_ARM64) arm_check_features(); #endif diff --git a/arch/x86/ctzl.h b/fallback_builtins.h index bc9e9bd..4cdace3 100644 --- a/arch/x86/ctzl.h +++ b/fallback_builtins.h @@ -3,7 +3,7 @@ #include <intrin.h> #ifdef X86_CPUID -# include "x86.h" +# include "arch/x86/x86.h" #endif #if defined(_MSC_VER) && !defined(__clang__) diff --git a/functable.c b/functable.c index 587abfe..b3020e0 100644 --- a/functable.c +++ b/functable.c @@ -19,7 +19,7 @@ extern Pos insert_string_acle(deflate_state *const s, const Pos str, unsigned in /* fill_window */ #ifdef X86_SSE2 extern void fill_window_sse(deflate_state *s); -#elif defined(__arm__) || defined(__aarch64__) || defined(_M_ARM) +#elif defined(__arm__) || defined(__aarch64__) || defined(_M_ARM) || defined(_M_ARM64) extern void fill_window_arm(deflate_state *s); #endif @@ -81,7 +81,7 @@ ZLIB_INTERNAL void fill_window_stub(deflate_state *s) { if (x86_cpu_has_sse2) # endif functable.fill_window=&fill_window_sse; - #elif defined(__arm__) || defined(__aarch64__) || defined(_M_ARM) + #elif defined(__arm__) || defined(__aarch64__) || defined(_M_ARM) || defined(_M_ARM64) functable.fill_window=&fill_window_arm; #endif @@ -132,7 +132,7 @@ int ZEXPORT PREFIX(inflateInit2_)(PREFIX3(stream) *strm, int windowBits, const c #ifdef X86_CPUID x86_check_features(); -#elif defined(__arm__) || defined(__aarch64__) || defined(_M_ARM) +#elif defined(__arm__) || defined(__aarch64__) || defined(_M_ARM) || defined(_M_ARM64) arm_check_features(); #endif @@ -33,10 +33,8 @@ #if defined(_MSC_VER) && !defined(__clang__) -# if defined(_M_IX86) || defined(_M_AMD64) || defined(_M_IA64) -# include "arch/x86/ctzl.h" -# elif defined(_M_ARM) -# include "arch/arm/ctzl.h" +# if defined(_M_IX86) || defined(_M_AMD64) || defined(_M_IA64) || defined(_M_ARM) || defined(_M_ARM64) +# include "fallback_builtins.h" # endif #endif @@ -18,7 +18,7 @@ static inline uint64_t load_64_bits(const unsigned char *in, unsigned bits) { #endif } - #if (defined(__GNUC__) || defined(__clang__)) && (defined(__ARM_NEON__) || defined(__ARM_NEON)) + #if defined(__ARM_NEON__) || defined(__ARM_NEON) #include <arm_neon.h> typedef uint8x16_t inffast_chunk_t; #define INFFAST_CHUNKSIZE sizeof(inffast_chunk_t) @@ -199,7 +199,7 @@ static inline unsigned char *chunkmemset_3(unsigned char *out, unsigned char *fr } #endif - #if defined(__aarch64__) + #if defined(__aarch64__) || defined(_M_ARM64) static inline unsigned char *chunkmemset_6(unsigned char *out, unsigned char *from, unsigned dist, unsigned len) { uint16x8x3_t chunks; unsigned sz = sizeof(chunks); @@ -265,7 +265,7 @@ static inline unsigned char *chunkmemset(unsigned char *out, unsigned dist, unsi chunk = chunkmemset_4(from); break; } - #if defined(__aarch64__) + #if defined(__aarch64__) || defined(_M_ARM64) case 6: return chunkmemset_6(out, from, dist, len); #endif @@ -27,7 +27,7 @@ #elif defined(WIN32) || defined(_WIN32) # define LITTLE_ENDIAN 1234 # define BIG_ENDIAN 4321 -# if defined(_M_IX86) || defined(_M_AMD64) || defined(_M_IA64) || defined (_M_ARM) +# if defined(_M_IX86) || defined(_M_AMD64) || defined(_M_IA64) || defined (_M_ARM) || defined (_M_ARM64) # define BYTE_ORDER LITTLE_ENDIAN # else # error Unknown endianness! @@ -240,7 +240,7 @@ void ZLIB_INTERNAL zng_cfree(void *opaque, void *ptr); #if defined(X86_CPUID) # include "arch/x86/x86.h" -#elif defined(__aarch64__) || defined(__arm__) || defined(_M_ARM) +#elif defined(__aarch64__) || defined(__arm__) || defined(_M_ARM) || defined(_M_ARM64) # include "arch/arm/arm.h" #endif |