diff options
author | Mika Lindqvist <postmaster@raasu.org> | 2022-03-13 17:12:42 +0200 |
---|---|---|
committer | Hans Kristian Rosbach <hk-github@circlestorm.org> | 2023-03-17 21:27:56 +0100 |
commit | 10627e69df3360103bf132a97f7708e73d01122c (patch) | |
tree | 09edc30bca258675e6264558cbc1f1ddc3e84e69 | |
parent | b9957e95dcaf5a38655b84e65cbaf0a9bee022af (diff) |
Allow bypassing runtime feature check of TZCNT instructions.
* This avoids conditional branch when it's known at build time that TZCNT instructions are always supported
-rw-r--r-- | CMakeLists.txt | 10 | ||||
-rw-r--r-- | README.md | 1 | ||||
-rwxr-xr-x | configure | 8 | ||||
-rw-r--r-- | fallback_builtins.h | 4 |
4 files changed, 17 insertions, 6 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 8e5646d..978ae2d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -102,6 +102,7 @@ elseif(BASEARCH_S360_FOUND) add_option(WITH_DFLTCC_DEFLATE "Build with DFLTCC intrinsics for compression on IBM Z" OFF) add_option(WITH_DFLTCC_INFLATE "Build with DFLTCC intrinsics for decompression on IBM Z" OFF) elseif(BASEARCH_X86_FOUND) + option(FORCE_TZCNT "Always assume CPU is TZCNT capable" OFF) add_option(WITH_AVX2 "Build with AVX2" ON) add_option(WITH_SSE2 "Build with SSE2" ON) add_option(WITH_SSSE3 "Build with SSSE3" ON) @@ -821,13 +822,10 @@ if(WITH_OPTIM) endif() endif() endif() - if(WITH_SSSE3 AND HAVE_SSSE3_INTRIN) - add_definitions(-DX86_SSSE3 -DX86_SSSE3_ADLER32) - set(SSSE3_SRCS ${ARCHDIR}/adler32_ssse3.c) - add_feature_info(SSSE3_ADLER32 1 "Support SSSE3-accelerated adler32, using \"${SSSE3FLAG}\"") - list(APPEND ZLIB_ARCH_SRCS ${SSSE3_SRCS}) - set_property(SOURCE ${SSSE3_SRCS} PROPERTY COMPILE_FLAGS "${SSSE3FLAG} ${NOLTOFLAG}") + if(FORCE_TZCNT) + add_definitions(-DX86_NOCHECK_TZCNT) endif() + add_feature_info(FORCE_TZCNT FORCE_TZCNT "Assume CPU is TZCNT capable") if(WITH_PCLMULQDQ AND HAVE_PCLMULQDQ_INTRIN AND WITH_SSSE3 AND WITH_SSE4) add_definitions(-DX86_PCLMULQDQ_CRC) set(PCLMULQDQ_SRCS ${ARCHDIR}/crc_folding.c) @@ -195,6 +195,7 @@ Advanced Build Options | ZLIB_DUAL_LINK | | Dual link tests with system zlib | OFF | | UNALIGNED_OK | | Allow unaligned reads | ON (x86, arm) | | | --force-sse2 | Skip runtime check for SSE2 instructions (Always on for x86_64) | OFF (x86) | +| FORCE_TZCNT | --force-tzcnt | Skip runtime check for TZCNT instructions | OFF | | WITH_AVX2 | | Build with AVX2 intrinsics | ON | | WITH_SSE2 | | Build with SSE2 intrinsics | ON | | WITH_SSE4 | | Build with SSE4 intrinsics | ON | @@ -99,6 +99,7 @@ with_fuzzers=0 floatabi= native=0 forcesse2=0 +forcetzcnt=0 avx2flag="-mavx2" sse2flag="-msse2" ssse3flag="-mssse3" @@ -155,6 +156,7 @@ case "$1" in echo ' [--with-dfltcc-deflate] Use DEFLATE CONVERSION CALL instruction for compression on IBM Z' | tee -a configure.log echo ' [--with-dfltcc-inflate] Use DEFLATE CONVERSION CALL instruction for decompression on IBM Z' | tee -a configure.log echo ' [--force-sse2] Assume SSE2 instructions are always available (disabled by default on x86, enabled on x86_64)' | tee -a configure.log + echo ' [--force-tzcnt] Assume TZCNT instructions are always available (disabled by default)' | tee -a configure.log echo ' [--with-sanitizer] Build with sanitizer (memory, address, undefined)' | tee -a configure.log echo ' [--with-fuzzers] Build test/fuzz (disabled by default)' | tee -a configure.log echo ' [--native] Compiles with full instruction set supported on this host' | tee -a configure.log @@ -181,6 +183,7 @@ case "$1" in --with-dfltcc-deflate) builddfltccdeflate=1; shift ;; --with-dfltcc-inflate) builddfltccinflate=1; shift ;; --force-sse2) forcesse2=1; shift ;; + --force-tzcnt) forcetzcnt=1; shift ;; -n | --native) native=1; shift ;; -a*=* | --archs=*) ARCHS=`echo $1 | sed 's/.*=//'`; shift ;; --sysconfdir=*) echo "ignored option: --sysconfdir" | tee -a configure.log; shift ;; @@ -1282,6 +1285,11 @@ case "${ARCH}" in ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} crc_folding.o" ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} crc_folding.lo" fi + + if test $forcetzcnt -eq 1; then + CFLAGS="${CFLAGS} -DX86_NOCHECK_TZCNT" + SFLAGS="${SFLAGS} -DX86_NOCHECK_TZCNT" + fi fi ;; diff --git a/fallback_builtins.h b/fallback_builtins.h index 314ad32..afa5870 100644 --- a/fallback_builtins.h +++ b/fallback_builtins.h @@ -14,7 +14,9 @@ */ static __forceinline unsigned long __builtin_ctz(uint32_t value) { #ifdef X86_FEATURES +# ifndef X86_NOCHECK_TZCNT if (x86_cpu_has_tzcnt) +# endif return _tzcnt_u32(value); #endif unsigned long trailing_zero; @@ -29,7 +31,9 @@ static __forceinline unsigned long __builtin_ctz(uint32_t value) { */ static __forceinline unsigned long long __builtin_ctzll(uint64_t value) { #ifdef X86_FEATURES +# ifndef X86_NOCHECK_TZCNT if (x86_cpu_has_tzcnt) +# endif return _tzcnt_u64(value); #endif unsigned long trailing_zero; |