summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMika Lindqvist <postmaster@raasu.org>2022-03-13 17:12:42 +0200
committerHans Kristian Rosbach <hk-github@circlestorm.org>2023-03-17 21:27:56 +0100
commit10627e69df3360103bf132a97f7708e73d01122c (patch)
tree09edc30bca258675e6264558cbc1f1ddc3e84e69
parentb9957e95dcaf5a38655b84e65cbaf0a9bee022af (diff)
Allow bypassing runtime feature check of TZCNT instructions.
* This avoids conditional branch when it's known at build time that TZCNT instructions are always supported
-rw-r--r--CMakeLists.txt10
-rw-r--r--README.md1
-rwxr-xr-xconfigure8
-rw-r--r--fallback_builtins.h4
4 files changed, 17 insertions, 6 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8e5646d..978ae2d 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -102,6 +102,7 @@ elseif(BASEARCH_S360_FOUND)
add_option(WITH_DFLTCC_DEFLATE "Build with DFLTCC intrinsics for compression on IBM Z" OFF)
add_option(WITH_DFLTCC_INFLATE "Build with DFLTCC intrinsics for decompression on IBM Z" OFF)
elseif(BASEARCH_X86_FOUND)
+ option(FORCE_TZCNT "Always assume CPU is TZCNT capable" OFF)
add_option(WITH_AVX2 "Build with AVX2" ON)
add_option(WITH_SSE2 "Build with SSE2" ON)
add_option(WITH_SSSE3 "Build with SSSE3" ON)
@@ -821,13 +822,10 @@ if(WITH_OPTIM)
endif()
endif()
endif()
- if(WITH_SSSE3 AND HAVE_SSSE3_INTRIN)
- add_definitions(-DX86_SSSE3 -DX86_SSSE3_ADLER32)
- set(SSSE3_SRCS ${ARCHDIR}/adler32_ssse3.c)
- add_feature_info(SSSE3_ADLER32 1 "Support SSSE3-accelerated adler32, using \"${SSSE3FLAG}\"")
- list(APPEND ZLIB_ARCH_SRCS ${SSSE3_SRCS})
- set_property(SOURCE ${SSSE3_SRCS} PROPERTY COMPILE_FLAGS "${SSSE3FLAG} ${NOLTOFLAG}")
+ if(FORCE_TZCNT)
+ add_definitions(-DX86_NOCHECK_TZCNT)
endif()
+ add_feature_info(FORCE_TZCNT FORCE_TZCNT "Assume CPU is TZCNT capable")
if(WITH_PCLMULQDQ AND HAVE_PCLMULQDQ_INTRIN AND WITH_SSSE3 AND WITH_SSE4)
add_definitions(-DX86_PCLMULQDQ_CRC)
set(PCLMULQDQ_SRCS ${ARCHDIR}/crc_folding.c)
diff --git a/README.md b/README.md
index 8528f28..a89c8b7 100644
--- a/README.md
+++ b/README.md
@@ -195,6 +195,7 @@ Advanced Build Options
| ZLIB_DUAL_LINK | | Dual link tests with system zlib | OFF |
| UNALIGNED_OK | | Allow unaligned reads | ON (x86, arm) |
| | --force-sse2 | Skip runtime check for SSE2 instructions (Always on for x86_64) | OFF (x86) |
+| FORCE_TZCNT | --force-tzcnt | Skip runtime check for TZCNT instructions | OFF |
| WITH_AVX2 | | Build with AVX2 intrinsics | ON |
| WITH_SSE2 | | Build with SSE2 intrinsics | ON |
| WITH_SSE4 | | Build with SSE4 intrinsics | ON |
diff --git a/configure b/configure
index afc07f9..3712476 100755
--- a/configure
+++ b/configure
@@ -99,6 +99,7 @@ with_fuzzers=0
floatabi=
native=0
forcesse2=0
+forcetzcnt=0
avx2flag="-mavx2"
sse2flag="-msse2"
ssse3flag="-mssse3"
@@ -155,6 +156,7 @@ case "$1" in
echo ' [--with-dfltcc-deflate] Use DEFLATE CONVERSION CALL instruction for compression on IBM Z' | tee -a configure.log
echo ' [--with-dfltcc-inflate] Use DEFLATE CONVERSION CALL instruction for decompression on IBM Z' | tee -a configure.log
echo ' [--force-sse2] Assume SSE2 instructions are always available (disabled by default on x86, enabled on x86_64)' | tee -a configure.log
+ echo ' [--force-tzcnt] Assume TZCNT instructions are always available (disabled by default)' | tee -a configure.log
echo ' [--with-sanitizer] Build with sanitizer (memory, address, undefined)' | tee -a configure.log
echo ' [--with-fuzzers] Build test/fuzz (disabled by default)' | tee -a configure.log
echo ' [--native] Compiles with full instruction set supported on this host' | tee -a configure.log
@@ -181,6 +183,7 @@ case "$1" in
--with-dfltcc-deflate) builddfltccdeflate=1; shift ;;
--with-dfltcc-inflate) builddfltccinflate=1; shift ;;
--force-sse2) forcesse2=1; shift ;;
+ --force-tzcnt) forcetzcnt=1; shift ;;
-n | --native) native=1; shift ;;
-a*=* | --archs=*) ARCHS=`echo $1 | sed 's/.*=//'`; shift ;;
--sysconfdir=*) echo "ignored option: --sysconfdir" | tee -a configure.log; shift ;;
@@ -1282,6 +1285,11 @@ case "${ARCH}" in
ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} crc_folding.o"
ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} crc_folding.lo"
fi
+
+ if test $forcetzcnt -eq 1; then
+ CFLAGS="${CFLAGS} -DX86_NOCHECK_TZCNT"
+ SFLAGS="${SFLAGS} -DX86_NOCHECK_TZCNT"
+ fi
fi
;;
diff --git a/fallback_builtins.h b/fallback_builtins.h
index 314ad32..afa5870 100644
--- a/fallback_builtins.h
+++ b/fallback_builtins.h
@@ -14,7 +14,9 @@
*/
static __forceinline unsigned long __builtin_ctz(uint32_t value) {
#ifdef X86_FEATURES
+# ifndef X86_NOCHECK_TZCNT
if (x86_cpu_has_tzcnt)
+# endif
return _tzcnt_u32(value);
#endif
unsigned long trailing_zero;
@@ -29,7 +31,9 @@ static __forceinline unsigned long __builtin_ctz(uint32_t value) {
*/
static __forceinline unsigned long long __builtin_ctzll(uint64_t value) {
#ifdef X86_FEATURES
+# ifndef X86_NOCHECK_TZCNT
if (x86_cpu_has_tzcnt)
+# endif
return _tzcnt_u64(value);
#endif
unsigned long trailing_zero;