summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore1
-rw-r--r--CMakeLists.txt42
-rw-r--r--README.md1
-rw-r--r--arch/power/Makefile.in33
-rw-r--r--arch/power/power.c19
-rw-r--r--arch/power/power.h13
-rw-r--r--cmake/detect-arch.c6
-rw-r--r--cmake/detect-arch.cmake2
-rwxr-xr-xconfigure64
-rw-r--r--functable.c3
-rw-r--r--zutil.h2
11 files changed, 173 insertions, 13 deletions
diff --git a/.gitignore b/.gitignore
index 9af3854..b6512f1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -64,6 +64,7 @@ a.out
/Makefile
/arch/arm/Makefile
/arch/generic/Makefile
+/arch/power/Makefile
/arch/x86/Makefile
.kdev4
*.kdev4
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 73c6e0f..0842263 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -91,6 +91,8 @@ option(WITH_INFLATE_ALLOW_INVALID_DIST "Build with zero fill for inflate invalid
if(BASEARCH_ARM_FOUND)
option(WITH_ACLE "Build with ACLE" ON)
option(WITH_NEON "Build with NEON intrinsics" ON)
+elseif(BASEARCH_PPC_FOUND)
+ option(WITH_POWER8 "Build with optimisations for POWER8" ON)
elseif(BASEARCH_S360_FOUND AND "${ARCH}" MATCHES "s390x")
option(WITH_DFLTCC_DEFLATE "Use DEFLATE CONVERSION CALL instruction for compression on IBM Z" OFF)
option(WITH_DFLTCC_INFLATE "Use DEFLATE CONVERSION CALL instruction for decompression on IBM Z" OFF)
@@ -102,7 +104,7 @@ elseif(BASEARCH_X86_FOUND)
endif()
mark_as_advanced(FORCE ZLIB_DUAL_LINK WITH_ACLE WITH_NEON WITH_DFLTCC_DEFLATE WITH_DFLTCC_INFLATE
- WITH_AVX2 WITH_SSE2 WITH_SSE4 WITH_PCLMULQDQ WITH_INFLATE_STRICT WITH_INFLATE_ALLOW_INVALID_DIST)
+ WITH_AVX2 WITH_SSE2 WITH_SSE4 WITH_PCLMULQDQ WITH_POWER8 WITH_INFLATE_STRICT WITH_INFLATE_ALLOW_INVALID_DIST)
add_feature_info(ZLIB_COMPAT ZLIB_COMPAT "Provide a zlib-compatible API")
add_feature_info(WITH_GZFILEOP WITH_GZFILEOP "Compile with support for gzFile-related functions")
@@ -114,6 +116,8 @@ add_feature_info(WITH_NEW_STRATEGIES WITH_NEW_STRATEGIES "Use new strategies")
if(BASEARCH_ARM_FOUND)
add_feature_info(WITH_ACLE WITH_ACLE "Build with ACLE CRC")
add_feature_info(WITH_NEON WITH_NEON "Build with NEON intrinsics")
+elseif(BASEARCH_PPC_FOUND)
+ add_feature_info(WITH_POWER8 WITH_POWER8 "Build with optimisations for POWER8")
endif()
add_feature_info(WITH_MAINTAINER_WARNINGS WITH_MAINTAINER_WARNINGS "Build with project maintainer warnings")
add_feature_info(WITH_CODE_COVERAGE WITH_CODE_COVERAGE "Enable code coverage reporting")
@@ -185,7 +189,11 @@ else()
endif()
if(WITH_NATIVE_INSTRUCTIONS)
if(__GNUC__)
- set(NATIVEFLAG "-march=native")
+ if(BASEARCH_PPC_FOUND)
+ set(NATIVEFLAG "-mcpu=native")
+ else()
+ set(NATIVEFLAG "-march=native")
+ endif()
else()
message(STATUS "Ignoring WITH_NATIVE_INSTRUCTIONS; not implemented yet on this configuration")
endif()
@@ -219,6 +227,8 @@ else()
endif()
# ACLE
set(ACLEFLAG "-march=armv8-a+crc")
+ elseif(BASEARCH_PPC_FOUND)
+ set(POWER8FLAG "-mcpu=power8")
elseif(BASEARCH_X86_FOUND)
set(AVX2FLAG "-mavx2")
set(SSE2FLAG "-msse2")
@@ -242,7 +252,7 @@ else()
endif()
# Set architecture alignment requirements
-if(BASEARCH_ARM_FOUND OR BASEARCH_X86_FOUND)
+if(BASEARCH_ARM_FOUND OR (BASEARCH_PPC_FOUND AND "${ARCH}" MATCHES "powerpc64le") OR BASEARCH_X86_FOUND)
if(NOT DEFINED UNALIGNED_OK)
set(UNALIGNED_OK TRUE)
endif()
@@ -418,7 +428,18 @@ if(MSVC)
add_definitions(-D_CRT_NONSTDC_NO_DEPRECATE)
endif()
-if(BASEARCH_X86_FOUND)
+if(BASEARCH_PPC_FOUND)
+ # Check if we have what we need for POWER8 optimizations
+ set(CMAKE_REQUIRED_FLAGS "${POWER8FLAG}")
+ check_c_source_compiles(
+ "#include <sys/auxv.h>
+ int main() {
+ return (getauxval(AT_HWCAP2) & PPC_FEATURE2_ARCH_2_07);
+ }"
+ HAVE_POWER8
+ )
+ set(CMAKE_REQUIRED_FLAGS)
+elseif(BASEARCH_X86_FOUND)
# Check whether compiler supports SSE2 instrinics
if(WITH_NATIVE_INSTRUCTIONS)
set(CMAKE_REQUIRED_FLAGS "${NATIVEFLAG}")
@@ -581,6 +602,8 @@ set(ZLIB_ARCH_HDRS)
set(ARCHDIR "arch/generic")
if(BASEARCH_ARM_FOUND)
set(ARCHDIR "arch/arm")
+elseif(BASEARCH_PPC_FOUND)
+ set(ARCHDIR "arch/power")
elseif(BASEARCH_S360_FOUND AND "${ARCH}" MATCHES "s390x")
set(ARCHDIR "arch/s390")
elseif(BASEARCH_X86_FOUND)
@@ -617,6 +640,17 @@ if(WITH_OPTIM)
endif()
add_feature_info(ACLE_CRC 1 "Support ACLE optimized CRC hash generation, using \"${ACLEFLAG}\"")
endif()
+ elseif(BASEARCH_PPC_FOUND)
+ if(WITH_POWER8 AND HAVE_POWER8)
+ add_definitions(-DPOWER_FEATURES)
+ add_definitions(-DPOWER8)
+ set(ZLIB_POWER8_SRCS )
+ set_source_files_properties(
+ ${ZLIB_POWER8_SRCS}
+ PROPERTIES COMPILE_FLAGS ${POWER8FLAG})
+ list(APPEND ZLIB_ARCH_HDRS ${ARCHDIR}/power.h)
+ list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/power.c ${ZLIB_POWER8_SRCS})
+ endif()
elseif(BASEARCH_S360_FOUND AND "${ARCH}" MATCHES "s390x")
if(WITH_DFLTCC_DEFLATE OR WITH_DFLTCC_INFLATE)
list(APPEND ZLIB_ARCH_SRCS ${ARCHDIR}/dfltcc_common.c)
diff --git a/README.md b/README.md
index 07f336d..b9e3bc1 100644
--- a/README.md
+++ b/README.md
@@ -200,6 +200,7 @@ Advanced Build Options
| WITH_PCLMULQDQ | | Build with PCLMULQDQ intrinsics | ON |
| WITH_ACLE | --without-acle | Build with ACLE intrinsics | ON |
| WITH_NEON | --without-neon | Build with NEON intrinsics | ON |
+| WITH_POWER8 | | Build with POWER8 optimisations | ON
| WITH_DFLTCC_DEFLATE | --with-dfltcc-deflate | Use DEFLATE COMPRESSION CALL instruction for compression on IBM Z | OFF |
| WITH_DFLTCC_INFLATE | --with-dfltcc-inflate | Use DEFLATE COMPRESSION CALL instruction for decompression on IBM Z | OFF |
| WITH_INFLATE_STRICT | | Build with strict inflate distance checking | OFF |
diff --git a/arch/power/Makefile.in b/arch/power/Makefile.in
new file mode 100644
index 0000000..a438fa5
--- /dev/null
+++ b/arch/power/Makefile.in
@@ -0,0 +1,33 @@
+# Makefile for POWER-specific files
+# Copyright (C) 2020 Matheus Castanho <msc@linux.ibm.com>, IBM
+# For conditions of distribution and use, see copyright notice in zlib.h
+
+CC=
+CFLAGS=
+SFLAGS=
+INCLUDES=
+SUFFIX=
+
+SRCDIR=.
+SRCTOP=../..
+TOPDIR=$(SRCTOP)
+
+P8FLAGS=-mcpu=power8
+
+all: power.o \
+ power.lo
+
+power.o:
+ $(CC) $(CFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/power.c
+
+power.lo:
+ $(CC) $(SFLAGS) $(INCLUDES) -c -o $@ $(SRCDIR)/power.c
+
+mostlyclean: clean
+clean:
+ rm -f *.o *.lo *~
+ rm -rf objs
+ rm -f *.gcda *.gcno *.gcov
+
+distclean:
+ rm -f Makefile
diff --git a/arch/power/power.c b/arch/power/power.c
new file mode 100644
index 0000000..8f0c49c
--- /dev/null
+++ b/arch/power/power.c
@@ -0,0 +1,19 @@
+/* POWER feature check
+ * Copyright (C) 2020 Matheus Castanho <msc@linux.ibm.com>, IBM
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+#include <sys/auxv.h>
+#include "zutil.h"
+
+ZLIB_INTERNAL int power_cpu_has_arch_2_07;
+
+void ZLIB_INTERNAL power_check_features(void) {
+ unsigned long hwcap2;
+ hwcap2 = getauxval(AT_HWCAP2);
+
+#ifdef POWER8
+ if (hwcap2 & PPC_FEATURE2_ARCH_2_07)
+ power_cpu_has_arch_2_07 = 1;
+#endif
+}
diff --git a/arch/power/power.h b/arch/power/power.h
new file mode 100644
index 0000000..4ce6384
--- /dev/null
+++ b/arch/power/power.h
@@ -0,0 +1,13 @@
+/* power.h -- check for POWER CPU features
+ * Copyright (C) 2020 Matheus Castanho <msc@linux.ibm.com>, IBM
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+#ifndef POWER_H_
+#define POWER_H_
+
+extern int power_cpu_has_arch_2_07;
+
+void ZLIB_INTERNAL power_check_features(void);
+
+#endif /* POWER_H_ */
diff --git a/cmake/detect-arch.c b/cmake/detect-arch.c
index 32a8db8..d7017d8 100644
--- a/cmake/detect-arch.c
+++ b/cmake/detect-arch.c
@@ -34,7 +34,11 @@
// PowerPC
#elif defined(__powerpc__) || defined(_ppc__) || defined(__PPC__)
#if defined(__64BIT__) || defined(__powerpc64__) || defined(__ppc64__)
- #error archfound ppc64
+ #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ #error archfound ppc64le
+ #else
+ #error archfound ppc64
+ #endif
#else
#error archfound ppc
#endif
diff --git a/cmake/detect-arch.cmake b/cmake/detect-arch.cmake
index f4eb99f..b80d666 100644
--- a/cmake/detect-arch.cmake
+++ b/cmake/detect-arch.cmake
@@ -52,7 +52,7 @@ if("${ARCH}" MATCHES "(x86_64|AMD64|i[3-6]86)")
elseif("${ARCH}" MATCHES "(arm(v[0-9])?|aarch64)")
set(BASEARCH "arm")
set(BASEARCH_ARM_FOUND TRUE)
-elseif("${ARCH}" MATCHES "ppc(64)?|powerpc(64)?")
+elseif("${ARCH}" MATCHES "ppc(64(le)?)?|powerpc(64(le)?)?")
set(BASEARCH "ppc")
set(BASEARCH_PPC_FOUND TRUE)
elseif("${ARCH}" MATCHES "alpha")
diff --git a/configure b/configure
index 50172fd..5004c54 100755
--- a/configure
+++ b/configure
@@ -326,6 +326,12 @@ if test "$gcc" -eq 1 && ($cc $CFLAGS -c $test.c) >> configure.log 2>&1; then
else
ARCH=native
fi ;;
+ powerpc | ppc)
+ ARCH=powerpc ;;
+ powerpc64 | ppc64)
+ ARCH=powerpc64 ;;
+ powerpc64le | ppc64le)
+ ARCH=powerpc64le ;;
esac
CFLAGS="-O2 ${CFLAGS}"
if test -n "${ARCHS}"; then
@@ -335,8 +341,14 @@ if test "$gcc" -eq 1 && ($cc $CFLAGS -c $test.c) >> configure.log 2>&1; then
CFLAGS="${CFLAGS} -Wall"
SFLAGS="${CFLAGS} -fPIC"
if test $native -eq 1; then
- CFLAGS="${CFLAGS} -march=native"
- SFLAGS="${SFLAGS} -march=native"
+ case $ARCH in
+ powerpc*)
+ NATIVE_FLAG="-mcpu=native" ;;
+ *)
+ NATIVE_FLAG="-march=native" ;;
+ esac
+ CFLAGS="${CFLAGS} ${NATIVE_FLAG}"
+ SFLAGS="${SFLAGS} ${NATIVE_FLAG}"
fi
if test "$warn" -eq 1; then
CFLAGS="${CFLAGS} -Wextra -Wpedantic -Wno-implicit-fallthrough"
@@ -1024,6 +1036,22 @@ EOF
;;
esac
+# Check whether features needed by POWER optimisations are available
+case "${ARCH}" in
+ powerpc*)
+ cat > $test.c << EOF
+#include <sys/auxv.h>
+int main() { return (getauxval(AT_HWCAP2) & PPC_FEATURE2_ARCH_2_07); }
+EOF
+ if try $CC -c $CFLAGS -mcpu=power8 $test.c; then
+ HAVE_POWER8=1
+ echo "Check whether POWER8 instructions are available ... Yes." | tee -a configure.log
+ else
+ HAVE_POWER8=0
+ echo "Check whether POWER8 instructions are available ... No." | tee -a configure.log
+ fi
+esac
+
# Check whether sys/sdt.h is available
cat > $test.c << EOF
#include <sys/sdt.h>
@@ -1325,11 +1353,33 @@ case "${ARCH}" in
CFLAGS="-march=${ARCH} ${CFLAGS} -DUNALIGNED_OK"
SFLAGS="-march=${ARCH} ${SFLAGS} -DUNALIGNED_OK"
;;
- powerpc)
- [ ! -z $CROSS_PREFIX ] && QEMU_ARCH=ppc
- ;;
- powerpc64)
- [ ! -z $CROSS_PREFIX ] && QEMU_ARCH=ppc64
+ powerpc*)
+ case "${ARCH}" in
+ powerpc)
+ [ ! -z $CROSS_PREFIX ] && QEMU_ARCH=ppc
+ ;;
+ powerpc64)
+ [ ! -z $CROSS_PREFIX ] && QEMU_ARCH=ppc64
+ ;;
+ powerpc64le)
+ [ ! -z $CROSS_PREFIX ] && QEMU_ARCH=ppc64le
+ CFLAGS="${CFLAGS} -DUNALIGNED_OK"
+ SFLAGS="${SFLAGS} -DUNALIGNED_OK"
+ ;;
+ esac
+
+ ARCHDIR=arch/power
+
+ if test $without_optimizations -eq 0; then
+ if test $HAVE_POWER8 -eq 1; then
+ ARCH_STATIC_OBJS="${ARCH_STATIC_OBJS} power.o"
+ ARCH_SHARED_OBJS="${ARCH_SHARED_OBJS} power.lo"
+ POWERFLAGS="-DPOWER_FEATURES -DPOWER8"
+ fi
+ fi
+
+ CFLAGS="${CFLAGS} ${POWERFLAGS}"
+ SFLAGS="${SFLAGS} ${POWERFLAGS}"
;;
s390x)
[ ! -z $CROSS_PREFIX ] && QEMU_ARCH=s390x
diff --git a/functable.c b/functable.c
index 1a203e3..aad8766 100644
--- a/functable.c
+++ b/functable.c
@@ -29,6 +29,7 @@ extern Pos quick_insert_string_sse4(deflate_state *const s, const Pos str);
#elif defined(ARM_ACLE_CRC_HASH)
extern Pos quick_insert_string_acle(deflate_state *const s, const Pos str);
#endif
+
/* slide_hash */
#ifdef X86_SSE2
void slide_hash_sse2(deflate_state *s);
@@ -115,6 +116,8 @@ ZLIB_INTERNAL void cpu_check_features(void)
x86_check_features();
#elif ARM_CPUID
arm_check_features();
+#elif POWER_FEATURES
+ power_check_features();
#endif
features_checked = 1;
}
diff --git a/zutil.h b/zutil.h
index 0c9accb..b8bce16 100644
--- a/zutil.h
+++ b/zutil.h
@@ -250,6 +250,8 @@ void ZLIB_INTERNAL zng_cfree(void *opaque, void *ptr);
# include "arch/x86/x86.h"
#elif defined(ARM_CPUID)
# include "arch/arm/arm.h"
+#elif defined(POWER_FEATURES)
+# include "arch/power/power.h"
#endif
#endif /* ZUTIL_H_ */