From 518d81558c797486e125e37cb529d65b560a6ea0 Mon Sep 17 00:00:00 2001 From: Jonathan Wright Date: Tue, 12 Jan 2021 11:33:28 +0000 Subject: Cherry-pick Arm CLZ fixes from upstream Cherry-pick two patches from upstream that fix the Neon intrinsics Huffman encoding path and reduce the memory footprint on Windows on Arm: https://github.com/libjpeg-turbo/libjpeg-turbo/commit/d2c407995992be1f128704ae2479adfd7906c158 https://github.com/libjpeg-turbo/libjpeg-turbo/commit/74e6ea45e3547ae85cd43efcdfc24a6907a2154e Re-enable the Neon intrinsics Huffman encoding path for WoA compiled with clang-cl. Bug: 1160249 Change-Id: I0849ca54b8f4f8f38c9b293ea48c9de1c60be86f --- README.chromium | 7 +++---- jchuff.c | 8 ++++++-- jcphuff.c | 9 +++++++-- simd/arm/aarch64/jchuff-neon.c | 6 +++--- simd/arm/aarch64/jsimd.c | 3 --- simd/arm/jcphuff-neon.c | 4 ++-- simd/arm/neon-compat.h | 6 +++--- simd/arm/neon-compat.h.in | 6 +++--- 8 files changed, 27 insertions(+), 22 deletions(-) diff --git a/README.chromium b/README.chromium index b0a1623..e8b0bc6 100644 --- a/README.chromium +++ b/README.chromium @@ -14,11 +14,12 @@ This consists of the components: * An OWNERS file * A codereview.settings file * Patched header files used by Chromium -* Cherry-picked three additional patches from upstream master to fix bugs found - by fuzzers: +* Additional patches cherry-picked from upstream master to fix various bugs: https://github.com/libjpeg-turbo/libjpeg-turbo/commit/ccaba5d7894ecfb5a8f11e48d3f86e1f14d5a469 https://github.com/libjpeg-turbo/libjpeg-turbo/commit/c7ca521bc85b57d41d3ad4963c13fc0100481084 https://github.com/libjpeg-turbo/libjpeg-turbo/commit/110d8d6dcafaed517e8f77a6253169535ee3a20e + https://github.com/libjpeg-turbo/libjpeg-turbo/commit/d2c407995992be1f128704ae2479adfd7906c158 + https://github.com/libjpeg-turbo/libjpeg-turbo/commit/74e6ea45e3547ae85cd43efcdfc24a6907a2154e * Deleted unused directories: ci, cmakescripts, doc, java, release, sharedlib, simd/loongson, simd/mips, simd/powerpc, and win * Deleted unused files: appveyor.yml, CMakeLists.txt, doxygen.config, @@ -73,8 +74,6 @@ following changes which are not merged to upstream: - Refactor djpeg.c to provide test interface A new gtest directory contains GTest wrappers (and associated utilities) for each of tjunittest, tjbench, cjpeg, djpeg and jpegtran. -* Disable Neon SIMD path for Huffman encoding when compiling for Windows on Arm - using Clang-cl: http://crbug.com/1160249 Refer to working-with-nested-repos [1] for details of how to setup your git svn client to update the code (for making local changes, cherry picking from diff --git a/jchuff.c b/jchuff.c index 8ea48b8..e2d5772 100644 --- a/jchuff.c +++ b/jchuff.c @@ -44,15 +44,19 @@ * flags (this defines __thumb__). */ -/* NOTE: Both GCC and Clang define __GNUC__ */ -#if defined(__GNUC__) && (defined(__arm__) || defined(__aarch64__)) +#if defined(__arm__) || defined(__aarch64__) || defined(_M_ARM) || \ + defined(_M_ARM64) #if !defined(__thumb__) || defined(__thumb2__) #define USE_CLZ_INTRINSIC #endif #endif #ifdef USE_CLZ_INTRINSIC +#if defined(_MSC_VER) && !defined(__clang__) +#define JPEG_NBITS_NONZERO(x) (32 - _CountLeadingZeros(x)) +#else #define JPEG_NBITS_NONZERO(x) (32 - __builtin_clz(x)) +#endif #define JPEG_NBITS(x) (x ? JPEG_NBITS_NONZERO(x) : 0) #else #include "jpeg_nbits_table.h" diff --git a/jcphuff.c b/jcphuff.c index a8b94be..373d71f 100644 --- a/jcphuff.c +++ b/jcphuff.c @@ -6,6 +6,7 @@ * libjpeg-turbo Modifications: * Copyright (C) 2011, 2015, 2018, D. R. Commander. * Copyright (C) 2016, 2018, Matthieu Darbois. + * Copyright (C) 2020, Arm Limited. * For conditions of distribution and use, see the accompanying README.ijg * file. * @@ -51,15 +52,19 @@ * flags (this defines __thumb__). */ -/* NOTE: Both GCC and Clang define __GNUC__ */ -#if defined(__GNUC__) && (defined(__arm__) || defined(__aarch64__)) +#if defined(__arm__) || defined(__aarch64__) || defined(_M_ARM) || \ + defined(_M_ARM64) #if !defined(__thumb__) || defined(__thumb2__) #define USE_CLZ_INTRINSIC #endif #endif #ifdef USE_CLZ_INTRINSIC +#if defined(_MSC_VER) && !defined(__clang__) +#define JPEG_NBITS_NONZERO(x) (32 - _CountLeadingZeros(x)) +#else #define JPEG_NBITS_NONZERO(x) (32 - __builtin_clz(x)) +#endif #define JPEG_NBITS(x) (x ? JPEG_NBITS_NONZERO(x) : 0) #else #include "jpeg_nbits_table.h" diff --git a/simd/arm/aarch64/jchuff-neon.c b/simd/arm/aarch64/jchuff-neon.c index a0a57a6..f13fd1b 100644 --- a/simd/arm/aarch64/jchuff-neon.c +++ b/simd/arm/aarch64/jchuff-neon.c @@ -1,7 +1,7 @@ /* * jchuff-neon.c - Huffman entropy encoding (64-bit Arm Neon) * - * Copyright (C) 2020, Arm Limited. All Rights Reserved. + * Copyright (C) 2020-2021, Arm Limited. All Rights Reserved. * Copyright (C) 2020, D. R. Commander. All Rights Reserved. * * This software is provided 'as-is', without any express or implied @@ -331,7 +331,7 @@ JOCTET *jsimd_huff_encode_one_block_neon(void *state, JOCTET *buffer, vst1q_u16(block_diff + 7 * DCTSIZE, row7_diff); while (bitmap != 0) { - r = BUILTIN_CLZL(bitmap); + r = BUILTIN_CLZLL(bitmap); i += r; bitmap <<= r; nbits = block_nbits[i]; @@ -370,7 +370,7 @@ JOCTET *jsimd_huff_encode_one_block_neon(void *state, JOCTET *buffer, /* Same as above but must mask diff bits and compute nbits on demand. */ while (bitmap != 0) { - r = BUILTIN_CLZL(bitmap); + r = BUILTIN_CLZLL(bitmap); i += r; bitmap <<= r; lz = BUILTIN_CLZ(block_abs[i]); diff --git a/simd/arm/aarch64/jsimd.c b/simd/arm/aarch64/jsimd.c index 4991bc0..8570b82 100644 --- a/simd/arm/aarch64/jsimd.c +++ b/simd/arm/aarch64/jsimd.c @@ -977,8 +977,6 @@ jsimd_idct_float(j_decompress_ptr cinfo, jpeg_component_info *compptr, GLOBAL(int) jsimd_can_huff_encode_one_block(void) { -/* Disable for Windows on Arm compiled with Clang-cl: crbug.com/1160249 */ -#if !(defined(_MSC_VER) && defined(__clang__)) init_simd(); if (DCTSIZE != 8) @@ -988,7 +986,6 @@ jsimd_can_huff_encode_one_block(void) if (simd_support & JSIMD_NEON && simd_huffman) return 1; -#endif return 0; } diff --git a/simd/arm/jcphuff-neon.c b/simd/arm/jcphuff-neon.c index 8b6d53b..86a263f 100644 --- a/simd/arm/jcphuff-neon.c +++ b/simd/arm/jcphuff-neon.c @@ -1,7 +1,7 @@ /* * jcphuff-neon.c - prepare data for progressive Huffman encoding (Arm Neon) * - * Copyright (C) 2020, Arm Limited. All Rights Reserved. + * Copyright (C) 2020-2021, Arm Limited. All Rights Reserved. * * This software is provided 'as-is', without any express or implied * warranty. In no event will the authors be held liable for any damages @@ -572,7 +572,7 @@ int jsimd_encode_mcu_AC_refine_prepare_neon /* EOB position is defined to be 0 if all coefficients != 1. */ return 0; } else { - return 63 - BUILTIN_CLZL(bitmap); + return 63 - BUILTIN_CLZLL(bitmap); } #else /* Move bitmap to two 32-bit scalar registers. */ diff --git a/simd/arm/neon-compat.h b/simd/arm/neon-compat.h index 3ce3bcb..543d860 100644 --- a/simd/arm/neon-compat.h +++ b/simd/arm/neon-compat.h @@ -1,6 +1,6 @@ /* * Copyright (C) 2020, D. R. Commander. All Rights Reserved. - * Copyright (C) 2020, Arm Limited. All Rights Reserved. + * Copyright (C) 2020-2021, Arm Limited. All Rights Reserved. * * This software is provided 'as-is', without any express or implied * warranty. In no event will the authors be held liable for any damages @@ -28,10 +28,10 @@ /* Define compiler-independent count-leading-zeros macros */ #if defined(_MSC_VER) && !defined(__clang__) #define BUILTIN_CLZ(x) _CountLeadingZeros(x) -#define BUILTIN_CLZL(x) _CountLeadingZeros64(x) +#define BUILTIN_CLZLL(x) _CountLeadingZeros64(x) #elif defined(__clang__) || defined(__GNUC__) #define BUILTIN_CLZ(x) __builtin_clz(x) -#define BUILTIN_CLZL(x) __builtin_clzl(x) +#define BUILTIN_CLZLL(x) __builtin_clzll(x) #else #error "Unknown compiler" #endif diff --git a/simd/arm/neon-compat.h.in b/simd/arm/neon-compat.h.in index e2347b9..23d6d28 100644 --- a/simd/arm/neon-compat.h.in +++ b/simd/arm/neon-compat.h.in @@ -1,6 +1,6 @@ /* * Copyright (C) 2020, D. R. Commander. All Rights Reserved. - * Copyright (C) 2020, Arm Limited. All Rights Reserved. + * Copyright (C) 2020-2021, Arm Limited. All Rights Reserved. * * This software is provided 'as-is', without any express or implied * warranty. In no event will the authors be held liable for any damages @@ -26,10 +26,10 @@ /* Define compiler-independent count-leading-zeros macros */ #if defined(_MSC_VER) && !defined(__clang__) #define BUILTIN_CLZ(x) _CountLeadingZeros(x) -#define BUILTIN_CLZL(x) _CountLeadingZeros64(x) +#define BUILTIN_CLZLL(x) _CountLeadingZeros64(x) #elif defined(__clang__) || defined(__GNUC__) #define BUILTIN_CLZ(x) __builtin_clz(x) -#define BUILTIN_CLZL(x) __builtin_clzl(x) +#define BUILTIN_CLZLL(x) __builtin_clzll(x) #else #error "Unknown compiler" #endif -- cgit v1.2.3 From f16a10d1d9e6cb24dba253cb8105c250b3b9d627 Mon Sep 17 00:00:00 2001 From: Jonathan Wright Date: Mon, 18 Jan 2021 11:19:10 +0000 Subject: Tidy up BUILD.gn file Simplify conditions to use Arm Neon sources and reduce duplication for AArch64 and AArch32 Neon builds. Change-Id: I8c1f8547741db3ccc6db20e6fef9a73ca49db6d3 --- BUILD.gn | 50 ++++++++++++++++++-------------------------------- 1 file changed, 18 insertions(+), 32 deletions(-) diff --git a/BUILD.gn b/BUILD.gn index 80bdf9b..813357a 100644 --- a/BUILD.gn +++ b/BUILD.gn @@ -6,7 +6,7 @@ # implementation. Use the meta target //third_party:jpeg instead. import("//build/config/sanitizers/sanitizers.gni") -if (current_cpu == "arm") { +if (current_cpu == "arm" || current_cpu == "arm64") { import("//build/config/arm.gni") } @@ -150,36 +150,11 @@ static_library("simd") { sources = [ "simd/x86_64/jsimd.c", ] - } else if (current_cpu == "arm" && arm_version >= 7 && - (arm_use_neon || arm_optionally_use_neon)) { - include_dirs += [ "simd/arm/" ] - sources = [ - "simd/arm/aarch32/jchuff-neon.c", - "simd/arm/aarch32/jsimd.c", - "simd/arm/jccolor-neon.c", - "simd/arm/jcgray-neon.c", - "simd/arm/jcphuff-neon.c", - "simd/arm/jcsample-neon.c", - "simd/arm/jdcolor-neon.c", - "simd/arm/jdmerge-neon.c", - "simd/arm/jdsample-neon.c", - "simd/arm/jfdctfst-neon.c", - "simd/arm/jfdctint-neon.c", - "simd/arm/jidctfst-neon.c", - "simd/arm/jidctint-neon.c", - "simd/arm/jidctred-neon.c", - "simd/arm/jquanti-neon.c", - ] - defines = [ - "NEON_INTRINSICS" - ] - configs -= [ "//build/config/compiler:default_optimization" ] - configs += [ "//build/config/compiler:optimize_speed" ] - } else if (current_cpu == "arm64") { + } else if ((current_cpu == "arm" || current_cpu == "arm64") && + arm_use_neon) { include_dirs += [ "simd/arm/" ] + sources = [ - "simd/arm/aarch64/jchuff-neon.c", - "simd/arm/aarch64/jsimd.c", "simd/arm/jccolor-neon.c", "simd/arm/jcgray-neon.c", "simd/arm/jcphuff-neon.c", @@ -194,9 +169,22 @@ static_library("simd") { "simd/arm/jidctred-neon.c", "simd/arm/jquanti-neon.c", ] + if (current_cpu == "arm") { + sources += [ + "simd/arm/aarch32/jchuff-neon.c", + "simd/arm/aarch32/jsimd.c", + ] + } else if (current_cpu == "arm64"){ + sources += [ + "simd/arm/aarch64/jchuff-neon.c", + "simd/arm/aarch64/jsimd.c", + ] + } + defines = [ "NEON_INTRINSICS" ] + configs -= [ "//build/config/compiler:default_optimization" ] configs += [ "//build/config/compiler:optimize_speed" ] } else { @@ -286,9 +274,7 @@ static_library("libjpeg") { } else { public_deps += [ ":simd" ] - if ((current_cpu == "arm" && arm_version >= 7 && - (arm_use_neon || arm_optionally_use_neon)) || - current_cpu == "arm64") { + if ((current_cpu == "arm" || current_cpu == "arm64") && arm_use_neon) { defines += [ "NEON_INTRINSICS", ] } } -- cgit v1.2.3 From fa0de07678c9828cc57b3eb086c03771912ba527 Mon Sep 17 00:00:00 2001 From: Jonathan Wright Date: Mon, 18 Jan 2021 11:41:42 +0000 Subject: Add Arm Neon SIMD paths to MSan builds MSan does not support assembly code so all SIMD paths were disabled for MSan builds of libjpeg-turbo. Now that all Arm Neon SIMD paths are implemented using intrinsics (and all assembly code has been removed) we can enable these paths for MSan builds. Change-Id: Id244fa2f710d6647a20f2a0d1c6760813623b0cf --- BUILD.gn | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/BUILD.gn b/BUILD.gn index 813357a..5befffd 100644 --- a/BUILD.gn +++ b/BUILD.gn @@ -267,9 +267,9 @@ static_library("libjpeg") { ":libjpeg_headers", ] - # MemorySanitizer doesn't support assembly code, so keep it disabled in - # MSan builds for now. - if (is_msan) { + # MemorySanitizer doesn't support assembly code, so keep it disabled in x86 + # and x64 MSan builds for now. + if (is_msan && (current_cpu == "x86" || current_cpu == "x64")) { sources += [ "jsimd_none.c" ] } else { public_deps += [ ":simd" ] -- cgit v1.2.3