diff options
author | Adenilson Cavalcanti <cavalcantii@gmail.com> | 2017-04-22 02:41:47 -0700 |
---|---|---|
committer | Hans Kristian Rosbach <hk-github@circlestorm.org> | 2017-04-22 11:41:47 +0200 |
commit | ec02ecf104e1d3f1836a908a359f20aa93494df5 (patch) | |
tree | f4981f964863a2037cdce45243b99976b8a7971e /adler32.c | |
parent | 9487690991eff131112153813e557572100ae92d (diff) |
Implementing NEON-ized Adler32 checksum (#102)
The checksum is calculated in the uncompressed PNG data and can be
made much faster by using SIMD. Tests in ARMv8 yielded an improvement
of about 3x (e.g. walltime was 350ms x 125ms for a 4096x4096 bytes
executed 30 times).
This yields an improvement in image decoding in Chromium around 18%
(see https://bugs.chromium.org/p/chromium/issues/detail?id=688601).
Diffstat (limited to 'adler32.c')
-rw-r--r-- | adler32.c | 8 |
1 files changed, 8 insertions, 0 deletions
@@ -7,6 +7,10 @@ #include "zutil.h" +#if (defined(__ARM_NEON__) || defined(__ARM_NEON)) +extern uint32_t adler32_neon(uint32_t adler, const unsigned char *buf, size_t len); +#endif + static uint32_t adler32_combine_(uint32_t adler1, uint32_t adler2, z_off64_t len2); #define BASE 65521U /* largest prime smaller than 65536 */ @@ -61,6 +65,10 @@ static uint32_t adler32_combine_(uint32_t adler1, uint32_t adler2, z_off64_t len /* ========================================================================= */ uint32_t ZEXPORT adler32_z(uint32_t adler, const unsigned char *buf, size_t len) { +#if (defined(__ARM_NEON__) || defined(__ARM_NEON)) + return adler32_neon(adler, buf, len); +#endif + uint32_t sum2; unsigned n; |