diff options
author | Richard Townsend <Richard.Townsend@arm.com> | 2018-01-25 19:03:04 +0000 |
---|---|---|
committer | Cosmin Truta <ctruta@gmail.com> | 2018-09-04 00:04:58 -0400 |
commit | 7734cda20cf1236aef60f3bbd2267c97bbb40869 (patch) | |
tree | 46bd39573775cfbec419527dcd3573e0bbfcf0e1 /pngrtran.c | |
parent | 2fc5ddff180f20ab8f7c689dc7a9562521c2f427 (diff) |
Optimize png_do_expand_palette for ARM
ARM-specific optimization processes 8 or 4 pixels at once.
Improves performance by around 10-22% on a recent ARM Chromebook.
Diffstat (limited to 'pngrtran.c')
-rw-r--r-- | pngrtran.c | 49 |
1 files changed, 41 insertions, 8 deletions
diff --git a/pngrtran.c b/pngrtran.c index 6b9b7202f..292843d4f 100644 --- a/pngrtran.c +++ b/pngrtran.c @@ -18,6 +18,13 @@ #include "pngpriv.h" +#ifdef PNG_ARM_NEON_IMPLEMENTATION +#if PNG_ARM_NEON_IMPLEMENTATION == 1 +#define PNG_ARM_NEON_INTRINSICS_AVAILABLE +#include <arm_neon.h> +#endif +#endif + #ifdef PNG_READ_SUPPORTED /* Set the action on getting a CRC error for an ancillary or critical chunk. */ @@ -4195,8 +4202,9 @@ png_do_encode_alpha(png_row_infop row_info, png_bytep row, png_structrp png_ptr) * upon whether you supply trans and num_trans. */ static void -png_do_expand_palette(png_row_infop row_info, png_bytep row, - png_const_colorp palette, png_const_bytep trans_alpha, int num_trans) +png_do_expand_palette(png_structrp png_ptr, png_row_infop row_info, + png_bytep row, png_const_colorp palette, png_const_bytep trans_alpha, + int num_trans) { int shift, value; png_bytep sp, dp; @@ -4300,14 +4308,22 @@ png_do_expand_palette(png_row_infop row_info, png_bytep row, sp = row + (size_t)row_width - 1; dp = row + ((size_t)row_width << 2) - 1; - for (i = 0; i < row_width; i++) + i = 0; +#ifdef PNG_ARM_NEON_INTRINSICS_AVAILABLE + if (png_ptr->riffled_palette != NULL) { + /* The RGBA optimization works with png_ptr->bit_depth == 8 + but sometimes row_info->bit_depth has been changed to 8. + In these cases, the palette hasn't been riffled. */ + i = png_do_expand_palette_neon_rgba(png_ptr, row_info, row, &sp, &dp); + } +#endif + + for (; i < row_width; i++) { if ((int)(*sp) >= num_trans) *dp-- = 0xff; - else *dp-- = trans_alpha[*sp]; - *dp-- = palette[*sp].blue; *dp-- = palette[*sp].green; *dp-- = palette[*sp].red; @@ -4324,8 +4340,12 @@ png_do_expand_palette(png_row_infop row_info, png_bytep row, { sp = row + (size_t)row_width - 1; dp = row + (size_t)(row_width * 3) - 1; + i = 0; +#ifdef PNG_ARM_NEON_INTRINSICS_AVAILABLE + i = png_do_expand_palette_neon_rgb(png_ptr, row_info, row, &sp, &dp); +#endif - for (i = 0; i < row_width; i++) + for (; i < row_width; i++) { *dp-- = palette[*sp].blue; *dp-- = palette[*sp].green; @@ -4739,8 +4759,21 @@ png_do_read_transformations(png_structrp png_ptr, png_row_infop row_info) { if (row_info->color_type == PNG_COLOR_TYPE_PALETTE) { - png_do_expand_palette(row_info, png_ptr->row_buf + 1, - png_ptr->palette, png_ptr->trans_alpha, png_ptr->num_trans); +#ifdef PNG_ARM_NEON_INTRINSICS_AVAILABLE + if ((png_ptr->num_trans > 0) && (png_ptr->bit_depth == 8)) { + /* Allocate space for the decompressed full palette. */ + if (png_ptr->riffled_palette == NULL) { + png_ptr->riffled_palette = png_malloc(png_ptr, 256*4); + if (png_ptr->riffled_palette == NULL) { + png_error(png_ptr, "NULL row buffer"); + } + /* Build the RGBA palette. */ + png_riffle_palette_rgba(png_ptr, row_info); + } + } +#endif + png_do_expand_palette(png_ptr, row_info, png_ptr->row_buf + 1, + png_ptr->palette, png_ptr->trans_alpha, png_ptr->num_trans); } else |