summaryrefslogtreecommitdiff
path: root/pngrtran.c
diff options
context:
space:
mode:
authorRichard Townsend <Richard.Townsend@arm.com>2018-01-25 19:03:04 +0000
committerCosmin Truta <ctruta@gmail.com>2018-09-04 00:04:58 -0400
commit7734cda20cf1236aef60f3bbd2267c97bbb40869 (patch)
tree46bd39573775cfbec419527dcd3573e0bbfcf0e1 /pngrtran.c
parent2fc5ddff180f20ab8f7c689dc7a9562521c2f427 (diff)
Optimize png_do_expand_palette for ARM
ARM-specific optimization processes 8 or 4 pixels at once. Improves performance by around 10-22% on a recent ARM Chromebook.
Diffstat (limited to 'pngrtran.c')
-rw-r--r--pngrtran.c49
1 files changed, 41 insertions, 8 deletions
diff --git a/pngrtran.c b/pngrtran.c
index 6b9b7202f..292843d4f 100644
--- a/pngrtran.c
+++ b/pngrtran.c
@@ -18,6 +18,13 @@
#include "pngpriv.h"
+#ifdef PNG_ARM_NEON_IMPLEMENTATION
+#if PNG_ARM_NEON_IMPLEMENTATION == 1
+#define PNG_ARM_NEON_INTRINSICS_AVAILABLE
+#include <arm_neon.h>
+#endif
+#endif
+
#ifdef PNG_READ_SUPPORTED
/* Set the action on getting a CRC error for an ancillary or critical chunk. */
@@ -4195,8 +4202,9 @@ png_do_encode_alpha(png_row_infop row_info, png_bytep row, png_structrp png_ptr)
* upon whether you supply trans and num_trans.
*/
static void
-png_do_expand_palette(png_row_infop row_info, png_bytep row,
- png_const_colorp palette, png_const_bytep trans_alpha, int num_trans)
+png_do_expand_palette(png_structrp png_ptr, png_row_infop row_info,
+ png_bytep row, png_const_colorp palette, png_const_bytep trans_alpha,
+ int num_trans)
{
int shift, value;
png_bytep sp, dp;
@@ -4300,14 +4308,22 @@ png_do_expand_palette(png_row_infop row_info, png_bytep row,
sp = row + (size_t)row_width - 1;
dp = row + ((size_t)row_width << 2) - 1;
- for (i = 0; i < row_width; i++)
+ i = 0;
+#ifdef PNG_ARM_NEON_INTRINSICS_AVAILABLE
+ if (png_ptr->riffled_palette != NULL) {
+ /* The RGBA optimization works with png_ptr->bit_depth == 8
+ but sometimes row_info->bit_depth has been changed to 8.
+ In these cases, the palette hasn't been riffled. */
+ i = png_do_expand_palette_neon_rgba(png_ptr, row_info, row, &sp, &dp);
+ }
+#endif
+
+ for (; i < row_width; i++)
{
if ((int)(*sp) >= num_trans)
*dp-- = 0xff;
-
else
*dp-- = trans_alpha[*sp];
-
*dp-- = palette[*sp].blue;
*dp-- = palette[*sp].green;
*dp-- = palette[*sp].red;
@@ -4324,8 +4340,12 @@ png_do_expand_palette(png_row_infop row_info, png_bytep row,
{
sp = row + (size_t)row_width - 1;
dp = row + (size_t)(row_width * 3) - 1;
+ i = 0;
+#ifdef PNG_ARM_NEON_INTRINSICS_AVAILABLE
+ i = png_do_expand_palette_neon_rgb(png_ptr, row_info, row, &sp, &dp);
+#endif
- for (i = 0; i < row_width; i++)
+ for (; i < row_width; i++)
{
*dp-- = palette[*sp].blue;
*dp-- = palette[*sp].green;
@@ -4739,8 +4759,21 @@ png_do_read_transformations(png_structrp png_ptr, png_row_infop row_info)
{
if (row_info->color_type == PNG_COLOR_TYPE_PALETTE)
{
- png_do_expand_palette(row_info, png_ptr->row_buf + 1,
- png_ptr->palette, png_ptr->trans_alpha, png_ptr->num_trans);
+#ifdef PNG_ARM_NEON_INTRINSICS_AVAILABLE
+ if ((png_ptr->num_trans > 0) && (png_ptr->bit_depth == 8)) {
+ /* Allocate space for the decompressed full palette. */
+ if (png_ptr->riffled_palette == NULL) {
+ png_ptr->riffled_palette = png_malloc(png_ptr, 256*4);
+ if (png_ptr->riffled_palette == NULL) {
+ png_error(png_ptr, "NULL row buffer");
+ }
+ /* Build the RGBA palette. */
+ png_riffle_palette_rgba(png_ptr, row_info);
+ }
+ }
+#endif
+ png_do_expand_palette(png_ptr, row_info, png_ptr->row_buf + 1,
+ png_ptr->palette, png_ptr->trans_alpha, png_ptr->num_trans);
}
else