summaryrefslogtreecommitdiff
path: root/contrib/intel/intel_init.c
diff options
context:
space:
mode:
authorGlenn Randers-Pehrson <glennrp at users.sourceforge.net>2016-02-18 21:20:28 -0600
committerGlenn Randers-Pehrson <glennrp at users.sourceforge.net>2016-02-18 21:20:28 -0600
commit4233766b1dfa0485edebe44da0668a24f93b3ffd (patch)
treef5b705b216bd2c79b56ca5601fef8f378b0da95c /contrib/intel/intel_init.c
parentf78d1cd47ad2fb26f8435b82bedc2470dc6ae357 (diff)
parentc3a45944e2a5c2fdc7d651876bb65c982c67bd34 (diff)
[libpng16] Added Intel SSE support (Matt Sarrett, Google Inc.)
Diffstat (limited to 'contrib/intel/intel_init.c')
-rw-r--r--contrib/intel/intel_init.c45
1 files changed, 45 insertions, 0 deletions
diff --git a/contrib/intel/intel_init.c b/contrib/intel/intel_init.c
new file mode 100644
index 000000000..394984e06
--- /dev/null
+++ b/contrib/intel/intel_init.c
@@ -0,0 +1,45 @@
+
+/* intel_init.c - SSE2 optimized filter functions
+ *
+ * Copyright (c) 2016 Google, Inc.
+ *
+ * This code is released under the libpng license.
+ * For conditions of distribution and use, see the disclaimer
+ * and license in png.h
+ */
+
+#include "../pngpriv.h"
+
+#ifdef PNG_READ_SUPPORTED
+#if PNG_INTEL_SSE_IMPLEMENTATION > 0
+
+void
+png_init_filter_functions_sse2(png_structp pp, unsigned int bpp)
+{
+ // The techniques used to implement each of these filters in SSE operate on
+ // one pixel at a time.
+ // So they generally speed up 3bpp images about 3x, 4bpp images about 4x.
+ // They can scale up to 6 and 8 bpp images and down to 2 bpp images,
+ // but they'd not likely have any benefit for 1bpp images.
+ // Most of these can be implemented using only MMX and 64-bit registers,
+ // but they end up a bit slower than using the equally-ubiquitous SSE2.
+ if (bpp == 3)
+ {
+ pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub3_sse2;
+ pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg3_sse2;
+ pp->read_filter[PNG_FILTER_VALUE_PAETH-1] =
+ png_read_filter_row_paeth3_sse2;
+ }
+ else if (bpp == 4)
+ {
+ pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub4_sse2;
+ pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg4_sse2;
+ pp->read_filter[PNG_FILTER_VALUE_PAETH-1] =
+ png_read_filter_row_paeth4_sse2;
+ }
+
+ // No need optimize PNG_FILTER_VALUE_UP. The compiler should autovectorize.
+}
+
+#endif /* PNG_INTEL_SSE_IMPLEMENTATION > 0 */
+#endif /* PNG_READ_SUPPORTED */