diff options
author | Matt Sarett <msarett@google.com> | 2016-02-18 12:43:50 -0500 |
---|---|---|
committer | Matt Sarett <msarett@google.com> | 2016-02-18 12:43:50 -0500 |
commit | 342c4eab2a0565de456f1f3efcc41b635544160e (patch) | |
tree | 0bb70b189dfc193530668c007786c81fd118cb77 /contrib/intel/intel_init.c | |
parent | fb375429b5f8e2f1e53eecf3155d5172c59b02ef (diff) |
Move sse opts into contrib/intel
Diffstat (limited to 'contrib/intel/intel_init.c')
-rw-r--r-- | contrib/intel/intel_init.c | 45 |
1 files changed, 45 insertions, 0 deletions
diff --git a/contrib/intel/intel_init.c b/contrib/intel/intel_init.c new file mode 100644 index 000000000..394984e06 --- /dev/null +++ b/contrib/intel/intel_init.c @@ -0,0 +1,45 @@ + +/* intel_init.c - SSE2 optimized filter functions + * + * Copyright (c) 2016 Google, Inc. + * + * This code is released under the libpng license. + * For conditions of distribution and use, see the disclaimer + * and license in png.h + */ + +#include "../pngpriv.h" + +#ifdef PNG_READ_SUPPORTED +#if PNG_INTEL_SSE_IMPLEMENTATION > 0 + +void +png_init_filter_functions_sse2(png_structp pp, unsigned int bpp) +{ + // The techniques used to implement each of these filters in SSE operate on + // one pixel at a time. + // So they generally speed up 3bpp images about 3x, 4bpp images about 4x. + // They can scale up to 6 and 8 bpp images and down to 2 bpp images, + // but they'd not likely have any benefit for 1bpp images. + // Most of these can be implemented using only MMX and 64-bit registers, + // but they end up a bit slower than using the equally-ubiquitous SSE2. + if (bpp == 3) + { + pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub3_sse2; + pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg3_sse2; + pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = + png_read_filter_row_paeth3_sse2; + } + else if (bpp == 4) + { + pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub4_sse2; + pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg4_sse2; + pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = + png_read_filter_row_paeth4_sse2; + } + + // No need optimize PNG_FILTER_VALUE_UP. The compiler should autovectorize. +} + +#endif /* PNG_INTEL_SSE_IMPLEMENTATION > 0 */ +#endif /* PNG_READ_SUPPORTED */ |