diff options
author | Hans Kristian Rosbach <hk-git@circlestorm.org> | 2017-04-24 10:53:39 +0200 |
---|---|---|
committer | Hans Kristian Rosbach <hk-git@circlestorm.org> | 2017-04-24 11:02:56 +0200 |
commit | da5133848856cbef6a565736ba488b4b408922c8 (patch) | |
tree | 8fa52c8633678c6659836863cb2d8770a9b76540 /deflate_fast.c | |
parent | ec02ecf104e1d3f1836a908a359f20aa93494df5 (diff) |
Add a struct func_table and function functableInit.
The struct contains pointers to select functions to be used by the
rest of zlib, and the init function selects what functions will be
used depending on what optimizations has been compiled in and what
instruction-sets are available at runtime.
Tests done on a haswell cpu running minigzip -6 compression of a
40M file shows a 2.5% decrease in branches, and a 25-30% reduction
in iTLB-loads. The reduction i iTLB-loads is likely mostly due to
the inability to inline functions. This also causes a slight
performance regression of around 1%, this might still be worth it
to make it much easier to implement new optimized functions for
various architectures and instruction sets.
The performance penalty will get smaller for functions that get more
alternative implementations to choose from, since there is no need
to add more branches to every call of the function.
Today insert_string has 1 branch to choose insert_string_sse
or insert_string_c, but if we also add for example insert_string_sse4
then that would have needed another branch, and it would probably
at some point hinder effective inlining too.
Diffstat (limited to 'deflate_fast.c')
-rw-r--r-- | deflate_fast.c | 13 |
1 files changed, 7 insertions, 6 deletions
diff --git a/deflate_fast.c b/deflate_fast.c index c169053..5b86e27 100644 --- a/deflate_fast.c +++ b/deflate_fast.c @@ -7,6 +7,7 @@ #include "deflate.h" #include "deflate_p.h" #include "match.h" +#include "functable.h" /* =========================================================================== * Compress as much as possible from the input stream, return the current @@ -26,7 +27,7 @@ block_state deflate_fast(deflate_state *s, int flush) { * string following the next match. */ if (s->lookahead < MIN_LOOKAHEAD) { - fill_window(s); + functable.fill_window(s); if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) { return need_more; } @@ -39,7 +40,7 @@ block_state deflate_fast(deflate_state *s, int flush) { */ hash_head = NIL; if (s->lookahead >= MIN_MATCH) { - hash_head = insert_string(s, s->strstart, 1); + hash_head = functable.insert_string(s, s->strstart, 1); } /* Find the longest match, discarding those <= prev_length. @@ -68,7 +69,7 @@ block_state deflate_fast(deflate_state *s, int flush) { s->strstart++; #ifdef NOT_TWEAK_COMPILER do { - insert_string(s, s->strstart, 1); + functable.insert_string(s, s->strstart, 1); s->strstart++; /* strstart never exceeds WSIZE-MAX_MATCH, so there are * always MIN_MATCH bytes ahead. @@ -76,7 +77,7 @@ block_state deflate_fast(deflate_state *s, int flush) { } while (--s->match_length != 0); #else { - insert_string(s, s->strstart, s->match_length); + functable.insert_string(s, s->strstart, s->match_length); s->strstart += s->match_length; s->match_length = 0; } @@ -86,9 +87,9 @@ block_state deflate_fast(deflate_state *s, int flush) { s->match_length = 0; s->ins_h = s->window[s->strstart]; #ifndef NOT_TWEAK_COMPILER - insert_string(s, s->strstart + 2 - MIN_MATCH, MIN_MATCH - 2); + functable.insert_string(s, s->strstart + 2 - MIN_MATCH, MIN_MATCH - 2); #else - insert_string(s, s->strstart + 2 - MIN_MATCH, 1); + functable.insert_string(s, s->strstart + 2 - MIN_MATCH, 1); #if MIN_MATCH != 3 #warning Call insert_string() MIN_MATCH-3 more times #endif |