summaryrefslogtreecommitdiff
path: root/deflate_slow.c
diff options
context:
space:
mode:
authorHans Kristian Rosbach <hk-git@circlestorm.org>2017-04-24 10:53:39 +0200
committerHans Kristian Rosbach <hk-git@circlestorm.org>2017-04-24 11:02:56 +0200
commitda5133848856cbef6a565736ba488b4b408922c8 (patch)
tree8fa52c8633678c6659836863cb2d8770a9b76540 /deflate_slow.c
parentec02ecf104e1d3f1836a908a359f20aa93494df5 (diff)
Add a struct func_table and function functableInit.
The struct contains pointers to select functions to be used by the rest of zlib, and the init function selects what functions will be used depending on what optimizations has been compiled in and what instruction-sets are available at runtime. Tests done on a haswell cpu running minigzip -6 compression of a 40M file shows a 2.5% decrease in branches, and a 25-30% reduction in iTLB-loads. The reduction i iTLB-loads is likely mostly due to the inability to inline functions. This also causes a slight performance regression of around 1%, this might still be worth it to make it much easier to implement new optimized functions for various architectures and instruction sets. The performance penalty will get smaller for functions that get more alternative implementations to choose from, since there is no need to add more branches to every call of the function. Today insert_string has 1 branch to choose insert_string_sse or insert_string_c, but if we also add for example insert_string_sse4 then that would have needed another branch, and it would probably at some point hinder effective inlining too.
Diffstat (limited to 'deflate_slow.c')
-rw-r--r--deflate_slow.c9
1 files changed, 5 insertions, 4 deletions
diff --git a/deflate_slow.c b/deflate_slow.c
index c0be3ea..61c1888 100644
--- a/deflate_slow.c
+++ b/deflate_slow.c
@@ -7,6 +7,7 @@
#include "deflate.h"
#include "deflate_p.h"
#include "match.h"
+#include "functable.h"
/* ===========================================================================
* Local data
@@ -34,7 +35,7 @@ block_state deflate_slow(deflate_state *s, int flush) {
* string following the next match.
*/
if (s->lookahead < MIN_LOOKAHEAD) {
- fill_window(s);
+ functable.fill_window(s);
if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) {
return need_more;
}
@@ -47,7 +48,7 @@ block_state deflate_slow(deflate_state *s, int flush) {
*/
hash_head = NIL;
if (s->lookahead >= MIN_MATCH) {
- hash_head = insert_string(s, s->strstart, 1);
+ hash_head = functable.insert_string(s, s->strstart, 1);
}
/* Find the longest match, discarding those <= prev_length.
@@ -97,7 +98,7 @@ block_state deflate_slow(deflate_state *s, int flush) {
s->prev_length -= 2;
do {
if (++s->strstart <= max_insert) {
- insert_string(s, s->strstart, 1);
+ functable.insert_string(s, s->strstart, 1);
}
} while (--s->prev_length != 0);
s->match_available = 0;
@@ -110,7 +111,7 @@ block_state deflate_slow(deflate_state *s, int flush) {
if (unlikely(insert_cnt > max_insert - s->strstart))
insert_cnt = max_insert - s->strstart;
- insert_string(s, s->strstart + 1, insert_cnt);
+ functable.insert_string(s, s->strstart + 1, insert_cnt);
s->prev_length = 0;
s->match_available = 0;
s->match_length = MIN_MATCH-1;