summaryrefslogtreecommitdiff
path: root/deflate_fast.c
diff options
context:
space:
mode:
authorHans Kristian Rosbach <hk-git@circlestorm.org>2017-04-24 10:53:39 +0200
committerHans Kristian Rosbach <hk-git@circlestorm.org>2017-04-24 11:02:56 +0200
commitda5133848856cbef6a565736ba488b4b408922c8 (patch)
tree8fa52c8633678c6659836863cb2d8770a9b76540 /deflate_fast.c
parentec02ecf104e1d3f1836a908a359f20aa93494df5 (diff)
Add a struct func_table and function functableInit.
The struct contains pointers to select functions to be used by the rest of zlib, and the init function selects what functions will be used depending on what optimizations has been compiled in and what instruction-sets are available at runtime. Tests done on a haswell cpu running minigzip -6 compression of a 40M file shows a 2.5% decrease in branches, and a 25-30% reduction in iTLB-loads. The reduction i iTLB-loads is likely mostly due to the inability to inline functions. This also causes a slight performance regression of around 1%, this might still be worth it to make it much easier to implement new optimized functions for various architectures and instruction sets. The performance penalty will get smaller for functions that get more alternative implementations to choose from, since there is no need to add more branches to every call of the function. Today insert_string has 1 branch to choose insert_string_sse or insert_string_c, but if we also add for example insert_string_sse4 then that would have needed another branch, and it would probably at some point hinder effective inlining too.
Diffstat (limited to 'deflate_fast.c')
-rw-r--r--deflate_fast.c13
1 files changed, 7 insertions, 6 deletions
diff --git a/deflate_fast.c b/deflate_fast.c
index c169053..5b86e27 100644
--- a/deflate_fast.c
+++ b/deflate_fast.c
@@ -7,6 +7,7 @@
#include "deflate.h"
#include "deflate_p.h"
#include "match.h"
+#include "functable.h"
/* ===========================================================================
* Compress as much as possible from the input stream, return the current
@@ -26,7 +27,7 @@ block_state deflate_fast(deflate_state *s, int flush) {
* string following the next match.
*/
if (s->lookahead < MIN_LOOKAHEAD) {
- fill_window(s);
+ functable.fill_window(s);
if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) {
return need_more;
}
@@ -39,7 +40,7 @@ block_state deflate_fast(deflate_state *s, int flush) {
*/
hash_head = NIL;
if (s->lookahead >= MIN_MATCH) {
- hash_head = insert_string(s, s->strstart, 1);
+ hash_head = functable.insert_string(s, s->strstart, 1);
}
/* Find the longest match, discarding those <= prev_length.
@@ -68,7 +69,7 @@ block_state deflate_fast(deflate_state *s, int flush) {
s->strstart++;
#ifdef NOT_TWEAK_COMPILER
do {
- insert_string(s, s->strstart, 1);
+ functable.insert_string(s, s->strstart, 1);
s->strstart++;
/* strstart never exceeds WSIZE-MAX_MATCH, so there are
* always MIN_MATCH bytes ahead.
@@ -76,7 +77,7 @@ block_state deflate_fast(deflate_state *s, int flush) {
} while (--s->match_length != 0);
#else
{
- insert_string(s, s->strstart, s->match_length);
+ functable.insert_string(s, s->strstart, s->match_length);
s->strstart += s->match_length;
s->match_length = 0;
}
@@ -86,9 +87,9 @@ block_state deflate_fast(deflate_state *s, int flush) {
s->match_length = 0;
s->ins_h = s->window[s->strstart];
#ifndef NOT_TWEAK_COMPILER
- insert_string(s, s->strstart + 2 - MIN_MATCH, MIN_MATCH - 2);
+ functable.insert_string(s, s->strstart + 2 - MIN_MATCH, MIN_MATCH - 2);
#else
- insert_string(s, s->strstart + 2 - MIN_MATCH, 1);
+ functable.insert_string(s, s->strstart + 2 - MIN_MATCH, 1);
#if MIN_MATCH != 3
#warning Call insert_string() MIN_MATCH-3 more times
#endif