summaryrefslogtreecommitdiff
path: root/inffast.c
diff options
context:
space:
mode:
authorNathan Moinvaziri <nathan@solidstatenetworks.com>2020-03-17 18:03:15 -0700
committerHans Kristian Rosbach <hk-github@circlestorm.org>2020-06-28 11:16:05 +0200
commite40d88adc9e8180969c6c56a0deb4ec69c3ec92b (patch)
treebdf66efd66337cf504c438dc54e620d5ce959c08 /inffast.c
parent0cab92e7faa5fb05bafd9c587918158cc933ed1d (diff)
Split memcopy by architecture.
Use uint8_t[8] struct on big-endian machines for speed.
Diffstat (limited to 'inffast.c')
-rw-r--r--inffast.c87
1 files changed, 21 insertions, 66 deletions
diff --git a/inffast.c b/inffast.c
index 1f74967..57d3ce4 100644
--- a/inffast.c
+++ b/inffast.c
@@ -9,8 +9,20 @@
#include "inflate.h"
#include "inffast.h"
#include "inflate_p.h"
-#include "memcopy.h"
+#include "functable.h"
+
+/* Load 64 bits from IN and place the bytes at offset BITS in the result. */
+static inline uint64_t load_64_bits(const unsigned char *in, unsigned bits) {
+ uint64_t chunk;
+ memcpy(&chunk, in, sizeof(chunk));
+
+#if BYTE_ORDER == LITTLE_ENDIAN
+ return chunk << bits;
+#else
+ return ZSWAP64(chunk) << bits;
+#endif
+}
/*
Decode literal, length, and distance codes and write out the resulting
literal and match bytes until either not enough input or output is
@@ -58,9 +70,7 @@ void ZLIB_INTERNAL zng_inflate_fast(PREFIX3(stream) *strm, unsigned long start)
unsigned char *out; /* local strm->next_out */
unsigned char *beg; /* inflate()'s initial strm->next_out */
unsigned char *end; /* while out < end, enough space available */
-#ifdef INFFAST_CHUNKSIZE
unsigned char *safe; /* can use chunkcopy provided out < safe */
-#endif
#ifdef INFLATE_STRICT
unsigned dmax; /* maximum distance from zlib header */
#endif
@@ -126,10 +136,7 @@ void ZLIB_INTERNAL zng_inflate_fast(PREFIX3(stream) *strm, unsigned long start)
out = strm->next_out;
beg = out - (start - strm->avail_out);
end = out + (strm->avail_out - (INFLATE_FAST_MIN_LEFT - 1));
-
-#ifdef INFFAST_CHUNKSIZE
safe = out + strm->avail_out;
-#endif
#ifdef INFLATE_STRICT
dmax = state->dmax;
#endif
@@ -230,7 +237,6 @@ void ZLIB_INTERNAL zng_inflate_fast(PREFIX3(stream) *strm, unsigned long start)
}
#endif
}
-#ifdef INFFAST_CHUNKSIZE
from = window;
if (wnext == 0) { /* very common case */
from += wsize - op;
@@ -241,7 +247,7 @@ void ZLIB_INTERNAL zng_inflate_fast(PREFIX3(stream) *strm, unsigned long start)
from += wsize - op;
if (op < len) { /* some from end of window */
len -= op;
- out = chunkcopysafe(out, from, op, safe);
+ out = functable.chunkcopy_safe(out, from, op, safe);
from = window; /* more from start of window */
op = wnext;
/* This (rare) case can create a situation where
@@ -251,74 +257,23 @@ void ZLIB_INTERNAL zng_inflate_fast(PREFIX3(stream) *strm, unsigned long start)
}
if (op < len) { /* still need some from output */
len -= op;
- out = chunkcopysafe(out, from, op, safe);
- out = chunkunroll(out, &dist, &len);
- out = chunkcopysafe(out, out - dist, len, safe);
+ out = functable.chunkcopy_safe(out, from, op, safe);
+ out = functable.chunkunroll(out, &dist, &len);
+ out = functable.chunkcopy_safe(out, out - dist, len, safe);
} else {
- out = chunkcopysafe(out, from, len, safe);
+ out = functable.chunkcopy_safe(out, from, len, safe);
}
-#else
- from = window;
- if (wnext == 0) { /* very common case */
- from += wsize - op;
- if (op < len) { /* some from window */
- len -= op;
- do {
- *out++ = *from++;
- } while (--op);
- from = out - dist; /* rest from output */
- }
- } else if (wnext < op) { /* wrap around window */
- from += wsize + wnext - op;
- op -= wnext;
- if (op < len) { /* some from end of window */
- len -= op;
- do {
- *out++ = *from++;
- } while (--op);
- from = window;
- if (wnext < len) { /* some from start of window */
- op = wnext;
- len -= op;
- do {
- *out++ = *from++;
- } while (--op);
- from = out - dist; /* rest from output */
- }
- }
- } else { /* contiguous in window */
- from += wnext - op;
- if (op < len) { /* some from window */
- len -= op;
- do {
- *out++ = *from++;
- } while (--op);
- from = out - dist; /* rest from output */
- }
- }
-
- out = chunk_copy(out, from, (int) (out - from), len);
-#endif
} else {
-#ifdef INFFAST_CHUNKSIZE
/* Whole reference is in range of current output. No
range checks are necessary because we start with room
for at least 258 bytes of output, so unroll and roundoff
operations can write beyond `out+len` so long as they
stay within 258 bytes of `out`.
*/
- if (dist >= len || dist >= INFFAST_CHUNKSIZE)
- out = chunkcopy(out, out - dist, len);
- else
- out = chunkmemset(out, dist, len);
-#else
- if (len < sizeof(uint64_t))
- out = set_bytes(out, out - dist, dist, len);
- else if (dist == 1)
- out = byte_memset(out, len);
+ if (dist >= len || dist >= functable.chunksize())
+ out = functable.chunkcopy(out, out - dist, len);
else
- out = chunk_memset(out, out - dist, dist, len);
-#endif
+ out = functable.chunkmemset(out, dist, len);
}
} else if ((op & 64) == 0) { /* 2nd level distance code */
here = dcode + here->val + BITS(op);