summaryrefslogtreecommitdiff
path: root/deflate_p.h
diff options
context:
space:
mode:
authorSebastian Pop <s.pop@samsung.com>2018-12-06 13:23:17 -0600
committerHans Kristian Rosbach <hk-github@circlestorm.org>2018-12-13 09:03:25 +0100
commit13619fd2b6d0d5e2c2b5d8e8c08bc97097415c11 (patch)
tree18f6f448389c38585a5f6f9de24341b703b523b0 /deflate_p.h
parent2aebdb5c7023de0b9ac7c19301216616997f961a (diff)
return an index for hash map collisions in insert_string
The current version of insert_string_c and variations for sse2, arm, and aarch64 in zlib-ng has changed semantics from the original code of INSERT_STRING macro in zlib: #define INSERT_STRING(s, str, match_head) \ (UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]), \ match_head = s->prev[(str) & s->w_mask] = s->head[s->ins_h], \ s->head[s->ins_h] = (Pos)(str)) The code of INSERT_STRING assigns match_head with the content of s->head[s->ins_h]. In zlib-ng, the assignment to match_head happens in the caller of insert_string(). zlib-ng's insert_string_*() functions return 0 instead of str+idx in case of collision, i.e., when if (s->head[s->ins_h] == str+idx). The effect of returning 0 instead of the content of s->head[s->ins_h] is that the search for a longest_match through s->prev[] chains will be cut short when arriving at 0. This leads to a shorter compression time at the expense of a worse compression rate: returning 0 cuts out the search space. With this patch: Performance counter stats for './minigzip -9 llvm.tar': 13422.379017 task-clock (msec) # 1.000 CPUs utilized 20 context-switches # 0.001 K/sec 0 cpu-migrations # 0.000 K/sec 130 page-faults # 0.010 K/sec 58,926,104,511 cycles # 4.390 GHz <not supported> stalled-cycles-frontend <not supported> stalled-cycles-backend 77,543,740,646 instructions # 1.32 insns per cycle 17,158,892,214 branches # 1278.379 M/sec 198,433,680 branch-misses # 1.16% of all branches 13.423365095 seconds time elapsed 45408 -rw-rw-r-- 1 spop spop 46493896 Dec 11 11:47 llvm.tar.gz Without this patch the compressed file is larger: Performance counter stats for './minigzip -9 llvm.tar': 13459.342312 task-clock (msec) # 1.000 CPUs utilized 25 context-switches # 0.002 K/sec 0 cpu-migrations # 0.000 K/sec 129 page-faults # 0.010 K/sec 59,088,391,808 cycles # 4.390 GHz <not supported> stalled-cycles-frontend <not supported> stalled-cycles-backend 77,600,766,958 instructions # 1.31 insns per cycle 17,486,130,785 branches # 1299.182 M/sec 196,281,761 branch-misses # 1.12% of all branches 13.463512830 seconds time elapsed 45408 -rw-rw-r-- 1 spop spop 46493896 Dec 11 11:48 llvm.tar.gz
Diffstat (limited to 'deflate_p.h')
-rw-r--r--deflate_p.h15
1 files changed, 9 insertions, 6 deletions
diff --git a/deflate_p.h b/deflate_p.h
index 02fbf66..6008a97 100644
--- a/deflate_p.h
+++ b/deflate_p.h
@@ -37,12 +37,15 @@ static inline Pos insert_string_c(deflate_state *const s, const Pos str, unsigne
for (idx = 0; idx < count; idx++) {
UPDATE_HASH(s, s->ins_h, str+idx);
- if (s->head[s->ins_h] != str+idx) {
- s->prev[(str+idx) & s->w_mask] = s->head[s->ins_h];
- s->head[s->ins_h] = str+idx;
- if (idx == count-1) {
- ret = s->prev[(str+idx) & s->w_mask];
- }
+
+ Pos head = s->head[s->ins_h];
+ if (head != str+idx) {
+ s->prev[(str+idx) & s->w_mask] = head;
+ s->head[s->ins_h] = str+idx;
+ if (idx == count - 1)
+ ret = head;
+ } else if (idx == count - 1) {
+ ret = str + idx;
}
}
return ret;