summaryrefslogtreecommitdiff
path: root/libc/malloc_hooks/malloc_hooks.cpp
diff options
context:
space:
mode:
authorAdhemerval Zanella <adhemerval.zanella@linaro.org>2018-06-22 13:31:34 -0300
committerHaibo Huang <hhb@google.com>2018-08-21 17:48:12 +0000
commit4ab56af82df228b63e723b21b92ede82e6f698dd (patch)
treeb55eb94017754d9da21b908733a762bc34e6038f /libc/malloc_hooks/malloc_hooks.cpp
parent3fd45bba4857fdbf320b6e89d2ae0569d9463bf5 (diff)
[AArch64] Optimize memcmp for medium to large sizes
This patch was originally written by Siddhesh Poyarekar and pushed on cortex-strings [1]. This improved memcmp provides a fast path for compares up to 16 bytes and then compares 16 bytes at a time, thus optimizing loads from both sources. Comparison on the default bionic and proposed optimized routines shows the following performance improvements on A72 (using the new proposed memcmp input data from test_memcmp.xml): Benchmark Time CPU Time Old Time New CPU Old CPU New -------------------------------------------------------------------------------------------------------------------- BM_string_memcmp/1/0/0 -0.2074 -0.2074 15 12 15 12 BM_string_memcmp/2/0/0 -0.5193 -0.5193 31 15 31 15 BM_string_memcmp/3/0/0 -0.1291 -0.1291 19 17 19 17 BM_string_memcmp/4/0/0 -0.2889 -0.2889 17 12 17 12 BM_string_memcmp/5/0/0 -0.2606 -0.2606 15 11 15 11 BM_string_memcmp/6/0/0 -0.1656 -0.1655 17 14 17 14 BM_string_memcmp/7/0/0 -0.1721 -0.1721 19 15 19 15 BM_string_memcmp/8/0/0 -0.3048 -0.3048 15 10 15 10 BM_string_memcmp/9/0/0 -0.3041 -0.3041 15 10 15 10 BM_string_memcmp/10/0/0 -0.3040 -0.3040 15 10 15 10 BM_string_memcmp/11/0/0 -0.3048 -0.3048 15 10 15 10 BM_string_memcmp/12/0/0 -0.3041 -0.3041 15 10 15 10 BM_string_memcmp/13/0/0 -0.3040 -0.3040 15 10 15 10 BM_string_memcmp/14/0/0 -0.3048 -0.3048 15 10 15 10 BM_string_memcmp/15/0/0 -0.3040 -0.3040 15 10 15 10 BM_string_memcmp/16/0/0 -0.3041 -0.3041 15 10 15 10 BM_string_memcmp/24/0/0 -0.1209 -0.1209 15 13 15 13 BM_string_memcmp/32/0/0 -0.3228 -0.3228 20 13 20 13 BM_string_memcmp/40/0/0 -0.2937 -0.2937 22 15 22 15 BM_string_memcmp/48/0/0 -0.3299 -0.3299 23 15 23 15 BM_string_memcmp/56/0/0 -0.1845 -0.1845 24 20 24 20 BM_string_memcmp/64/0/0 -0.2247 -0.2247 26 20 26 20 BM_string_memcmp/72/0/0 -0.1947 -0.1947 27 22 27 22 BM_string_memcmp/80/0/0 -0.2275 -0.2275 28 22 28 22 BM_string_memcmp/88/0/0 -0.2360 -0.2360 29 22 29 22 BM_string_memcmp/96/0/0 -0.2675 -0.2675 31 22 31 22 BM_string_memcmp/104/0/0 -0.2559 -0.2559 32 24 32 24 BM_string_memcmp/112/0/0 -0.2787 -0.2786 33 24 33 24 BM_string_memcmp/120/0/0 -0.2599 -0.2599 34 25 34 25 BM_string_memcmp/128/0/0 -0.2860 -0.2860 35 25 35 25 BM_string_memcmp/136/0/0 -0.4708 -0.4708 53 28 53 28 BM_string_memcmp/144/0/0 -0.4719 -0.4719 53 28 53 28 BM_string_memcmp/160/0/0 -0.4680 -0.4680 56 30 56 30 BM_string_memcmp/176/0/0 -0.4645 -0.4645 60 32 60 32 BM_string_memcmp/192/0/0 -0.4641 -0.4641 63 34 63 34 BM_string_memcmp/208/0/0 -0.4555 -0.4555 66 36 66 36 BM_string_memcmp/224/0/0 -0.4558 -0.4557 69 38 69 38 BM_string_memcmp/240/0/0 -0.4534 -0.4534 72 40 72 40 BM_string_memcmp/256/0/0 -0.4463 -0.4463 75 42 75 42 BM_string_memcmp/512/0/0 -0.3077 -0.3077 126 88 126 88 BM_string_memcmp/1024/0/0 -0.3493 -0.3493 229 149 229 149 BM_string_memcmp/8192/0/0 -0.4173 -0.4173 1729 1007 1729 1007 BM_string_memcmp/16384/0/0 -0.3855 -0.3855 3377 2076 3377 2075 BM_string_memcmp/32768/0/0 -0.2968 -0.2968 6847 4815 6847 4814 BM_string_memcmp/65536/0/0 -0.2496 -0.2496 13715 10292 13714 10291 BM_string_memcmp/131072/0/0 -0.2676 -0.2676 27354 20033 27351 20031 BM_string_memcmp/262144/0/0 -0.2319 -0.2319 54604 41943 54598 41939 BM_string_memcmp/524288/0/0 -0.2359 -0.2359 109225 83460 109212 83449 BM_string_memcmp/1048576/0/0 -0.0439 -0.0439 423367 404791 423251 404686 BM_string_memcmp/2097152/0/0 -0.0023 -0.0024 762470 760701 761956 760122 BM_string_memcmp/512/4/4 -0.2853 -0.2853 125 89 125 89 BM_string_memcmp/1024/4/4 -0.3377 -0.3377 228 151 227 151 BM_string_memcmp/8192/4/4 -0.4083 -0.4083 1706 1009 1706 1009 BM_string_memcmp/16384/4/4 -0.3853 -0.3853 3376 2075 3376 2075 BM_string_memcmp/32768/4/4 -0.2974 -0.2974 6846 4810 6845 4810 BM_string_memcmp/65536/4/4 -0.2485 -0.2485 13619 10235 13618 10234 BM_string_memcmp/131072/4/4 -0.2387 -0.2387 27056 20597 27054 20595 BM_string_memcmp/512/4/0 -0.2898 -0.2898 123 88 123 88 BM_string_memcmp/1024/4/0 -0.3401 -0.3401 225 149 225 149 BM_string_memcmp/8192/4/0 -0.4167 -0.4167 1727 1007 1727 1007 BM_string_memcmp/16384/4/0 -0.3820 -0.3820 3384 2092 3384 2091 BM_string_memcmp/32768/4/0 -0.2535 -0.2535 6886 5141 6886 5140 BM_string_memcmp/65536/4/0 -0.1897 -0.1897 13850 11223 13849 11223 BM_string_memcmp/131072/4/0 -0.1972 -0.1972 27536 22106 27533 22104 BM_string_memcmp/512/0/4 -0.2854 -0.2854 125 89 125 89 BM_string_memcmp/1024/0/4 -0.3332 -0.3333 226 151 226 151 BM_string_memcmp/8192/0/4 -0.4199 -0.4199 1740 1009 1740 1009 BM_string_memcmp/16384/0/4 -0.3811 -0.3811 3383 2094 3383 2094 BM_string_memcmp/32768/0/4 -0.2409 -0.2409 6900 5238 6899 5237 BM_string_memcmp/65536/0/4 -0.1920 -0.1920 13922 11250 13921 11248 BM_string_memcmp/131072/0/4 -0.2029 -0.2029 27699 22079 27697 22077 I see similar improvements on A54 as well: Benchmark Time CPU Time Old Time New CPU Old CPU New -------------------------------------------------------------------------------------------------------------------- BM_string_memcmp/1/0/0 -0.2074 -0.2074 15 12 15 12 BM_string_memcmp/2/0/0 -0.5193 -0.5193 31 15 31 15 BM_string_memcmp/3/0/0 -0.1291 -0.1291 19 17 19 17 BM_string_memcmp/4/0/0 -0.2889 -0.2889 17 12 17 12 BM_string_memcmp/5/0/0 -0.2606 -0.2606 15 11 15 11 BM_string_memcmp/6/0/0 -0.1656 -0.1655 17 14 17 14 BM_string_memcmp/7/0/0 -0.1721 -0.1721 19 15 19 15 BM_string_memcmp/8/0/0 -0.3048 -0.3048 15 10 15 10 BM_string_memcmp/9/0/0 -0.3041 -0.3041 15 10 15 10 BM_string_memcmp/10/0/0 -0.3040 -0.3040 15 10 15 10 BM_string_memcmp/11/0/0 -0.3048 -0.3048 15 10 15 10 BM_string_memcmp/12/0/0 -0.3041 -0.3041 15 10 15 10 BM_string_memcmp/13/0/0 -0.3040 -0.3040 15 10 15 10 BM_string_memcmp/14/0/0 -0.3048 -0.3048 15 10 15 10 BM_string_memcmp/15/0/0 -0.3040 -0.3040 15 10 15 10 BM_string_memcmp/16/0/0 -0.3041 -0.3041 15 10 15 10 BM_string_memcmp/24/0/0 -0.1209 -0.1209 15 13 15 13 BM_string_memcmp/32/0/0 -0.3228 -0.3228 20 13 20 13 BM_string_memcmp/40/0/0 -0.2937 -0.2937 22 15 22 15 BM_string_memcmp/48/0/0 -0.3299 -0.3299 23 15 23 15 BM_string_memcmp/56/0/0 -0.1845 -0.1845 24 20 24 20 BM_string_memcmp/64/0/0 -0.2247 -0.2247 26 20 26 20 BM_string_memcmp/72/0/0 -0.1947 -0.1947 27 22 27 22 BM_string_memcmp/80/0/0 -0.2275 -0.2275 28 22 28 22 BM_string_memcmp/88/0/0 -0.2360 -0.2360 29 22 29 22 BM_string_memcmp/96/0/0 -0.2675 -0.2675 31 22 31 22 BM_string_memcmp/104/0/0 -0.2559 -0.2559 32 24 32 24 BM_string_memcmp/112/0/0 -0.2787 -0.2786 33 24 33 24 BM_string_memcmp/120/0/0 -0.2599 -0.2599 34 25 34 25 BM_string_memcmp/128/0/0 -0.2860 -0.2860 35 25 35 25 BM_string_memcmp/136/0/0 -0.4708 -0.4708 53 28 53 28 BM_string_memcmp/144/0/0 -0.4719 -0.4719 53 28 53 28 BM_string_memcmp/160/0/0 -0.4680 -0.4680 56 30 56 30 BM_string_memcmp/176/0/0 -0.4645 -0.4645 60 32 60 32 BM_string_memcmp/192/0/0 -0.4641 -0.4641 63 34 63 34 BM_string_memcmp/208/0/0 -0.4555 -0.4555 66 36 66 36 BM_string_memcmp/224/0/0 -0.4558 -0.4557 69 38 69 38 BM_string_memcmp/240/0/0 -0.4534 -0.4534 72 40 72 40 BM_string_memcmp/256/0/0 -0.4463 -0.4463 75 42 75 42 BM_string_memcmp/512/0/0 -0.3077 -0.3077 126 88 126 88 BM_string_memcmp/1024/0/0 -0.3493 -0.3493 229 149 229 149 BM_string_memcmp/8192/0/0 -0.4173 -0.4173 1729 1007 1729 1007 BM_string_memcmp/16384/0/0 -0.3855 -0.3855 3377 2076 3377 2075 BM_string_memcmp/32768/0/0 -0.2968 -0.2968 6847 4815 6847 4814 BM_string_memcmp/65536/0/0 -0.2496 -0.2496 13715 10292 13714 10291 BM_string_memcmp/131072/0/0 -0.2676 -0.2676 27354 20033 27351 20031 BM_string_memcmp/262144/0/0 -0.2319 -0.2319 54604 41943 54598 41939 BM_string_memcmp/524288/0/0 -0.2359 -0.2359 109225 83460 109212 83449 BM_string_memcmp/1048576/0/0 -0.0439 -0.0439 423367 404791 423251 404686 BM_string_memcmp/2097152/0/0 -0.0023 -0.0024 762470 760701 761956 760122 BM_string_memcmp/512/4/4 -0.2853 -0.2853 125 89 125 89 BM_string_memcmp/1024/4/4 -0.3377 -0.3377 228 151 227 151 BM_string_memcmp/8192/4/4 -0.4083 -0.4083 1706 1009 1706 1009 BM_string_memcmp/16384/4/4 -0.3853 -0.3853 3376 2075 3376 2075 BM_string_memcmp/32768/4/4 -0.2974 -0.2974 6846 4810 6845 4810 BM_string_memcmp/65536/4/4 -0.2485 -0.2485 13619 10235 13618 10234 BM_string_memcmp/131072/4/4 -0.2387 -0.2387 27056 20597 27054 20595 BM_string_memcmp/512/4/0 -0.2898 -0.2898 123 88 123 88 BM_string_memcmp/1024/4/0 -0.3401 -0.3401 225 149 225 149 BM_string_memcmp/8192/4/0 -0.4167 -0.4167 1727 1007 1727 1007 BM_string_memcmp/16384/4/0 -0.3820 -0.3820 3384 2092 3384 2091 BM_string_memcmp/32768/4/0 -0.2535 -0.2535 6886 5141 6886 5140 BM_string_memcmp/65536/4/0 -0.1897 -0.1897 13850 11223 13849 11223 BM_string_memcmp/131072/4/0 -0.1972 -0.1972 27536 22106 27533 22104 BM_string_memcmp/512/0/4 -0.2854 -0.2854 125 89 125 89 BM_string_memcmp/1024/0/4 -0.3332 -0.3333 226 151 226 151 BM_string_memcmp/8192/0/4 -0.4199 -0.4199 1740 1009 1740 1009 BM_string_memcmp/16384/0/4 -0.3811 -0.3811 3383 2094 3383 2094 BM_string_memcmp/32768/0/4 -0.2409 -0.2409 6900 5238 6899 5237 BM_string_memcmp/65536/0/4 -0.1920 -0.1920 13922 11250 13921 11248 BM_string_memcmp/131072/0/4 -0.2029 -0.2029 27699 22079 27697 22077 [1] Commit id: f77e4c932b4fd65177b57dd5e220bd17fb4037d6 Test: bionic tests and benchmarks on aarch64. Change-Id: I2791e2b20d1c0ad429e8e5a41d3e47b1ac02c921
Diffstat (limited to 'libc/malloc_hooks/malloc_hooks.cpp')
0 files changed, 0 insertions, 0 deletions