summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaan Leijen <daan@microsoft.com>2022-04-19 18:32:35 -0700
committerDaan Leijen <daan@microsoft.com>2022-04-19 18:32:35 -0700
commitd69d4c861ff21a411e30e6c6914a1ae705962aee (patch)
treeb7e1ce52f1b6064e8cb628dd0b717b2b3e1f0373
parent9d69e3ed06617b2621eb550222eca35f58ee1850 (diff)
add zero parameter to primitive allocation to improve codegen for calloc etc
-rw-r--r--include/mimalloc-internal.h35
-rw-r--r--src/alloc-aligned.c3
-rw-r--r--src/alloc.c85
-rw-r--r--src/page.c15
-rw-r--r--test/test-stress.c2
5 files changed, 77 insertions, 63 deletions
diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index 787efb0..9a14105 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -106,7 +106,7 @@ void _mi_abandoned_await_readers(void);
// "page.c"
-void* _mi_malloc_generic(mi_heap_t* heap, size_t size) mi_attr_noexcept mi_attr_malloc;
+void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept mi_attr_malloc;
void _mi_page_retire(mi_page_t* page) mi_attr_noexcept; // free the page if there are no other pages with many free blocks
void _mi_page_unfull(mi_page_t* page);
@@ -138,12 +138,11 @@ mi_msecs_t _mi_clock_end(mi_msecs_t start);
mi_msecs_t _mi_clock_start(void);
// "alloc.c"
-void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept; // called from `_mi_malloc_generic`
+void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size, bool zero) mi_attr_noexcept; // called from `_mi_malloc_generic`
void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept;
void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero) mi_attr_noexcept;
mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* page, const void* p);
bool _mi_free_delayed_block(mi_block_t* block);
-void _mi_block_zero_init(const mi_page_t* page, void* p, size_t size);
#if MI_DEBUG>1
bool _mi_page_is_valid(mi_page_t* page);
@@ -267,8 +266,8 @@ static inline bool mi_mul_overflow(size_t count, size_t size, size_t* total) {
static inline bool mi_mul_overflow(size_t count, size_t size, size_t* total) {
#define MI_MUL_NO_OVERFLOW ((size_t)1 << (4*sizeof(size_t))) // sqrt(SIZE_MAX)
*total = count * size;
- return ((size >= MI_MUL_NO_OVERFLOW || count >= MI_MUL_NO_OVERFLOW)
- && size > 0 && (SIZE_MAX / size) < count);
+ // note: gcc/clang optimize this to directly check the overflow flag
+ return ((size >= MI_MUL_NO_OVERFLOW || count >= MI_MUL_NO_OVERFLOW) && size > 0 && (SIZE_MAX / size) < count);
}
#endif
@@ -279,7 +278,7 @@ static inline bool mi_count_size_overflow(size_t count, size_t size, size_t* tot
return false;
}
else if (mi_unlikely(mi_mul_overflow(count, size, total))) {
- #if !defined(NDEBUG)
+ #if MI_DEBUG > 0
_mi_error_message(EOVERFLOW, "allocation request is too large (%zu * %zu bytes)\n", count, size);
#endif
*total = SIZE_MAX;
@@ -925,7 +924,15 @@ static inline void _mi_memcpy(void* dst, const void* src, size_t n) {
__movsb((unsigned char*)dst, (const unsigned char*)src, n);
}
else {
- memcpy(dst, src, n); // todo: use noinline?
+ memcpy(dst, src, n);
+ }
+}
+static inline void _mi_memzero(void* dst, size_t n) {
+ if (_mi_cpu_has_fsrm) {
+ __stosb((unsigned char*)dst, 0, n);
+ }
+ else {
+ memset(dst, 0, n);
}
}
#else
@@ -933,6 +940,9 @@ static inline void _mi_memcpy(void* dst, const void* src, size_t n) {
static inline void _mi_memcpy(void* dst, const void* src, size_t n) {
memcpy(dst, src, n);
}
+static inline void _mi_memzero(void* dst, size_t n) {
+ memset(dst, 0, n);
+}
#endif
@@ -950,12 +960,23 @@ static inline void _mi_memcpy_aligned(void* dst, const void* src, size_t n) {
const void* asrc = __builtin_assume_aligned(src, MI_INTPTR_SIZE);
_mi_memcpy(adst, asrc, n);
}
+
+static inline void _mi_memzero_aligned(void* dst, size_t n) {
+ mi_assert_internal((uintptr_t)dst % MI_INTPTR_SIZE == 0);
+ void* adst = __builtin_assume_aligned(dst, MI_INTPTR_SIZE);
+ _mi_memzero(adst, n);
+}
#else
// Default fallback on `_mi_memcpy`
static inline void _mi_memcpy_aligned(void* dst, const void* src, size_t n) {
mi_assert_internal(((uintptr_t)dst % MI_INTPTR_SIZE == 0) && ((uintptr_t)src % MI_INTPTR_SIZE == 0));
_mi_memcpy(dst, src, n);
}
+
+static inline void _mi_memzero_aligned(void* dst, size_t n) {
+ mi_assert_internal((uintptr_t)dst % MI_INTPTR_SIZE == 0);
+ _mi_memzero(dst, n);
+}
#endif
diff --git a/src/alloc-aligned.c b/src/alloc-aligned.c
index f7eaae4..1578135 100644
--- a/src/alloc-aligned.c
+++ b/src/alloc-aligned.c
@@ -79,10 +79,9 @@ static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t
#if MI_STAT>1
mi_heap_stat_increase(heap, malloc, size);
#endif
- void* p = _mi_page_malloc(heap, page, padsize); // TODO: inline _mi_page_malloc
+ void* p = _mi_page_malloc(heap, page, padsize, zero); // TODO: inline _mi_page_malloc
mi_assert_internal(p != NULL);
mi_assert_internal(((uintptr_t)p + offset) % alignment == 0);
- if (zero) { _mi_block_zero_init(page, p, size); }
return p;
}
}
diff --git a/src/alloc.c b/src/alloc.c
index 73b2ea1..1148ec7 100644
--- a/src/alloc.c
+++ b/src/alloc.c
@@ -25,11 +25,11 @@ terms of the MIT license. A copy of the license can be found in the file
// Fast allocation in a page: just pop from the free list.
// Fall back to generic allocation only if the list is empty.
-extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept {
+extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size, bool zero) mi_attr_noexcept {
mi_assert_internal(page->xblock_size==0||mi_page_block_size(page) >= size);
mi_block_t* const block = page->free;
if (mi_unlikely(block == NULL)) {
- return _mi_malloc_generic(heap, size);
+ return _mi_malloc_generic(heap, size, zero);
}
mi_assert_internal(block != NULL && _mi_ptr_page(block) == page);
// pop from the free list
@@ -37,10 +37,17 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz
page->free = mi_block_next(page, block);
mi_assert_internal(page->free == NULL || _mi_ptr_page(page->free) == page);
+ // zero the block?
+ if (mi_unlikely(zero)) {
+ mi_assert_internal(page->xblock_size != 0); // do not call with zero'ing for huge blocks
+ const size_t zsize = (mi_unlikely(page->is_zero) ? sizeof(block->next) : page->xblock_size);
+ _mi_memzero_aligned(block, zsize);
+ }
+
#if (MI_DEBUG>0)
- if (!page->is_zero) { memset(block, MI_DEBUG_UNINIT, size); }
+ if (!page->is_zero && !zero) { memset(block, MI_DEBUG_UNINIT, size); }
#elif (MI_SECURE!=0)
- block->next = 0; // don't leak internal data
+ if (!zero) { block->next = 0; } // don't leak internal data
#endif
#if (MI_STAT>0)
@@ -69,41 +76,45 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz
return block;
}
-// allocate a small block
-mi_decl_nodiscard extern inline mi_decl_restrict void* mi_heap_malloc_small(mi_heap_t* heap, size_t size) mi_attr_noexcept {
- mi_assert(heap!=NULL);
+static inline mi_decl_restrict void* mi_heap_malloc_small_zero(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept {
+ mi_assert(heap != NULL);
mi_assert(heap->thread_id == 0 || heap->thread_id == _mi_thread_id()); // heaps are thread local
mi_assert(size <= MI_SMALL_SIZE_MAX);
- #if (MI_PADDING)
+#if (MI_PADDING)
if (size == 0) {
size = sizeof(void*);
}
- #endif
- mi_page_t* page = _mi_heap_get_free_small_page(heap,size + MI_PADDING_SIZE);
- void* p = _mi_page_malloc(heap, page, size + MI_PADDING_SIZE);
- mi_assert_internal(p==NULL || mi_usable_size(p) >= size);
- #if MI_STAT>1
+#endif
+ mi_page_t* page = _mi_heap_get_free_small_page(heap, size + MI_PADDING_SIZE);
+ void* p = _mi_page_malloc(heap, page, size + MI_PADDING_SIZE, zero);
+ mi_assert_internal(p == NULL || mi_usable_size(p) >= size);
+#if MI_STAT>1
if (p != NULL) {
if (!mi_heap_is_initialized(heap)) { heap = mi_get_default_heap(); }
mi_heap_stat_increase(heap, malloc, mi_usable_size(p));
}
- #endif
+#endif
return p;
}
+// allocate a small block
+mi_decl_nodiscard extern inline mi_decl_restrict void* mi_heap_malloc_small(mi_heap_t* heap, size_t size) mi_attr_noexcept {
+ return mi_heap_malloc_small_zero(heap, size, false);
+}
+
mi_decl_nodiscard extern inline mi_decl_restrict void* mi_malloc_small(size_t size) mi_attr_noexcept {
return mi_heap_malloc_small(mi_get_default_heap(), size);
}
// The main allocation function
-mi_decl_nodiscard extern inline mi_decl_restrict void* mi_heap_malloc(mi_heap_t* heap, size_t size) mi_attr_noexcept {
+mi_decl_nodiscard extern inline void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept {
if (mi_likely(size <= MI_SMALL_SIZE_MAX)) {
- return mi_heap_malloc_small(heap, size);
+ return mi_heap_malloc_small_zero(heap, size, zero);
}
else {
mi_assert(heap!=NULL);
- mi_assert(heap->thread_id == 0 || heap->thread_id == _mi_thread_id()); // heaps are thread local
- void* const p = _mi_malloc_generic(heap, size + MI_PADDING_SIZE); // note: size can overflow but it is detected in malloc_generic
+ mi_assert(heap->thread_id == 0 || heap->thread_id == _mi_thread_id()); // heaps are thread local
+ void* const p = _mi_malloc_generic(heap, size + MI_PADDING_SIZE, zero); // note: size can overflow but it is detected in malloc_generic
mi_assert_internal(p == NULL || mi_usable_size(p) >= size);
#if MI_STAT>1
if (p != NULL) {
@@ -115,44 +126,17 @@ mi_decl_nodiscard extern inline mi_decl_restrict void* mi_heap_malloc(mi_heap_t*
}
}
-mi_decl_nodiscard extern inline mi_decl_restrict void* mi_malloc(size_t size) mi_attr_noexcept {
- return mi_heap_malloc(mi_get_default_heap(), size);
+mi_decl_nodiscard extern inline mi_decl_restrict void* mi_heap_malloc(mi_heap_t* heap, size_t size) mi_attr_noexcept {
+ return _mi_heap_malloc_zero(heap, size, false);
}
-
-void _mi_block_zero_init(const mi_page_t* page, void* p, size_t size) {
- // note: we need to initialize the whole usable block size to zero, not just the requested size,
- // or the recalloc/rezalloc functions cannot safely expand in place (see issue #63)
- MI_UNUSED(size);
- mi_assert_internal(p != NULL);
- mi_assert_internal(mi_usable_size(p) >= size); // size can be zero
- mi_assert_internal(_mi_ptr_page(p)==page);
- if (page->is_zero && size > sizeof(mi_block_t)) {
- // already zero initialized memory
- ((mi_block_t*)p)->next = 0; // clear the free list pointer
- mi_assert_expensive(mi_mem_is_zero(p, mi_usable_size(p)));
- }
- else {
- // otherwise memset
- memset(p, 0, mi_usable_size(p));
- }
+mi_decl_nodiscard extern inline mi_decl_restrict void* mi_malloc(size_t size) mi_attr_noexcept {
+ return mi_heap_malloc(mi_get_default_heap(), size);
}
// zero initialized small block
mi_decl_nodiscard mi_decl_restrict void* mi_zalloc_small(size_t size) mi_attr_noexcept {
- void* p = mi_malloc_small(size);
- if (p != NULL) {
- _mi_block_zero_init(_mi_ptr_page(p), p, size); // todo: can we avoid getting the page again?
- }
- return p;
-}
-
-void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept {
- void* p = mi_heap_malloc(heap,size);
- if (zero && p != NULL) {
- _mi_block_zero_init(_mi_ptr_page(p),p,size); // todo: can we avoid getting the page again?
- }
- return p;
+ return mi_heap_malloc_small_zero(mi_get_default_heap(), size, true);
}
mi_decl_nodiscard extern inline mi_decl_restrict void* mi_heap_zalloc(mi_heap_t* heap, size_t size) mi_attr_noexcept {
@@ -564,6 +548,7 @@ mi_decl_nodiscard size_t mi_usable_size(const void* p) mi_attr_noexcept {
#ifdef __cplusplus
void* _mi_externs[] = {
(void*)&_mi_page_malloc,
+ (void*)&_mi_heap_malloc_zero,
(void*)&mi_malloc,
(void*)&mi_malloc_small,
(void*)&mi_zalloc_small,
diff --git a/src/page.c b/src/page.c
index 386898f..fdcf5d0 100644
--- a/src/page.c
+++ b/src/page.c
@@ -815,7 +815,7 @@ static mi_page_t* mi_find_page(mi_heap_t* heap, size_t size) mi_attr_noexcept {
// Generic allocation routine if the fast path (`alloc.c:mi_page_malloc`) does not succeed.
// Note: in debug mode the size includes MI_PADDING_SIZE and might have overflowed.
-void* _mi_malloc_generic(mi_heap_t* heap, size_t size) mi_attr_noexcept
+void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept
{
mi_assert_internal(heap != NULL);
@@ -849,6 +849,15 @@ void* _mi_malloc_generic(mi_heap_t* heap, size_t size) mi_attr_noexcept
mi_assert_internal(mi_page_immediate_available(page));
mi_assert_internal(mi_page_block_size(page) >= size);
- // and try again, this time succeeding! (i.e. this should never recurse)
- return _mi_page_malloc(heap, page, size);
+ // and try again, this time succeeding! (i.e. this should never recurse through _mi_page_malloc)
+ if (mi_unlikely(zero && page->xblock_size == 0)) {
+ // note: we cannot call _mi_page_malloc with zeroing for huge blocks; we zero it afterwards in that case.
+ void* p = _mi_page_malloc(heap, page, size, false);
+ mi_assert_internal(p != NULL);
+ _mi_memzero_aligned(p, mi_page_usable_block_size(page));
+ return p;
+ }
+ else {
+ return _mi_page_malloc(heap, page, size, zero);
+ }
}
diff --git a/test/test-stress.c b/test/test-stress.c
index 498b7ec..9033e22 100644
--- a/test/test-stress.c
+++ b/test/test-stress.c
@@ -256,7 +256,7 @@ int main(int argc, char** argv) {
mi_collect(true);
#endif
mi_stats_print(NULL);
-#endif
+#endif
//bench_end_program();
return 0;
}