diff options
-rw-r--r-- | include/mimalloc-internal.h | 6 | ||||
-rw-r--r-- | src/alloc.c | 59 | ||||
-rw-r--r-- | src/init.c | 86 |
3 files changed, 110 insertions, 41 deletions
diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h index 16be125..79adc23 100644 --- a/include/mimalloc-internal.h +++ b/include/mimalloc-internal.h @@ -138,8 +138,8 @@ mi_msecs_t _mi_clock_start(void); // "alloc.c" void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept; // called from `_mi_malloc_generic` -void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero); -void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero); +void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept; +void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero) mi_attr_noexcept; mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* page, const void* p); bool _mi_free_delayed_block(mi_block_t* block); void _mi_block_zero_init(const mi_page_t* page, void* p, size_t size); @@ -945,7 +945,7 @@ static inline void _mi_memcpy_aligned(void* dst, const void* src, size_t n) { mi_assert_internal(((uintptr_t)dst % MI_INTPTR_SIZE == 0) && ((uintptr_t)src % MI_INTPTR_SIZE == 0)); void* adst = __builtin_assume_aligned(dst, MI_INTPTR_SIZE); const void* asrc = __builtin_assume_aligned(src, MI_INTPTR_SIZE); - memcpy(adst, asrc, n); + _mi_memcpy(adst, asrc, n); } #else // Default fallback on `_mi_memcpy` diff --git a/src/alloc.c b/src/alloc.c index 5f150f2..62e76e2 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -147,7 +147,7 @@ mi_decl_restrict void* mi_zalloc_small(size_t size) mi_attr_noexcept { return p; } -void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero) { +void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept { void* p = mi_heap_malloc(heap,size); if (zero && p != NULL) { _mi_block_zero_init(_mi_ptr_page(p),p,size); // todo: can we avoid getting the page again? @@ -530,20 +530,25 @@ bool _mi_free_delayed_block(mi_block_t* block) { } // Bytes available in a block -static size_t _mi_usable_size(const void* p, const char* msg) mi_attr_noexcept { - const mi_segment_t* const segment = mi_checked_ptr_segment(p,msg); - if (segment==NULL) return 0; - const mi_page_t* const page = _mi_segment_page_of(segment, p); - const mi_block_t* block = (const mi_block_t*)p; - if (mi_unlikely(mi_page_has_aligned(page))) { - block = _mi_page_ptr_unalign(segment, page, p); - size_t size = mi_page_usable_size_of(page, block); - ptrdiff_t const adjust = (uint8_t*)p - (uint8_t*)block; - mi_assert_internal(adjust >= 0 && (size_t)adjust <= size); - return (size - adjust); +mi_decl_noinline static size_t mi_page_usable_aligned_size_of(const mi_segment_t* segment, const mi_page_t* page, const void* p) mi_attr_noexcept { + const mi_block_t* block = _mi_page_ptr_unalign(segment, page, p); + const size_t size = mi_page_usable_size_of(page, block); + const ptrdiff_t adjust = (uint8_t*)p - (uint8_t*)block; + mi_assert_internal(adjust >= 0 && (size_t)adjust <= size); + return (size - adjust); +} + +static inline size_t _mi_usable_size(const void* p, const char* msg) mi_attr_noexcept { + const mi_segment_t* const segment = mi_checked_ptr_segment(p, msg); + if (segment==NULL) return 0; // also returns 0 if `p == NULL` + const mi_page_t* const page = _mi_segment_page_of(segment, p); + if (mi_likely(!mi_page_has_aligned(page))) { + const mi_block_t* block = (const mi_block_t*)p; + return mi_page_usable_size_of(page, block); } else { - return mi_page_usable_size_of(page, block); + // split out to separate routine for improved code generation + return mi_page_usable_aligned_size_of(segment, page, p); } } @@ -612,35 +617,43 @@ mi_decl_restrict void* mi_mallocn(size_t count, size_t size) mi_attr_noexcept { return mi_heap_mallocn(mi_get_default_heap(),count,size); } -// Expand in place or fail +// Expand (or shrink) in place (or fail) void* mi_expand(void* p, size_t newsize) mi_attr_noexcept { + #if MI_PADDING + // we do not shrink/expand with padding enabled + MI_UNUSED(p); MI_UNUSED(newsize); + return NULL; + #else if (p == NULL) return NULL; - size_t size = _mi_usable_size(p,"mi_expand"); + const size_t size = _mi_usable_size(p,"mi_expand"); if (newsize > size) return NULL; return p; // it fits + #endif } -void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero) { - if (p == NULL) return _mi_heap_malloc_zero(heap,newsize,zero); - size_t size = _mi_usable_size(p,"mi_realloc"); - if (newsize <= size && newsize >= (size / 2)) { +void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero) mi_attr_noexcept { + const size_t size = _mi_usable_size(p,"mi_realloc"); // also works if p == NULL + if (mi_unlikely(newsize <= size && newsize >= (size / 2))) { + // todo: adjust potential padding to reflect the new size? return p; // reallocation still fits and not more than 50% waste } void* newp = mi_heap_malloc(heap,newsize); if (mi_likely(newp != NULL)) { if (zero && newsize > size) { // also set last word in the previous allocation to zero to ensure any padding is zero-initialized - size_t start = (size >= sizeof(intptr_t) ? size - sizeof(intptr_t) : 0); + const size_t start = (size >= sizeof(intptr_t) ? size - sizeof(intptr_t) : 0); memset((uint8_t*)newp + start, 0, newsize - start); } - _mi_memcpy_aligned(newp, p, (newsize > size ? size : newsize)); - mi_free(p); // only free if successful + if (mi_likely(p != NULL)) { + _mi_memcpy_aligned(newp, p, (newsize > size ? size : newsize)); + mi_free(p); // only free the original pointer if successful + } } return newp; } void* mi_heap_realloc(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept { - return _mi_heap_realloc_zero(heap, p, newsize, false); + return _mi_heap_realloc_zero(heap, p, newsize, false); } void* mi_heap_reallocn(mi_heap_t* heap, void* p, size_t count, size_t size) mi_attr_noexcept { @@ -165,6 +165,68 @@ typedef struct mi_thread_data_s { mi_tld_t tld; } mi_thread_data_t; + +// Thread meta-data is allocated directly from the OS. For +// some programs that do not use thread pools and allocate and +// destroy many OS threads, this may causes too much overhead +// per thread so we maintain a small cache of recently freed metadata. + +#define TD_CACHE_SIZE (8) +static _Atomic(mi_thread_data_t*) td_cache[TD_CACHE_SIZE]; + +static mi_thread_data_t* mi_thread_data_alloc(void) { + // try to find thread metadata in the cache + mi_thread_data_t* td; + for (int i = 0; i < TD_CACHE_SIZE; i++) { + td = mi_atomic_load_ptr_relaxed(mi_thread_data_t, &td_cache[i]); + if (td != NULL) { + td = mi_atomic_exchange_ptr_acq_rel(mi_thread_data_t, &td_cache[i], NULL); + if (td != NULL) { + return td; + } + } + } + // if that fails, allocate directly from the OS + td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), &_mi_stats_main); + if (td == NULL) { + // if this fails, try once more. (issue #257) + td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), &_mi_stats_main); + if (td == NULL) { + // really out of memory + _mi_error_message(ENOMEM, "unable to allocate thread local heap metadata (%zu bytes)\n", sizeof(mi_thread_data_t)); + } + } + return td; +} + +static void mi_thread_data_free( mi_thread_data_t* tdfree ) { + // try to add the thread metadata to the cache + for (int i = 0; i < TD_CACHE_SIZE; i++) { + mi_thread_data_t* td = mi_atomic_load_ptr_relaxed(mi_thread_data_t, &td_cache[i]); + if (td == NULL) { + mi_thread_data_t* expected = NULL; + if (mi_atomic_cas_ptr_weak_acq_rel(mi_thread_data_t, &td_cache[i], &expected, tdfree)) { + return; + } + } + } + // if that fails, just free it directly + _mi_os_free(tdfree, sizeof(mi_thread_data_t), &_mi_stats_main); +} + +static void mi_thread_data_collect(void) { + // free all thread metadata from the cache + for (int i = 0; i < TD_CACHE_SIZE; i++) { + mi_thread_data_t* td = mi_atomic_load_ptr_relaxed(mi_thread_data_t, &td_cache[i]); + if (td != NULL) { + td = mi_atomic_exchange_ptr_acq_rel(mi_thread_data_t, &td_cache[i], NULL); + if (td != NULL) { + _mi_os_free( td, sizeof(mi_thread_data_t), &_mi_stats_main ); + } + } + } +} + // Initialize the thread local default heap, called from `mi_thread_init` static bool _mi_heap_init(void) { if (mi_heap_is_initialized(mi_get_default_heap())) return true; @@ -177,16 +239,9 @@ static bool _mi_heap_init(void) { } else { // use `_mi_os_alloc` to allocate directly from the OS - mi_thread_data_t* td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), &_mi_stats_main); // Todo: more efficient allocation? - if (td == NULL) { - // if this fails, try once more. (issue #257) - td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), &_mi_stats_main); - if (td == NULL) { - // really out of memory - _mi_error_message(ENOMEM, "unable to allocate thread local heap metadata (%zu bytes)\n", sizeof(mi_thread_data_t)); - return false; - } - } + mi_thread_data_t* td = mi_thread_data_alloc(); + if (td == NULL) return false; + // OS allocated so already zero initialized mi_tld_t* tld = &td->tld; mi_heap_t* heap = &td->heap; @@ -242,16 +297,17 @@ static bool _mi_heap_done(mi_heap_t* heap) { // free if not the main thread if (heap != &_mi_heap_main) { mi_assert_internal(heap->tld->segments.count == 0 || heap->thread_id != _mi_thread_id()); - _mi_os_free(heap, sizeof(mi_thread_data_t), &_mi_stats_main); + mi_thread_data_free((mi_thread_data_t*)heap); } -#if 0 - // never free the main thread even in debug mode; if a dll is linked statically with mimalloc, - // there may still be delete/free calls after the mi_fls_done is called. Issue #207 else { + mi_thread_data_collect(); // free cached thread metadata + #if 0 + // never free the main thread even in debug mode; if a dll is linked statically with mimalloc, + // there may still be delete/free calls after the mi_fls_done is called. Issue #207 _mi_heap_destroy_pages(heap); mi_assert_internal(heap->tld->heap_backing == &_mi_heap_main); + #endif } -#endif return false; } |