summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/mimalloc-internal.h6
-rw-r--r--src/alloc.c59
-rw-r--r--src/init.c86
3 files changed, 110 insertions, 41 deletions
diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index 16be125..79adc23 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -138,8 +138,8 @@ mi_msecs_t _mi_clock_start(void);
// "alloc.c"
void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept; // called from `_mi_malloc_generic`
-void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero);
-void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero);
+void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept;
+void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero) mi_attr_noexcept;
mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* page, const void* p);
bool _mi_free_delayed_block(mi_block_t* block);
void _mi_block_zero_init(const mi_page_t* page, void* p, size_t size);
@@ -945,7 +945,7 @@ static inline void _mi_memcpy_aligned(void* dst, const void* src, size_t n) {
mi_assert_internal(((uintptr_t)dst % MI_INTPTR_SIZE == 0) && ((uintptr_t)src % MI_INTPTR_SIZE == 0));
void* adst = __builtin_assume_aligned(dst, MI_INTPTR_SIZE);
const void* asrc = __builtin_assume_aligned(src, MI_INTPTR_SIZE);
- memcpy(adst, asrc, n);
+ _mi_memcpy(adst, asrc, n);
}
#else
// Default fallback on `_mi_memcpy`
diff --git a/src/alloc.c b/src/alloc.c
index 5f150f2..62e76e2 100644
--- a/src/alloc.c
+++ b/src/alloc.c
@@ -147,7 +147,7 @@ mi_decl_restrict void* mi_zalloc_small(size_t size) mi_attr_noexcept {
return p;
}
-void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero) {
+void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept {
void* p = mi_heap_malloc(heap,size);
if (zero && p != NULL) {
_mi_block_zero_init(_mi_ptr_page(p),p,size); // todo: can we avoid getting the page again?
@@ -530,20 +530,25 @@ bool _mi_free_delayed_block(mi_block_t* block) {
}
// Bytes available in a block
-static size_t _mi_usable_size(const void* p, const char* msg) mi_attr_noexcept {
- const mi_segment_t* const segment = mi_checked_ptr_segment(p,msg);
- if (segment==NULL) return 0;
- const mi_page_t* const page = _mi_segment_page_of(segment, p);
- const mi_block_t* block = (const mi_block_t*)p;
- if (mi_unlikely(mi_page_has_aligned(page))) {
- block = _mi_page_ptr_unalign(segment, page, p);
- size_t size = mi_page_usable_size_of(page, block);
- ptrdiff_t const adjust = (uint8_t*)p - (uint8_t*)block;
- mi_assert_internal(adjust >= 0 && (size_t)adjust <= size);
- return (size - adjust);
+mi_decl_noinline static size_t mi_page_usable_aligned_size_of(const mi_segment_t* segment, const mi_page_t* page, const void* p) mi_attr_noexcept {
+ const mi_block_t* block = _mi_page_ptr_unalign(segment, page, p);
+ const size_t size = mi_page_usable_size_of(page, block);
+ const ptrdiff_t adjust = (uint8_t*)p - (uint8_t*)block;
+ mi_assert_internal(adjust >= 0 && (size_t)adjust <= size);
+ return (size - adjust);
+}
+
+static inline size_t _mi_usable_size(const void* p, const char* msg) mi_attr_noexcept {
+ const mi_segment_t* const segment = mi_checked_ptr_segment(p, msg);
+ if (segment==NULL) return 0; // also returns 0 if `p == NULL`
+ const mi_page_t* const page = _mi_segment_page_of(segment, p);
+ if (mi_likely(!mi_page_has_aligned(page))) {
+ const mi_block_t* block = (const mi_block_t*)p;
+ return mi_page_usable_size_of(page, block);
}
else {
- return mi_page_usable_size_of(page, block);
+ // split out to separate routine for improved code generation
+ return mi_page_usable_aligned_size_of(segment, page, p);
}
}
@@ -612,35 +617,43 @@ mi_decl_restrict void* mi_mallocn(size_t count, size_t size) mi_attr_noexcept {
return mi_heap_mallocn(mi_get_default_heap(),count,size);
}
-// Expand in place or fail
+// Expand (or shrink) in place (or fail)
void* mi_expand(void* p, size_t newsize) mi_attr_noexcept {
+ #if MI_PADDING
+ // we do not shrink/expand with padding enabled
+ MI_UNUSED(p); MI_UNUSED(newsize);
+ return NULL;
+ #else
if (p == NULL) return NULL;
- size_t size = _mi_usable_size(p,"mi_expand");
+ const size_t size = _mi_usable_size(p,"mi_expand");
if (newsize > size) return NULL;
return p; // it fits
+ #endif
}
-void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero) {
- if (p == NULL) return _mi_heap_malloc_zero(heap,newsize,zero);
- size_t size = _mi_usable_size(p,"mi_realloc");
- if (newsize <= size && newsize >= (size / 2)) {
+void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero) mi_attr_noexcept {
+ const size_t size = _mi_usable_size(p,"mi_realloc"); // also works if p == NULL
+ if (mi_unlikely(newsize <= size && newsize >= (size / 2))) {
+ // todo: adjust potential padding to reflect the new size?
return p; // reallocation still fits and not more than 50% waste
}
void* newp = mi_heap_malloc(heap,newsize);
if (mi_likely(newp != NULL)) {
if (zero && newsize > size) {
// also set last word in the previous allocation to zero to ensure any padding is zero-initialized
- size_t start = (size >= sizeof(intptr_t) ? size - sizeof(intptr_t) : 0);
+ const size_t start = (size >= sizeof(intptr_t) ? size - sizeof(intptr_t) : 0);
memset((uint8_t*)newp + start, 0, newsize - start);
}
- _mi_memcpy_aligned(newp, p, (newsize > size ? size : newsize));
- mi_free(p); // only free if successful
+ if (mi_likely(p != NULL)) {
+ _mi_memcpy_aligned(newp, p, (newsize > size ? size : newsize));
+ mi_free(p); // only free the original pointer if successful
+ }
}
return newp;
}
void* mi_heap_realloc(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept {
- return _mi_heap_realloc_zero(heap, p, newsize, false);
+ return _mi_heap_realloc_zero(heap, p, newsize, false);
}
void* mi_heap_reallocn(mi_heap_t* heap, void* p, size_t count, size_t size) mi_attr_noexcept {
diff --git a/src/init.c b/src/init.c
index ad1e4d4..ce43e3a 100644
--- a/src/init.c
+++ b/src/init.c
@@ -165,6 +165,68 @@ typedef struct mi_thread_data_s {
mi_tld_t tld;
} mi_thread_data_t;
+
+// Thread meta-data is allocated directly from the OS. For
+// some programs that do not use thread pools and allocate and
+// destroy many OS threads, this may causes too much overhead
+// per thread so we maintain a small cache of recently freed metadata.
+
+#define TD_CACHE_SIZE (8)
+static _Atomic(mi_thread_data_t*) td_cache[TD_CACHE_SIZE];
+
+static mi_thread_data_t* mi_thread_data_alloc(void) {
+ // try to find thread metadata in the cache
+ mi_thread_data_t* td;
+ for (int i = 0; i < TD_CACHE_SIZE; i++) {
+ td = mi_atomic_load_ptr_relaxed(mi_thread_data_t, &td_cache[i]);
+ if (td != NULL) {
+ td = mi_atomic_exchange_ptr_acq_rel(mi_thread_data_t, &td_cache[i], NULL);
+ if (td != NULL) {
+ return td;
+ }
+ }
+ }
+ // if that fails, allocate directly from the OS
+ td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), &_mi_stats_main);
+ if (td == NULL) {
+ // if this fails, try once more. (issue #257)
+ td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), &_mi_stats_main);
+ if (td == NULL) {
+ // really out of memory
+ _mi_error_message(ENOMEM, "unable to allocate thread local heap metadata (%zu bytes)\n", sizeof(mi_thread_data_t));
+ }
+ }
+ return td;
+}
+
+static void mi_thread_data_free( mi_thread_data_t* tdfree ) {
+ // try to add the thread metadata to the cache
+ for (int i = 0; i < TD_CACHE_SIZE; i++) {
+ mi_thread_data_t* td = mi_atomic_load_ptr_relaxed(mi_thread_data_t, &td_cache[i]);
+ if (td == NULL) {
+ mi_thread_data_t* expected = NULL;
+ if (mi_atomic_cas_ptr_weak_acq_rel(mi_thread_data_t, &td_cache[i], &expected, tdfree)) {
+ return;
+ }
+ }
+ }
+ // if that fails, just free it directly
+ _mi_os_free(tdfree, sizeof(mi_thread_data_t), &_mi_stats_main);
+}
+
+static void mi_thread_data_collect(void) {
+ // free all thread metadata from the cache
+ for (int i = 0; i < TD_CACHE_SIZE; i++) {
+ mi_thread_data_t* td = mi_atomic_load_ptr_relaxed(mi_thread_data_t, &td_cache[i]);
+ if (td != NULL) {
+ td = mi_atomic_exchange_ptr_acq_rel(mi_thread_data_t, &td_cache[i], NULL);
+ if (td != NULL) {
+ _mi_os_free( td, sizeof(mi_thread_data_t), &_mi_stats_main );
+ }
+ }
+ }
+}
+
// Initialize the thread local default heap, called from `mi_thread_init`
static bool _mi_heap_init(void) {
if (mi_heap_is_initialized(mi_get_default_heap())) return true;
@@ -177,16 +239,9 @@ static bool _mi_heap_init(void) {
}
else {
// use `_mi_os_alloc` to allocate directly from the OS
- mi_thread_data_t* td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), &_mi_stats_main); // Todo: more efficient allocation?
- if (td == NULL) {
- // if this fails, try once more. (issue #257)
- td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), &_mi_stats_main);
- if (td == NULL) {
- // really out of memory
- _mi_error_message(ENOMEM, "unable to allocate thread local heap metadata (%zu bytes)\n", sizeof(mi_thread_data_t));
- return false;
- }
- }
+ mi_thread_data_t* td = mi_thread_data_alloc();
+ if (td == NULL) return false;
+
// OS allocated so already zero initialized
mi_tld_t* tld = &td->tld;
mi_heap_t* heap = &td->heap;
@@ -242,16 +297,17 @@ static bool _mi_heap_done(mi_heap_t* heap) {
// free if not the main thread
if (heap != &_mi_heap_main) {
mi_assert_internal(heap->tld->segments.count == 0 || heap->thread_id != _mi_thread_id());
- _mi_os_free(heap, sizeof(mi_thread_data_t), &_mi_stats_main);
+ mi_thread_data_free((mi_thread_data_t*)heap);
}
-#if 0
- // never free the main thread even in debug mode; if a dll is linked statically with mimalloc,
- // there may still be delete/free calls after the mi_fls_done is called. Issue #207
else {
+ mi_thread_data_collect(); // free cached thread metadata
+ #if 0
+ // never free the main thread even in debug mode; if a dll is linked statically with mimalloc,
+ // there may still be delete/free calls after the mi_fls_done is called. Issue #207
_mi_heap_destroy_pages(heap);
mi_assert_internal(heap->tld->heap_backing == &_mi_heap_main);
+ #endif
}
-#endif
return false;
}