summaryrefslogtreecommitdiff
path: root/src/segment-cache.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/segment-cache.c')
-rw-r--r--src/segment-cache.c310
1 files changed, 310 insertions, 0 deletions
diff --git a/src/segment-cache.c b/src/segment-cache.c
new file mode 100644
index 0000000..569e878
--- /dev/null
+++ b/src/segment-cache.c
@@ -0,0 +1,310 @@
+/* ----------------------------------------------------------------------------
+Copyright (c) 2020, Microsoft Research, Daan Leijen
+This is free software; you can redistribute it and/or modify it under the
+terms of the MIT license. A copy of the license can be found in the file
+"LICENSE" at the root of this distribution.
+-----------------------------------------------------------------------------*/
+
+/* ----------------------------------------------------------------------------
+ Implements a cache of segments to avoid expensive OS calls
+ and also the full memory map of all segments.
+-----------------------------------------------------------------------------*/
+#include "mimalloc.h"
+#include "mimalloc-internal.h"
+#include "mimalloc-atomic.h"
+
+#include "bitmap.h" // atomic bitmap
+
+#define MI_CACHE_FIELDS (16)
+#define MI_CACHE_MAX (MI_BITMAP_FIELD_BITS*MI_CACHE_FIELDS) // 1024 on 64-bit
+
+#define BITS_SET() ATOMIC_VAR_INIT(UINTPTR_MAX)
+#define MI_CACHE_BITS_SET MI_INIT16(BITS_SET)
+
+typedef struct mi_cache_slot_s {
+ void* p;
+ size_t memid;
+ mi_commit_mask_t commit_mask;
+ _Atomic(mi_msecs_t) expire;
+} mi_cache_slot_t;
+
+static mi_decl_cache_align mi_cache_slot_t cache[MI_CACHE_MAX]; // = 0
+
+static mi_decl_cache_align mi_bitmap_field_t cache_available[MI_CACHE_FIELDS] = { MI_CACHE_BITS_SET }; // zero bit = available!
+static mi_decl_cache_align mi_bitmap_field_t cache_available_large[MI_CACHE_FIELDS] = { MI_CACHE_BITS_SET };
+static mi_decl_cache_align mi_bitmap_field_t cache_inuse[MI_CACHE_FIELDS]; // zero bit = free
+
+
+mi_decl_noinline void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, bool* large, bool* is_zero, size_t* memid, mi_os_tld_t* tld)
+{
+ // only segment blocks
+ if (size != MI_SEGMENT_SIZE) return NULL;
+
+ // numa node determines start field
+ const int numa_node = _mi_os_numa_node(tld);
+ size_t start_field = 0;
+ if (numa_node > 0) {
+ start_field = (MI_CACHE_FIELDS / _mi_os_numa_node_count())*numa_node;
+ if (start_field >= MI_CACHE_FIELDS) start_field = 0;
+ }
+
+ // find an available slot
+ mi_bitmap_index_t bitidx = 0;
+ bool claimed = false;
+ if (*large) { // large allowed?
+ claimed = _mi_bitmap_try_find_from_claim(cache_available_large, MI_CACHE_FIELDS, start_field, 1, &bitidx);
+ if (claimed) *large = true;
+ }
+ if (!claimed) {
+ claimed = _mi_bitmap_try_find_from_claim(cache_available, MI_CACHE_FIELDS, start_field, 1, &bitidx);
+ if (claimed) *large = false;
+ }
+
+ if (!claimed) return NULL;
+
+ // found a slot
+ mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)];
+ void* p = slot->p;
+ *memid = slot->memid;
+ *is_zero = false;
+ mi_commit_mask_t cmask = slot->commit_mask; // copy
+ slot->p = NULL;
+ mi_atomic_storei64_release(&slot->expire,(mi_msecs_t)0);
+ *commit_mask = cmask;
+
+ // mark the slot as free again
+ mi_assert_internal(_mi_bitmap_is_claimed(cache_inuse, MI_CACHE_FIELDS, 1, bitidx));
+ _mi_bitmap_unclaim(cache_inuse, MI_CACHE_FIELDS, 1, bitidx);
+ return p;
+}
+
+static mi_decl_noinline void mi_commit_mask_decommit(mi_commit_mask_t* cmask, void* p, size_t total, mi_stats_t* stats)
+{
+ if (mi_commit_mask_is_empty(*cmask)) {
+ // nothing
+ }
+ else if (mi_commit_mask_is_full(*cmask)) {
+ _mi_os_decommit(p, total, stats);
+ }
+ else {
+ // todo: one call to decommit the whole at once?
+ mi_assert_internal((total%MI_COMMIT_MASK_BITS)==0);
+ size_t part = total/MI_COMMIT_MASK_BITS;
+ uintptr_t idx;
+ uintptr_t count;
+ mi_commit_mask_t mask = *cmask;
+ mi_commit_mask_foreach(mask, idx, count) {
+ void* start = (uint8_t*)p + (idx*part);
+ size_t size = count*part;
+ _mi_os_decommit(start, size, stats);
+ }
+ mi_commit_mask_foreach_end()
+ }
+ *cmask = mi_commit_mask_empty();
+}
+
+#define MI_MAX_PURGE_PER_PUSH (4)
+
+static mi_decl_noinline void mi_segment_cache_purge(mi_os_tld_t* tld)
+{
+ UNUSED(tld);
+ mi_msecs_t now = _mi_clock_now();
+ size_t idx = (_mi_random_shuffle((uintptr_t)now) % MI_CACHE_MAX); // random start
+ size_t purged = 0;
+ for (size_t visited = 0; visited < MI_CACHE_FIELDS; visited++,idx++) { // probe just N slots
+ if (idx >= MI_CACHE_MAX) idx = 0; // wrap
+ mi_cache_slot_t* slot = &cache[idx];
+ mi_msecs_t expire = mi_atomic_loadi64_relaxed(&slot->expire);
+ if (expire != 0 && now >= expire) { // racy read
+ // seems expired, first claim it from available
+ purged++;
+ mi_bitmap_index_t bitidx = mi_bitmap_index_create_from_bit(idx);
+ if (_mi_bitmap_claim(cache_available, MI_CACHE_FIELDS, 1, bitidx, NULL)) {
+ // was available, we claimed it
+ expire = mi_atomic_loadi64_acquire(&slot->expire);
+ if (expire != 0 && now >= expire) { // safe read
+ // still expired, decommit it
+ mi_atomic_storei64_relaxed(&slot->expire,(mi_msecs_t)0);
+ mi_assert_internal(!mi_commit_mask_is_empty(slot->commit_mask) && _mi_bitmap_is_claimed(cache_available_large, MI_CACHE_FIELDS, 1, bitidx));
+ _mi_abandoned_await_readers(); // wait until safe to decommit
+ // decommit committed parts
+ // TODO: instead of decommit, we could also free to the OS?
+ mi_commit_mask_decommit(&slot->commit_mask, slot->p, MI_SEGMENT_SIZE, tld->stats);
+ }
+ _mi_bitmap_unclaim(cache_available, MI_CACHE_FIELDS, 1, bitidx); // make it available again for a pop
+ }
+ if (purged > MI_MAX_PURGE_PER_PUSH) break; // bound to no more than N purge tries per push
+ }
+ }
+}
+
+mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t memid, mi_commit_mask_t commit_mask, bool is_large, mi_os_tld_t* tld)
+{
+ // only for normal segment blocks
+ if (size != MI_SEGMENT_SIZE || ((uintptr_t)start % MI_SEGMENT_ALIGN) != 0) return false;
+
+ // numa node determines start field
+ int numa_node = _mi_os_numa_node(NULL);
+ size_t start_field = 0;
+ if (numa_node > 0) {
+ start_field = (MI_CACHE_FIELDS / _mi_os_numa_node_count())*numa_node;
+ if (start_field >= MI_CACHE_FIELDS) start_field = 0;
+ }
+
+ // purge expired entries
+ mi_segment_cache_purge(tld);
+
+ // find an available slot
+ mi_bitmap_index_t bitidx;
+ bool claimed = _mi_bitmap_try_find_from_claim(cache_inuse, MI_CACHE_FIELDS, start_field, 1, &bitidx);
+ if (!claimed) return false;
+
+ mi_assert_internal(_mi_bitmap_is_claimed(cache_available, MI_CACHE_FIELDS, 1, bitidx));
+ mi_assert_internal(_mi_bitmap_is_claimed(cache_available_large, MI_CACHE_FIELDS, 1, bitidx));
+
+ // set the slot
+ mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)];
+ slot->p = start;
+ slot->memid = memid;
+ mi_atomic_storei64_relaxed(&slot->expire,(mi_msecs_t)0);
+ slot->commit_mask = commit_mask;
+ if (!mi_commit_mask_is_empty(commit_mask) && !is_large) {
+ long delay = mi_option_get(mi_option_arena_reset_delay);
+ if (delay == 0) {
+ _mi_abandoned_await_readers(); // wait until safe to decommit
+ mi_commit_mask_decommit(&slot->commit_mask, start, MI_SEGMENT_SIZE, tld->stats);
+ }
+ else {
+ mi_atomic_storei64_release(&slot->expire, _mi_clock_now() + delay);
+ }
+ }
+
+ // make it available
+ _mi_bitmap_unclaim((is_large ? cache_available_large : cache_available), MI_CACHE_FIELDS, 1, bitidx);
+ return true;
+}
+
+
+/* -----------------------------------------------------------
+ The following functions are to reliably find the segment or
+ block that encompasses any pointer p (or NULL if it is not
+ in any of our segments).
+ We maintain a bitmap of all memory with 1 bit per MI_SEGMENT_SIZE (64MiB)
+ set to 1 if it contains the segment meta data.
+----------------------------------------------------------- */
+
+
+#if (MI_INTPTR_SIZE==8)
+#define MI_MAX_ADDRESS ((size_t)20 << 40) // 20TB
+#else
+#define MI_MAX_ADDRESS ((size_t)2 << 30) // 2Gb
+#endif
+
+#define MI_SEGMENT_MAP_BITS (MI_MAX_ADDRESS / MI_SEGMENT_SIZE)
+#define MI_SEGMENT_MAP_SIZE (MI_SEGMENT_MAP_BITS / 8)
+#define MI_SEGMENT_MAP_WSIZE (MI_SEGMENT_MAP_SIZE / MI_INTPTR_SIZE)
+
+static _Atomic(uintptr_t)mi_segment_map[MI_SEGMENT_MAP_WSIZE]; // 2KiB per TB with 64MiB segments
+
+static size_t mi_segment_map_index_of(const mi_segment_t* segment, size_t* bitidx) {
+ mi_assert_internal(_mi_ptr_segment(segment) == segment); // is it aligned on MI_SEGMENT_SIZE?
+ uintptr_t segindex = ((uintptr_t)segment % MI_MAX_ADDRESS) / MI_SEGMENT_SIZE;
+ *bitidx = segindex % (8*MI_INTPTR_SIZE);
+ return (segindex / (8*MI_INTPTR_SIZE));
+}
+
+void _mi_segment_map_allocated_at(const mi_segment_t* segment) {
+ size_t bitidx;
+ size_t index = mi_segment_map_index_of(segment, &bitidx);
+ mi_assert_internal(index < MI_SEGMENT_MAP_WSIZE);
+ if (index==0) return;
+ uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]);
+ uintptr_t newmask;
+ do {
+ newmask = (mask | ((uintptr_t)1 << bitidx));
+ } while (!mi_atomic_cas_weak_release(&mi_segment_map[index], &mask, newmask));
+}
+
+void _mi_segment_map_freed_at(const mi_segment_t* segment) {
+ size_t bitidx;
+ size_t index = mi_segment_map_index_of(segment, &bitidx);
+ mi_assert_internal(index < MI_SEGMENT_MAP_WSIZE);
+ if (index == 0) return;
+ uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]);
+ uintptr_t newmask;
+ do {
+ newmask = (mask & ~((uintptr_t)1 << bitidx));
+ } while (!mi_atomic_cas_weak_release(&mi_segment_map[index], &mask, newmask));
+}
+
+// Determine the segment belonging to a pointer or NULL if it is not in a valid segment.
+static mi_segment_t* _mi_segment_of(const void* p) {
+ mi_segment_t* segment = _mi_ptr_segment(p);
+ size_t bitidx;
+ size_t index = mi_segment_map_index_of(segment, &bitidx);
+ // fast path: for any pointer to valid small/medium/large object or first MI_SEGMENT_SIZE in huge
+ const uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]);
+ if (mi_likely((mask & ((uintptr_t)1 << bitidx)) != 0)) {
+ return segment; // yes, allocated by us
+ }
+ if (index==0) return NULL;
+ // search downwards for the first segment in case it is an interior pointer
+ // could be slow but searches in MI_INTPTR_SIZE * MI_SEGMENT_SIZE (512MiB) steps trough
+ // valid huge objects
+ // note: we could maintain a lowest index to speed up the path for invalid pointers?
+ size_t lobitidx;
+ size_t loindex;
+ uintptr_t lobits = mask & (((uintptr_t)1 << bitidx) - 1);
+ if (lobits != 0) {
+ loindex = index;
+ lobitidx = mi_bsr(lobits); // lobits != 0
+ }
+ else {
+ uintptr_t lomask = mask;
+ loindex = index - 1;
+ while (loindex > 0 && (lomask = mi_atomic_load_relaxed(&mi_segment_map[loindex])) == 0) loindex--;
+ if (loindex==0) return NULL;
+ lobitidx = mi_bsr(lomask); // lomask != 0
+ }
+ // take difference as the addresses could be larger than the MAX_ADDRESS space.
+ size_t diff = (((index - loindex) * (8*MI_INTPTR_SIZE)) + bitidx - lobitidx) * MI_SEGMENT_SIZE;
+ segment = (mi_segment_t*)((uint8_t*)segment - diff);
+
+ if (segment == NULL) return NULL;
+ mi_assert_internal((void*)segment < p);
+ bool cookie_ok = (_mi_ptr_cookie(segment) == segment->cookie);
+ mi_assert_internal(cookie_ok);
+ if (mi_unlikely(!cookie_ok)) return NULL;
+ if (((uint8_t*)segment + mi_segment_size(segment)) <= (uint8_t*)p) return NULL; // outside the range
+ mi_assert_internal(p >= (void*)segment && (uint8_t*)p < (uint8_t*)segment + mi_segment_size(segment));
+ return segment;
+}
+
+// Is this a valid pointer in our heap?
+static bool mi_is_valid_pointer(const void* p) {
+ return (_mi_segment_of(p) != NULL);
+}
+
+mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept {
+ return mi_is_valid_pointer(p);
+}
+
+/*
+// Return the full segment range belonging to a pointer
+static void* mi_segment_range_of(const void* p, size_t* size) {
+ mi_segment_t* segment = _mi_segment_of(p);
+ if (segment == NULL) {
+ if (size != NULL) *size = 0;
+ return NULL;
+ }
+ else {
+ if (size != NULL) *size = segment->segment_size;
+ return segment;
+ }
+ mi_assert_expensive(page == NULL || mi_segment_is_valid(_mi_page_segment(page),tld));
+ mi_assert_internal(page == NULL || (mi_segment_page_size(_mi_page_segment(page)) - (MI_SECURE == 0 ? 0 : _mi_os_page_size())) >= block_size);
+ mi_reset_delayed(tld);
+ mi_assert_internal(page == NULL || mi_page_not_in_queue(page, tld));
+ return page;
+}
+*/