summaryrefslogtreecommitdiff
path: root/include/mimalloc-internal.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/mimalloc-internal.h')
-rw-r--r--include/mimalloc-internal.h66
1 files changed, 51 insertions, 15 deletions
diff --git a/include/mimalloc-internal.h b/include/mimalloc-internal.h
index 574e5bd..06d31db 100644
--- a/include/mimalloc-internal.h
+++ b/include/mimalloc-internal.h
@@ -180,21 +180,6 @@ bool _mi_page_is_valid(mi_page_t* page);
#endif
-// -----------------------------------------------------------------------------------
-// On windows x86/x64 with msvc/clang-cl, use `rep movsb` for `memcpy` (issue #201)
-// -----------------------------------------------------------------------------------
-
-#if defined(_WIN32) && (defined(_M_IX86) || defined(_M_X64))
-#include <intrin.h>
-static inline void _mi_memcpy_rep_movsb(void* d, const void* s, size_t n) {
- __movsb((unsigned char*)d, (const unsigned char*)s, n);
-}
-#define _mi_memcpy(d,s,n) _mi_memcpy_rep_movsb(d,s,n)
-#else
-#define _mi_memcpy(d,s,n) memcpy(d,s,n)
-#endif
-
-
/* -----------------------------------------------------------
Inlined definitions
----------------------------------------------------------- */
@@ -997,4 +982,55 @@ static inline size_t mi_bsr(uintptr_t x) {
}
+// ---------------------------------------------------------------------------------
+// Provide our own `_mi_memcpy` for potential performance optimizations.
+//
+// For now, only on Windows with msvc/clang-cl we optimize to `rep movsb` if
+// we happen to run on x86/x64 cpu's that have "fast short rep movsb" (FSRM) support
+// (AMD Zen3+ (~2020) or Intel Ice Lake+ (~2017). See also issue #201 and pr #253.
+// ---------------------------------------------------------------------------------
+
+#if defined(_WIN32) && (defined(_M_IX86) || defined(_M_X64))
+#include <intrin.h>
+#include <string.h>
+extern bool _mi_cpu_has_fsrm;
+static inline void _mi_memcpy(void* dst, const void* src, size_t n) {
+ if (_mi_cpu_has_fsrm) {
+ __movsb((unsigned char*)dst, (const unsigned char*)src, n);
+ }
+ else {
+ memcpy(dst, src, n); // todo: use noinline?
+ }
+}
+#else
+#include <string.h>
+static inline void _mi_memcpy(void* dst, const void* src, size_t n) {
+ memcpy(dst, src, n);
+}
+#endif
+
+
+// -------------------------------------------------------------------------------
+// The `_mi_memcpy_aligned` can be used if the pointers are machine-word aligned
+// This is used for example in `mi_realloc`.
+// -------------------------------------------------------------------------------
+
+#if (__GNUC__ >= 4) || defined(__clang__)
+// On GCC/CLang we provide a hint that the pointers are word aligned.
+#include <string.h>
+static inline void _mi_memcpy_aligned(void* dst, const void* src, size_t n) {
+ mi_assert_internal(((uintptr_t)dst % MI_INTPTR_SIZE == 0) && ((uintptr_t)src % MI_INTPTR_SIZE == 0));
+ void* adst = __builtin_assume_aligned(dst, MI_INTPTR_SIZE);
+ const void* asrc = __builtin_assume_aligned(src, MI_INTPTR_SIZE);
+ memcpy(adst, asrc, n);
+}
+#else
+// Default fallback on `_mi_memcpy`
+static inline void _mi_memcpy_aligned(void* dst, const void* src, size_t n) {
+ mi_assert_internal(((uintptr_t)dst % MI_INTPTR_SIZE == 0) && ((uintptr_t)src % MI_INTPTR_SIZE == 0));
+ _mi_memcpy(dst, src, n);
+}
+#endif
+
+
#endif