Added TCMalloc and JEMalloc projects

2025-06-28 20:46:00 +02:00 · 2022-06-02 17:55:17 -07:00 · 2022-06-02 17:55:17 -07:00 · 652142e189
commit 652142e189
parent 53376f3861
242 changed files with 67746 additions and 6 deletions
--- a/BeefRT/JEMalloc/include/jemalloc/internal/atomic_gcc_sync.h
+++ b/BeefRT/JEMalloc/include/jemalloc/internal/atomic_gcc_sync.h
@ -0,0 +1,195 @@
+#ifndef JEMALLOC_INTERNAL_ATOMIC_GCC_SYNC_H
+#define JEMALLOC_INTERNAL_ATOMIC_GCC_SYNC_H
+
+#define ATOMIC_INIT(...) {__VA_ARGS__}
+
+typedef enum {
+	atomic_memory_order_relaxed,
+	atomic_memory_order_acquire,
+	atomic_memory_order_release,
+	atomic_memory_order_acq_rel,
+	atomic_memory_order_seq_cst
+} atomic_memory_order_t;
+
+ATOMIC_INLINE void
+atomic_fence(atomic_memory_order_t mo) {
+	/* Easy cases first: no barrier, and full barrier. */
+	if (mo == atomic_memory_order_relaxed) {
+		asm volatile("" ::: "memory");
+		return;
+	}
+	if (mo == atomic_memory_order_seq_cst) {
+		asm volatile("" ::: "memory");
+		__sync_synchronize();
+		asm volatile("" ::: "memory");
+		return;
+	}
+	asm volatile("" ::: "memory");
+#  if defined(__i386__) || defined(__x86_64__)
+	/* This is implicit on x86. */
+#  elif defined(__ppc64__)
+	asm volatile("lwsync");
+#  elif defined(__ppc__)
+	asm volatile("sync");
+#  elif defined(__sparc__) && defined(__arch64__)
+	if (mo == atomic_memory_order_acquire) {
+		asm volatile("membar #LoadLoad | #LoadStore");
+	} else if (mo == atomic_memory_order_release) {
+		asm volatile("membar #LoadStore | #StoreStore");
+	} else {
+		asm volatile("membar #LoadLoad | #LoadStore | #StoreStore");
+	}
+#  else
+	__sync_synchronize();
+#  endif
+	asm volatile("" ::: "memory");
+}
+
+/*
+ * A correct implementation of seq_cst loads and stores on weakly ordered
+ * architectures could do either of the following:
+ *   1. store() is weak-fence -> store -> strong fence, load() is load ->
+ *      strong-fence.
+ *   2. store() is strong-fence -> store, load() is strong-fence -> load ->
+ *      weak-fence.
+ * The tricky thing is, load() and store() above can be the load or store
+ * portions of a gcc __sync builtin, so we have to follow GCC's lead, which
+ * means going with strategy 2.
+ * On strongly ordered architectures, the natural strategy is to stick a strong
+ * fence after seq_cst stores, and have naked loads.  So we want the strong
+ * fences in different places on different architectures.
+ * atomic_pre_sc_load_fence and atomic_post_sc_store_fence allow us to
+ * accomplish this.
+ */
+
+ATOMIC_INLINE void
+atomic_pre_sc_load_fence() {
+#  if defined(__i386__) || defined(__x86_64__) ||			\
+    (defined(__sparc__) && defined(__arch64__))
+	atomic_fence(atomic_memory_order_relaxed);
+#  else
+	atomic_fence(atomic_memory_order_seq_cst);
+#  endif
+}
+
+ATOMIC_INLINE void
+atomic_post_sc_store_fence() {
+#  if defined(__i386__) || defined(__x86_64__) ||			\
+    (defined(__sparc__) && defined(__arch64__))
+	atomic_fence(atomic_memory_order_seq_cst);
+#  else
+	atomic_fence(atomic_memory_order_relaxed);
+#  endif
+
+}
+
+#define JEMALLOC_GENERATE_ATOMICS(type, short_type,			\
+    /* unused */ lg_size)						\
+typedef struct {							\
+	type volatile repr;						\
+} atomic_##short_type##_t;						\
+									\
+ATOMIC_INLINE type							\
+atomic_load_##short_type(const atomic_##short_type##_t *a,		\
+    atomic_memory_order_t mo) {						\
+	if (mo == atomic_memory_order_seq_cst) {			\
+		atomic_pre_sc_load_fence();				\
+	}								\
+	type result = a->repr;						\
+	if (mo != atomic_memory_order_relaxed) {			\
+		atomic_fence(atomic_memory_order_acquire);		\
+	}								\
+	return result;							\
+}									\
+									\
+ATOMIC_INLINE void							\
+atomic_store_##short_type(atomic_##short_type##_t *a,			\
+    type val, atomic_memory_order_t mo) {				\
+	if (mo != atomic_memory_order_relaxed) {			\
+		atomic_fence(atomic_memory_order_release);		\
+	}								\
+	a->repr = val;							\
+	if (mo == atomic_memory_order_seq_cst) {			\
+		atomic_post_sc_store_fence();				\
+	}								\
+}									\
+									\
+ATOMIC_INLINE type							\
+atomic_exchange_##short_type(atomic_##short_type##_t *a, type val, \
+    atomic_memory_order_t mo) {                  					 \
+	/*								\
+	 * Because of FreeBSD, we care about gcc 4.2, which doesn't have\
+	 * an atomic exchange builtin.  We fake it with a CAS loop.	\
+	 */								\
+	while (true) {							\
+		type old = a->repr;					\
+		if (__sync_bool_compare_and_swap(&a->repr, old, val)) {	\
+			return old;					\
+		}							\
+	}								\
+}									\
+									\
+ATOMIC_INLINE bool							\
+atomic_compare_exchange_weak_##short_type(atomic_##short_type##_t *a,	\
+    type *expected, type desired,                                     \
+    atomic_memory_order_t success_mo,                          \
+    atomic_memory_order_t failure_mo) {				                \
+	type prev = __sync_val_compare_and_swap(&a->repr, *expected,	\
+	    desired);							\
+	if (prev == *expected) {					\
+		return true;						\
+	} else {							\
+		*expected = prev;					\
+		return false;						\
+	}								\
+}									\
+ATOMIC_INLINE bool							\
+atomic_compare_exchange_strong_##short_type(atomic_##short_type##_t *a,	\
+    type *expected, type desired,                                       \
+    atomic_memory_order_t success_mo,                            \
+    atomic_memory_order_t failure_mo) {                          \
+	type prev = __sync_val_compare_and_swap(&a->repr, *expected,	\
+	    desired);							\
+	if (prev == *expected) {					\
+		return true;						\
+	} else {							\
+		*expected = prev;					\
+		return false;						\
+	}								\
+}
+
+#define JEMALLOC_GENERATE_INT_ATOMICS(type, short_type,			\
+    /* unused */ lg_size)						\
+JEMALLOC_GENERATE_ATOMICS(type, short_type, /* unused */ lg_size)	\
+									\
+ATOMIC_INLINE type							\
+atomic_fetch_add_##short_type(atomic_##short_type##_t *a, type val,	\
+    atomic_memory_order_t mo) {						\
+	return __sync_fetch_and_add(&a->repr, val);			\
+}									\
+									\
+ATOMIC_INLINE type							\
+atomic_fetch_sub_##short_type(atomic_##short_type##_t *a, type val,	\
+    atomic_memory_order_t mo) {						\
+	return __sync_fetch_and_sub(&a->repr, val);			\
+}									\
+									\
+ATOMIC_INLINE type							\
+atomic_fetch_and_##short_type(atomic_##short_type##_t *a, type val,	\
+    atomic_memory_order_t mo) {						\
+	return __sync_fetch_and_and(&a->repr, val);			\
+}									\
+									\
+ATOMIC_INLINE type							\
+atomic_fetch_or_##short_type(atomic_##short_type##_t *a, type val,	\
+    atomic_memory_order_t mo) {						\
+	return __sync_fetch_and_or(&a->repr, val);			\
+}									\
+									\
+ATOMIC_INLINE type							\
+atomic_fetch_xor_##short_type(atomic_##short_type##_t *a, type val,	\
+    atomic_memory_order_t mo) {						\
+	return __sync_fetch_and_xor(&a->repr, val);			\
+}
+
+#endif /* JEMALLOC_INTERNAL_ATOMIC_GCC_SYNC_H */