Boxify superslab registry, add bench profile, and document C7 hotpath experiments

This commit is contained in:
Moe Charm (CI)
2025-12-07 03:12:27 +09:00
parent 18faa6a1c4
commit fda6cd2e67
71 changed files with 2052 additions and 286 deletions

View File

@ -10,18 +10,11 @@
#endif
#include <string.h>
#include "tiny_remote.h"
#include "box/remote_side_box.h"
#include "hakmem_tiny_superslab.h"
#include "tiny_debug_ring.h"
#define REM_SIDE_LOG2 20
#define REM_SIDE_SIZE (1u<<REM_SIDE_LOG2)
typedef struct {
_Atomic(uintptr_t) key; // node pointer
_Atomic(uintptr_t) val; // next pointer
} rem_side_entry;
static rem_side_entry g_rem_side[REM_SIDE_SIZE];
static rem_side_entry* g_rem_side = NULL;
int g_remote_side_enable = 1; // default ON; can be disabled via env or 1T hint
extern int g_debug_remote_guard;
static _Atomic int g_remote_scribble_once = 0;
@ -32,6 +25,21 @@ static inline uint32_t hmix(uintptr_t v);
static inline uint32_t tiny_remote_stage_hash(const char* stage);
static void tiny_remote_dump_backtrace(void);
static inline uint32_t rem_side_mask(void) {
return remote_side_effective_mask();
}
static inline uint32_t rem_side_size(void) {
return remote_side_effective_size();
}
static inline rem_side_entry* rem_side_table_local(void) {
if (__builtin_expect(g_rem_side == NULL, 0)) {
g_rem_side = remote_side_table();
}
return g_rem_side;
}
#if !HAKMEM_BUILD_RELEASE
#define REM_TRACK_TABLE_LOG2 20
#define REM_TRACK_TABLE_SIZE (1u << REM_TRACK_TABLE_LOG2)
@ -536,14 +544,18 @@ uint32_t tiny_remote_drain_threshold(void) {
void tiny_remote_side_set(struct SuperSlab* ss, int slab_idx, void* node, uintptr_t next) {
(void)ss; (void)slab_idx;
if (!g_remote_side_enable) return;
rem_side_entry* table = rem_side_table_local();
if (!table) return;
uintptr_t k = (uintptr_t)node;
uintptr_t base = (uintptr_t)ss;
size_t ss_size = (size_t)1ULL << ss->lg_size;
uint32_t i = hmix(k) & (REM_SIDE_SIZE - 1);
for (uint32_t n=0; n<REM_SIDE_SIZE; n++, i=(i+1)&(REM_SIDE_SIZE-1)) {
uint32_t mask = rem_side_mask();
uint32_t size = rem_side_size();
uint32_t i = hmix(k) & mask;
for (uint32_t n=0; n<size; n++, i=(i+1)&mask) {
uintptr_t expect = 0;
if (atomic_compare_exchange_weak_explicit(&g_rem_side[i].key, &expect, k, memory_order_acq_rel, memory_order_relaxed)) {
atomic_store_explicit(&g_rem_side[i].val, next, memory_order_release);
if (atomic_compare_exchange_weak_explicit(&table[i].key, &expect, k, memory_order_acq_rel, memory_order_relaxed)) {
atomic_store_explicit(&table[i].val, next, memory_order_release);
tiny_remote_sentinel_set(node);
tiny_remote_watch_note("side_set", ss, slab_idx, node, 0xA233u, 0, 0);
return;
@ -583,12 +595,16 @@ void tiny_remote_side_set(struct SuperSlab* ss, int slab_idx, void* node, uintpt
uintptr_t tiny_remote_side_get(struct SuperSlab* ss, int slab_idx, void* node) {
(void)ss; (void)slab_idx;
(void)g_remote_side_enable; // always true in caller
rem_side_entry* table = rem_side_table_local();
if (!table) return 0;
uintptr_t k = (uintptr_t)node;
uint32_t i = hmix(k) & (REM_SIDE_SIZE - 1);
for (uint32_t n=0; n<REM_SIDE_SIZE; n++, i=(i+1)&(REM_SIDE_SIZE-1)) {
uintptr_t key = atomic_load_explicit(&g_rem_side[i].key, memory_order_acquire);
uint32_t mask = rem_side_mask();
uint32_t size = rem_side_size();
uint32_t i = hmix(k) & mask;
for (uint32_t n=0; n<size; n++, i=(i+1)&mask) {
uintptr_t key = atomic_load_explicit(&table[i].key, memory_order_acquire);
if (key == k) {
return atomic_load_explicit(&g_rem_side[i].val, memory_order_acquire);
return atomic_load_explicit(&table[i].val, memory_order_acquire);
}
if (key == 0) break;
}
@ -606,13 +622,17 @@ uintptr_t tiny_remote_side_get(struct SuperSlab* ss, int slab_idx, void* node) {
void tiny_remote_side_clear(struct SuperSlab* ss, int slab_idx, void* node) {
(void)ss; (void)slab_idx;
if (!g_remote_side_enable) return;
rem_side_entry* table = rem_side_table_local();
if (!table) return;
uintptr_t k = (uintptr_t)node;
uint32_t i = hmix(k) & (REM_SIDE_SIZE - 1);
for (uint32_t n = 0; n < REM_SIDE_SIZE; n++, i = (i + 1) & (REM_SIDE_SIZE - 1)) {
uintptr_t key = atomic_load_explicit(&g_rem_side[i].key, memory_order_acquire);
uint32_t mask = rem_side_mask();
uint32_t size = rem_side_size();
uint32_t i = hmix(k) & mask;
for (uint32_t n = 0; n < size; n++, i = (i + 1) & mask) {
uintptr_t key = atomic_load_explicit(&table[i].key, memory_order_acquire);
if (key == k) {
atomic_store_explicit(&g_rem_side[i].val, 0, memory_order_relaxed);
atomic_store_explicit(&g_rem_side[i].key, 0, memory_order_release);
atomic_store_explicit(&table[i].val, 0, memory_order_relaxed);
atomic_store_explicit(&table[i].key, 0, memory_order_release);
tiny_remote_watch_clear(node);
return;
}
@ -623,10 +643,14 @@ void tiny_remote_side_clear(struct SuperSlab* ss, int slab_idx, void* node) {
int tiny_remote_side_contains(struct SuperSlab* ss, int slab_idx, void* node) {
(void)ss; (void)slab_idx;
if (!g_remote_side_enable) return 0;
rem_side_entry* table = rem_side_table_local();
if (!table) return 0;
uintptr_t k = (uintptr_t)node;
uint32_t i = hmix(k) & (REM_SIDE_SIZE - 1);
for (uint32_t n = 0; n < REM_SIDE_SIZE; n++, i = (i + 1) & (REM_SIDE_SIZE - 1)) {
uintptr_t key = atomic_load_explicit(&g_rem_side[i].key, memory_order_acquire);
uint32_t mask = rem_side_mask();
uint32_t size = rem_side_size();
uint32_t i = hmix(k) & mask;
for (uint32_t n = 0; n < size; n++, i = (i + 1) & mask) {
uintptr_t key = atomic_load_explicit(&table[i].key, memory_order_acquire);
if (key == k) {
return 1;
}
@ -639,6 +663,7 @@ void tiny_remote_side_init_from_env(void) {
static int g_side_init_once = 0;
if (__builtin_expect(g_side_init_once, 0)) return;
g_side_init_once = 1;
remote_side_init(NULL, NULL);
const char* side_env = getenv("HAKMEM_TINY_REMOTE_SIDE");
int enable = 1;
if (side_env && *side_env) {
@ -658,8 +683,12 @@ void tiny_remote_side_init_from_env(void) {
fprintf(stderr, "[REMOTE_SIDE_INIT] enable=%d\n", enable);
}
if (!enable) return;
for (uint32_t i = 0; i < REM_SIDE_SIZE; i++) {
atomic_store_explicit(&g_rem_side[i].key, 0, memory_order_relaxed);
atomic_store_explicit(&g_rem_side[i].val, 0, memory_order_relaxed);
g_rem_side = remote_side_table();
rem_side_entry* table = rem_side_table_local();
if (!table) return;
uint32_t size = rem_side_size();
for (uint32_t i = 0; i < size; i++) {
atomic_store_explicit(&table[i].key, 0, memory_order_relaxed);
atomic_store_explicit(&table[i].val, 0, memory_order_relaxed);
}
}