Boxify superslab registry, add bench profile, and document C7 hotpath experiments
This commit is contained in:
@ -4,17 +4,20 @@
|
||||
#include "box/ss_addr_map_box.h" // Phase 9-1: SuperSlab address map
|
||||
#include "box/ss_cold_start_box.inc.h" // Phase 11+: Cold Start prewarm defaults
|
||||
#include "hakmem_env_cache.h" // Priority-2: ENV cache (eliminate syscalls)
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include <sys/mman.h> // munmap for incompatible SuperSlab eviction
|
||||
|
||||
// Global registry storage
|
||||
SuperRegEntry g_super_reg[SUPER_REG_SIZE];
|
||||
// Global registry storage (allocated via SuperRegBox)
|
||||
static SuperRegEntry* reg_entries(void) {
|
||||
return super_reg_entries();
|
||||
}
|
||||
|
||||
pthread_mutex_t g_super_reg_lock = PTHREAD_MUTEX_INITIALIZER;
|
||||
int g_super_reg_initialized = 0;
|
||||
|
||||
// Per-class registry storage (Phase 6: Registry Optimization)
|
||||
SuperSlab* g_super_reg_by_class[TINY_NUM_CLASSES][SUPER_REG_PER_CLASS];
|
||||
int g_super_reg_class_size[TINY_NUM_CLASSES];
|
||||
|
||||
// Phase 9: Lazy Deallocation - LRU Cache Storage
|
||||
@ -28,11 +31,23 @@ static _Atomic int g_ss_prewarm_bypass = 0;
|
||||
void hak_super_registry_init(void) {
|
||||
if (g_super_reg_initialized) return;
|
||||
|
||||
super_reg_init(NULL, NULL);
|
||||
|
||||
SuperRegEntry* entries = reg_entries();
|
||||
int reg_cap = super_reg_effective_size();
|
||||
if (!entries) {
|
||||
fprintf(stderr, "[SUPER_REG] init failed: no registry entries\n");
|
||||
abort();
|
||||
}
|
||||
// Zero-initialize all entries (hash table)
|
||||
memset(g_super_reg, 0, sizeof(g_super_reg));
|
||||
memset(entries, 0, (size_t)reg_cap * sizeof(SuperRegEntry));
|
||||
|
||||
// Zero-initialize per-class registry (Phase 6: Registry Optimization)
|
||||
memset(g_super_reg_by_class, 0, sizeof(g_super_reg_by_class));
|
||||
SuperSlab** by_class = super_reg_by_class_slots();
|
||||
int stride = super_reg_by_class_stride();
|
||||
if (by_class && stride > 0) {
|
||||
memset(by_class, 0, (size_t)TINY_NUM_CLASSES * (size_t)stride * sizeof(SuperSlab*));
|
||||
}
|
||||
memset(g_super_reg_class_size, 0, sizeof(g_super_reg_class_size));
|
||||
|
||||
// Memory fence to ensure initialization is visible to all threads
|
||||
@ -62,12 +77,22 @@ int hak_super_register(uintptr_t base, SuperSlab* ss) {
|
||||
const int dbg = 0;
|
||||
#endif
|
||||
|
||||
SuperRegEntry* entries = reg_entries();
|
||||
if (!entries) {
|
||||
pthread_mutex_unlock(&g_super_reg_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int h = hak_super_hash(base, lg);
|
||||
const int mask = super_reg_effective_mask();
|
||||
const int probe_limit = super_reg_effective_size() > SUPER_MAX_PROBE
|
||||
? SUPER_MAX_PROBE
|
||||
: super_reg_effective_size();
|
||||
|
||||
// Step 1: Register in hash table (for address → SuperSlab lookup)
|
||||
int hash_registered = 0;
|
||||
for (int i = 0; i < SUPER_MAX_PROBE; i++) {
|
||||
SuperRegEntry* e = &g_super_reg[(h + i) & SUPER_REG_MASK];
|
||||
for (int i = 0; i < probe_limit; i++) {
|
||||
SuperRegEntry* e = &entries[(h + i) & mask];
|
||||
|
||||
if (atomic_load_explicit(&e->base, memory_order_acquire) == 0) {
|
||||
// Found empty slot
|
||||
@ -84,7 +109,7 @@ int hak_super_register(uintptr_t base, SuperSlab* ss) {
|
||||
hash_registered = 1;
|
||||
if (dbg == 1) {
|
||||
fprintf(stderr, "[SUPER_REG] register base=%p lg=%d slot=%d magic=%llx\n",
|
||||
(void*)base, lg, (h + i) & SUPER_REG_MASK,
|
||||
(void*)base, lg, (h + i) & mask,
|
||||
(unsigned long long)ss->magic);
|
||||
}
|
||||
break;
|
||||
@ -131,12 +156,22 @@ void hak_super_unregister(uintptr_t base) {
|
||||
|
||||
// Step 1: Find and remove from hash table
|
||||
SuperSlab* ss = NULL; // Save SuperSlab pointer for per-class removal
|
||||
SuperRegEntry* entries = reg_entries();
|
||||
if (!entries) {
|
||||
pthread_mutex_unlock(&g_super_reg_lock);
|
||||
return;
|
||||
}
|
||||
|
||||
for (int lg = 20; lg <= 21; lg++) {
|
||||
int h = hak_super_hash(base, lg);
|
||||
const int mask = super_reg_effective_mask();
|
||||
const int probe_limit = super_reg_effective_size() > SUPER_MAX_PROBE
|
||||
? SUPER_MAX_PROBE
|
||||
: super_reg_effective_size();
|
||||
|
||||
// Linear probing to find matching entry
|
||||
for (int i = 0; i < SUPER_MAX_PROBE; i++) {
|
||||
SuperRegEntry* e = &g_super_reg[(h + i) & SUPER_REG_MASK];
|
||||
for (int i = 0; i < probe_limit; i++) {
|
||||
SuperRegEntry* e = &entries[(h + i) & mask];
|
||||
|
||||
if (atomic_load_explicit(&e->base, memory_order_acquire) == base && e->lg_size == lg) {
|
||||
// Found entry to remove
|
||||
@ -775,30 +810,37 @@ void hak_ss_prewarm_init(void) {
|
||||
void hak_super_registry_stats(SuperRegStats* stats) {
|
||||
if (!stats) return;
|
||||
|
||||
stats->total_slots = SUPER_REG_SIZE;
|
||||
int eff_size = super_reg_effective_size();
|
||||
int eff_mask = super_reg_effective_mask();
|
||||
SuperRegEntry* reg = reg_entries();
|
||||
|
||||
stats->total_slots = eff_size;
|
||||
stats->used_slots = 0;
|
||||
stats->max_probe_depth = 0;
|
||||
if (!reg || eff_size <= 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
pthread_mutex_lock(&g_super_reg_lock);
|
||||
|
||||
// Count used slots
|
||||
for (int i = 0; i < SUPER_REG_SIZE; i++) {
|
||||
if (atomic_load_explicit(&g_super_reg[i].base, memory_order_acquire) != 0) {
|
||||
for (int i = 0; i < eff_size; i++) {
|
||||
if (atomic_load_explicit(®[i].base, memory_order_acquire) != 0) {
|
||||
stats->used_slots++;
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate max probe depth
|
||||
for (int i = 0; i < SUPER_REG_SIZE; i++) {
|
||||
if (atomic_load_explicit(&g_super_reg[i].base, memory_order_acquire) != 0) {
|
||||
uintptr_t base = atomic_load_explicit(&g_super_reg[i].base, memory_order_acquire);
|
||||
int lg = g_super_reg[i].lg_size; // Phase 8.3: Use stored lg_size
|
||||
for (int i = 0; i < eff_size; i++) {
|
||||
if (atomic_load_explicit(®[i].base, memory_order_acquire) != 0) {
|
||||
uintptr_t base = atomic_load_explicit(®[i].base, memory_order_acquire);
|
||||
int lg = reg[i].lg_size; // Phase 8.3: Use stored lg_size
|
||||
int h = hak_super_hash(base, lg);
|
||||
|
||||
// Find actual probe depth for this entry
|
||||
for (int j = 0; j < SUPER_MAX_PROBE; j++) {
|
||||
int idx = (h + j) & SUPER_REG_MASK;
|
||||
if (atomic_load_explicit(&g_super_reg[idx].base, memory_order_acquire) == base && g_super_reg[idx].lg_size == lg) {
|
||||
int idx = (h + j) & eff_mask;
|
||||
if (atomic_load_explicit(®[idx].base, memory_order_acquire) == base && reg[idx].lg_size == lg) {
|
||||
if (j > stats->max_probe_depth) {
|
||||
stats->max_probe_depth = j;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user