Boxify superslab registry, add bench profile, and document C7 hotpath experiments

This commit is contained in:
Moe Charm (CI)
2025-12-07 03:12:27 +09:00
parent 18faa6a1c4
commit fda6cd2e67
71 changed files with 2052 additions and 286 deletions

View File

@ -4,17 +4,20 @@
#include "box/ss_addr_map_box.h" // Phase 9-1: SuperSlab address map
#include "box/ss_cold_start_box.inc.h" // Phase 11+: Cold Start prewarm defaults
#include "hakmem_env_cache.h" // Priority-2: ENV cache (eliminate syscalls)
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <sys/mman.h> // munmap for incompatible SuperSlab eviction
// Global registry storage
SuperRegEntry g_super_reg[SUPER_REG_SIZE];
// Global registry storage (allocated via SuperRegBox)
static SuperRegEntry* reg_entries(void) {
return super_reg_entries();
}
pthread_mutex_t g_super_reg_lock = PTHREAD_MUTEX_INITIALIZER;
int g_super_reg_initialized = 0;
// Per-class registry storage (Phase 6: Registry Optimization)
SuperSlab* g_super_reg_by_class[TINY_NUM_CLASSES][SUPER_REG_PER_CLASS];
int g_super_reg_class_size[TINY_NUM_CLASSES];
// Phase 9: Lazy Deallocation - LRU Cache Storage
@ -28,11 +31,23 @@ static _Atomic int g_ss_prewarm_bypass = 0;
void hak_super_registry_init(void) {
if (g_super_reg_initialized) return;
super_reg_init(NULL, NULL);
SuperRegEntry* entries = reg_entries();
int reg_cap = super_reg_effective_size();
if (!entries) {
fprintf(stderr, "[SUPER_REG] init failed: no registry entries\n");
abort();
}
// Zero-initialize all entries (hash table)
memset(g_super_reg, 0, sizeof(g_super_reg));
memset(entries, 0, (size_t)reg_cap * sizeof(SuperRegEntry));
// Zero-initialize per-class registry (Phase 6: Registry Optimization)
memset(g_super_reg_by_class, 0, sizeof(g_super_reg_by_class));
SuperSlab** by_class = super_reg_by_class_slots();
int stride = super_reg_by_class_stride();
if (by_class && stride > 0) {
memset(by_class, 0, (size_t)TINY_NUM_CLASSES * (size_t)stride * sizeof(SuperSlab*));
}
memset(g_super_reg_class_size, 0, sizeof(g_super_reg_class_size));
// Memory fence to ensure initialization is visible to all threads
@ -62,12 +77,22 @@ int hak_super_register(uintptr_t base, SuperSlab* ss) {
const int dbg = 0;
#endif
SuperRegEntry* entries = reg_entries();
if (!entries) {
pthread_mutex_unlock(&g_super_reg_lock);
return 0;
}
int h = hak_super_hash(base, lg);
const int mask = super_reg_effective_mask();
const int probe_limit = super_reg_effective_size() > SUPER_MAX_PROBE
? SUPER_MAX_PROBE
: super_reg_effective_size();
// Step 1: Register in hash table (for address → SuperSlab lookup)
int hash_registered = 0;
for (int i = 0; i < SUPER_MAX_PROBE; i++) {
SuperRegEntry* e = &g_super_reg[(h + i) & SUPER_REG_MASK];
for (int i = 0; i < probe_limit; i++) {
SuperRegEntry* e = &entries[(h + i) & mask];
if (atomic_load_explicit(&e->base, memory_order_acquire) == 0) {
// Found empty slot
@ -84,7 +109,7 @@ int hak_super_register(uintptr_t base, SuperSlab* ss) {
hash_registered = 1;
if (dbg == 1) {
fprintf(stderr, "[SUPER_REG] register base=%p lg=%d slot=%d magic=%llx\n",
(void*)base, lg, (h + i) & SUPER_REG_MASK,
(void*)base, lg, (h + i) & mask,
(unsigned long long)ss->magic);
}
break;
@ -131,12 +156,22 @@ void hak_super_unregister(uintptr_t base) {
// Step 1: Find and remove from hash table
SuperSlab* ss = NULL; // Save SuperSlab pointer for per-class removal
SuperRegEntry* entries = reg_entries();
if (!entries) {
pthread_mutex_unlock(&g_super_reg_lock);
return;
}
for (int lg = 20; lg <= 21; lg++) {
int h = hak_super_hash(base, lg);
const int mask = super_reg_effective_mask();
const int probe_limit = super_reg_effective_size() > SUPER_MAX_PROBE
? SUPER_MAX_PROBE
: super_reg_effective_size();
// Linear probing to find matching entry
for (int i = 0; i < SUPER_MAX_PROBE; i++) {
SuperRegEntry* e = &g_super_reg[(h + i) & SUPER_REG_MASK];
for (int i = 0; i < probe_limit; i++) {
SuperRegEntry* e = &entries[(h + i) & mask];
if (atomic_load_explicit(&e->base, memory_order_acquire) == base && e->lg_size == lg) {
// Found entry to remove
@ -775,30 +810,37 @@ void hak_ss_prewarm_init(void) {
void hak_super_registry_stats(SuperRegStats* stats) {
if (!stats) return;
stats->total_slots = SUPER_REG_SIZE;
int eff_size = super_reg_effective_size();
int eff_mask = super_reg_effective_mask();
SuperRegEntry* reg = reg_entries();
stats->total_slots = eff_size;
stats->used_slots = 0;
stats->max_probe_depth = 0;
if (!reg || eff_size <= 0) {
return;
}
pthread_mutex_lock(&g_super_reg_lock);
// Count used slots
for (int i = 0; i < SUPER_REG_SIZE; i++) {
if (atomic_load_explicit(&g_super_reg[i].base, memory_order_acquire) != 0) {
for (int i = 0; i < eff_size; i++) {
if (atomic_load_explicit(&reg[i].base, memory_order_acquire) != 0) {
stats->used_slots++;
}
}
// Calculate max probe depth
for (int i = 0; i < SUPER_REG_SIZE; i++) {
if (atomic_load_explicit(&g_super_reg[i].base, memory_order_acquire) != 0) {
uintptr_t base = atomic_load_explicit(&g_super_reg[i].base, memory_order_acquire);
int lg = g_super_reg[i].lg_size; // Phase 8.3: Use stored lg_size
for (int i = 0; i < eff_size; i++) {
if (atomic_load_explicit(&reg[i].base, memory_order_acquire) != 0) {
uintptr_t base = atomic_load_explicit(&reg[i].base, memory_order_acquire);
int lg = reg[i].lg_size; // Phase 8.3: Use stored lg_size
int h = hak_super_hash(base, lg);
// Find actual probe depth for this entry
for (int j = 0; j < SUPER_MAX_PROBE; j++) {
int idx = (h + j) & SUPER_REG_MASK;
if (atomic_load_explicit(&g_super_reg[idx].base, memory_order_acquire) == base && g_super_reg[idx].lg_size == lg) {
int idx = (h + j) & eff_mask;
if (atomic_load_explicit(&reg[idx].base, memory_order_acquire) == base && reg[idx].lg_size == lg) {
if (j > stats->max_probe_depth) {
stats->max_probe_depth = j;
}