Files
hakmem/core/hakmem_tiny_superslab.c
Moe Charm (CI) 6b86c60a20 P1.3: Add meta->active for TLS SLL tracking
Add active field to TinySlabMeta to track blocks currently held by
users (not in TLS SLL or freelist caches). This enables accurate
empty slab detection that accounts for TLS SLL cached blocks.

Changes:
- superslab_types.h: Add _Atomic uint16_t active field
- ss_allocation_box.c, hakmem_tiny_superslab.c: Initialize active=0
- tiny_free_fast_v2.inc.h: Decrement active on TLS SLL push
- tiny_alloc_fast.inc.h: Add tiny_active_track_alloc() helper,
  increment active on TLS SLL pop (all code paths)
- ss_hot_cold_box.h: ss_is_slab_empty() uses active when enabled

All tracking is ENV-gated: HAKMEM_TINY_ACTIVE_TRACK=1 to enable.
Default is off for zero performance impact.

Invariant: active = used - tls_cached (active <= used)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-28 13:53:45 +09:00

1520 lines
56 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// hakmem_tiny_superslab.c - SuperSlab allocator implementation (Phase 6.22)
// Purpose: 2MB aligned slab allocation with fast pointer→slab lookup
// License: MIT
// Date: 2025-10-24
#include "hakmem_tiny_superslab.h"
#include "box/ss_hot_cold_box.h" // Phase 3d-C: Hot/Cold Split
#include "hakmem_super_registry.h" // Phase 1: Registry integration
#include "hakmem_tiny.h" // For tiny_self_u32
#include "hakmem_tiny_config.h" // For extern g_tiny_class_sizes
#include "hakmem_shared_pool.h" // Phase 12: Shared SuperSlab pool backend (skeleton)
#include <sys/mman.h>
#include <sys/resource.h>
#include <errno.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h> // getenv, atoi
#include <pthread.h>
#include <unistd.h>
#include <sys/resource.h> // getrlimit for OOM diagnostics
#include <sys/mman.h>
#include "hakmem_internal.h" // HAKMEM_LOG for release-silent logging
#include "tiny_region_id.h" // For HEADER_MAGIC / HEADER_CLASS_MASK (restore header on remote-drain)
#include "hakmem_tiny_integrity.h" // HAK_CHECK_CLASS_IDX
#include "box/tiny_next_ptr_box.h" // For tiny_next_write
#include "box/slab_freelist_atomic.h" // Phase 1: Atomic freelist accessor
static int g_ss_force_lg = -1;
static _Atomic int g_ss_populate_once = 0;
// Forward: decide next SuperSlab lg for a class (ACE-aware, clamped)
static inline uint8_t hak_tiny_superslab_next_lg(int class_idx)
{
if (class_idx < 0 || class_idx >= TINY_NUM_CLASSES_SS) {
return SUPERSLAB_LG_DEFAULT;
}
// Prefer ACE target if within allowed range
uint8_t t = atomic_load_explicit((_Atomic uint8_t*)&g_ss_ace[class_idx].target_lg,
memory_order_relaxed);
if (t < SUPERSLAB_LG_MIN || t > SUPERSLAB_LG_MAX) {
return SUPERSLAB_LG_DEFAULT;
}
return t;
}
// ============================================================================
// Global Statistics
// ============================================================================
static pthread_mutex_t g_superslab_lock = PTHREAD_MUTEX_INITIALIZER;
uint64_t g_superslabs_allocated = 0; // Non-static for debugging
uint64_t g_superslabs_freed = 0; // Phase 7.6: Non-static for test access
uint64_t g_bytes_allocated = 0; // Non-static for debugging
// ============================================================================
// Phase 2a: Dynamic Expansion - Global per-class SuperSlabHeads
// ============================================================================
SuperSlabHead* g_superslab_heads[TINY_NUM_CLASSES_SS] = {NULL};
// Debug counters
_Atomic uint64_t g_ss_active_dec_calls = 0;
_Atomic uint64_t g_hak_tiny_free_calls = 0;
_Atomic uint64_t g_ss_remote_push_calls = 0;
// Free path instrumentation (lightweight, for OOM/route diagnosis)
_Atomic uint64_t g_free_ss_enter = 0; // hak_tiny_free_superslab() entries
_Atomic uint64_t g_free_local_box_calls = 0; // same-thread freelist pushes
_Atomic uint64_t g_free_remote_box_calls = 0; // cross-thread remote pushes
// Per-class counters for gating/metrics (Tiny classes = 8)
uint64_t g_ss_alloc_by_class[8] = {0};
uint64_t g_ss_freed_by_class[8] = {0};
typedef struct SuperslabCacheEntry {
struct SuperslabCacheEntry* next;
} SuperslabCacheEntry;
static SuperslabCacheEntry* g_ss_cache_head[8] = {0};
static size_t g_ss_cache_count[8] = {0};
static size_t g_ss_cache_cap[8] = {0};
static size_t g_ss_precharge_target[8] = {0};
static _Atomic int g_ss_precharge_done[8] = {0};
static int g_ss_cache_enabled = 0;
static pthread_once_t g_ss_cache_once = PTHREAD_ONCE_INIT;
static pthread_mutex_t g_ss_cache_lock[8];
uint64_t g_ss_cache_hits[8] = {0};
uint64_t g_ss_cache_misses[8] = {0};
uint64_t g_ss_cache_puts[8] = {0};
uint64_t g_ss_cache_drops[8] = {0};
uint64_t g_ss_cache_precharged[8] = {0};
uint64_t g_superslabs_reused = 0;
uint64_t g_superslabs_cached = 0;
static void ss_cache_global_init(void) {
for (int i = 0; i < 8; i++) {
pthread_mutex_init(&g_ss_cache_lock[i], NULL);
}
}
static inline void ss_cache_ensure_init(void) {
pthread_once(&g_ss_cache_once, ss_cache_global_init);
}
static void* ss_os_acquire(uint8_t size_class, size_t ss_size, uintptr_t ss_mask, int populate);
static void ss_cache_precharge(uint8_t size_class, size_t ss_size, uintptr_t ss_mask);
static SuperslabCacheEntry* ss_cache_pop(uint8_t size_class);
static int ss_cache_push(uint8_t size_class, SuperSlab* ss);
// Drain remote MPSC stack into freelist (ownership already verified by caller)
void _ss_remote_drain_to_freelist_unsafe(SuperSlab* ss, int slab_idx, TinySlabMeta* meta)
{
if (!ss || slab_idx < 0 || slab_idx >= ss_slabs_capacity(ss) || !meta) return;
static _Atomic uint32_t g_remote_drain_diag_once = 0;
static int g_remote_drain_diag_en = -1;
// Atomically take the whole remote list
uintptr_t head = atomic_exchange_explicit(&ss->remote_heads[slab_idx], 0,
memory_order_acq_rel);
if (head == 0) return;
// Convert remote stack (offset 0 next) into freelist encoding via Box API
// and splice in front of current freelist preserving relative order.
void* prev = meta->freelist;
int cls = (int)meta->class_idx;
HAK_CHECK_CLASS_IDX(cls, "_ss_remote_drain_to_freelist_unsafe");
if (__builtin_expect(cls < 0 || cls >= TINY_NUM_CLASSES, 0)) {
static _Atomic int g_remote_drain_cls_oob = 0;
if (atomic_fetch_add_explicit(&g_remote_drain_cls_oob, 1, memory_order_relaxed) == 0) {
fprintf(stderr,
"[REMOTE_DRAIN_CLASS_OOB] ss=%p slab_idx=%d meta=%p cls=%d head=%#lx\n",
(void*)ss, slab_idx, (void*)meta, cls, (unsigned long)head);
}
return;
}
uintptr_t cur = head;
while (cur != 0) {
uintptr_t next = *(uintptr_t*)cur; // remote-next stored at offset 0
#if !HAKMEM_BUILD_RELEASE
if (__builtin_expect(g_remote_drain_diag_en == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_SLL_DIAG");
g_remote_drain_diag_en = (e && *e && *e != '0') ? 1 : 0;
}
#else
if (__builtin_expect(g_remote_drain_diag_en == -1, 0)) {
g_remote_drain_diag_en = 0;
}
#endif
if (__builtin_expect(g_remote_drain_diag_en, 0)) {
uintptr_t addr = (uintptr_t)next;
if (addr != 0 && (addr < 4096 || addr > 0x00007fffffffffffULL)) {
uint32_t shot = atomic_fetch_add_explicit(&g_remote_drain_diag_once, 1, memory_order_relaxed);
if (shot < 8) {
fprintf(stderr,
"[REMOTE_DRAIN_NEXT_INVALID] cls=%d slab=%d cur=%p next=%p head=%#lx prev=%p count=%u\n",
cls,
slab_idx,
(void*)cur,
(void*)next,
(unsigned long)head,
prev,
(unsigned)meta->used);
}
}
#if HAKMEM_TINY_HEADER_CLASSIDX
int hdr_cls = tiny_region_id_read_header((uint8_t*)cur + 1);
if (hdr_cls >= 0 && hdr_cls != cls) {
uint32_t shot = atomic_fetch_add_explicit(&g_remote_drain_diag_once, 1, memory_order_relaxed);
if (shot < 8) {
fprintf(stderr,
"[REMOTE_DRAIN_HDR_MISMATCH] cls=%d slab=%d cur=%p hdr_cls=%d meta_cls=%d head=%#lx\n",
cls, slab_idx, (void*)cur, hdr_cls, (int)meta->class_idx, (unsigned long)head);
}
}
#endif
}
#if HAKMEM_TINY_HEADER_CLASSIDX
// Cross-check header vs meta before writing next (even if diag is off)
{
int hdr_cls_pre = tiny_region_id_read_header((uint8_t*)cur + 1);
if (hdr_cls_pre >= 0 && hdr_cls_pre != cls) {
static _Atomic uint32_t g_hdr_meta_mismatch_rd = 0;
uint32_t n = atomic_fetch_add_explicit(&g_hdr_meta_mismatch_rd, 1, memory_order_relaxed);
if (n < 16) {
fprintf(stderr,
"[REMOTE_DRAIN_HDR_META_MISMATCH] cls=%d slab=%d cur=%p hdr_cls=%d meta_cls=%d\n",
cls, slab_idx, (void*)cur, hdr_cls_pre, (int)meta->class_idx);
}
}
}
#endif
// Restore header for header-classes (class 1-6) which were clobbered by remote push
#if HAKMEM_TINY_HEADER_CLASSIDX
if (cls != 0) {
uint8_t expected = (uint8_t)(HEADER_MAGIC | (cls & HEADER_CLASS_MASK));
*(uint8_t*)(uintptr_t)cur = expected;
}
#endif
// Rewrite next pointer to Box representation for this class
tiny_next_write(cls, (void*)cur, prev);
prev = (void*)cur;
cur = next;
}
meta->freelist = prev;
// Reset remote count after full drain
atomic_store_explicit(&ss->remote_counts[slab_idx], 0, memory_order_release);
// Update freelist/nonempty visibility bits
uint32_t bit = (1u << slab_idx);
atomic_fetch_or_explicit(&ss->freelist_mask, bit, memory_order_release);
atomic_fetch_or_explicit(&ss->nonempty_mask, bit, memory_order_release);
}
static inline void ss_stats_os_alloc(uint8_t size_class, size_t ss_size) {
pthread_mutex_lock(&g_superslab_lock);
g_superslabs_allocated++;
if (size_class < 8) {
g_ss_alloc_by_class[size_class]++;
}
g_bytes_allocated += ss_size;
pthread_mutex_unlock(&g_superslab_lock);
}
static inline void ss_stats_cache_reuse(void) {
pthread_mutex_lock(&g_superslab_lock);
g_superslabs_reused++;
pthread_mutex_unlock(&g_superslab_lock);
}
static inline void ss_stats_cache_store(void) {
pthread_mutex_lock(&g_superslab_lock);
g_superslabs_cached++;
pthread_mutex_unlock(&g_superslab_lock);
}
// ============================================================================
// Phase 8.3: ACE (Adaptive Cache Engine) State
// ============================================================================
SuperSlabACEState g_ss_ace[TINY_NUM_CLASSES_SS] = {{0}};
// Phase 8.3: hak_now_ns() is now defined in hakmem_tiny_superslab.h as static inline
// ============================================================================
// Diagnostics
// ============================================================================
static void log_superslab_oom_once(size_t ss_size, size_t alloc_size, int err) {
static int logged = 0;
if (logged) return;
logged = 1;
// CRITICAL FIX: Increment lock depth FIRST before any LIBC calls
// fopen/fclose/getrlimit/fprintf all may call malloc internally
// Must bypass HAKMEM wrapper to avoid header mismatch crash
extern __thread int g_hakmem_lock_depth;
g_hakmem_lock_depth++; // Force wrapper to use __libc_malloc
struct rlimit rl = {0};
if (getrlimit(RLIMIT_AS, &rl) != 0) {
rl.rlim_cur = RLIM_INFINITY;
rl.rlim_max = RLIM_INFINITY;
}
unsigned long vm_size_kb = 0;
unsigned long vm_rss_kb = 0;
FILE* status = fopen("/proc/self/status", "r");
if (status) {
char line[256];
while (fgets(line, sizeof(line), status)) {
if (strncmp(line, "VmSize:", 7) == 0) {
(void)sscanf(line + 7, "%lu", &vm_size_kb);
} else if (strncmp(line, "VmRSS:", 6) == 0) {
(void)sscanf(line + 6, "%lu", &vm_rss_kb);
}
}
fclose(status);
}
// CRITICAL FIX: Do NOT decrement lock_depth yet!
// fprintf() below may call malloc for buffering
char rl_cur_buf[32];
char rl_max_buf[32];
if (rl.rlim_cur == RLIM_INFINITY) {
strcpy(rl_cur_buf, "inf");
} else {
snprintf(rl_cur_buf, sizeof(rl_cur_buf), "%llu", (unsigned long long)rl.rlim_cur);
}
if (rl.rlim_max == RLIM_INFINITY) {
strcpy(rl_max_buf, "inf");
} else {
snprintf(rl_max_buf, sizeof(rl_max_buf), "%llu", (unsigned long long)rl.rlim_max);
}
#if !HAKMEM_BUILD_RELEASE
fprintf(stderr,
"[SS OOM] mmap failed: err=%d ss_size=%zu alloc_size=%zu "
"alloc=%llu freed=%llu bytes=%llu "
"RLIMIT_AS(cur=%s max=%s) VmSize=%lu kB VmRSS=%lu kB\n",
err,
ss_size,
alloc_size,
(unsigned long long)g_superslabs_allocated,
(unsigned long long)g_superslabs_freed,
(unsigned long long)g_bytes_allocated,
rl_cur_buf,
rl_max_buf,
vm_size_kb,
vm_rss_kb);
#endif
g_hakmem_lock_depth--; // Now safe to restore (all libc calls complete)
}
// Global counters for debugging (non-static for external access)
_Atomic uint64_t g_ss_mmap_count = 0;
_Atomic uint64_t g_final_fallback_mmap_count = 0;
static void* ss_os_acquire(uint8_t size_class, size_t ss_size, uintptr_t ss_mask, int populate) {
void* ptr = NULL;
static int log_count = 0;
#ifdef MAP_ALIGNED_SUPER
int map_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_ALIGNED_SUPER;
#ifdef MAP_POPULATE
if (populate) {
map_flags |= MAP_POPULATE;
}
#endif
ptr = mmap(NULL, ss_size,
PROT_READ | PROT_WRITE,
map_flags,
-1, 0);
if (ptr != MAP_FAILED) {
atomic_fetch_add(&g_ss_mmap_count, 1);
if (((uintptr_t)ptr & ss_mask) == 0) {
ss_stats_os_alloc(size_class, ss_size);
return ptr;
}
munmap(ptr, ss_size);
ptr = NULL;
} else {
log_superslab_oom_once(ss_size, ss_size, errno);
}
#endif
size_t alloc_size = ss_size * 2;
int flags = MAP_PRIVATE | MAP_ANONYMOUS;
#ifdef MAP_POPULATE
if (populate) {
flags |= MAP_POPULATE;
}
#endif
void* raw = mmap(NULL, alloc_size,
PROT_READ | PROT_WRITE,
flags,
-1, 0);
if (raw != MAP_FAILED) {
uint64_t count = atomic_fetch_add(&g_ss_mmap_count, 1) + 1;
#if !HAKMEM_BUILD_RELEASE
if (log_count < 10) {
fprintf(stderr, "[SUPERSLAB_MMAP] #%lu: class=%d size=%zu (total SuperSlab mmaps so far)\n",
(unsigned long)count, size_class, ss_size);
log_count++;
}
#endif
}
if (raw == MAP_FAILED) {
log_superslab_oom_once(ss_size, alloc_size, errno);
return NULL;
}
uintptr_t raw_addr = (uintptr_t)raw;
uintptr_t aligned_addr = (raw_addr + ss_mask) & ~ss_mask;
ptr = (void*)aligned_addr;
size_t prefix_size = aligned_addr - raw_addr;
if (prefix_size > 0) {
munmap(raw, prefix_size);
}
size_t suffix_size = alloc_size - prefix_size - ss_size;
if (suffix_size > 0) {
if (populate) {
#ifdef MADV_DONTNEED
madvise((char*)ptr + ss_size, suffix_size, MADV_DONTNEED);
#endif
} else {
munmap((char*)ptr + ss_size, suffix_size);
}
}
ss_stats_os_alloc(size_class, ss_size);
return ptr;
}
static void ss_cache_precharge(uint8_t size_class, size_t ss_size, uintptr_t ss_mask) {
if (!g_ss_cache_enabled) return;
if (size_class >= 8) return;
if (g_ss_precharge_target[size_class] == 0) return;
if (atomic_load_explicit(&g_ss_precharge_done[size_class], memory_order_acquire)) return;
ss_cache_ensure_init();
pthread_mutex_lock(&g_ss_cache_lock[size_class]);
size_t target = g_ss_precharge_target[size_class];
size_t cap = g_ss_cache_cap[size_class];
size_t desired = target;
if (cap != 0 && desired > cap) {
desired = cap;
}
while (g_ss_cache_count[size_class] < desired) {
void* raw = ss_os_acquire(size_class, ss_size, ss_mask, 1);
if (!raw) {
break;
}
SuperslabCacheEntry* entry = (SuperslabCacheEntry*)raw;
entry->next = g_ss_cache_head[size_class];
g_ss_cache_head[size_class] = entry;
g_ss_cache_count[size_class]++;
g_ss_cache_precharged[size_class]++;
}
atomic_store_explicit(&g_ss_precharge_done[size_class], 1, memory_order_release);
pthread_mutex_unlock(&g_ss_cache_lock[size_class]);
}
static SuperslabCacheEntry* ss_cache_pop(uint8_t size_class) {
if (!g_ss_cache_enabled) return NULL;
if (size_class >= 8) return NULL;
ss_cache_ensure_init();
pthread_mutex_lock(&g_ss_cache_lock[size_class]);
SuperslabCacheEntry* entry = g_ss_cache_head[size_class];
if (entry) {
g_ss_cache_head[size_class] = entry->next;
if (g_ss_cache_count[size_class] > 0) {
g_ss_cache_count[size_class]--;
}
entry->next = NULL;
g_ss_cache_hits[size_class]++;
} else {
g_ss_cache_misses[size_class]++;
}
pthread_mutex_unlock(&g_ss_cache_lock[size_class]);
return entry;
}
static int ss_cache_push(uint8_t size_class, SuperSlab* ss) {
if (!g_ss_cache_enabled) return 0;
if (size_class >= 8) return 0;
ss_cache_ensure_init();
pthread_mutex_lock(&g_ss_cache_lock[size_class]);
size_t cap = g_ss_cache_cap[size_class];
if (cap != 0 && g_ss_cache_count[size_class] >= cap) {
g_ss_cache_drops[size_class]++;
pthread_mutex_unlock(&g_ss_cache_lock[size_class]);
return 0;
}
SuperslabCacheEntry* entry = (SuperslabCacheEntry*)ss;
entry->next = g_ss_cache_head[size_class];
g_ss_cache_head[size_class] = entry;
g_ss_cache_count[size_class]++;
g_ss_cache_puts[size_class]++;
pthread_mutex_unlock(&g_ss_cache_lock[size_class]);
return 1;
}
/*
* Legacy backend for hak_tiny_alloc_superslab_box().
*
* Phase 12 Stage A/B:
* - Uses per-class SuperSlabHead (g_superslab_heads) as the implementation.
* - Callers MUST use hak_tiny_alloc_superslab_box() and never touch this directly.
* - Later Stage C: this function will be replaced by a shared_pool backend.
*/
static SuperSlabHead* init_superslab_head(int class_idx);
static int expand_superslab_head(SuperSlabHead* head);
static void* hak_tiny_alloc_superslab_backend_legacy(int class_idx)
{
if (class_idx < 0 || class_idx >= TINY_NUM_CLASSES_SS) {
return NULL;
}
SuperSlabHead* head = g_superslab_heads[class_idx];
if (!head) {
head = init_superslab_head(class_idx);
if (!head) {
return NULL;
}
g_superslab_heads[class_idx] = head;
}
SuperSlab* chunk = head->current_chunk ? head->current_chunk : head->first_chunk;
while (chunk) {
int cap = ss_slabs_capacity(chunk);
for (int slab_idx = 0; slab_idx < cap; slab_idx++) {
TinySlabMeta* meta = &chunk->slabs[slab_idx];
// Skip slabs that belong to a different class (or are uninitialized).
if (meta->class_idx != (uint8_t)class_idx && meta->class_idx != 255) {
continue;
}
// P1.2 FIX: Initialize slab on first use (like shared backend does)
// This ensures class_map is populated for all slabs, not just slab 0
if (meta->capacity == 0) {
size_t block_size = g_tiny_class_sizes[class_idx];
uint32_t owner_tid = (uint32_t)(uintptr_t)pthread_self();
superslab_init_slab(chunk, slab_idx, block_size, owner_tid);
meta = &chunk->slabs[slab_idx]; // Refresh pointer after init
meta->class_idx = (uint8_t)class_idx;
// P1.2: Update class_map for dynamic slab initialization
chunk->class_map[slab_idx] = (uint8_t)class_idx;
}
if (meta->used < meta->capacity) {
size_t stride = tiny_block_stride_for_class(class_idx);
size_t offset = (size_t)meta->used * stride;
uint8_t* base = (uint8_t*)chunk
+ SUPERSLAB_SLAB0_DATA_OFFSET
+ (size_t)slab_idx * SUPERSLAB_SLAB_USABLE_SIZE
+ offset;
meta->used++;
atomic_fetch_add_explicit(&chunk->total_active_blocks, 1, memory_order_relaxed);
return (void*)base;
}
}
chunk = chunk->next_chunk;
}
if (expand_superslab_head(head) < 0) {
return NULL;
}
SuperSlab* new_chunk = head->current_chunk;
if (!new_chunk) {
return NULL;
}
int cap2 = ss_slabs_capacity(new_chunk);
for (int slab_idx = 0; slab_idx < cap2; slab_idx++) {
TinySlabMeta* meta = &new_chunk->slabs[slab_idx];
// P1.2 FIX: Initialize slab on first use (like shared backend does)
if (meta->capacity == 0) {
size_t block_size = g_tiny_class_sizes[class_idx];
uint32_t owner_tid = (uint32_t)(uintptr_t)pthread_self();
superslab_init_slab(new_chunk, slab_idx, block_size, owner_tid);
meta = &new_chunk->slabs[slab_idx]; // Refresh pointer after init
meta->class_idx = (uint8_t)class_idx;
// P1.2: Update class_map for dynamic slab initialization
new_chunk->class_map[slab_idx] = (uint8_t)class_idx;
}
if (meta->used < meta->capacity) {
size_t stride = tiny_block_stride_for_class(class_idx);
size_t offset = (size_t)meta->used * stride;
uint8_t* base = (uint8_t*)new_chunk
+ SUPERSLAB_SLAB0_DATA_OFFSET
+ (size_t)slab_idx * SUPERSLAB_SLAB_USABLE_SIZE
+ offset;
meta->used++;
atomic_fetch_add_explicit(&new_chunk->total_active_blocks, 1, memory_order_relaxed);
return (void*)base;
}
}
return NULL;
}
/*
* Shared pool backend for hak_tiny_alloc_superslab_box().
*
* Phase 12-2:
* - Uses SharedSuperSlabPool (g_shared_pool) to obtain a SuperSlab/slab
* for the requested class_idx.
* - This backend EXPRESSLY owns only:
* - choosing (ss, slab_idx) via shared_pool_acquire_slab()
* - initializing that slab's TinySlabMeta via superslab_init_slab()
* and nothing else; all callers must go through hak_tiny_alloc_superslab_box().
*
* - For now this is a minimal, conservative implementation:
* - One linear bump-run is carved from the acquired slab using tiny_block_stride_for_class().
* - No complex per-slab freelist or refill policy yet (Phase 12-3+).
* - If shared_pool_acquire_slab() fails, we fall back to legacy backend.
*/
static void* hak_tiny_alloc_superslab_backend_shared(int class_idx)
{
if (class_idx < 0 || class_idx >= TINY_NUM_CLASSES_SS) {
return NULL;
}
SuperSlab* ss = NULL;
int slab_idx = -1;
if (shared_pool_acquire_slab(class_idx, &ss, &slab_idx) != 0 || !ss) {
// Shared pool could not provide a slab; caller may choose to fall back.
return NULL;
}
TinySlabMeta* meta = &ss->slabs[slab_idx];
// Defensive: shared_pool must either hand us an UNASSIGNED slab or one
// already bound to this class. Anything else is a hard bug.
if (meta->class_idx != 255 && meta->class_idx != (uint8_t)class_idx) {
#if !HAKMEM_BUILD_RELEASE
fprintf(stderr,
"[HAKMEM][SS_SHARED] BUG: acquire_slab mismatch: cls=%d meta->class_idx=%u slab_idx=%d ss=%p\n",
class_idx, (unsigned)meta->class_idx, slab_idx, (void*)ss);
#endif
return NULL;
}
// Initialize slab geometry once for this class.
if (meta->capacity == 0) {
size_t block_size = g_tiny_class_sizes[class_idx];
// LARSON FIX: Pass actual thread ID for cross-thread free detection
uint32_t my_tid = (uint32_t)(uintptr_t)pthread_self();
superslab_init_slab(ss, slab_idx, block_size, my_tid);
meta = &ss->slabs[slab_idx];
// CRITICAL FIX: Always set class_idx after init to avoid C0/C7 confusion.
// New SuperSlabs start with meta->class_idx=0 (mmap zero-init).
// Must explicitly set to requested class, not just when class_idx==255.
meta->class_idx = (uint8_t)class_idx;
// P1.1: Update class_map in shared acquire path
ss->class_map[slab_idx] = (uint8_t)class_idx;
}
// Final contract check before computing addresses.
if (meta->class_idx != (uint8_t)class_idx ||
meta->capacity == 0 ||
meta->used > meta->capacity) {
#if !HAKMEM_BUILD_RELEASE
fprintf(stderr,
"[HAKMEM][SS_SHARED] BUG: invalid slab meta before alloc: "
"cls=%d slab_idx=%d meta_cls=%u used=%u cap=%u ss=%p\n",
class_idx, slab_idx,
(unsigned)meta->class_idx,
(unsigned)meta->used,
(unsigned)meta->capacity,
(void*)ss);
#endif
return NULL;
}
// Simple bump allocation within this slab.
if (meta->used >= meta->capacity) {
// Slab exhausted: in minimal Phase12-2 backend we do not loop;
// caller or future logic must acquire another slab.
return NULL;
}
size_t stride = tiny_block_stride_for_class(class_idx);
size_t offset = (size_t)meta->used * stride;
// Phase 12-2 minimal geometry:
// - slab 0 data offset via SUPERSLAB_SLAB0_DATA_OFFSET
// - subsequent slabs at fixed SUPERSLAB_SLAB_USABLE_SIZE strides.
size_t slab_base_off = SUPERSLAB_SLAB0_DATA_OFFSET
+ (size_t)slab_idx * SUPERSLAB_SLAB_USABLE_SIZE;
uint8_t* base = (uint8_t*)ss + slab_base_off + offset;
meta->used++;
atomic_fetch_add_explicit(&ss->total_active_blocks, 1, memory_order_relaxed);
return (void*)base;
}
/*
* Box API entry:
* - Single front-door for tiny-side Superslab allocations.
*
* Phase 12 policy:
* - HAKMEM_TINY_SS_SHARED=0 → legacy backendのみ回帰確認用
* - HAKMEM_TINY_SS_SHARED=1 → shared backendを優先し、失敗時のみ legacy にフォールバック
*/
void* hak_tiny_alloc_superslab_box(int class_idx)
{
static int g_ss_shared_mode = -1;
static _Atomic uint32_t g_ss_backend_log = 0;
if (__builtin_expect(g_ss_shared_mode == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_SS_SHARED");
if (!e || !*e) {
g_ss_shared_mode = 1; // デフォルト: shared 有効
} else {
int v = atoi(e);
g_ss_shared_mode = (v != 0) ? 1 : 0;
}
}
if (g_ss_shared_mode == 1) {
void* p = hak_tiny_alloc_superslab_backend_shared(class_idx);
if (p != NULL) {
uint32_t n = atomic_fetch_add_explicit(&g_ss_backend_log, 1, memory_order_relaxed);
if (n < 4) {
fprintf(stderr, "[SS_BACKEND] shared cls=%d ptr=%p\n", class_idx, p);
}
return p;
}
// shared backend が失敗した場合は安全側で legacy にフォールバック
uint32_t n = atomic_fetch_add_explicit(&g_ss_backend_log, 1, memory_order_relaxed);
if (n < 4) {
fprintf(stderr, "[SS_BACKEND] shared_fail→legacy cls=%d\n", class_idx);
}
return hak_tiny_alloc_superslab_backend_legacy(class_idx);
}
// shared OFF 時は legacy のみ
uint32_t n = atomic_fetch_add_explicit(&g_ss_backend_log, 1, memory_order_relaxed);
if (n < 4) {
fprintf(stderr, "[SS_BACKEND] legacy cls=%d\n", class_idx);
}
return hak_tiny_alloc_superslab_backend_legacy(class_idx);
}
// ============================================================================
// SuperSlab Allocation (2MB aligned)
// ============================================================================
SuperSlab* superslab_allocate(uint8_t size_class) {
// Optional fault injection for testing: HAKMEM_TINY_SS_FAULT_RATE=N → 1/N で失敗
static int fault_rate = -1; // -1=unparsed, 0=disabled, >0=rate
static __thread unsigned long fault_tick = 0;
if (__builtin_expect(fault_rate == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_SS_FAULT_RATE");
if (e && *e) {
int v = atoi(e); if (v < 0) v = 0; fault_rate = v;
} else {
fault_rate = 0;
}
}
if (fault_rate > 0) {
unsigned long t = ++fault_tick;
if ((t % (unsigned long)fault_rate) == 0ul) {
return NULL; // simulate OOM
}
}
// Optional env clamp for SuperSlab size
static int env_parsed = 0;
static uint8_t g_ss_min_lg_env = SUPERSLAB_LG_DEFAULT; // Start with default (2MB)
static uint8_t g_ss_max_lg_env = SUPERSLAB_LG_MAX;
if (!env_parsed) {
char* maxmb = getenv("HAKMEM_TINY_SS_MAX_MB");
if (maxmb) {
int m = atoi(maxmb); if (m == 1) g_ss_max_lg_env = 20; else if (m == 2) g_ss_max_lg_env = 21;
}
char* minmb = getenv("HAKMEM_TINY_SS_MIN_MB");
if (minmb) {
int m = atoi(minmb); if (m == 1) g_ss_min_lg_env = 20; else if (m == 2) g_ss_min_lg_env = 21;
}
if (g_ss_min_lg_env > g_ss_max_lg_env) g_ss_min_lg_env = g_ss_max_lg_env;
const char* force_lg_env = getenv("HAKMEM_TINY_SS_FORCE_LG");
if (force_lg_env && *force_lg_env) {
int v = atoi(force_lg_env);
if (v >= SUPERSLAB_LG_MIN && v <= SUPERSLAB_LG_MAX) {
g_ss_force_lg = v;
g_ss_min_lg_env = g_ss_max_lg_env = v;
}
}
size_t precharge_default = 0;
const char* precharge_env = getenv("HAKMEM_TINY_SS_PRECHARGE");
if (precharge_env && *precharge_env) {
long v = atol(precharge_env);
if (v < 0) v = 0;
precharge_default = (size_t)v;
if (v > 0) {
atomic_store_explicit(&g_ss_populate_once, 1, memory_order_relaxed);
}
}
size_t cache_default = 0;
const char* cache_env = getenv("HAKMEM_TINY_SS_CACHE");
if (cache_env && *cache_env) {
long v = atol(cache_env);
if (v < 0) v = 0;
cache_default = (size_t)v;
}
for (int i = 0; i < 8; i++) {
g_ss_cache_cap[i] = cache_default;
g_ss_precharge_target[i] = precharge_default;
}
for (int i = 0; i < 8; i++) {
char name[64];
snprintf(name, sizeof(name), "HAKMEM_TINY_SS_CACHE_C%d", i);
char* cap_env = getenv(name);
if (cap_env && *cap_env) {
long v = atol(cap_env);
if (v < 0) v = 0;
g_ss_cache_cap[i] = (size_t)v;
}
snprintf(name, sizeof(name), "HAKMEM_TINY_SS_PRECHARGE_C%d", i);
char* pre_env = getenv(name);
if (pre_env && *pre_env) {
long v = atol(pre_env);
if (v < 0) v = 0;
g_ss_precharge_target[i] = (size_t)v;
if (v > 0) {
atomic_store_explicit(&g_ss_populate_once, 1, memory_order_relaxed);
}
}
if (g_ss_cache_cap[i] > 0 || g_ss_precharge_target[i] > 0) {
g_ss_cache_enabled = 1;
}
}
const char* populate_env = getenv("HAKMEM_TINY_SS_POPULATE_ONCE");
if (populate_env && atoi(populate_env) != 0) {
atomic_store_explicit(&g_ss_populate_once, 1, memory_order_relaxed);
}
env_parsed = 1;
}
uint8_t lg = (g_ss_force_lg >= 0) ? (uint8_t)g_ss_force_lg : hak_tiny_superslab_next_lg(size_class);
if (lg < g_ss_min_lg_env) lg = g_ss_min_lg_env;
if (lg > g_ss_max_lg_env) lg = g_ss_max_lg_env;
size_t ss_size = (size_t)1 << lg; // 2^20 = 1MB, 2^21 = 2MB
uintptr_t ss_mask = ss_size - 1;
int from_cache = 0;
void* ptr = NULL;
// Debug logging flag (lazy init)
static __thread int dbg = -1;
#if HAKMEM_BUILD_RELEASE
dbg = 0;
#else
if (__builtin_expect(dbg == -1, 0)) {
const char* e = getenv("HAKMEM_SS_PREWARM_DEBUG");
dbg = (e && *e && *e != '0') ? 1 : 0;
}
#endif
// Phase 9: Try LRU cache first (lazy deallocation)
SuperSlab* cached_ss = hak_ss_lru_pop(size_class);
if (cached_ss) {
ptr = (void*)cached_ss;
from_cache = 1;
// Debug logging for REFILL from LRU
if (dbg == 1) {
fprintf(stderr, "[REFILL] class=%d from_lru=1 ss=%p\n",
size_class, (void*)cached_ss);
}
// Skip old cache path - LRU cache takes priority
} else if (g_ss_cache_enabled && size_class < 8) {
// Fallback to old cache (will be deprecated)
ss_cache_precharge(size_class, ss_size, ss_mask);
SuperslabCacheEntry* old_cached = ss_cache_pop(size_class);
if (old_cached) {
ptr = (void*)old_cached;
from_cache = 1;
// Debug logging for REFILL from prewarm (old cache is essentially prewarm)
if (dbg == 1) {
fprintf(stderr, "[REFILL] class=%d from_prewarm=1 ss=%p\n",
size_class, (void*)old_cached);
}
}
}
if (!ptr) {
int populate = atomic_exchange_explicit(&g_ss_populate_once, 0, memory_order_acq_rel);
ptr = ss_os_acquire(size_class, ss_size, ss_mask, populate);
if (!ptr) {
return NULL;
}
// Debug logging for REFILL with new allocation
if (dbg == 1) {
fprintf(stderr, "[REFILL] class=%d new_alloc=1 ss=%p\n",
size_class, (void*)ptr);
}
}
// Initialize SuperSlab header (Phase 12: no global size_class field)
SuperSlab* ss = (SuperSlab*)ptr;
ss->magic = SUPERSLAB_MAGIC;
ss->active_slabs = 0;
ss->lg_size = lg; // Phase 8.3: Use ACE-determined lg_size (20=1MB, 21=2MB)
ss->slab_bitmap = 0;
ss->nonempty_mask = 0; // Phase 6-2.1: ChatGPT Pro P0 - init nonempty mask
ss->partial_epoch = 0;
ss->publish_hint = 0xFF;
// Initialize atomics explicitly
atomic_store_explicit(&ss->total_active_blocks, 0, memory_order_relaxed);
atomic_store_explicit(&ss->refcount, 0, memory_order_relaxed);
atomic_store_explicit(&ss->listed, 0, memory_order_relaxed);
ss->partial_next = NULL;
// Phase 9: Initialize LRU fields
ss->last_used_ns = 0;
ss->generation = 0;
ss->lru_prev = NULL;
ss->lru_next = NULL;
// Phase 3d-C: Initialize Hot/Cold Split fields
ss->hot_count = 0;
ss->cold_count = 0;
for (int i = 0; i < 16; i++) {
ss->hot_indices[i] = 0;
ss->cold_indices[i] = 0;
}
// Initialize all slab metadata (only up to max slabs for this size)
int max_slabs = (int)(ss_size / SLAB_SIZE);
// DEFENSIVE FIX: Zero all slab metadata arrays to prevent ANY uninitialized pointers
// This catches the 0xa2a2a2a2a2a2a2a2 pattern bug (ASan/debug fill pattern)
// Even though mmap should return zeroed pages, sanitizers may fill with debug patterns
memset(ss->slabs, 0, max_slabs * sizeof(TinySlabMeta));
memset(ss->remote_heads, 0, max_slabs * sizeof(uintptr_t));
memset(ss->remote_counts, 0, max_slabs * sizeof(uint32_t));
memset(ss->slab_listed, 0, max_slabs * sizeof(uint32_t));
for (int i = 0; i < max_slabs; i++) {
// Phase 1: Atomic initialization (freelist + used are now _Atomic)
slab_freelist_store_relaxed(&ss->slabs[i], NULL); // Explicit NULL (redundant after memset, but clear intent)
atomic_store_explicit(&ss->slabs[i].used, 0, memory_order_relaxed);
ss->slabs[i].capacity = 0;
ss->slabs[i].owner_tid_low = 0;
// Initialize remote queue atomics (memset already zeroed, but use proper atomic init)
atomic_store_explicit(&ss->remote_heads[i], 0, memory_order_relaxed);
atomic_store_explicit(&ss->remote_counts[i], 0, memory_order_relaxed);
atomic_store_explicit(&ss->slab_listed[i], 0, memory_order_relaxed);
}
if (from_cache) {
ss_stats_cache_reuse();
}
// Phase 8.3: Update ACE current_lg to match allocated size
g_ss_ace[size_class].current_lg = lg;
// Phase 1: Register SuperSlab in global registry for fast lookup
// CRITICAL: Register AFTER full initialization (ss structure is ready)
uintptr_t base = (uintptr_t)ss;
if (!hak_super_register(base, ss)) {
// Registry full - this is a fatal error
fprintf(stderr, "HAKMEM FATAL: SuperSlab registry full, cannot register %p\n", ss);
// Still return ss to avoid memory leak, but lookups may fail
}
return ss;
}
// ============================================================================
// Phase 2a: Dynamic Expansion - Chunk Management Functions
// ============================================================================
// Initialize SuperSlabHead for a class
SuperSlabHead* init_superslab_head(int class_idx) {
if (class_idx < 0 || class_idx >= TINY_NUM_CLASSES_SS) {
return NULL;
}
// Allocate SuperSlabHead structure
SuperSlabHead* head = (SuperSlabHead*)calloc(1, sizeof(SuperSlabHead));
if (!head) {
extern __thread int g_hakmem_lock_depth;
g_hakmem_lock_depth++;
fprintf(stderr, "[HAKMEM] CRITICAL: Failed to allocate SuperSlabHead for class %d\n", class_idx);
g_hakmem_lock_depth--;
return NULL;
}
head->class_idx = (uint8_t)class_idx;
atomic_store_explicit(&head->total_chunks, 0, memory_order_relaxed);
head->first_chunk = NULL;
head->current_chunk = NULL;
pthread_mutex_init(&head->expansion_lock, NULL);
// Allocate initial chunk(s)
// Hot classes (1, 4, 6) get 2 initial chunks to reduce contention
int initial_chunks = 1;
// Phase 2a: Start with 1 chunk for all classes (expansion will handle growth)
// This reduces startup memory overhead while still allowing unlimited growth
initial_chunks = 1;
for (int i = 0; i < initial_chunks; i++) {
if (expand_superslab_head(head) < 0) {
extern __thread int g_hakmem_lock_depth;
g_hakmem_lock_depth++;
fprintf(stderr, "[HAKMEM] CRITICAL: Failed to allocate initial chunk %d for class %d\n",
i, class_idx);
g_hakmem_lock_depth--;
// Cleanup on failure
SuperSlab* chunk = head->first_chunk;
while (chunk) {
SuperSlab* next = chunk->next_chunk;
superslab_free(chunk);
chunk = next;
}
pthread_mutex_destroy(&head->expansion_lock);
free(head);
return NULL;
}
}
extern __thread int g_hakmem_lock_depth;
g_hakmem_lock_depth++;
#if !HAKMEM_BUILD_RELEASE
fprintf(stderr, "[HAKMEM] Initialized SuperSlabHead for class %d: %zu initial chunks\n",
class_idx, atomic_load_explicit(&head->total_chunks, memory_order_relaxed));
#endif
g_hakmem_lock_depth--;
return head;
}
// Expand SuperSlabHead by allocating and linking a new chunk
int expand_superslab_head(SuperSlabHead* head) {
if (!head) {
return -1;
}
// Allocate new chunk via existing superslab_allocate
SuperSlab* new_chunk = superslab_allocate(head->class_idx);
if (!new_chunk) {
#if !defined(NDEBUG) || defined(HAKMEM_SUPERSLAB_VERBOSE)
extern __thread int g_hakmem_lock_depth;
g_hakmem_lock_depth++;
fprintf(stderr, "[HAKMEM] CRITICAL: Failed to allocate new chunk for class %d (system OOM)\n",
head->class_idx);
g_hakmem_lock_depth--;
#endif
return -1; // True OOM (system out of memory)
}
// CRITICAL FIX: Initialize slab 0 so bitmap != 0x00000000
// Phase 2a chunks must have at least one usable slab after allocation
size_t block_size = g_tiny_class_sizes[head->class_idx];
// Use pthread_self() directly since tiny_self_u32() is static inline in hakmem_tiny.c
uint32_t owner_tid = (uint32_t)(uintptr_t)pthread_self();
superslab_init_slab(new_chunk, 0, block_size, owner_tid);
// Initialize the next_chunk link to NULL
new_chunk->next_chunk = NULL;
// Thread-safe linking
pthread_mutex_lock(&head->expansion_lock);
if (head->current_chunk) {
// Find the tail of the list (optimization: could cache tail pointer)
SuperSlab* tail = head->current_chunk;
while (tail->next_chunk) {
tail = tail->next_chunk;
}
tail->next_chunk = new_chunk;
} else {
// First chunk
head->first_chunk = new_chunk;
}
// Update current chunk to new chunk (for fast allocation)
head->current_chunk = new_chunk;
// Increment total chunks atomically
size_t old_count = atomic_fetch_add_explicit(&head->total_chunks, 1, memory_order_relaxed);
size_t new_count = old_count + 1;
pthread_mutex_unlock(&head->expansion_lock);
#if !defined(NDEBUG) || defined(HAKMEM_SUPERSLAB_VERBOSE)
extern __thread int g_hakmem_lock_depth;
g_hakmem_lock_depth++;
fprintf(stderr, "[HAKMEM] Expanded SuperSlabHead for class %d: %zu chunks now (bitmap=0x%08x)\n",
head->class_idx, new_count, new_chunk->slab_bitmap);
g_hakmem_lock_depth--;
#endif
return 0;
}
// Find which chunk a pointer belongs to
SuperSlab* find_chunk_for_ptr(void* ptr, int class_idx) {
if (!ptr || class_idx < 0 || class_idx >= TINY_NUM_CLASSES_SS) {
return NULL;
}
SuperSlabHead* head = g_superslab_heads[class_idx];
if (!head) {
return NULL;
}
uintptr_t ptr_addr = (uintptr_t)ptr;
// Walk the chunk list
SuperSlab* chunk = head->first_chunk;
while (chunk) {
// Check if ptr is within this chunk's memory range
// Each chunk is aligned to SUPERSLAB_SIZE (1MB or 2MB)
uintptr_t chunk_start = (uintptr_t)chunk;
size_t chunk_size = (size_t)1 << chunk->lg_size; // Use actual chunk size
uintptr_t chunk_end = chunk_start + chunk_size;
if (ptr_addr >= chunk_start && ptr_addr < chunk_end) {
// Found the chunk
return chunk;
}
chunk = chunk->next_chunk;
}
return NULL; // Not found in any chunk
}
// ============================================================================
// SuperSlab Deallocation
// ============================================================================
void superslab_free(SuperSlab* ss) {
if (!ss || ss->magic != SUPERSLAB_MAGIC) {
return; // Invalid SuperSlab
}
// ADD DEBUG LOGGING
static __thread int dbg = -1;
#if HAKMEM_BUILD_RELEASE
dbg = 0;
#else
if (__builtin_expect(dbg == -1, 0)) {
const char* e = getenv("HAKMEM_SS_FREE_DEBUG");
dbg = (e && *e && *e != '0') ? 1 : 0;
}
#endif
if (dbg == 1) {
fprintf(stderr, "[SS_FREE] CALLED: ss=%p lg_size=%d active_slabs=%u\n",
(void*)ss, ss->lg_size, ss->active_slabs);
}
// Phase 9: Lazy Deallocation - try to cache in LRU instead of munmap
size_t ss_size = (size_t)1 << ss->lg_size;
// Phase 1: Unregister SuperSlab from registry FIRST
// CRITICAL: Must unregister BEFORE adding to LRU cache
// Reason: Cached SuperSlabs should NOT be found by lookups
uintptr_t base = (uintptr_t)ss;
hak_super_unregister(base);
// Memory fence to ensure unregister is visible
atomic_thread_fence(memory_order_release);
// Phase 9: Try LRU cache first (lazy deallocation)
// NOTE: LRU cache keeps magic=SUPERSLAB_MAGIC for validation
// Magic will be cleared on eviction or reuse
int lru_cached = hak_ss_lru_push(ss);
if (dbg == 1) {
fprintf(stderr, "[SS_FREE] hak_ss_lru_push() returned %d\n", lru_cached);
}
if (lru_cached) {
// Successfully cached in LRU - defer munmap
return;
}
// LRU cache full or disabled - try old cache using head class_idx (if known)
int old_cached = ss_cache_push(0, ss);
if (old_cached) {
ss_stats_cache_store();
return;
}
// Both caches full - immediately free to OS (eager deallocation)
// Clear magic to prevent use-after-free
ss->magic = 0;
#if !HAKMEM_BUILD_RELEASE
fprintf(stderr, "[DEBUG ss_os_release] Freeing SuperSlab ss=%p size=%zu active=%u (LRU full)\n",
(void*)ss, ss_size,
atomic_load_explicit(&ss->total_active_blocks, memory_order_relaxed));
#endif
munmap(ss, ss_size);
// Update statistics for actual release to OS
pthread_mutex_lock(&g_superslab_lock);
g_superslabs_freed++;
// Phase 12: we no longer track per-SS size_class on header; skip g_ss_freed_by_class here
g_bytes_allocated -= ss_size;
pthread_mutex_unlock(&g_superslab_lock);
#if !HAKMEM_BUILD_RELEASE
fprintf(stderr, "[DEBUG ss_os_release] g_superslabs_freed now = %llu\n",
(unsigned long long)g_superslabs_freed);
#endif
}
// ============================================================================
// Slab Initialization within SuperSlab
// ============================================================================
void superslab_init_slab(SuperSlab* ss, int slab_idx, size_t block_size, uint32_t owner_tid)
{
if (!ss || slab_idx < 0 || slab_idx >= ss_slabs_capacity(ss)) {
return;
}
// Phase E1-CORRECT unified geometry:
// - block_size is the TOTAL stride for this class (g_tiny_class_sizes[cls])
// - usable bytes are determined by slab index (slab0 vs others)
// - capacity = usable / stride for ALL classes (including former C7)
size_t usable_size = (slab_idx == 0)
? SUPERSLAB_SLAB0_USABLE_SIZE
: SUPERSLAB_SLAB_USABLE_SIZE;
size_t stride = block_size;
uint16_t capacity = (uint16_t)(usable_size / stride);
#if !HAKMEM_BUILD_RELEASE
if (slab_idx == 0) {
fprintf(stderr,
"[SUPERSLAB_INIT] slab 0: usable_size=%zu stride=%zu capacity=%u\n",
usable_size, stride, (unsigned)capacity);
}
#endif
TinySlabMeta* meta = &ss->slabs[slab_idx];
meta->freelist = NULL; // NULL = linear allocation mode
meta->used = 0;
meta->active = 0; // P1.3: blocks in use by user (starts at 0)
meta->capacity = capacity;
meta->carved = 0;
// LARSON FIX: Use bits 8-15 instead of 0-7 since pthread TIDs are aligned to 256 bytes
meta->owner_tid_low = (uint8_t)((owner_tid >> 8) & 0xFFu);
// Fail-safe: stamp class_idx from geometry (stride → class).
// This ensures legacy/shared/legacy-refill paths all end with a correct class.
for (int i = 0; i < TINY_NUM_CLASSES; i++) {
if (g_tiny_class_sizes[i] == stride) {
meta->class_idx = (uint8_t)i;
// P1.1: Update class_map for out-of-band lookup on free path
ss->class_map[slab_idx] = (uint8_t)i;
break;
}
}
superslab_activate_slab(ss, slab_idx);
}
// ============================================================================
// Slab Bitmap Management
// ============================================================================
void superslab_activate_slab(SuperSlab* ss, int slab_idx) {
if (!ss || slab_idx < 0 || slab_idx >= ss_slabs_capacity(ss)) {
return;
}
uint32_t mask = 1u << slab_idx;
if ((ss->slab_bitmap & mask) == 0) {
ss->slab_bitmap |= mask;
ss->active_slabs++;
// Phase 3d-C: Update hot/cold indices after activating new slab
ss_update_hot_cold_indices(ss);
}
}
void superslab_deactivate_slab(SuperSlab* ss, int slab_idx) {
if (!ss || slab_idx < 0 || slab_idx >= ss_slabs_capacity(ss)) {
return;
}
uint32_t mask = 1u << slab_idx;
if (ss->slab_bitmap & mask) {
ss->slab_bitmap &= ~mask;
ss->active_slabs--;
}
}
int superslab_find_free_slab(SuperSlab* ss) {
if (!ss) return -1;
if ((int)ss->active_slabs >= ss_slabs_capacity(ss)) {
return -1; // No free slabs
}
// Find first 0 bit in bitmap
int cap = ss_slabs_capacity(ss);
for (int i = 0; i < cap; i++) {
if ((ss->slab_bitmap & (1u << i)) == 0) {
return i;
}
}
return -1;
}
// ============================================================================
// Statistics / Debugging
// ============================================================================
void superslab_print_stats(SuperSlab* ss) {
if (!ss || ss->magic != SUPERSLAB_MAGIC) {
printf("Invalid SuperSlab\n");
return;
}
printf("=== SuperSlab Stats ===\n");
printf("Address: %p\n", (void*)ss);
// Phase 12: per-SS size_class removed; classes are per-slab via meta->class_idx.
printf("Active slabs: %u / %d\n", ss->active_slabs, ss_slabs_capacity(ss));
printf("Bitmap: 0x%08X\n", ss->slab_bitmap);
printf("\nPer-slab details:\n");
for (int i = 0; i < ss_slabs_capacity(ss); i++) {
if (ss->slab_bitmap & (1u << i)) {
TinySlabMeta* meta = &ss->slabs[i];
printf(" Slab %2d: used=%u/%u freelist=%p class=%u owner_tid_low=%u\n",
i, meta->used, meta->capacity, meta->freelist,
(unsigned)meta->class_idx, (unsigned)meta->owner_tid_low);
}
}
printf("\n");
}
// Global statistics
void superslab_print_global_stats(void) {
pthread_mutex_lock(&g_superslab_lock);
printf("=== Global SuperSlab Stats ===\n");
printf("SuperSlabs allocated: %lu\n", g_superslabs_allocated);
printf("SuperSlabs freed: %lu\n", g_superslabs_freed);
printf("SuperSlabs active: %lu\n", g_superslabs_allocated - g_superslabs_freed);
printf("Total bytes allocated: %lu MB\n", g_bytes_allocated / (1024 * 1024));
pthread_mutex_unlock(&g_superslab_lock);
}
// ============================================================================
// Phase 8.3: ACE Statistics / Debugging
// ============================================================================
void superslab_ace_print_stats(void) {
printf("=== ACE (Adaptive Cache Engine) Stats ===\n");
const char* class_names[8] = {"8B", "16B", "24B", "32B", "40B", "48B", "56B", "64B"};
printf("Class Curr Targ Hot Allocs Refills Spills LiveBlks\n");
printf("--------------------------------------------------------------\n");
for (int i = 0; i < TINY_NUM_CLASSES_SS; i++) {
SuperSlabACEState* c = &g_ss_ace[i];
printf("%-6s %2uMB %2uMB %4u %7u %8u %7u %9u\n",
class_names[i],
(1u << c->current_lg) / (1024 * 1024),
(1u << c->target_lg) / (1024 * 1024),
c->hot_score,
c->alloc_count,
c->refill_count,
c->spill_count,
c->live_blocks);
}
printf("\n");
}
// ============================================================================
// Phase 8.3: ACE Tick Function (Promotion/Demotion Logic)
// ============================================================================
#define ACE_TICK_NS (150ULL * 1000 * 1000) // 150ms tick interval
#define ACE_COOLDOWN_NS (800ULL * 1000 * 1000) // 0.8s cooldown (anti-oscillation)
// Simplified thresholds for refill activity
#define HI_REFILL(k) (g_ss_ace[k].refill_count > 64) // High refill rate
#define MID_REFILL(k) (g_ss_ace[k].refill_count > 16) // Medium refill rate
// Object sizes per class (for capacity calculation)
// Must match TINY size classes: 8, 16, 24, 32, 40, 48, 56, 64 bytes
static const int g_tiny_obj_sizes[TINY_NUM_CLASSES_SS] = {8, 16, 24, 32, 40, 48, 56, 64};
void hak_tiny_superslab_ace_tick(int k, uint64_t now) {
if (k < 0 || k >= TINY_NUM_CLASSES_SS) return;
SuperSlabACEState* c = &g_ss_ace[k];
// Rate limiting: only tick every ACE_TICK_NS (~150ms)
if (now - c->last_tick_ns < ACE_TICK_NS) return;
// Calculate capacity for 1MB and 2MB SuperSlabs
int obj_size = g_tiny_obj_sizes[k];
double cap1MB = (double)((1U << 20) / obj_size); // 1MB capacity
double cap2MB = (double)((1U << 21) / obj_size); // 2MB capacity
// Calculate hotness score (weighted: 60% live blocks, 40% refill rate)
double hot = 0.6 * (double)c->live_blocks + 0.4 * (double)c->refill_count;
if (hot < 0) hot = 0;
if (hot > 1000) hot = 1000;
c->hot_score = (uint16_t)hot;
// Cooldown mechanism: prevent size changes within 0.8s of last change
static uint64_t last_switch_ns[TINY_NUM_CLASSES_SS] = {0};
if (now - last_switch_ns[k] >= ACE_COOLDOWN_NS) {
if (c->current_lg <= 20) {
// Promotion condition: 1MB → 2MB
// High demand (live > 75% capacity) AND high refill rate
if (c->live_blocks > 0.75 * cap1MB && HI_REFILL(k)) {
c->target_lg = 21; // Promote to 2MB
last_switch_ns[k] = now;
}
} else {
// Demotion condition: 2MB → 1MB
// Low demand (live < 35% capacity) AND low refill rate
if (c->live_blocks < 0.35 * cap2MB && !MID_REFILL(k)) {
c->target_lg = 20; // Demote to 1MB
last_switch_ns[k] = now;
}
}
}
// EMA-style decay for counters (reduce by 75% each tick)
c->alloc_count = c->alloc_count / 4;
c->refill_count = c->refill_count / 4;
c->spill_count = c->spill_count / 4;
// live_blocks is updated incrementally by alloc/free, not decayed here
c->last_tick_ns = now;
}
// ============================================================================
// Phase 8.4: ACE Observer (Registry-based, zero hot-path overhead)
// ============================================================================
// Global debug flag (set once at initialization)
static int g_ace_debug = 0;
// Registry-based observation: scan all SuperSlabs for usage stats
static void ace_observe_and_decide(int k) {
if (k < 0 || k >= TINY_NUM_CLASSES_SS) return;
SuperSlabACEState* c = &g_ss_ace[k];
// Scan Registry to count SuperSlabs and total live blocks
int ss_count = 0;
uint32_t total_live = 0;
for (int i = 0; i < SUPER_REG_SIZE; i++) {
SuperRegEntry* e = &g_super_reg[i];
// Atomic read (thread-safe)
uintptr_t base = atomic_load_explicit(
(_Atomic uintptr_t*)&e->base,
memory_order_acquire);
if (base == 0) continue; // Empty slot
// Phase 8.4: Safety check - skip if ss pointer is invalid
if (!e->ss) continue;
// Phase 12: per-SS size_class removed; registry entries are per-class by construction.
ss_count++;
// Phase 8.4: Scan all slabs to count used blocks (zero hot-path overhead)
uint32_t ss_live = 0;
int cap_scan = ss_slabs_capacity(e->ss);
for (int slab_idx = 0; slab_idx < cap_scan; slab_idx++) {
TinySlabMeta* meta = &e->ss->slabs[slab_idx];
// Relaxed read is OK (stats only, no hot-path impact)
ss_live += meta->used;
}
total_live += ss_live;
}
// Calculate utilization
int obj_size = g_tiny_obj_sizes[k];
uint8_t current_lg = atomic_load_explicit(
(_Atomic uint8_t*)&c->current_lg,
memory_order_relaxed);
uint32_t capacity = (ss_count > 0) ? ss_count * ((1U << current_lg) / obj_size) : 1;
double util = (double)total_live / capacity;
// Update hot_score (for debugging/visualization)
c->hot_score = (uint16_t)(util * 1000);
if (c->hot_score > 1000) c->hot_score = 1000;
// Promotion/Demotion decision
uint8_t new_target = current_lg;
if (current_lg <= 20) {
// Promotion: 1MB → 2MB
if (util > 0.75) {
new_target = 21;
}
} else {
// Demotion: 2MB → 1MB
if (util < 0.35) {
new_target = 20;
}
}
// Debug output (if enabled)
if (g_ace_debug && ss_count > 0) {
fprintf(stderr, "[ACE] Class %d (%dB): ss=%d live=%u cap=%u util=%.2f%% lg=%d->%d hot=%d\n",
k, obj_size, ss_count, total_live, capacity, util * 100.0,
current_lg, new_target, c->hot_score);
}
// Atomic write (thread-safe)
if (new_target != current_lg) {
atomic_store_explicit(
(_Atomic uint8_t*)&c->target_lg,
new_target,
memory_order_release);
if (g_ace_debug) {
fprintf(stderr, "[ACE] *** Class %d: SIZE CHANGE %dMB -> %dMB (util=%.2f%%)\n",
k, 1 << (current_lg - 20), 1 << (new_target - 20), util * 100.0);
}
}
}
// Called from Learner thread (background observation)
void hak_tiny_superslab_ace_observe_all(void) {
// Initialize debug flag once
static int initialized = 0;
if (!initialized) {
const char* ace_debug = getenv("HAKMEM_ACE_DEBUG");
g_ace_debug = (ace_debug && atoi(ace_debug) != 0) ? 1 : 0;
initialized = 1;
}
for (int k = 0; k < TINY_NUM_CLASSES_SS; k++) {
ace_observe_and_decide(k);
}
}