424 lines
16 KiB
C
424 lines
16 KiB
C
// Box: Core Allocation
|
||
// Purpose: SuperSlab allocation/deallocation (Box化フロント)
|
||
|
||
#include "ss_allocation_box.h"
|
||
#include "ss_os_acquire_box.h"
|
||
#include "ss_prefault_box.h"
|
||
#include "ss_cache_box.h"
|
||
#include "ss_stats_box.h"
|
||
#include "ss_ace_box.h"
|
||
#include "hakmem_super_registry.h"
|
||
#include "ss_addr_map_box.h"
|
||
#include "hakmem_tiny_config.h"
|
||
#include "hakmem_policy.h" // Phase E3-1: Access FrozenPolicy for never-free policy
|
||
#include <stdio.h>
|
||
#include <stdlib.h>
|
||
#include <string.h>
|
||
#include <sys/mman.h>
|
||
#include <pthread.h>
|
||
|
||
// Global statistics (defined in ss_stats_box.c, declared here for access)
|
||
extern pthread_mutex_t g_superslab_lock;
|
||
extern uint64_t g_superslabs_freed;
|
||
extern uint64_t g_bytes_allocated;
|
||
|
||
// g_ss_force_lg is defined in ss_ace_box.c but needs external linkage
|
||
extern int g_ss_force_lg;
|
||
|
||
// g_ss_populate_once controls MAP_POPULATE flag (defined in superslab_ace.c)
|
||
extern _Atomic int g_ss_populate_once;
|
||
|
||
// ============================================================================
|
||
// SuperSlab Allocation (ACE-Aware)
|
||
// ============================================================================
|
||
|
||
SuperSlab* superslab_allocate(uint8_t size_class) {
|
||
// Optional fault injection for testing: HAKMEM_TINY_SS_FAULT_RATE=N → 1/N で失敗
|
||
static int fault_rate = -1; // -1=unparsed, 0=disabled, >0=rate
|
||
static __thread unsigned long fault_tick = 0;
|
||
if (__builtin_expect(fault_rate == -1, 0)) {
|
||
const char* e = getenv("HAKMEM_TINY_SS_FAULT_RATE");
|
||
if (e && *e) {
|
||
int v = atoi(e); if (v < 0) v = 0; fault_rate = v;
|
||
} else {
|
||
fault_rate = 0;
|
||
}
|
||
}
|
||
if (fault_rate > 0) {
|
||
unsigned long t = ++fault_tick;
|
||
if ((t % (unsigned long)fault_rate) == 0ul) {
|
||
return NULL; // simulate OOM
|
||
}
|
||
}
|
||
// Optional env clamp for SuperSlab size
|
||
static int env_parsed = 0;
|
||
// Allow full ACE range [MIN..MAX] by default so 1MB/2MB の二択学習が有効になる。
|
||
static uint8_t g_ss_min_lg_env = SUPERSLAB_LG_MIN;
|
||
static uint8_t g_ss_max_lg_env = SUPERSLAB_LG_MAX;
|
||
if (!env_parsed) {
|
||
char* maxmb = getenv("HAKMEM_TINY_SS_MAX_MB");
|
||
if (maxmb) {
|
||
int m = atoi(maxmb); if (m == 1) g_ss_max_lg_env = 20; else if (m == 2) g_ss_max_lg_env = 21;
|
||
}
|
||
char* minmb = getenv("HAKMEM_TINY_SS_MIN_MB");
|
||
if (minmb) {
|
||
int m = atoi(minmb); if (m == 1) g_ss_min_lg_env = 20; else if (m == 2) g_ss_min_lg_env = 21;
|
||
}
|
||
if (g_ss_min_lg_env > g_ss_max_lg_env) g_ss_min_lg_env = g_ss_max_lg_env;
|
||
const char* force_lg_env = getenv("HAKMEM_TINY_SS_FORCE_LG");
|
||
if (force_lg_env && *force_lg_env) {
|
||
int v = atoi(force_lg_env);
|
||
if (v >= SUPERSLAB_LG_MIN && v <= SUPERSLAB_LG_MAX) {
|
||
g_ss_force_lg = v;
|
||
g_ss_min_lg_env = g_ss_max_lg_env = v;
|
||
}
|
||
}
|
||
size_t precharge_default = 0;
|
||
const char* precharge_env = getenv("HAKMEM_TINY_SS_PRECHARGE");
|
||
if (precharge_env && *precharge_env) {
|
||
long v = atol(precharge_env);
|
||
if (v < 0) v = 0;
|
||
precharge_default = (size_t)v;
|
||
if (v > 0) {
|
||
atomic_store_explicit(&g_ss_populate_once, 1, memory_order_relaxed);
|
||
}
|
||
}
|
||
size_t cache_default = 0;
|
||
const char* cache_env = getenv("HAKMEM_TINY_SS_CACHE");
|
||
if (cache_env && *cache_env) {
|
||
long v = atol(cache_env);
|
||
if (v < 0) v = 0;
|
||
cache_default = (size_t)v;
|
||
}
|
||
// Initialize cache/precharge via direct manipulation (box API doesn't need init function)
|
||
for (int i = 0; i < 8; i++) {
|
||
extern size_t g_ss_cache_cap[8];
|
||
extern size_t g_ss_precharge_target[8];
|
||
g_ss_cache_cap[i] = cache_default;
|
||
g_ss_precharge_target[i] = precharge_default;
|
||
}
|
||
|
||
for (int i = 0; i < 8; i++) {
|
||
char name[64];
|
||
snprintf(name, sizeof(name), "HAKMEM_TINY_SS_CACHE_C%d", i);
|
||
char* cap_env = getenv(name);
|
||
if (cap_env && *cap_env) {
|
||
long v = atol(cap_env);
|
||
if (v < 0) v = 0;
|
||
tiny_ss_cache_set_class_cap(i, (size_t)v);
|
||
}
|
||
snprintf(name, sizeof(name), "HAKMEM_TINY_SS_PRECHARGE_C%d", i);
|
||
char* pre_env = getenv(name);
|
||
if (pre_env && *pre_env) {
|
||
long v = atol(pre_env);
|
||
if (v < 0) v = 0;
|
||
tiny_ss_precharge_set_class_target(i, (size_t)v);
|
||
}
|
||
}
|
||
const char* populate_env = getenv("HAKMEM_TINY_SS_POPULATE_ONCE");
|
||
if (populate_env && atoi(populate_env) != 0) {
|
||
atomic_store_explicit(&g_ss_populate_once, 1, memory_order_relaxed);
|
||
}
|
||
env_parsed = 1;
|
||
}
|
||
|
||
uint8_t lg = (g_ss_force_lg >= 0) ? (uint8_t)g_ss_force_lg : hak_tiny_superslab_next_lg(size_class);
|
||
if (lg < g_ss_min_lg_env) lg = g_ss_min_lg_env;
|
||
if (lg > g_ss_max_lg_env) lg = g_ss_max_lg_env;
|
||
size_t ss_size = (size_t)1 << lg; // 2^20 = 1MB, 2^21 = 2MB
|
||
uintptr_t ss_mask = ss_size - 1;
|
||
int from_cache = 0;
|
||
void* ptr = NULL;
|
||
|
||
// Debug logging flag (lazy init)
|
||
static __thread int dbg = -1;
|
||
#if HAKMEM_BUILD_RELEASE
|
||
dbg = 0;
|
||
#else
|
||
if (__builtin_expect(dbg == -1, 0)) {
|
||
const char* e = getenv("HAKMEM_SS_PREWARM_DEBUG");
|
||
dbg = (e && *e && *e != '0') ? 1 : 0;
|
||
}
|
||
#endif
|
||
|
||
// Phase 9: Try LRU cache first (lazy deallocation)
|
||
SuperSlab* cached_ss = hak_ss_lru_pop(size_class);
|
||
if (cached_ss) {
|
||
ptr = (void*)cached_ss;
|
||
from_cache = 1;
|
||
// Debug logging for REFILL from LRU
|
||
if (dbg == 1) {
|
||
fprintf(stderr, "[REFILL] class=%d from_lru=1 ss=%p\n",
|
||
size_class, (void*)cached_ss);
|
||
}
|
||
// Skip old cache path - LRU cache takes priority
|
||
} else {
|
||
// Fallback to old cache (will be deprecated)
|
||
ss_cache_precharge(size_class, ss_size, ss_mask);
|
||
void* old_cached = ss_cache_pop(size_class);
|
||
if (old_cached) {
|
||
ptr = old_cached;
|
||
from_cache = 1;
|
||
// Debug logging for REFILL from prewarm (old cache is essentially prewarm)
|
||
if (dbg == 1) {
|
||
fprintf(stderr, "[REFILL] class=%d from_prewarm=1 ss=%p\n",
|
||
size_class, ptr);
|
||
}
|
||
}
|
||
}
|
||
|
||
if (!ptr) {
|
||
// Prefault policy: decide MAP_POPULATE and optional manual touch
|
||
SSPrefaultPolicy pf_policy = ss_prefault_policy();
|
||
int populate = 0;
|
||
|
||
if (pf_policy == SS_PREFAULT_POPULATE ||
|
||
pf_policy == SS_PREFAULT_TOUCH ||
|
||
pf_policy == SS_PREFAULT_ASYNC) {
|
||
// 常時 MAP_POPULATE(+必要なら手動 touch)
|
||
populate = 1;
|
||
} else {
|
||
// OFF の場合のみ、従来の「ワンショット populate」挙動を温存
|
||
populate = atomic_exchange_explicit(&g_ss_populate_once, 0, memory_order_acq_rel);
|
||
}
|
||
|
||
ptr = ss_os_acquire(size_class, ss_size, ss_mask, populate);
|
||
if (!ptr) {
|
||
return NULL;
|
||
}
|
||
// 手動 prefault が要求されている場合は、mmap 直後に SuperSlab 全域を page-in
|
||
if (pf_policy == SS_PREFAULT_TOUCH || pf_policy == SS_PREFAULT_ASYNC) {
|
||
ss_prefault_region(ptr, ss_size);
|
||
}
|
||
// Debug logging for REFILL with new allocation
|
||
if (dbg == 1) {
|
||
fprintf(stderr, "[REFILL] class=%d new_alloc=1 ss=%p\n",
|
||
size_class, (void*)ptr);
|
||
}
|
||
}
|
||
|
||
// Initialize SuperSlab header (Phase 12: no global size_class field)
|
||
SuperSlab* ss = (SuperSlab*)ptr;
|
||
ss->magic = SUPERSLAB_MAGIC;
|
||
ss->active_slabs = 0;
|
||
ss->lg_size = lg; // Phase 8.3: Use ACE-determined lg_size (20=1MB, 21=2MB)
|
||
|
||
// P-Tier: Initialize tier to HOT (normal operation, eligible for allocation)
|
||
atomic_store_explicit(&ss->tier, SS_TIER_HOT, memory_order_relaxed);
|
||
ss->slab_bitmap = 0;
|
||
ss->nonempty_mask = 0; // Phase 6-2.1: ChatGPT Pro P0 - init nonempty mask
|
||
ss->freelist_mask = 0; // P1.1 FIX: Initialize freelist_mask
|
||
ss->empty_mask = 0; // P1.1 FIX: Initialize empty_mask
|
||
ss->empty_count = 0; // P1.1 FIX: Initialize empty_count
|
||
ss->partial_epoch = 0;
|
||
ss->publish_hint = 0xFF;
|
||
|
||
// Initialize atomics explicitly
|
||
atomic_store_explicit(&ss->total_active_blocks, 0, memory_order_relaxed);
|
||
atomic_store_explicit(&ss->refcount, 0, memory_order_relaxed);
|
||
atomic_store_explicit(&ss->listed, 0, memory_order_relaxed);
|
||
ss->partial_next = NULL;
|
||
|
||
// Phase 9: Initialize LRU fields
|
||
ss->last_used_ns = 0;
|
||
ss->generation = 0;
|
||
ss->lru_prev = NULL;
|
||
ss->lru_next = NULL;
|
||
|
||
// Phase 3d-C: Initialize hot/cold fields
|
||
ss->hot_count = 0;
|
||
ss->cold_count = 0;
|
||
memset(ss->hot_indices, 0, sizeof(ss->hot_indices));
|
||
memset(ss->cold_indices, 0, sizeof(ss->cold_indices));
|
||
|
||
// Phase 12: Initialize next_chunk (legacy per-class chain)
|
||
ss->next_chunk = NULL;
|
||
|
||
// Initialize all slab metadata (only up to max slabs for this size).
|
||
// NOTE: 詳細な Slab 初期化と Remote Queue Drain は superslab_slab.c
|
||
//(Slab Management Box)側に集約している。
|
||
int max_slabs = (int)(ss_size / SLAB_SIZE);
|
||
|
||
// DEFENSIVE FIX: Zero all slab metadata arrays to prevent ANY uninitialized pointers
|
||
// This catches the 0xa2a2a2a2a2a2a2a2 pattern bug (ASan/debug fill pattern)
|
||
// Even though mmap should return zeroed pages, sanitizers may fill with debug patterns
|
||
memset(ss->slabs, 0, max_slabs * sizeof(TinySlabMeta));
|
||
memset(ss->remote_heads, 0, max_slabs * sizeof(uintptr_t));
|
||
memset(ss->remote_counts, 0, max_slabs * sizeof(uint32_t));
|
||
memset(ss->slab_listed, 0, max_slabs * sizeof(uint32_t));
|
||
|
||
// P1.1: Initialize class_map to UNASSIGNED (255) for all slabs
|
||
// This ensures class_map is in a known state even before slabs are assigned
|
||
memset(ss->class_map, 255, max_slabs * sizeof(uint8_t));
|
||
|
||
// P0 Optimization: Initialize shared_meta pointer (used for O(1) metadata lookup)
|
||
ss->shared_meta = NULL;
|
||
|
||
if (from_cache) {
|
||
ss_stats_cache_reuse();
|
||
}
|
||
|
||
// Phase 8.3: Update ACE current_lg to match allocated size
|
||
g_ss_ace[size_class].current_lg = lg;
|
||
|
||
// Phase 1: Register SuperSlab in global registry for fast lookup
|
||
// CRITICAL: Register AFTER full initialization (ss structure is ready)
|
||
uintptr_t base = (uintptr_t)ss;
|
||
int reg_ok = hak_super_register(base, ss);
|
||
if (!reg_ok) {
|
||
// Registry full - this is a fatal error
|
||
fprintf(stderr, "HAKMEM FATAL: SuperSlab registry full, cannot register %p\n", ss);
|
||
// Still return ss to avoid memory leak, but lookups may fail
|
||
}
|
||
do {
|
||
static _Atomic uint32_t g_ss_reg_log_shot = 0;
|
||
uint32_t shot = atomic_fetch_add_explicit(&g_ss_reg_log_shot, 1, memory_order_relaxed);
|
||
if (shot < 4) {
|
||
fprintf(stderr,
|
||
"[SS_REG_DEBUG] class=%u ss=%p reg_ok=%d map_count=%zu\n",
|
||
(unsigned)size_class,
|
||
(void*)ss,
|
||
reg_ok,
|
||
g_ss_addr_map.count);
|
||
fflush(stderr);
|
||
}
|
||
} while (0);
|
||
|
||
if (!from_cache) {
|
||
ss_stats_on_ss_alloc_class(size_class);
|
||
}
|
||
|
||
return ss;
|
||
}
|
||
|
||
// ============================================================================
|
||
// SuperSlab Deallocation
|
||
// ============================================================================
|
||
|
||
void superslab_free(SuperSlab* ss) {
|
||
if (!ss || ss->magic != SUPERSLAB_MAGIC) {
|
||
return; // Invalid SuperSlab
|
||
}
|
||
|
||
// Guard: do not free while pinned by TLS/remote holders
|
||
uint32_t ss_refs = atomic_load_explicit(&ss->refcount, memory_order_acquire);
|
||
if (__builtin_expect(ss_refs != 0, 0)) {
|
||
#if !HAKMEM_BUILD_RELEASE
|
||
static _Atomic uint32_t g_ss_free_pinned = 0;
|
||
uint32_t shot = atomic_fetch_add_explicit(&g_ss_free_pinned, 1, memory_order_relaxed);
|
||
if (shot < 8) {
|
||
fprintf(stderr, "[SS_FREE_SKIP_PINNED] ss=%p refcount=%u\n", (void*)ss, (unsigned)ss_refs);
|
||
}
|
||
#endif
|
||
return;
|
||
}
|
||
|
||
// ADD DEBUG LOGGING
|
||
static __thread int dbg = -1;
|
||
#if HAKMEM_BUILD_RELEASE
|
||
dbg = 0;
|
||
#else
|
||
if (__builtin_expect(dbg == -1, 0)) {
|
||
const char* e = getenv("HAKMEM_SS_FREE_DEBUG");
|
||
dbg = (e && *e && *e != '0') ? 1 : 0;
|
||
}
|
||
#endif
|
||
if (dbg == 1) {
|
||
fprintf(stderr, "[SS_FREE] CALLED: ss=%p lg_size=%d active_slabs=%u\n",
|
||
(void*)ss, ss->lg_size, ss->active_slabs);
|
||
}
|
||
|
||
// Phase 9: Lazy Deallocation - try to cache in LRU instead of munmap
|
||
size_t ss_size = (size_t)1 << ss->lg_size;
|
||
|
||
// Phase 1: Unregister SuperSlab from registry FIRST
|
||
// CRITICAL: Must unregister BEFORE adding to LRU cache
|
||
// Reason: Cached SuperSlabs should NOT be found by lookups
|
||
uintptr_t base = (uintptr_t)ss;
|
||
hak_super_unregister(base);
|
||
|
||
// Memory fence to ensure unregister is visible
|
||
atomic_thread_fence(memory_order_release);
|
||
|
||
// Phase 9: Try LRU cache first (lazy deallocation)
|
||
// NOTE: LRU cache keeps magic=SUPERSLAB_MAGIC for validation
|
||
// Magic will be cleared on eviction or reuse
|
||
int lru_cached = hak_ss_lru_push(ss);
|
||
if (dbg == 1) {
|
||
fprintf(stderr, "[SS_FREE] hak_ss_lru_push() returned %d\n", lru_cached);
|
||
}
|
||
if (lru_cached) {
|
||
// Successfully cached in LRU - defer munmap
|
||
// OPTIMIZATION: Lazy zeroing via MADV_DONTNEED
|
||
// When SuperSlab enters LRU cache, mark pages as DONTNEED to defer
|
||
// page zeroing until they are actually touched by next allocation.
|
||
// Kernel will zero them on-fault (zero-on-fault), reducing clear_page_erms overhead.
|
||
static int lazy_zero_enabled = -1;
|
||
if (__builtin_expect(lazy_zero_enabled == -1, 0)) {
|
||
const char* e = getenv("HAKMEM_SS_LAZY_ZERO");
|
||
lazy_zero_enabled = (!e || !*e || *e == '1') ? 1 : 0;
|
||
}
|
||
if (lazy_zero_enabled) {
|
||
#ifdef MADV_DONTNEED
|
||
(void)ss_os_madvise_guarded((void*)ss, ss_size, MADV_DONTNEED, "ss_lru_madvise");
|
||
ss_os_stats_record_madvise();
|
||
#endif
|
||
}
|
||
return;
|
||
}
|
||
|
||
// LRU cache full or disabled - try old cache using head class_idx (if known)
|
||
int old_cached = ss_cache_push(0, ss);
|
||
if (old_cached) {
|
||
ss_stats_cache_store();
|
||
return;
|
||
}
|
||
|
||
// Phase E3-1: Check never-free policy before munmap (DISABLED - policy field not yet implemented)
|
||
// If policy forbids Tiny SuperSlab munmap, skip deallocation (leak is intentional)
|
||
// TODO: Add tiny_ss_never_free_global field to FrozenPolicy when implementing Phase E3-1
|
||
#if 0
|
||
const FrozenPolicy* pol = hkm_policy_get();
|
||
if (pol && pol->tiny_ss_never_free_global) {
|
||
// Policy forbids munmap - keep SuperSlab allocated (intentional "leak")
|
||
// Watermark enforcement will be added in Phase E3-2
|
||
#if !HAKMEM_BUILD_RELEASE
|
||
fprintf(stderr, "[SS_POLICY_SKIP] Skipping munmap (never_free policy) ss=%p size=%zu\n",
|
||
(void*)ss, ss_size);
|
||
#endif
|
||
return;
|
||
}
|
||
#endif
|
||
|
||
// Both caches full - immediately free to OS (eager deallocation)
|
||
// Clear magic to prevent use-after-free
|
||
ss->magic = 0;
|
||
|
||
#if !HAKMEM_BUILD_RELEASE
|
||
fprintf(stderr, "[DEBUG ss_os_release] Freeing SuperSlab ss=%p size=%zu active=%u (LRU full)\n",
|
||
(void*)ss, ss_size,
|
||
atomic_load_explicit(&ss->total_active_blocks, memory_order_relaxed));
|
||
#endif
|
||
|
||
ss_os_stats_record_free();
|
||
munmap(ss, ss_size);
|
||
|
||
// Update statistics for actual release to OS
|
||
pthread_mutex_lock(&g_superslab_lock);
|
||
g_superslabs_freed++;
|
||
// Phase 12: we no longer track per-SS size_class on header; skip g_ss_freed_by_class here
|
||
g_bytes_allocated -= ss_size;
|
||
pthread_mutex_unlock(&g_superslab_lock);
|
||
|
||
#if !HAKMEM_BUILD_RELEASE
|
||
fprintf(stderr, "[DEBUG ss_os_release] g_superslabs_freed now = %llu\n",
|
||
(unsigned long long)g_superslabs_freed);
|
||
#endif
|
||
}
|
||
|
||
// ============================================================================
|
||
// Slab Initialization within SuperSlab
|
||
// ============================================================================
|
||
// Note: superslab_init_slab() は superslab_slab.c(Slab Management Box)
|
||
// に実装されており、この Box では export しない。
|