Files
hakmem/core/box/ss_allocation_box.c
2025-12-09 21:50:15 +09:00

424 lines
16 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// Box: Core Allocation
// Purpose: SuperSlab allocation/deallocation (Box化フロント)
#include "ss_allocation_box.h"
#include "ss_os_acquire_box.h"
#include "ss_prefault_box.h"
#include "ss_cache_box.h"
#include "ss_stats_box.h"
#include "ss_ace_box.h"
#include "hakmem_super_registry.h"
#include "ss_addr_map_box.h"
#include "hakmem_tiny_config.h"
#include "hakmem_policy.h" // Phase E3-1: Access FrozenPolicy for never-free policy
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <pthread.h>
// Global statistics (defined in ss_stats_box.c, declared here for access)
extern pthread_mutex_t g_superslab_lock;
extern uint64_t g_superslabs_freed;
extern uint64_t g_bytes_allocated;
// g_ss_force_lg is defined in ss_ace_box.c but needs external linkage
extern int g_ss_force_lg;
// g_ss_populate_once controls MAP_POPULATE flag (defined in superslab_ace.c)
extern _Atomic int g_ss_populate_once;
// ============================================================================
// SuperSlab Allocation (ACE-Aware)
// ============================================================================
SuperSlab* superslab_allocate(uint8_t size_class) {
// Optional fault injection for testing: HAKMEM_TINY_SS_FAULT_RATE=N → 1/N で失敗
static int fault_rate = -1; // -1=unparsed, 0=disabled, >0=rate
static __thread unsigned long fault_tick = 0;
if (__builtin_expect(fault_rate == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_SS_FAULT_RATE");
if (e && *e) {
int v = atoi(e); if (v < 0) v = 0; fault_rate = v;
} else {
fault_rate = 0;
}
}
if (fault_rate > 0) {
unsigned long t = ++fault_tick;
if ((t % (unsigned long)fault_rate) == 0ul) {
return NULL; // simulate OOM
}
}
// Optional env clamp for SuperSlab size
static int env_parsed = 0;
// Allow full ACE range [MIN..MAX] by default so 1MB/2MB の二択学習が有効になる。
static uint8_t g_ss_min_lg_env = SUPERSLAB_LG_MIN;
static uint8_t g_ss_max_lg_env = SUPERSLAB_LG_MAX;
if (!env_parsed) {
char* maxmb = getenv("HAKMEM_TINY_SS_MAX_MB");
if (maxmb) {
int m = atoi(maxmb); if (m == 1) g_ss_max_lg_env = 20; else if (m == 2) g_ss_max_lg_env = 21;
}
char* minmb = getenv("HAKMEM_TINY_SS_MIN_MB");
if (minmb) {
int m = atoi(minmb); if (m == 1) g_ss_min_lg_env = 20; else if (m == 2) g_ss_min_lg_env = 21;
}
if (g_ss_min_lg_env > g_ss_max_lg_env) g_ss_min_lg_env = g_ss_max_lg_env;
const char* force_lg_env = getenv("HAKMEM_TINY_SS_FORCE_LG");
if (force_lg_env && *force_lg_env) {
int v = atoi(force_lg_env);
if (v >= SUPERSLAB_LG_MIN && v <= SUPERSLAB_LG_MAX) {
g_ss_force_lg = v;
g_ss_min_lg_env = g_ss_max_lg_env = v;
}
}
size_t precharge_default = 0;
const char* precharge_env = getenv("HAKMEM_TINY_SS_PRECHARGE");
if (precharge_env && *precharge_env) {
long v = atol(precharge_env);
if (v < 0) v = 0;
precharge_default = (size_t)v;
if (v > 0) {
atomic_store_explicit(&g_ss_populate_once, 1, memory_order_relaxed);
}
}
size_t cache_default = 0;
const char* cache_env = getenv("HAKMEM_TINY_SS_CACHE");
if (cache_env && *cache_env) {
long v = atol(cache_env);
if (v < 0) v = 0;
cache_default = (size_t)v;
}
// Initialize cache/precharge via direct manipulation (box API doesn't need init function)
for (int i = 0; i < 8; i++) {
extern size_t g_ss_cache_cap[8];
extern size_t g_ss_precharge_target[8];
g_ss_cache_cap[i] = cache_default;
g_ss_precharge_target[i] = precharge_default;
}
for (int i = 0; i < 8; i++) {
char name[64];
snprintf(name, sizeof(name), "HAKMEM_TINY_SS_CACHE_C%d", i);
char* cap_env = getenv(name);
if (cap_env && *cap_env) {
long v = atol(cap_env);
if (v < 0) v = 0;
tiny_ss_cache_set_class_cap(i, (size_t)v);
}
snprintf(name, sizeof(name), "HAKMEM_TINY_SS_PRECHARGE_C%d", i);
char* pre_env = getenv(name);
if (pre_env && *pre_env) {
long v = atol(pre_env);
if (v < 0) v = 0;
tiny_ss_precharge_set_class_target(i, (size_t)v);
}
}
const char* populate_env = getenv("HAKMEM_TINY_SS_POPULATE_ONCE");
if (populate_env && atoi(populate_env) != 0) {
atomic_store_explicit(&g_ss_populate_once, 1, memory_order_relaxed);
}
env_parsed = 1;
}
uint8_t lg = (g_ss_force_lg >= 0) ? (uint8_t)g_ss_force_lg : hak_tiny_superslab_next_lg(size_class);
if (lg < g_ss_min_lg_env) lg = g_ss_min_lg_env;
if (lg > g_ss_max_lg_env) lg = g_ss_max_lg_env;
size_t ss_size = (size_t)1 << lg; // 2^20 = 1MB, 2^21 = 2MB
uintptr_t ss_mask = ss_size - 1;
int from_cache = 0;
void* ptr = NULL;
// Debug logging flag (lazy init)
static __thread int dbg = -1;
#if HAKMEM_BUILD_RELEASE
dbg = 0;
#else
if (__builtin_expect(dbg == -1, 0)) {
const char* e = getenv("HAKMEM_SS_PREWARM_DEBUG");
dbg = (e && *e && *e != '0') ? 1 : 0;
}
#endif
// Phase 9: Try LRU cache first (lazy deallocation)
SuperSlab* cached_ss = hak_ss_lru_pop(size_class);
if (cached_ss) {
ptr = (void*)cached_ss;
from_cache = 1;
// Debug logging for REFILL from LRU
if (dbg == 1) {
fprintf(stderr, "[REFILL] class=%d from_lru=1 ss=%p\n",
size_class, (void*)cached_ss);
}
// Skip old cache path - LRU cache takes priority
} else {
// Fallback to old cache (will be deprecated)
ss_cache_precharge(size_class, ss_size, ss_mask);
void* old_cached = ss_cache_pop(size_class);
if (old_cached) {
ptr = old_cached;
from_cache = 1;
// Debug logging for REFILL from prewarm (old cache is essentially prewarm)
if (dbg == 1) {
fprintf(stderr, "[REFILL] class=%d from_prewarm=1 ss=%p\n",
size_class, ptr);
}
}
}
if (!ptr) {
// Prefault policy: decide MAP_POPULATE and optional manual touch
SSPrefaultPolicy pf_policy = ss_prefault_policy();
int populate = 0;
if (pf_policy == SS_PREFAULT_POPULATE ||
pf_policy == SS_PREFAULT_TOUCH ||
pf_policy == SS_PREFAULT_ASYNC) {
// 常時 MAP_POPULATE+必要なら手動 touch
populate = 1;
} else {
// OFF の場合のみ、従来の「ワンショット populate」挙動を温存
populate = atomic_exchange_explicit(&g_ss_populate_once, 0, memory_order_acq_rel);
}
ptr = ss_os_acquire(size_class, ss_size, ss_mask, populate);
if (!ptr) {
return NULL;
}
// 手動 prefault が要求されている場合は、mmap 直後に SuperSlab 全域を page-in
if (pf_policy == SS_PREFAULT_TOUCH || pf_policy == SS_PREFAULT_ASYNC) {
ss_prefault_region(ptr, ss_size);
}
// Debug logging for REFILL with new allocation
if (dbg == 1) {
fprintf(stderr, "[REFILL] class=%d new_alloc=1 ss=%p\n",
size_class, (void*)ptr);
}
}
// Initialize SuperSlab header (Phase 12: no global size_class field)
SuperSlab* ss = (SuperSlab*)ptr;
ss->magic = SUPERSLAB_MAGIC;
ss->active_slabs = 0;
ss->lg_size = lg; // Phase 8.3: Use ACE-determined lg_size (20=1MB, 21=2MB)
// P-Tier: Initialize tier to HOT (normal operation, eligible for allocation)
atomic_store_explicit(&ss->tier, SS_TIER_HOT, memory_order_relaxed);
ss->slab_bitmap = 0;
ss->nonempty_mask = 0; // Phase 6-2.1: ChatGPT Pro P0 - init nonempty mask
ss->freelist_mask = 0; // P1.1 FIX: Initialize freelist_mask
ss->empty_mask = 0; // P1.1 FIX: Initialize empty_mask
ss->empty_count = 0; // P1.1 FIX: Initialize empty_count
ss->partial_epoch = 0;
ss->publish_hint = 0xFF;
// Initialize atomics explicitly
atomic_store_explicit(&ss->total_active_blocks, 0, memory_order_relaxed);
atomic_store_explicit(&ss->refcount, 0, memory_order_relaxed);
atomic_store_explicit(&ss->listed, 0, memory_order_relaxed);
ss->partial_next = NULL;
// Phase 9: Initialize LRU fields
ss->last_used_ns = 0;
ss->generation = 0;
ss->lru_prev = NULL;
ss->lru_next = NULL;
// Phase 3d-C: Initialize hot/cold fields
ss->hot_count = 0;
ss->cold_count = 0;
memset(ss->hot_indices, 0, sizeof(ss->hot_indices));
memset(ss->cold_indices, 0, sizeof(ss->cold_indices));
// Phase 12: Initialize next_chunk (legacy per-class chain)
ss->next_chunk = NULL;
// Initialize all slab metadata (only up to max slabs for this size).
// NOTE: 詳細な Slab 初期化と Remote Queue Drain は superslab_slab.c
//Slab Management Box側に集約している。
int max_slabs = (int)(ss_size / SLAB_SIZE);
// DEFENSIVE FIX: Zero all slab metadata arrays to prevent ANY uninitialized pointers
// This catches the 0xa2a2a2a2a2a2a2a2 pattern bug (ASan/debug fill pattern)
// Even though mmap should return zeroed pages, sanitizers may fill with debug patterns
memset(ss->slabs, 0, max_slabs * sizeof(TinySlabMeta));
memset(ss->remote_heads, 0, max_slabs * sizeof(uintptr_t));
memset(ss->remote_counts, 0, max_slabs * sizeof(uint32_t));
memset(ss->slab_listed, 0, max_slabs * sizeof(uint32_t));
// P1.1: Initialize class_map to UNASSIGNED (255) for all slabs
// This ensures class_map is in a known state even before slabs are assigned
memset(ss->class_map, 255, max_slabs * sizeof(uint8_t));
// P0 Optimization: Initialize shared_meta pointer (used for O(1) metadata lookup)
ss->shared_meta = NULL;
if (from_cache) {
ss_stats_cache_reuse();
}
// Phase 8.3: Update ACE current_lg to match allocated size
g_ss_ace[size_class].current_lg = lg;
// Phase 1: Register SuperSlab in global registry for fast lookup
// CRITICAL: Register AFTER full initialization (ss structure is ready)
uintptr_t base = (uintptr_t)ss;
int reg_ok = hak_super_register(base, ss);
if (!reg_ok) {
// Registry full - this is a fatal error
fprintf(stderr, "HAKMEM FATAL: SuperSlab registry full, cannot register %p\n", ss);
// Still return ss to avoid memory leak, but lookups may fail
}
do {
static _Atomic uint32_t g_ss_reg_log_shot = 0;
uint32_t shot = atomic_fetch_add_explicit(&g_ss_reg_log_shot, 1, memory_order_relaxed);
if (shot < 4) {
fprintf(stderr,
"[SS_REG_DEBUG] class=%u ss=%p reg_ok=%d map_count=%zu\n",
(unsigned)size_class,
(void*)ss,
reg_ok,
g_ss_addr_map.count);
fflush(stderr);
}
} while (0);
if (!from_cache) {
ss_stats_on_ss_alloc_class(size_class);
}
return ss;
}
// ============================================================================
// SuperSlab Deallocation
// ============================================================================
void superslab_free(SuperSlab* ss) {
if (!ss || ss->magic != SUPERSLAB_MAGIC) {
return; // Invalid SuperSlab
}
// Guard: do not free while pinned by TLS/remote holders
uint32_t ss_refs = atomic_load_explicit(&ss->refcount, memory_order_acquire);
if (__builtin_expect(ss_refs != 0, 0)) {
#if !HAKMEM_BUILD_RELEASE
static _Atomic uint32_t g_ss_free_pinned = 0;
uint32_t shot = atomic_fetch_add_explicit(&g_ss_free_pinned, 1, memory_order_relaxed);
if (shot < 8) {
fprintf(stderr, "[SS_FREE_SKIP_PINNED] ss=%p refcount=%u\n", (void*)ss, (unsigned)ss_refs);
}
#endif
return;
}
// ADD DEBUG LOGGING
static __thread int dbg = -1;
#if HAKMEM_BUILD_RELEASE
dbg = 0;
#else
if (__builtin_expect(dbg == -1, 0)) {
const char* e = getenv("HAKMEM_SS_FREE_DEBUG");
dbg = (e && *e && *e != '0') ? 1 : 0;
}
#endif
if (dbg == 1) {
fprintf(stderr, "[SS_FREE] CALLED: ss=%p lg_size=%d active_slabs=%u\n",
(void*)ss, ss->lg_size, ss->active_slabs);
}
// Phase 9: Lazy Deallocation - try to cache in LRU instead of munmap
size_t ss_size = (size_t)1 << ss->lg_size;
// Phase 1: Unregister SuperSlab from registry FIRST
// CRITICAL: Must unregister BEFORE adding to LRU cache
// Reason: Cached SuperSlabs should NOT be found by lookups
uintptr_t base = (uintptr_t)ss;
hak_super_unregister(base);
// Memory fence to ensure unregister is visible
atomic_thread_fence(memory_order_release);
// Phase 9: Try LRU cache first (lazy deallocation)
// NOTE: LRU cache keeps magic=SUPERSLAB_MAGIC for validation
// Magic will be cleared on eviction or reuse
int lru_cached = hak_ss_lru_push(ss);
if (dbg == 1) {
fprintf(stderr, "[SS_FREE] hak_ss_lru_push() returned %d\n", lru_cached);
}
if (lru_cached) {
// Successfully cached in LRU - defer munmap
// OPTIMIZATION: Lazy zeroing via MADV_DONTNEED
// When SuperSlab enters LRU cache, mark pages as DONTNEED to defer
// page zeroing until they are actually touched by next allocation.
// Kernel will zero them on-fault (zero-on-fault), reducing clear_page_erms overhead.
static int lazy_zero_enabled = -1;
if (__builtin_expect(lazy_zero_enabled == -1, 0)) {
const char* e = getenv("HAKMEM_SS_LAZY_ZERO");
lazy_zero_enabled = (!e || !*e || *e == '1') ? 1 : 0;
}
if (lazy_zero_enabled) {
#ifdef MADV_DONTNEED
(void)ss_os_madvise_guarded((void*)ss, ss_size, MADV_DONTNEED, "ss_lru_madvise");
ss_os_stats_record_madvise();
#endif
}
return;
}
// LRU cache full or disabled - try old cache using head class_idx (if known)
int old_cached = ss_cache_push(0, ss);
if (old_cached) {
ss_stats_cache_store();
return;
}
// Phase E3-1: Check never-free policy before munmap (DISABLED - policy field not yet implemented)
// If policy forbids Tiny SuperSlab munmap, skip deallocation (leak is intentional)
// TODO: Add tiny_ss_never_free_global field to FrozenPolicy when implementing Phase E3-1
#if 0
const FrozenPolicy* pol = hkm_policy_get();
if (pol && pol->tiny_ss_never_free_global) {
// Policy forbids munmap - keep SuperSlab allocated (intentional "leak")
// Watermark enforcement will be added in Phase E3-2
#if !HAKMEM_BUILD_RELEASE
fprintf(stderr, "[SS_POLICY_SKIP] Skipping munmap (never_free policy) ss=%p size=%zu\n",
(void*)ss, ss_size);
#endif
return;
}
#endif
// Both caches full - immediately free to OS (eager deallocation)
// Clear magic to prevent use-after-free
ss->magic = 0;
#if !HAKMEM_BUILD_RELEASE
fprintf(stderr, "[DEBUG ss_os_release] Freeing SuperSlab ss=%p size=%zu active=%u (LRU full)\n",
(void*)ss, ss_size,
atomic_load_explicit(&ss->total_active_blocks, memory_order_relaxed));
#endif
ss_os_stats_record_free();
munmap(ss, ss_size);
// Update statistics for actual release to OS
pthread_mutex_lock(&g_superslab_lock);
g_superslabs_freed++;
// Phase 12: we no longer track per-SS size_class on header; skip g_ss_freed_by_class here
g_bytes_allocated -= ss_size;
pthread_mutex_unlock(&g_superslab_lock);
#if !HAKMEM_BUILD_RELEASE
fprintf(stderr, "[DEBUG ss_os_release] g_superslabs_freed now = %llu\n",
(unsigned long long)g_superslabs_freed);
#endif
}
// ============================================================================
// Slab Initialization within SuperSlab
// ============================================================================
// Note: superslab_init_slab() は superslab_slab.cSlab Management Box
// に実装されており、この Box では export しない。