Files
hakmem/core/box/ss_allocation_box.c
Moe Charm (CI) 7adbcdfcb6 Phase 54-60: Memory-Lean mode, Balanced mode stabilization, M1 (50%) achievement
## Summary

Completed Phase 54-60 optimization work:

**Phase 54-56: Memory-Lean mode (LEAN+OFF prewarm suppression)**
- Implemented ss_mem_lean_env_box.h with ENV gates
- Balanced mode (LEAN+OFF) promoted as production default
- Result: +1.2% throughput, better stability, zero syscall overhead
- Added to bench_profile.h: MIXED_TINYV3_C7_BALANCED preset

**Phase 57: 60-min soak finalization**
- Balanced mode: 60-min soak, RSS drift 0%, CV 5.38%
- Speed-first mode: 60-min soak, RSS drift 0%, CV 1.58%
- Syscall budget: 1.25e-7/op (800× under target)
- Status: PRODUCTION-READY

**Phase 59: 50% recovery baseline rebase**
- hakmem FAST (Balanced): 59.184M ops/s, CV 1.31%
- mimalloc: 120.466M ops/s, CV 3.50%
- Ratio: 49.13% (M1 ACHIEVED within statistical noise)
- Superior stability: 2.68× better CV than mimalloc

**Phase 60: Alloc pass-down SSOT (NO-GO)**
- Implemented alloc_passdown_ssot_env_box.h
- Modified malloc_tiny_fast.h for SSOT pattern
- Result: -0.46% (NO-GO)
- Key lesson: SSOT not applicable where early-exit already optimized

## Key Metrics

- Performance: 49.13% of mimalloc (M1 effectively achieved)
- Stability: CV 1.31% (superior to mimalloc 3.50%)
- Syscall budget: 1.25e-7/op (excellent)
- RSS: 33MB stable, 0% drift over 60 minutes

## Files Added/Modified

New boxes:
- core/box/ss_mem_lean_env_box.h
- core/box/ss_release_policy_box.{h,c}
- core/box/alloc_passdown_ssot_env_box.h

Scripts:
- scripts/soak_mixed_single_process.sh
- scripts/analyze_epoch_tail_csv.py
- scripts/soak_mixed_rss.sh
- scripts/calculate_percentiles.py
- scripts/analyze_soak.py

Documentation: Phase 40-60 analysis documents

## Design Decisions

1. Profile separation (core/bench_profile.h):
   - MIXED_TINYV3_C7_SAFE: Speed-first (no LEAN)
   - MIXED_TINYV3_C7_BALANCED: Balanced mode (LEAN+OFF)

2. Box Theory compliance:
   - All ENV gates reversible (HAKMEM_SS_MEM_LEAN, HAKMEM_ALLOC_PASSDOWN_SSOT)
   - Single conversion points maintained
   - No physical deletions (compile-out only)

3. Lessons learned:
   - SSOT effective only where redundancy exists (Phase 60 showed limits)
   - Branch prediction extremely effective (~0 cycles for well-predicted branches)
   - Early-exit pattern valuable even when seemingly redundant

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2025-12-17 06:24:01 +09:00

455 lines
18 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// Box: Core Allocation
// Purpose: SuperSlab allocation/deallocation (Box化フロント)
#include "ss_allocation_box.h"
#include "ss_os_acquire_box.h"
#include "ss_prefault_box.h"
#include "ss_cache_box.h"
#include "ss_stats_box.h"
#include "ss_ace_box.h"
#include "hakmem_super_registry.h"
#include "ss_addr_map_box.h"
#include "hakmem_tiny_config.h"
#include "hakmem_policy.h" // Phase E3-1: Access FrozenPolicy for never-free policy
#include "ss_release_policy_box.h" // Phase 54: Memory-Lean mode release policy
#include "ss_mem_lean_env_box.h" // Phase 54: Memory-Lean mode ENV check
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <pthread.h>
// Global statistics (defined in ss_stats_box.c, declared here for access)
extern pthread_mutex_t g_superslab_lock;
extern uint64_t g_superslabs_freed;
extern uint64_t g_bytes_allocated;
// g_ss_force_lg is defined in ss_ace_box.c but needs external linkage
extern int g_ss_force_lg;
// g_ss_populate_once controls MAP_POPULATE flag (defined in superslab_ace.c)
extern _Atomic int g_ss_populate_once;
// ============================================================================
// SuperSlab Allocation (ACE-Aware)
// ============================================================================
SuperSlab* superslab_allocate(uint8_t size_class) {
// Optional fault injection for testing: HAKMEM_TINY_SS_FAULT_RATE=N → 1/N で失敗
static int fault_rate = -1; // -1=unparsed, 0=disabled, >0=rate
static __thread unsigned long fault_tick = 0;
if (__builtin_expect(fault_rate == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_SS_FAULT_RATE");
if (e && *e) {
int v = atoi(e); if (v < 0) v = 0; fault_rate = v;
} else {
fault_rate = 0;
}
}
if (fault_rate > 0) {
unsigned long t = ++fault_tick;
if ((t % (unsigned long)fault_rate) == 0ul) {
return NULL; // simulate OOM
}
}
// Optional env clamp for SuperSlab size
static int env_parsed = 0;
// Allow full ACE range [MIN..MAX] by default so 1MB/2MB の二択学習が有効になる。
static uint8_t g_ss_min_lg_env = SUPERSLAB_LG_MIN;
static uint8_t g_ss_max_lg_env = SUPERSLAB_LG_MAX;
if (!env_parsed) {
char* maxmb = getenv("HAKMEM_TINY_SS_MAX_MB");
if (maxmb) {
int m = atoi(maxmb); if (m == 1) g_ss_max_lg_env = 20; else if (m == 2) g_ss_max_lg_env = 21;
}
char* minmb = getenv("HAKMEM_TINY_SS_MIN_MB");
if (minmb) {
int m = atoi(minmb); if (m == 1) g_ss_min_lg_env = 20; else if (m == 2) g_ss_min_lg_env = 21;
}
if (g_ss_min_lg_env > g_ss_max_lg_env) g_ss_min_lg_env = g_ss_max_lg_env;
const char* force_lg_env = getenv("HAKMEM_TINY_SS_FORCE_LG");
if (force_lg_env && *force_lg_env) {
int v = atoi(force_lg_env);
if (v >= SUPERSLAB_LG_MIN && v <= SUPERSLAB_LG_MAX) {
g_ss_force_lg = v;
g_ss_min_lg_env = g_ss_max_lg_env = v;
}
}
size_t precharge_default = 0;
const char* precharge_env = getenv("HAKMEM_TINY_SS_PRECHARGE");
if (precharge_env && *precharge_env) {
long v = atol(precharge_env);
if (v < 0) v = 0;
precharge_default = (size_t)v;
if (v > 0) {
atomic_store_explicit(&g_ss_populate_once, 1, memory_order_relaxed);
}
}
size_t cache_default = 0;
const char* cache_env = getenv("HAKMEM_TINY_SS_CACHE");
if (cache_env && *cache_env) {
long v = atol(cache_env);
if (v < 0) v = 0;
cache_default = (size_t)v;
}
// Initialize cache/precharge via direct manipulation (box API doesn't need init function)
for (int i = 0; i < 8; i++) {
extern size_t g_ss_cache_cap[8];
extern size_t g_ss_precharge_target[8];
g_ss_cache_cap[i] = cache_default;
g_ss_precharge_target[i] = precharge_default;
}
for (int i = 0; i < 8; i++) {
char name[64];
snprintf(name, sizeof(name), "HAKMEM_TINY_SS_CACHE_C%d", i);
char* cap_env = getenv(name);
if (cap_env && *cap_env) {
long v = atol(cap_env);
if (v < 0) v = 0;
tiny_ss_cache_set_class_cap(i, (size_t)v);
}
snprintf(name, sizeof(name), "HAKMEM_TINY_SS_PRECHARGE_C%d", i);
char* pre_env = getenv(name);
if (pre_env && *pre_env) {
long v = atol(pre_env);
if (v < 0) v = 0;
tiny_ss_precharge_set_class_target(i, (size_t)v);
}
}
const char* populate_env = getenv("HAKMEM_TINY_SS_POPULATE_ONCE");
if (populate_env && atoi(populate_env) != 0) {
atomic_store_explicit(&g_ss_populate_once, 1, memory_order_relaxed);
}
env_parsed = 1;
}
uint8_t lg = (g_ss_force_lg >= 0) ? (uint8_t)g_ss_force_lg : hak_tiny_superslab_next_lg(size_class);
if (lg < g_ss_min_lg_env) lg = g_ss_min_lg_env;
if (lg > g_ss_max_lg_env) lg = g_ss_max_lg_env;
size_t ss_size = (size_t)1 << lg; // 2^20 = 1MB, 2^21 = 2MB
uintptr_t ss_mask = ss_size - 1;
int from_cache = 0;
void* ptr = NULL;
// Debug logging flag (lazy init)
static __thread int dbg = -1;
#if HAKMEM_BUILD_RELEASE
dbg = 0;
#else
if (__builtin_expect(dbg == -1, 0)) {
const char* e = getenv("HAKMEM_SS_PREWARM_DEBUG");
dbg = (e && *e && *e != '0') ? 1 : 0;
}
#endif
// Phase 9: Try LRU cache first (lazy deallocation)
SuperSlab* cached_ss = hak_ss_lru_pop(size_class);
if (cached_ss) {
ptr = (void*)cached_ss;
from_cache = 1;
// Debug logging for REFILL from LRU
if (dbg == 1) {
fprintf(stderr, "[REFILL] class=%d from_lru=1 ss=%p\n",
size_class, (void*)cached_ss);
}
// Skip old cache path - LRU cache takes priority
} else {
// Fallback to old cache (will be deprecated)
ss_cache_precharge(size_class, ss_size, ss_mask);
void* old_cached = ss_cache_pop(size_class);
if (old_cached) {
ptr = old_cached;
from_cache = 1;
// Debug logging for REFILL from prewarm (old cache is essentially prewarm)
if (dbg == 1) {
fprintf(stderr, "[REFILL] class=%d from_prewarm=1 ss=%p\n",
size_class, ptr);
}
}
}
if (!ptr) {
// Prefault policy: decide MAP_POPULATE and optional manual touch
SSPrefaultPolicy pf_policy = ss_prefault_policy();
int populate = 0;
if (pf_policy == SS_PREFAULT_POPULATE ||
pf_policy == SS_PREFAULT_TOUCH ||
pf_policy == SS_PREFAULT_ASYNC) {
// 常時 MAP_POPULATE+必要なら手動 touch
populate = 1;
} else {
// OFF の場合のみ、従来の「ワンショット populate」挙動を温存
populate = atomic_exchange_explicit(&g_ss_populate_once, 0, memory_order_acq_rel);
}
ptr = ss_os_acquire(size_class, ss_size, ss_mask, populate);
if (!ptr) {
return NULL;
}
// 手動 prefault が要求されている場合は、mmap 直後に SuperSlab 全域を page-in
if (pf_policy == SS_PREFAULT_TOUCH || pf_policy == SS_PREFAULT_ASYNC) {
ss_prefault_region(ptr, ss_size);
}
// Debug logging for REFILL with new allocation
if (dbg == 1) {
fprintf(stderr, "[REFILL] class=%d new_alloc=1 ss=%p\n",
size_class, (void*)ptr);
}
}
// Initialize SuperSlab header (Phase 12: no global size_class field)
SuperSlab* ss = (SuperSlab*)ptr;
ss->magic = SUPERSLAB_MAGIC;
ss->active_slabs = 0;
ss->lg_size = lg; // Phase 8.3: Use ACE-determined lg_size (20=1MB, 21=2MB)
// P-Tier: Initialize tier to HOT (normal operation, eligible for allocation)
atomic_store_explicit(&ss->tier, SS_TIER_HOT, memory_order_relaxed);
ss->slab_bitmap = 0;
ss->nonempty_mask = 0; // Phase 6-2.1: ChatGPT Pro P0 - init nonempty mask
ss->freelist_mask = 0; // P1.1 FIX: Initialize freelist_mask
ss->empty_mask = 0; // P1.1 FIX: Initialize empty_mask
ss->empty_count = 0; // P1.1 FIX: Initialize empty_count
ss->partial_epoch = 0;
ss->publish_hint = 0xFF;
// Initialize atomics explicitly
atomic_store_explicit(&ss->total_active_blocks, 0, memory_order_relaxed);
atomic_store_explicit(&ss->refcount, 0, memory_order_relaxed);
atomic_store_explicit(&ss->listed, 0, memory_order_relaxed);
ss->partial_next = NULL;
// Phase 9: Initialize LRU fields
ss->last_used_ns = 0;
ss->generation = 0;
ss->lru_prev = NULL;
ss->lru_next = NULL;
// Phase 3d-C: Initialize hot/cold fields
ss->hot_count = 0;
ss->cold_count = 0;
memset(ss->hot_indices, 0, sizeof(ss->hot_indices));
memset(ss->cold_indices, 0, sizeof(ss->cold_indices));
// Phase 12: Initialize next_chunk (legacy per-class chain)
ss->next_chunk = NULL;
// Initialize all slab metadata (only up to max slabs for this size).
// NOTE: 詳細な Slab 初期化と Remote Queue Drain は superslab_slab.c
//Slab Management Box側に集約している。
int max_slabs = (int)(ss_size / SLAB_SIZE);
// DEFENSIVE FIX: Zero all slab metadata arrays to prevent ANY uninitialized pointers
// This catches the 0xa2a2a2a2a2a2a2a2 pattern bug (ASan/debug fill pattern)
// Even though mmap should return zeroed pages, sanitizers may fill with debug patterns
memset(ss->slabs, 0, max_slabs * sizeof(TinySlabMeta));
memset(ss->remote_heads, 0, max_slabs * sizeof(uintptr_t));
memset(ss->remote_counts, 0, max_slabs * sizeof(uint32_t));
memset(ss->slab_listed, 0, max_slabs * sizeof(uint32_t));
// P1.1: Initialize class_map to UNASSIGNED (255) for all slabs
// This ensures class_map is in a known state even before slabs are assigned
memset(ss->class_map, 255, max_slabs * sizeof(uint8_t));
// P0 Optimization: Initialize shared_meta pointer (used for O(1) metadata lookup)
ss->shared_meta = NULL;
if (from_cache) {
ss_stats_cache_reuse();
}
// Phase 8.3: Update ACE current_lg to match allocated size
g_ss_ace[size_class].current_lg = lg;
// Phase 1: Register SuperSlab in global registry for fast lookup
// CRITICAL: Register AFTER full initialization (ss structure is ready)
uintptr_t base = (uintptr_t)ss;
int reg_ok = hak_super_register(base, ss);
if (!reg_ok) {
// Registry full - this is a fatal error
fprintf(stderr, "HAKMEM FATAL: SuperSlab registry full, cannot register %p\n", ss);
// Still return ss to avoid memory leak, but lookups may fail
}
do {
static _Atomic uint32_t g_ss_reg_log_shot = 0;
uint32_t shot = atomic_fetch_add_explicit(&g_ss_reg_log_shot, 1, memory_order_relaxed);
if (shot < 4) {
fprintf(stderr,
"[SS_REG_DEBUG] class=%u ss=%p reg_ok=%d map_count=%zu\n",
(unsigned)size_class,
(void*)ss,
reg_ok,
g_ss_addr_map.count);
fflush(stderr);
}
} while (0);
if (!from_cache) {
ss_stats_on_ss_alloc_class(size_class);
}
return ss;
}
// ============================================================================
// SuperSlab Deallocation
// ============================================================================
void superslab_free(SuperSlab* ss) {
if (!ss || ss->magic != SUPERSLAB_MAGIC) {
return; // Invalid SuperSlab
}
// Guard: do not free while pinned by TLS/remote holders
uint32_t ss_refs = atomic_load_explicit(&ss->refcount, memory_order_acquire);
if (__builtin_expect(ss_refs != 0, 0)) {
#if !HAKMEM_BUILD_RELEASE
static _Atomic uint32_t g_ss_free_pinned = 0;
uint32_t shot = atomic_fetch_add_explicit(&g_ss_free_pinned, 1, memory_order_relaxed);
if (shot < 8) {
fprintf(stderr, "[SS_FREE_SKIP_PINNED] ss=%p refcount=%u\n", (void*)ss, (unsigned)ss_refs);
}
#endif
return;
}
// ADD DEBUG LOGGING
static __thread int dbg = -1;
#if HAKMEM_BUILD_RELEASE
dbg = 0;
#else
if (__builtin_expect(dbg == -1, 0)) {
const char* e = getenv("HAKMEM_SS_FREE_DEBUG");
dbg = (e && *e && *e != '0') ? 1 : 0;
}
#endif
if (dbg == 1) {
fprintf(stderr, "[SS_FREE] CALLED: ss=%p lg_size=%d active_slabs=%u\n",
(void*)ss, ss->lg_size, ss->active_slabs);
}
// Phase 9: Lazy Deallocation - try to cache in LRU instead of munmap
size_t ss_size = (size_t)1 << ss->lg_size;
// Phase 1: Unregister SuperSlab from registry FIRST
// CRITICAL: Must unregister BEFORE adding to LRU cache
// Reason: Cached SuperSlabs should NOT be found by lookups
uintptr_t base = (uintptr_t)ss;
hak_super_unregister(base);
// Memory fence to ensure unregister is visible
atomic_thread_fence(memory_order_release);
// Phase 9: Try LRU cache first (lazy deallocation)
// NOTE: LRU cache keeps magic=SUPERSLAB_MAGIC for validation
// Magic will be cleared on eviction or reuse
int lru_cached = hak_ss_lru_push(ss);
if (dbg == 1) {
fprintf(stderr, "[SS_FREE] hak_ss_lru_push() returned %d\n", lru_cached);
}
if (lru_cached) {
// Successfully cached in LRU - defer munmap
// OPTIMIZATION: Lazy zeroing via MADV_DONTNEED
// When SuperSlab enters LRU cache, mark pages as DONTNEED to defer
// page zeroing until they are actually touched by next allocation.
// Kernel will zero them on-fault (zero-on-fault), reducing clear_page_erms overhead.
static int lazy_zero_enabled = -1;
if (__builtin_expect(lazy_zero_enabled == -1, 0)) {
const char* e = getenv("HAKMEM_SS_LAZY_ZERO");
lazy_zero_enabled = (!e || !*e || *e == '1') ? 1 : 0;
}
if (lazy_zero_enabled) {
#ifdef MADV_DONTNEED
(void)ss_os_madvise_guarded((void*)ss, ss_size, MADV_DONTNEED, "ss_lru_madvise");
ss_os_stats_record_madvise();
#endif
}
return;
}
// LRU cache full or disabled - try old cache using head class_idx (if known)
int old_cached = ss_cache_push(0, ss);
if (old_cached) {
ss_stats_cache_store();
return;
}
// Phase E3-1: Check never-free policy before munmap (DISABLED - policy field not yet implemented)
// If policy forbids Tiny SuperSlab munmap, skip deallocation (leak is intentional)
// TODO: Add tiny_ss_never_free_global field to FrozenPolicy when implementing Phase E3-1
#if 0
const FrozenPolicy* pol = hkm_policy_get();
if (pol && pol->tiny_ss_never_free_global) {
// Policy forbids munmap - keep SuperSlab allocated (intentional "leak")
// Watermark enforcement will be added in Phase E3-2
#if !HAKMEM_BUILD_RELEASE
fprintf(stderr, "[SS_POLICY_SKIP] Skipping munmap (never_free policy) ss=%p size=%zu\n",
(void*)ss, ss_size);
#endif
return;
}
#endif
// Both caches full - immediately free to OS (eager deallocation)
// Phase 54: Memory-Lean mode - try decommit before munmap
// This allows kernel to reclaim pages while keeping VMA (lower RSS without munmap)
if (ss_mem_lean_enabled()) {
int decommit_ret = ss_maybe_decommit_superslab((void*)ss, ss_size);
if (decommit_ret == 0) {
// Decommit succeeded - record lean_retire and skip munmap
// SuperSlab VMA is kept but pages are released to kernel
ss_os_stats_record_lean_retire();
// Clear magic to prevent use-after-free (but keep VMA)
ss->magic = 0;
#if !HAKMEM_BUILD_RELEASE
fprintf(stderr, "[DEBUG ss_lean_retire] Decommitted SuperSlab ss=%p size=%zu (lean mode)\n",
(void*)ss, ss_size);
#endif
// Update statistics for retired superslab (not munmap)
pthread_mutex_lock(&g_superslab_lock);
g_superslabs_freed++;
g_bytes_allocated -= ss_size;
pthread_mutex_unlock(&g_superslab_lock);
return; // Skip munmap, pages are decommitted
}
// Decommit failed (DSO overlap, madvise error) - fall through to munmap
}
// Clear magic to prevent use-after-free
ss->magic = 0;
#if !HAKMEM_BUILD_RELEASE
fprintf(stderr, "[DEBUG ss_os_release] Freeing SuperSlab ss=%p size=%zu active=%u (LRU full)\n",
(void*)ss, ss_size,
atomic_load_explicit(&ss->total_active_blocks, memory_order_relaxed));
#endif
ss_os_stats_record_free();
munmap(ss, ss_size);
// Update statistics for actual release to OS
pthread_mutex_lock(&g_superslab_lock);
g_superslabs_freed++;
// Phase 12: we no longer track per-SS size_class on header; skip g_ss_freed_by_class here
g_bytes_allocated -= ss_size;
pthread_mutex_unlock(&g_superslab_lock);
#if !HAKMEM_BUILD_RELEASE
fprintf(stderr, "[DEBUG ss_os_release] g_superslabs_freed now = %llu\n",
(unsigned long long)g_superslabs_freed);
#endif
}
// ============================================================================
// Slab Initialization within SuperSlab
// ============================================================================
// Note: superslab_init_slab() は superslab_slab.cSlab Management Box
// に実装されており、この Box では export しない。