Files
hakmem/core/hakmem_shared_pool_acquire.c
Moe Charm (CI) c482722705 Priority-2 ENV Cache: Shared Pool Acquire (5変数追加、5箇所置換)
【追加ENV変数】
- HAKMEM_SS_EMPTY_REUSE (default: 1)
- HAKMEM_SS_EMPTY_SCAN_LIMIT (default: 32)
- HAKMEM_SS_ACQUIRE_DEBUG (default: 0)
- HAKMEM_TINY_TENSION_DRAIN_ENABLE (default: 1)
- HAKMEM_TINY_TENSION_DRAIN_THRESHOLD (default: 1024)

【置換ファイル】
- core/hakmem_shared_pool_acquire.c (5箇所 → ENV Cache)

【変更詳細】
1. ENV Cache (hakmem_env_cache.h):
   - 構造体に5変数追加 (41→46変数)
   - hakmem_env_cache_init()に初期化追加
   - アクセサマクロ5個追加
   - カウント更新: 41→46

2. hakmem_shared_pool_acquire.c:
   - getenv("HAKMEM_SS_EMPTY_REUSE") → HAK_ENV_SS_EMPTY_REUSE()
   - getenv("HAKMEM_SS_EMPTY_SCAN_LIMIT") → HAK_ENV_SS_EMPTY_SCAN_LIMIT()
   - getenv("HAKMEM_SS_ACQUIRE_DEBUG") → HAK_ENV_SS_ACQUIRE_DEBUG()
   - getenv("HAKMEM_TINY_TENSION_DRAIN_ENABLE") → HAK_ENV_TINY_TENSION_DRAIN_ENABLE()
   - getenv("HAKMEM_TINY_TENSION_DRAIN_THRESHOLD") → HAK_ENV_TINY_TENSION_DRAIN_THRESHOLD()
   - #include "hakmem_env_cache.h" 追加

【効果】
- Shared Pool Acquire warm pathからgetenv()呼び出しを完全排除
- Lock-free Stage2のgetenv()オーバーヘッド削減

【テスト】
 make shared → 成功
 /tmp/test_mixed3_final → PASSED

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-12-02 20:51:50 +09:00

449 lines
17 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#include "hakmem_shared_pool_internal.h"
#include "hakmem_debug_master.h"
#include "hakmem_stats_master.h"
#include "box/ss_slab_meta_box.h"
#include "box/ss_hot_cold_box.h"
#include "box/pagefault_telemetry_box.h"
#include "box/tls_sll_drain_box.h"
#include "box/tls_slab_reuse_guard_box.h"
#include "hakmem_policy.h"
#include "hakmem_env_cache.h" // Priority-2: ENV cache
#include <stdlib.h>
#include <stdio.h>
#include <stdatomic.h>
// Stage 0.5: EMPTY slab direct scanregistry ベースの EMPTY 再利用)
// Scan existing SuperSlabs for EMPTY slabs (highest reuse priority) to
// avoid Stage 3 (mmap) when freed slabs are available.
static inline int
sp_acquire_from_empty_scan(int class_idx, SuperSlab** ss_out, int* slab_idx_out, int dbg_acquire)
{
// Priority-2: Use cached ENV
int empty_reuse_enabled = HAK_ENV_SS_EMPTY_REUSE();
if (!empty_reuse_enabled) {
return -1;
}
extern SuperSlab* g_super_reg_by_class[TINY_NUM_CLASSES][SUPER_REG_PER_CLASS];
extern int g_super_reg_class_size[TINY_NUM_CLASSES];
int reg_size = (class_idx < TINY_NUM_CLASSES) ? g_super_reg_class_size[class_idx] : 0;
// Priority-2: Use cached ENV
int scan_limit = HAK_ENV_SS_EMPTY_SCAN_LIMIT();
if (scan_limit > reg_size) scan_limit = reg_size;
// Stage 0.5 hit counter for visualization
static _Atomic uint64_t stage05_hits = 0;
static _Atomic uint64_t stage05_attempts = 0;
atomic_fetch_add_explicit(&stage05_attempts, 1, memory_order_relaxed);
for (int i = 0; i < scan_limit; i++) {
SuperSlab* ss = g_super_reg_by_class[class_idx][i];
if (!(ss && ss->magic == SUPERSLAB_MAGIC)) continue;
if (ss->empty_count == 0) continue; // No EMPTY slabs in this SS
uint32_t mask = ss->empty_mask;
while (mask) {
int empty_idx = __builtin_ctz(mask);
mask &= (mask - 1); // clear lowest bit
TinySlabMeta* meta = &ss->slabs[empty_idx];
if (meta->capacity > 0 && meta->used == 0) {
tiny_tls_slab_reuse_guard(ss);
ss_clear_slab_empty(ss, empty_idx);
meta->class_idx = (uint8_t)class_idx;
ss->class_map[empty_idx] = (uint8_t)class_idx;
#if !HAKMEM_BUILD_RELEASE
if (dbg_acquire == 1) {
fprintf(stderr,
"[SP_ACQUIRE_STAGE0.5_EMPTY] class=%d reusing EMPTY slab (ss=%p slab=%d empty_count=%u)\n",
class_idx, (void*)ss, empty_idx, ss->empty_count);
}
#else
(void)dbg_acquire;
#endif
*ss_out = ss;
*slab_idx_out = empty_idx;
sp_stage_stats_init();
if (g_sp_stage_stats_enabled) {
atomic_fetch_add(&g_sp_stage1_hits[class_idx], 1);
}
atomic_fetch_add_explicit(&stage05_hits, 1, memory_order_relaxed);
// Stage 0.5 hit rate visualization (every 100 hits)
uint64_t hits = atomic_load_explicit(&stage05_hits, memory_order_relaxed);
if (hits % 100 == 1) {
uint64_t attempts = atomic_load_explicit(&stage05_attempts, memory_order_relaxed);
fprintf(stderr, "[STAGE0.5_STATS] hits=%lu attempts=%lu rate=%.1f%% (scan_limit=%d)\n",
hits, attempts, (double)hits * 100.0 / attempts, scan_limit);
}
return 0;
}
}
}
return -1;
}
int
shared_pool_acquire_slab(int class_idx, SuperSlab** ss_out, int* slab_idx_out)
{
// Phase 12: SP-SLOT Box - 3-Stage Acquire Logic
//
// Stage 1: Reuse EMPTY slots from per-class free list (EMPTY→ACTIVE)
// Stage 2: Find UNUSED slots in existing SuperSlabs
// Stage 3: Get new SuperSlab (LRU pop or mmap)
//
// Invariants:
// - On success: *ss_out != NULL, 0 <= *slab_idx_out < total_slots
// - The chosen slab has meta->class_idx == class_idx
if (!ss_out || !slab_idx_out) {
return -1;
}
if (class_idx < 0 || class_idx >= TINY_NUM_CLASSES_SS) {
return -1;
}
shared_pool_init();
// Debug logging / stage stats
#if !HAKMEM_BUILD_RELEASE
// Priority-2: Use cached ENV
int dbg_acquire = HAK_ENV_SS_ACQUIRE_DEBUG();
#else
static const int dbg_acquire = 0;
#endif
sp_stage_stats_init();
stage1_retry_after_tension_drain:
// ========== Stage 0.5 (Phase 12-1.1): EMPTY slab direct scan ==========
// Scan existing SuperSlabs for EMPTY slabs (highest reuse priority) to
// avoid Stage 3 (mmap) when freed slabs are available.
if (sp_acquire_from_empty_scan(class_idx, ss_out, slab_idx_out, dbg_acquire) == 0) {
return 0;
}
// ========== Stage 1 (Lock-Free): Try to reuse EMPTY slots ==========
// P0-4: Lock-free pop from per-class free list (no mutex needed!)
// Best case: Same class freed a slot, reuse immediately (cache-hot)
SharedSSMeta* reuse_meta = NULL;
int reuse_slot_idx = -1;
if (sp_freelist_pop_lockfree(class_idx, &reuse_meta, &reuse_slot_idx)) {
// Found EMPTY slot from lock-free list!
// Now acquire mutex ONLY for slot activation and metadata update
// P0 instrumentation: count lock acquisitions
lock_stats_init();
if (g_lock_stats_enabled == 1) {
atomic_fetch_add(&g_lock_acquire_count, 1);
atomic_fetch_add(&g_lock_acquire_slab_count, 1);
}
pthread_mutex_lock(&g_shared_pool.alloc_lock);
// P0.3: Guard against TLS SLL orphaned pointers before reusing slab
// RACE FIX: Load SuperSlab pointer atomically BEFORE guard (consistency)
SuperSlab* ss_guard = atomic_load_explicit(&reuse_meta->ss, memory_order_relaxed);
if (ss_guard) {
tiny_tls_slab_reuse_guard(ss_guard);
}
// Activate slot under mutex (slot state transition requires protection)
if (sp_slot_mark_active(reuse_meta, reuse_slot_idx, class_idx) == 0) {
// RACE FIX: Load SuperSlab pointer atomically (consistency)
SuperSlab* ss = atomic_load_explicit(&reuse_meta->ss, memory_order_relaxed);
// RACE FIX: Check if SuperSlab was freed (NULL pointer)
// This can happen if Thread A freed the SuperSlab after pushing slot to freelist,
// but Thread B popped the stale slot before the freelist was cleared.
if (!ss) {
// SuperSlab freed - skip and fall through to Stage 2/3
if (g_lock_stats_enabled == 1) {
atomic_fetch_add(&g_lock_release_count, 1);
}
pthread_mutex_unlock(&g_shared_pool.alloc_lock);
goto stage2_fallback;
}
#if !HAKMEM_BUILD_RELEASE
if (dbg_acquire == 1) {
fprintf(stderr, "[SP_ACQUIRE_STAGE1_LOCKFREE] class=%d reusing EMPTY slot (ss=%p slab=%d)\n",
class_idx, (void*)ss, reuse_slot_idx);
}
#endif
// Update SuperSlab metadata
ss->slab_bitmap |= (1u << reuse_slot_idx);
ss_slab_meta_class_idx_set(ss, reuse_slot_idx, (uint8_t)class_idx);
if (ss->active_slabs == 0) {
// Was empty, now active again
ss->active_slabs = 1;
g_shared_pool.active_count++;
}
// Track per-class active slots (approximate, under alloc_lock)
if (class_idx < TINY_NUM_CLASSES_SS) {
g_shared_pool.class_active_slots[class_idx]++;
}
// Update hint
g_shared_pool.class_hints[class_idx] = ss;
*ss_out = ss;
*slab_idx_out = reuse_slot_idx;
if (g_lock_stats_enabled == 1) {
atomic_fetch_add(&g_lock_release_count, 1);
}
pthread_mutex_unlock(&g_shared_pool.alloc_lock);
if (g_sp_stage_stats_enabled) {
atomic_fetch_add(&g_sp_stage1_hits[class_idx], 1);
}
return 0; // ✅ Stage 1 (lock-free) success
}
// Slot activation failed (race condition?) - release lock and fall through
if (g_lock_stats_enabled == 1) {
atomic_fetch_add(&g_lock_release_count, 1);
}
pthread_mutex_unlock(&g_shared_pool.alloc_lock);
}
stage2_fallback:
// ========== Stage 2 (Lock-Free): Try to claim UNUSED slots ==========
// P0-5: Lock-free atomic CAS claiming (no mutex needed for slot state transition!)
// RACE FIX: Read ss_meta_count atomically (now properly declared as _Atomic)
// No cast needed! memory_order_acquire synchronizes with release in sp_meta_find_or_create
uint32_t meta_count = atomic_load_explicit(
&g_shared_pool.ss_meta_count,
memory_order_acquire
);
for (uint32_t i = 0; i < meta_count; i++) {
SharedSSMeta* meta = &g_shared_pool.ss_metadata[i];
// Try lock-free claiming (UNUSED → ACTIVE via CAS)
int claimed_idx = sp_slot_claim_lockfree(meta, class_idx);
if (claimed_idx >= 0) {
// RACE FIX: Load SuperSlab pointer atomically (critical for lock-free Stage 2)
// Use memory_order_acquire to synchronize with release in sp_meta_find_or_create
SuperSlab* ss = atomic_load_explicit(&meta->ss, memory_order_acquire);
if (!ss) {
// SuperSlab was freed between claiming and loading - skip this entry
continue;
}
#if !HAKMEM_BUILD_RELEASE
if (dbg_acquire == 1) {
fprintf(stderr, "[SP_ACQUIRE_STAGE2_LOCKFREE] class=%d claimed UNUSED slot (ss=%p slab=%d)\n",
class_idx, (void*)ss, claimed_idx);
}
#endif
// P0 instrumentation: count lock acquisitions
lock_stats_init();
if (g_lock_stats_enabled == 1) {
atomic_fetch_add(&g_lock_acquire_count, 1);
atomic_fetch_add(&g_lock_acquire_slab_count, 1);
}
pthread_mutex_lock(&g_shared_pool.alloc_lock);
// Update SuperSlab metadata under mutex
ss->slab_bitmap |= (1u << claimed_idx);
ss_slab_meta_class_idx_set(ss, claimed_idx, (uint8_t)class_idx);
if (ss->active_slabs == 0) {
ss->active_slabs = 1;
g_shared_pool.active_count++;
}
if (class_idx < TINY_NUM_CLASSES_SS) {
g_shared_pool.class_active_slots[class_idx]++;
}
// Update hint
g_shared_pool.class_hints[class_idx] = ss;
*ss_out = ss;
*slab_idx_out = claimed_idx;
sp_fix_geometry_if_needed(ss, claimed_idx, class_idx);
if (g_lock_stats_enabled == 1) {
atomic_fetch_add(&g_lock_release_count, 1);
}
pthread_mutex_unlock(&g_shared_pool.alloc_lock);
if (g_sp_stage_stats_enabled) {
atomic_fetch_add(&g_sp_stage2_hits[class_idx], 1);
}
return 0; // ✅ Stage 2 (lock-free) success
}
// Claim failed (no UNUSED slots in this meta) - continue to next SuperSlab
}
// ========== Tension-Based Drain: Try to create EMPTY slots before Stage 3 ==========
// If TLS SLL has accumulated blocks, drain them to enable EMPTY slot detection
// This can avoid allocating new SuperSlabs by reusing EMPTY slots in Stage 1
// ENV: HAKMEM_TINY_TENSION_DRAIN_ENABLE=0 to disable (default=1)
// ENV: HAKMEM_TINY_TENSION_DRAIN_THRESHOLD=N to set threshold (default=1024)
{
// Priority-2: Use cached ENV
int tension_drain_enabled = HAK_ENV_TINY_TENSION_DRAIN_ENABLE();
uint32_t tension_threshold = (uint32_t)HAK_ENV_TINY_TENSION_DRAIN_THRESHOLD();
if (tension_drain_enabled) {
extern __thread TinyTLSSLL g_tls_sll[TINY_NUM_CLASSES];
extern uint32_t tiny_tls_sll_drain(int class_idx, uint32_t batch_size);
uint32_t sll_count = (class_idx < TINY_NUM_CLASSES) ? g_tls_sll[class_idx].count : 0;
if (sll_count >= tension_threshold) {
// Drain all blocks to maximize EMPTY slot creation
uint32_t drained = tiny_tls_sll_drain(class_idx, 0); // 0 = drain all
if (drained > 0) {
// Retry Stage 1 (EMPTY reuse) after drain
// Some slabs might have become EMPTY (meta->used == 0)
goto stage1_retry_after_tension_drain;
}
}
}
}
// ========== Stage 3: Mutex-protected fallback (new SuperSlab allocation) ==========
// All existing SuperSlabs have no UNUSED slots → need new SuperSlab
// P0 instrumentation: count lock acquisitions
lock_stats_init();
if (g_lock_stats_enabled == 1) {
atomic_fetch_add(&g_lock_acquire_count, 1);
atomic_fetch_add(&g_lock_acquire_slab_count, 1);
}
pthread_mutex_lock(&g_shared_pool.alloc_lock);
// ========== Stage 3: Get new SuperSlab ==========
// Try LRU cache first, then mmap
SuperSlab* new_ss = NULL;
// Stage 3a: Try LRU cache
extern SuperSlab* hak_ss_lru_pop(uint8_t size_class);
new_ss = hak_ss_lru_pop((uint8_t)class_idx);
int from_lru = (new_ss != NULL);
// Stage 3b: If LRU miss, allocate new SuperSlab
if (!new_ss) {
// Release the alloc_lock to avoid deadlock with registry during superslab_allocate
if (g_lock_stats_enabled == 1) {
atomic_fetch_add(&g_lock_release_count, 1);
}
pthread_mutex_unlock(&g_shared_pool.alloc_lock);
SuperSlab* allocated_ss = sp_internal_allocate_superslab();
// Re-acquire the alloc_lock
if (g_lock_stats_enabled == 1) {
atomic_fetch_add(&g_lock_acquire_count, 1);
atomic_fetch_add(&g_lock_acquire_slab_count, 1); // This is part of acquisition path
}
pthread_mutex_lock(&g_shared_pool.alloc_lock);
if (!allocated_ss) {
// Allocation failed; return now.
if (g_lock_stats_enabled == 1) {
atomic_fetch_add(&g_lock_release_count, 1);
}
pthread_mutex_unlock(&g_shared_pool.alloc_lock);
return -1; // Out of memory
}
new_ss = allocated_ss;
// Add newly allocated SuperSlab to the shared pool's internal array
if (g_shared_pool.total_count >= g_shared_pool.capacity) {
shared_pool_ensure_capacity_unlocked(g_shared_pool.total_count + 1);
if (g_shared_pool.total_count >= g_shared_pool.capacity) {
// Pool table expansion failed; leave ss alive (registry-owned),
// but do not treat it as part of shared_pool.
// This is a critical error, return early.
if (g_lock_stats_enabled == 1) {
atomic_fetch_add(&g_lock_release_count, 1);
}
pthread_mutex_unlock(&g_shared_pool.alloc_lock);
return -1;
}
}
g_shared_pool.slabs[g_shared_pool.total_count] = new_ss;
g_shared_pool.total_count++;
}
#if !HAKMEM_BUILD_RELEASE
if (dbg_acquire == 1 && new_ss) {
fprintf(stderr, "[SP_ACQUIRE_STAGE3] class=%d new SuperSlab (ss=%p from_lru=%d)\n",
class_idx, (void*)new_ss, from_lru);
}
#endif
if (!new_ss) {
if (g_lock_stats_enabled == 1) {
atomic_fetch_add(&g_lock_release_count, 1);
}
pthread_mutex_unlock(&g_shared_pool.alloc_lock);
return -1; // ❌ Out of memory
}
// Before creating a new SuperSlab, consult learning-layer soft cap.
// Phase 9-2: Soft Cap removed to allow Shared Pool to fully replace Legacy Backend.
// We now rely on LRU eviction and EMPTY recycling to manage memory pressure.
// Create metadata for this new SuperSlab
SharedSSMeta* new_meta = sp_meta_find_or_create(new_ss);
if (!new_meta) {
if (g_lock_stats_enabled == 1) {
atomic_fetch_add(&g_lock_release_count, 1);
}
pthread_mutex_unlock(&g_shared_pool.alloc_lock);
return -1; // ❌ Metadata allocation failed
}
// Assign first slot to this class
int first_slot = 0;
if (sp_slot_mark_active(new_meta, first_slot, class_idx) != 0) {
if (g_lock_stats_enabled == 1) {
atomic_fetch_add(&g_lock_release_count, 1);
}
pthread_mutex_unlock(&g_shared_pool.alloc_lock);
return -1; // ❌ Should not happen
}
// Update SuperSlab metadata
new_ss->slab_bitmap |= (1u << first_slot);
ss_slab_meta_class_idx_set(new_ss, first_slot, (uint8_t)class_idx);
new_ss->active_slabs = 1;
g_shared_pool.active_count++;
if (class_idx < TINY_NUM_CLASSES_SS) {
g_shared_pool.class_active_slots[class_idx]++;
}
// Update hint
g_shared_pool.class_hints[class_idx] = new_ss;
*ss_out = new_ss;
*slab_idx_out = first_slot;
sp_fix_geometry_if_needed(new_ss, first_slot, class_idx);
if (g_lock_stats_enabled == 1) {
atomic_fetch_add(&g_lock_release_count, 1);
}
pthread_mutex_unlock(&g_shared_pool.alloc_lock);
if (g_sp_stage_stats_enabled) {
atomic_fetch_add(&g_sp_stage3_hits[class_idx], 1);
}
return 0; // ✅ Stage 3 success
}