Files
hakmem/core/hakmem_shared_pool_release.c
Moe Charm (CI) acc64f2438 Phase ML1: Pool v1 memset 89.73% overhead 軽量化 (+15.34% improvement)
## Summary
- ChatGPT により bench_profile.h の setenv segfault を修正(RTLD_NEXT 経由に切り替え)
- core/box/pool_zero_mode_box.h 新設:ENV キャッシュ経由で ZERO_MODE を統一管理
- core/hakmem_pool.c で zero mode に応じた memset 制御(FULL/header/off)
- A/B テスト結果:ZERO_MODE=header で +15.34% improvement(1M iterations, C6-heavy)

## Files Modified
- core/box/pool_api.inc.h: pool_zero_mode_box.h include
- core/bench_profile.h: glibc setenv → malloc+putenv(segfault 回避)
- core/hakmem_pool.c: zero mode 参照・制御ロジック
- core/box/pool_zero_mode_box.h (新設): enum/getter
- CURRENT_TASK.md: Phase ML1 結果記載

## Test Results
| Iterations | ZERO_MODE=full | ZERO_MODE=header | Improvement |
|-----------|----------------|-----------------|------------|
| 10K       | 3.06 M ops/s   | 3.17 M ops/s    | +3.65%     |
| 1M        | 23.71 M ops/s  | 27.34 M ops/s   | **+15.34%** |

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-10 09:08:18 +09:00

355 lines
14 KiB
C
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#include "hakmem_shared_pool_internal.h"
#include "hakmem_debug_master.h"
#include "box/ss_slab_meta_box.h"
#include "box/ss_hot_cold_box.h"
#include "box/ss_tier_box.h" // P-Tier: Utilization-aware tiering
#include "hakmem_env_cache.h" // Priority-2: ENV cache
#include "superslab/superslab_inline.h" // superslab_ref_get guard for TLS pins
#include "box/ss_release_guard_box.h" // Box: SuperSlab Release Guard
#include "box/ss_slab_reset_box.h" // Box: Reset slab metadata on reuse path
#include "box/ss_stats_box.h" // Observability: Superslab/slab counters
#include "box/ss_budget_box.h" // Budget guard (global/class caps)
#include <stdlib.h>
#include <stdio.h>
#include <stdatomic.h>
static inline void c7_release_log_once(SuperSlab* ss, int slab_idx) {
#if HAKMEM_BUILD_RELEASE
static _Atomic uint32_t rel_c7_release_logs = 0;
uint32_t n = atomic_fetch_add_explicit(&rel_c7_release_logs, 1, memory_order_relaxed);
if (n < 8) {
TinySlabMeta* meta = &ss->slabs[slab_idx];
fprintf(stderr,
"[REL_C7_RELEASE] ss=%p slab=%d used=%u cap=%u carved=%u\n",
(void*)ss,
slab_idx,
(unsigned)meta->used,
(unsigned)meta->capacity,
(unsigned)meta->carved);
}
#else
static _Atomic uint32_t dbg_c7_release_logs = 0;
uint32_t n = atomic_fetch_add_explicit(&dbg_c7_release_logs, 1, memory_order_relaxed);
if (n < 8) {
TinySlabMeta* meta = &ss->slabs[slab_idx];
fprintf(stderr,
"[DBG_C7_RELEASE] ss=%p slab=%d used=%u cap=%u carved=%u\n",
(void*)ss,
slab_idx,
(unsigned)meta->used,
(unsigned)meta->capacity,
(unsigned)meta->carved);
}
#endif
}
void
shared_pool_release_slab(SuperSlab* ss, int slab_idx)
{
// Phase 12: SP-SLOT Box - Slot-based Release
//
// Flow:
// 1. Validate inputs and check meta->used == 0
// 2. Find SharedSSMeta for this SuperSlab
// 3. Mark slot ACTIVE → EMPTY
// 4. Push to per-class free list (enables same-class reuse)
// 5. If all slots EMPTY → superslab_free() → LRU cache
if (!ss) {
return;
}
if (slab_idx < 0 || slab_idx >= SLABS_PER_SUPERSLAB_MAX) {
return;
}
// Phase 9-2 FIX: Promote Legacy SuperSlabs to Shared Pool on first recycle
// If we are recycling a slot from a Legacy SS, we must remove it from the
// Legacy list (g_superslab_heads) to prevent Legacy Backend from allocating
// from it simultaneously (Double Allocation Race).
// This effectively transfers ownership to Shared Pool.
extern void remove_superslab_from_legacy_head(SuperSlab* ss);
remove_superslab_from_legacy_head(ss);
// BUGFIX: Re-check used count after removal. Legacy Backend might have
// allocated from this slab while we were waiting for the lock in remove().
TinySlabMeta* slab_meta = &ss->slabs[slab_idx];
if (atomic_load_explicit(&slab_meta->used, memory_order_acquire) != 0) {
// Legacy Backend stole this slab. It's now an orphan (removed from list).
// We abort recycling. It will be recycled when Legacy frees it later.
return;
}
// Debug logging
#if !HAKMEM_BUILD_RELEASE
// Priority-2: Use cached ENV
int dbg = HAK_ENV_SS_FREE_DEBUG();
#else
static const int dbg = 0;
#endif
(void)dbg;
// P0 instrumentation: count lock acquisitions
lock_stats_init();
if (g_lock_stats_enabled == 1) {
atomic_fetch_add(&g_lock_stats_enabled, 1);
atomic_fetch_add(&g_lock_release_slab_count, 1);
}
pthread_mutex_lock(&g_shared_pool.alloc_lock);
// TinySlabMeta* slab_meta = &ss->slabs[slab_idx]; // Already declared above
if (slab_meta->used != 0) {
// Not actually empty (double check under lock)
if (g_lock_stats_enabled == 1) {
atomic_fetch_add(&g_lock_release_count, 1);
}
pthread_mutex_unlock(&g_shared_pool.alloc_lock);
return;
}
uint8_t class_idx = slab_meta->class_idx;
if (class_idx == 7) {
c7_release_log_once(ss, slab_idx);
}
// Guard: if SuperSlab is pinned (TLS/remote references), defer release to avoid
// class_map=255 while pointers are still in-flight.
uint32_t ss_refs_guard = superslab_ref_get(ss);
if (ss_refs_guard != 0) {
#if !HAKMEM_BUILD_RELEASE
if (dbg == 1) {
fprintf(stderr,
"[SP_SLOT_RELEASE_SKIP_PINNED] ss=%p slab_idx=%d class=%d refcount=%u\n",
(void*)ss, slab_idx, class_idx, (unsigned)ss_refs_guard);
}
#endif
if (g_lock_stats_enabled == 1) {
atomic_fetch_add(&g_lock_release_count, 1);
}
pthread_mutex_unlock(&g_shared_pool.alloc_lock);
return;
}
#if !HAKMEM_BUILD_RELEASE
if (dbg == 1) {
fprintf(stderr, "[SP_SLOT_RELEASE] ss=%p slab_idx=%d class=%d used=0 (marking EMPTY)\n",
(void*)ss, slab_idx, class_idx);
}
#endif
if (class_idx == 7) {
ss_slab_reset_meta_for_tiny(ss, slab_idx, class_idx);
#if HAKMEM_BUILD_RELEASE
static _Atomic uint32_t rel_c7_reset_logs = 0;
uint32_t rn = atomic_fetch_add_explicit(&rel_c7_reset_logs, 1, memory_order_relaxed);
if (rn < 4) {
TinySlabMeta* m = &ss->slabs[slab_idx];
fprintf(stderr,
"[REL_C7_RELEASE_RESET] ss=%p slab=%d used=%u cap=%u carved=%u freelist=%p\n",
(void*)ss,
slab_idx,
(unsigned)m->used,
(unsigned)m->capacity,
(unsigned)m->carved,
m->freelist);
}
#else
static _Atomic uint32_t dbg_c7_reset_logs = 0;
uint32_t rn = atomic_fetch_add_explicit(&dbg_c7_reset_logs, 1, memory_order_relaxed);
if (rn < 4) {
TinySlabMeta* m = &ss->slabs[slab_idx];
fprintf(stderr,
"[DBG_C7_RELEASE_RESET] ss=%p slab=%d used=%u cap=%u carved=%u freelist=%p\n",
(void*)ss,
slab_idx,
(unsigned)m->used,
(unsigned)m->capacity,
(unsigned)m->carved,
m->freelist);
}
#endif
}
// Find SharedSSMeta for this SuperSlab
SharedSSMeta* sp_meta = NULL;
uint32_t count = atomic_load_explicit(&g_shared_pool.ss_meta_count, memory_order_relaxed);
for (uint32_t i = 0; i < count; i++) {
// RACE FIX: Load pointer atomically
SuperSlab* meta_ss = atomic_load_explicit(&g_shared_pool.ss_metadata[i].ss, memory_order_relaxed);
if (meta_ss == ss) {
sp_meta = &g_shared_pool.ss_metadata[i];
break;
}
}
if (!sp_meta) {
// SuperSlab not in SP-SLOT system yet - create metadata
sp_meta = sp_meta_find_or_create(ss);
if (!sp_meta) {
pthread_mutex_unlock(&g_shared_pool.alloc_lock);
return; // Failed to create metadata
}
}
// Mark slot as EMPTY (ACTIVE → EMPTY)
uint32_t slab_bit = (1u << slab_idx);
SlotState slot_state = atomic_load_explicit(
&sp_meta->slots[slab_idx].state,
memory_order_acquire);
if (slot_state != SLOT_ACTIVE && (ss->slab_bitmap & slab_bit)) {
// Legacy path import: rebuild slot states from SuperSlab bitmap/class_map
sp_meta_sync_slots_from_ss(sp_meta, ss);
slot_state = atomic_load_explicit(
&sp_meta->slots[slab_idx].state,
memory_order_acquire);
}
if (slot_state != SLOT_ACTIVE || sp_slot_mark_empty(sp_meta, slab_idx) != 0) {
if (g_lock_stats_enabled == 1) {
atomic_fetch_add(&g_lock_release_count, 1);
}
pthread_mutex_unlock(&g_shared_pool.alloc_lock);
return; // Slot wasn't ACTIVE
}
// Update SuperSlab metadata
uint32_t bit = (1u << slab_idx);
if (ss->slab_bitmap & bit) {
ss->slab_bitmap &= ~bit;
slab_meta->class_idx = 255; // UNASSIGNED
// P1.1: Mark class_map as UNASSIGNED when releasing slab
ss->class_map[slab_idx] = 255;
// Reset slab metadata to a pristine state for all classes (C0C7)
ss_slab_reset_meta_for_tiny(ss, slab_idx, -1);
if (ss->active_slabs > 0) {
ss->active_slabs--;
if (ss->active_slabs == 0 && g_shared_pool.active_count > 0) {
g_shared_pool.active_count--;
}
}
if (class_idx < TINY_NUM_CLASSES_SS &&
g_shared_pool.class_active_slots[class_idx] > 0) {
g_shared_pool.class_active_slots[class_idx]--;
}
}
// P0-4: Push to lock-free per-class free list (enables reuse by same class)
// Note: push BEFORE releasing mutex (slot state already updated under lock)
if (class_idx < TINY_NUM_CLASSES_SS) {
sp_freelist_push_lockfree(class_idx, sp_meta, slab_idx);
#if !HAKMEM_BUILD_RELEASE
if (dbg == 1) {
fprintf(stderr, "[SP_SLOT_FREELIST_LOCKFREE] class=%d pushed slot (ss=%p slab=%d) active_slots=%u/%u\n",
class_idx, (void*)ss, slab_idx,
sp_meta->active_slots, sp_meta->total_slots);
}
#endif
}
// P-Tier: Check tier transition after releasing slab
// This may transition HOT → DRAINING if utilization dropped below threshold
// or DRAINING → FREE if utilization reached 0
ss_tier_check_transition(ss);
// P-Tier Step B: Eager FREE eviction
// If tier transitioned to FREE (total_active_blocks == 0), immediately try to
// release the SuperSlab regardless of active_slots. This prevents registry bloat.
SSTier current_tier = ss_tier_get(ss);
if (current_tier == SS_TIER_FREE) {
// Double-check: total_active_blocks should be 0 for FREE tier
uint32_t active_blocks = atomic_load_explicit(&ss->total_active_blocks, memory_order_acquire);
if (active_blocks == 0 && ss_release_guard_superslab_can_free(ss)) {
#if !HAKMEM_BUILD_RELEASE
if (dbg == 1) {
fprintf(stderr, "[SP_TIER_FREE_EAGER] ss=%p tier=FREE active_slots=%u -> immediate free\n",
(void*)ss, sp_meta->active_slots);
}
#endif
// Force all remaining slots to EMPTY state for clean metadata
for (uint32_t i = 0; i < sp_meta->total_slots; i++) {
SlotState st = atomic_load_explicit(&sp_meta->slots[i].state, memory_order_relaxed);
if (st == SLOT_ACTIVE) {
atomic_store_explicit(&sp_meta->slots[i].state, SLOT_EMPTY, memory_order_relaxed);
}
}
sp_meta->active_slots = 0;
if (g_lock_stats_enabled == 1) {
atomic_fetch_add(&g_lock_release_count, 1);
}
// Clear meta->ss before unlocking (race prevention)
atomic_store_explicit(&sp_meta->ss, NULL, memory_order_release);
pthread_mutex_unlock(&g_shared_pool.alloc_lock);
// Free SuperSlab immediately (bypasses normal active_slots==0 check)
extern void superslab_free(SuperSlab* ss);
ss_stats_on_ss_free_class(class_idx);
ss_budget_on_free(class_idx);
superslab_free(ss);
return;
}
}
// Check if SuperSlab is now completely empty (all slots EMPTY or UNUSED)
if (sp_meta->active_slots == 0) {
#if !HAKMEM_BUILD_RELEASE
if (dbg == 1) {
fprintf(stderr, "[SP_SLOT_COMPLETELY_EMPTY] ss=%p active_slots=0 (calling superslab_free)\n",
(void*)ss);
}
#endif
if (g_lock_stats_enabled == 1) {
atomic_fetch_add(&g_lock_release_count, 1);
}
// RACE FIX: Set meta->ss to NULL BEFORE unlocking mutex
// This prevents Stage 2 from accessing freed SuperSlab
atomic_store_explicit(&sp_meta->ss, NULL, memory_order_release);
pthread_mutex_unlock(&g_shared_pool.alloc_lock);
// Remove from legacy backend list (moved to top of function)
// extern void remove_superslab_from_legacy_head(SuperSlab* ss);
// remove_superslab_from_legacy_head(ss);
// Free SuperSlab:
// 1. Try LRU cache (hak_ss_lru_push) - lazy deallocation
// 2. Or munmap if LRU is full - eager deallocation
// BUGFIX: Double check total_active_blocks and refcount. Legacy Backend might have
// allocated from ANOTHER slab in this SS just before we removed it.
// If so, we must NOT free the SS.
if (ss_release_guard_superslab_can_free(ss)) {
extern void superslab_free(SuperSlab* ss);
ss_stats_on_ss_free_class(class_idx);
ss_budget_on_free(class_idx);
superslab_free(ss);
} else {
#if !HAKMEM_BUILD_RELEASE
if (dbg == 1) {
uint32_t active_blocks = atomic_load_explicit(&ss->total_active_blocks, memory_order_acquire);
uint32_t ss_refs = superslab_ref_get(ss);
fprintf(stderr,
"[SP_SLOT_RELEASE] SKIP free ss=%p: total_active_blocks=%u refcount=%u\n",
(void*)ss,
(unsigned)active_blocks,
(unsigned)ss_refs);
}
#endif
}
return;
}
if (g_lock_stats_enabled == 1) {
atomic_fetch_add(&g_lock_release_count, 1);
}
pthread_mutex_unlock(&g_shared_pool.alloc_lock);
}