Files
hakmem/core/hakmem_shared_pool.c
Moe Charm (CI) acc64f2438 Phase ML1: Pool v1 memset 89.73% overhead 軽量化 (+15.34% improvement)
## Summary
- ChatGPT により bench_profile.h の setenv segfault を修正(RTLD_NEXT 経由に切り替え)
- core/box/pool_zero_mode_box.h 新設:ENV キャッシュ経由で ZERO_MODE を統一管理
- core/hakmem_pool.c で zero mode に応じた memset 制御(FULL/header/off)
- A/B テスト結果:ZERO_MODE=header で +15.34% improvement(1M iterations, C6-heavy)

## Files Modified
- core/box/pool_api.inc.h: pool_zero_mode_box.h include
- core/bench_profile.h: glibc setenv → malloc+putenv(segfault 回避)
- core/hakmem_pool.c: zero mode 参照・制御ロジック
- core/box/pool_zero_mode_box.h (新設): enum/getter
- CURRENT_TASK.md: Phase ML1 結果記載

## Test Results
| Iterations | ZERO_MODE=full | ZERO_MODE=header | Improvement |
|-----------|----------------|-----------------|------------|
| 10K       | 3.06 M ops/s   | 3.17 M ops/s    | +3.65%     |
| 1M        | 23.71 M ops/s  | 27.34 M ops/s   | **+15.34%** |

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-10 09:08:18 +09:00

639 lines
24 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#include "hakmem_shared_pool_internal.h"
#include "hakmem_debug_master.h" // Phase 4b: Master debug control
#include "hakmem_stats_master.h" // Phase 4d: Master stats control
#include "box/ss_slab_meta_box.h" // Phase 3d-A: SlabMeta Box boundary
#include "box/ss_hot_cold_box.h" // Phase 12-1.1: EMPTY slab marking
#include "box/pagefault_telemetry_box.h" // Box PageFaultTelemetry (PF_BUCKET_SS_META)
#include "box/tls_sll_drain_box.h" // Box TLS SLL Drain (tiny_tls_sll_drain)
#include "box/tls_slab_reuse_guard_box.h" // Box TLS Slab Reuse Guard (P0.3)
#include "hakmem_policy.h" // FrozenPolicy (learning layer)
#include "box/shared_pool_box.h" // Logical cap for bench profile
#include <stdlib.h>
#include <string.h>
#include <stdatomic.h>
#include <stdio.h>
#include <sys/mman.h> // For mmap/munmap (used in shared_pool_ensure_capacity_unlocked)
// ============================================================================
// P0 Lock Contention Instrumentation (Debug build only; counters defined always)
// ============================================================================
_Atomic uint64_t g_lock_acquire_count = 0; // Total lock acquisitions
_Atomic uint64_t g_lock_release_count = 0; // Total lock releases
_Atomic uint64_t g_lock_acquire_slab_count = 0; // Locks from acquire_slab path
_Atomic uint64_t g_lock_release_slab_count = 0; // Locks from release_slab path
#if !HAKMEM_BUILD_RELEASE
int g_lock_stats_enabled = -1; // -1=uninitialized, 0=off, 1=on
// Initialize lock stats from environment variable
// Phase 4b: Now uses hak_debug_check() for master debug control support
void lock_stats_init(void) {
if (__builtin_expect(g_lock_stats_enabled == -1, 0)) {
g_lock_stats_enabled = hak_debug_check("HAKMEM_SHARED_POOL_LOCK_STATS");
}
}
// Report lock statistics at shutdown
static void __attribute__((destructor)) lock_stats_report(void) {
if (g_lock_stats_enabled != 1) {
return;
}
uint64_t acquires = atomic_load(&g_lock_acquire_count);
uint64_t releases = atomic_load(&g_lock_release_count);
uint64_t acquire_path = atomic_load(&g_lock_acquire_slab_count);
uint64_t release_path = atomic_load(&g_lock_release_slab_count);
fprintf(stderr, "\n=== SHARED POOL LOCK STATISTICS ===\n");
fprintf(stderr, "Total lock ops: %lu (acquire) + %lu (release) = %lu\n",
acquires, releases, acquires + releases);
fprintf(stderr, "Balance: %ld (should be 0)\n",
(int64_t)acquires - (int64_t)releases);
fprintf(stderr, "\n--- Breakdown by Code Path ---\n");
fprintf(stderr, "acquire_slab(): %lu (%.1f%%)\n",
acquire_path, 100.0 * acquire_path / (acquires ? acquires : 1));
fprintf(stderr, "release_slab(): %lu (%.1f%%)\n",
release_path, 100.0 * release_path / (acquires ? acquires : 1));
fprintf(stderr, "===================================\n");
fflush(stderr);
}
#else
// Release build: No-op stubs
int g_lock_stats_enabled = 0;
#endif
// ============================================================================
// SP Acquire Stage Statistics (Stage1/2/3 breakdown)
// ============================================================================
_Atomic uint64_t g_sp_stage1_hits[TINY_NUM_CLASSES_SS];
_Atomic uint64_t g_sp_stage2_hits[TINY_NUM_CLASSES_SS];
_Atomic uint64_t g_sp_stage3_hits[TINY_NUM_CLASSES_SS];
// Data collection gate (0=off, 1=on). 学習層からも有効化される。
int g_sp_stage_stats_enabled = 0;
#if !HAKMEM_BUILD_RELEASE
// Logging gate for destructorENV: HAKMEM_SHARED_POOL_STAGE_STATS
static int g_sp_stage_stats_log_enabled = -1; // -1=uninitialized, 0=off, 1=on
void sp_stage_stats_init(void) {
// Phase 4d: Now uses hak_stats_check() for unified stats control
if (__builtin_expect(g_sp_stage_stats_log_enabled == -1, 0)) {
g_sp_stage_stats_log_enabled = hak_stats_check("HAKMEM_SHARED_POOL_STAGE_STATS", "pool");
if (g_sp_stage_stats_log_enabled == 1) {
// ログが有効なら計測も必ず有効化する。
g_sp_stage_stats_enabled = 1;
}
}
}
static void __attribute__((destructor)) sp_stage_stats_report(void) {
if (g_sp_stage_stats_log_enabled != 1) {
return;
}
fprintf(stderr, "\n=== SHARED POOL STAGE STATISTICS ===\n");
fprintf(stderr, "Per-class acquire_slab() stage hits (Stage1=EMPTY, Stage2=UNUSED, Stage3=new SS)\n");
for (int cls = 0; cls < TINY_NUM_CLASSES_SS; cls++) {
uint64_t s1 = atomic_load(&g_sp_stage1_hits[cls]);
uint64_t s2 = atomic_load(&g_sp_stage2_hits[cls]);
uint64_t s3 = atomic_load(&g_sp_stage3_hits[cls]);
uint64_t total = s1 + s2 + s3;
if (total == 0) continue; // Skip unused classes
double p1 = 100.0 * (double)s1 / (double)total;
double p2 = 100.0 * (double)s2 / (double)total;
double p3 = 100.0 * (double)s3 / (double)total;
fprintf(stderr,
"Class %d: total=%llu S1=%llu (%.1f%%) S2=%llu (%.1f%%) S3=%llu (%.1f%%)\n",
cls,
(unsigned long long)total,
(unsigned long long)s1, p1,
(unsigned long long)s2, p2,
(unsigned long long)s3, p3);
}
fprintf(stderr, "====================================\n");
fflush(stderr);
}
#else
// Release build: No-op stubs
void sp_stage_stats_init(void) {}
#endif
// Snapshot Tiny-related backend metrics for learner / observability.
void
shared_pool_tiny_metrics_snapshot(uint64_t stage1[TINY_NUM_CLASSES_SS],
uint64_t stage2[TINY_NUM_CLASSES_SS],
uint64_t stage3[TINY_NUM_CLASSES_SS],
uint32_t active_slots[TINY_NUM_CLASSES_SS])
{
// Ensure env-based logging設定の初期化だけ先に済ませる。
sp_stage_stats_init();
// 学習層から呼ばれた場合は、計測自体は常に有効化する(ログは env で制御)。
g_sp_stage_stats_enabled = 1;
for (int cls = 0; cls < TINY_NUM_CLASSES_SS; cls++) {
if (stage1) {
stage1[cls] = atomic_load_explicit(&g_sp_stage1_hits[cls],
memory_order_relaxed);
}
if (stage2) {
stage2[cls] = atomic_load_explicit(&g_sp_stage2_hits[cls],
memory_order_relaxed);
}
if (stage3) {
stage3[cls] = atomic_load_explicit(&g_sp_stage3_hits[cls],
memory_order_relaxed);
}
if (active_slots) {
active_slots[cls] = g_shared_pool.class_active_slots[cls];
}
}
}
// Helper: return per-class active slot limit from FrozenPolicy.tiny_cap[]
// Semantics:
// - tiny_cap[class] == 0 → no limit (unbounded)
// - otherwise: soft cap on ACTIVE slots managed by shared pool for this class.
uint32_t sp_class_active_limit(int class_idx) {
const FrozenPolicy* pol = hkm_policy_get();
if (!pol) {
return 0; // no limit
}
if (class_idx < 0 || class_idx >= 8) {
return 0;
}
return (uint32_t)pol->tiny_cap[class_idx];
}
// ============================================================================
// P0-4: Lock-Free Free Slot List - Node Pool
// ============================================================================
// Pre-allocated node pools (one per class, to avoid malloc/free)
FreeSlotNode g_free_node_pool[TINY_NUM_CLASSES_SS][MAX_FREE_NODES_PER_CLASS];
_Atomic uint32_t g_node_alloc_index[TINY_NUM_CLASSES_SS] = {0};
// Recycle list for FreeSlotNode (per class, lock-free LIFO).
// node_alloc() はまずこのリストから再利用を試み、枯渇時のみ新規ノードを切り出す。
static _Atomic(FreeSlotNode*) g_node_free_head[TINY_NUM_CLASSES_SS] = {
[0 ... TINY_NUM_CLASSES_SS-1] = ATOMIC_VAR_INIT(NULL)
};
// Allocate a node from pool (lock-free fast path, may fall back to legacy path)
static inline FreeSlotNode* node_alloc(int class_idx) {
if (class_idx < 0 || class_idx >= TINY_NUM_CLASSES_SS) {
return NULL;
}
// First, try to pop from recycle list (nodes returned by pop_lockfree).
FreeSlotNode* free_head = atomic_load_explicit(
&g_node_free_head[class_idx],
memory_order_acquire);
while (free_head != NULL) {
FreeSlotNode* next = free_head->next;
if (atomic_compare_exchange_weak_explicit(
&g_node_free_head[class_idx],
&free_head,
next,
memory_order_acq_rel,
memory_order_acquire)) {
return free_head; // Recycled node
}
// CAS failed: free_head is updated; retry with new head.
}
uint32_t idx = atomic_fetch_add(&g_node_alloc_index[class_idx], 1);
if (idx >= MAX_FREE_NODES_PER_CLASS) {
// Pool exhausted - should be rare.
return NULL;
}
return &g_free_node_pool[class_idx][idx];
}
// ============================================================================
// Phase 12-2: SharedSuperSlabPool skeleton implementation
// Goal:
// - Centralize SuperSlab allocation/registration
// - Provide acquire_slab/release_slab APIs for later refill/free integration
// - Keep logic simple & conservative; correctness and observability first.
//
// Notes:
// - Concurrency: protected by g_shared_pool.alloc_lock for now.
// - class_hints is best-effort: read lock-free, written under lock.
// - LRU hooks left as no-op placeholders.
SharedSuperSlabPool g_shared_pool = {
.slabs = NULL,
.capacity = 0,
.total_count = 0,
.active_count = 0,
.alloc_lock = PTHREAD_MUTEX_INITIALIZER,
.class_hints = { NULL },
.lru_head = NULL,
.lru_tail = NULL,
.lru_count = 0,
// P0-4: Lock-free free slot lists (zero-initialized atomic pointers)
.free_slots_lockfree = {{.head = ATOMIC_VAR_INIT(NULL)}},
// Legacy: mutex-protected free lists
.free_slots = {{.entries = {{0}}, .count = 0}},
// Phase 12: SP-SLOT fields (ss_metadata is fixed-size array, auto-zeroed)
.ss_meta_count = 0
};
void
shared_pool_ensure_capacity_unlocked(uint32_t min_capacity)
{
if (g_shared_pool.capacity >= min_capacity) {
return;
}
uint32_t new_cap = g_shared_pool.capacity ? g_shared_pool.capacity : 16;
while (new_cap < min_capacity) {
new_cap *= 2;
}
// CRITICAL FIX: Use system mmap() directly to avoid recursion!
size_t new_size = new_cap * sizeof(SuperSlab*);
SuperSlab** new_slabs = (SuperSlab**)mmap(NULL, new_size,
PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (new_slabs == MAP_FAILED) {
// Allocation failure: keep old state; caller must handle NULL later.
return;
}
// Copy old data if exists
if (g_shared_pool.slabs != NULL) {
memcpy(new_slabs, g_shared_pool.slabs,
g_shared_pool.capacity * sizeof(SuperSlab*));
// Free old mapping (also use system munmap, not free!)
size_t old_size = g_shared_pool.capacity * sizeof(SuperSlab*);
munmap(g_shared_pool.slabs, old_size);
}
// Zero new entries to keep scanning logic simple.
memset(new_slabs + g_shared_pool.capacity, 0,
(new_cap - g_shared_pool.capacity) * sizeof(SuperSlab*));
g_shared_pool.slabs = new_slabs;
g_shared_pool.capacity = new_cap;
}
void
shared_pool_init(void)
{
// Idempotent init; safe to call from multiple early paths.
// pthread_mutex_t with static initializer is already valid.
shared_pool_box_init(NULL, NULL);
pthread_mutex_lock(&g_shared_pool.alloc_lock);
if (g_shared_pool.capacity == 0 && g_shared_pool.slabs == NULL) {
shared_pool_ensure_capacity_unlocked(16);
}
pthread_mutex_unlock(&g_shared_pool.alloc_lock);
}
// ============================================================================
// Phase 12: SP-SLOT Box - Modular Helper Functions
// ============================================================================
// ---------- Layer 1: Slot Operations (Low-level) ----------
// Find first unused slot in SharedSSMeta
// P0-5: Uses atomic load for state check
// Returns: slot_idx on success, -1 if no unused slots
static int sp_slot_find_unused(SharedSSMeta* meta) __attribute__((unused));
static int sp_slot_find_unused(SharedSSMeta* meta) {
if (!meta) return -1;
for (int i = 0; i < meta->total_slots; i++) {
SlotState state = atomic_load_explicit(&meta->slots[i].state, memory_order_acquire);
if (state == SLOT_UNUSED) {
return i;
}
}
return -1;
}
// Mark slot as ACTIVE (UNUSED→ACTIVE or EMPTY→ACTIVE)
// P0-5: Uses atomic store for state transition (caller must hold mutex!)
// Returns: 0 on success, -1 on error
int sp_slot_mark_active(SharedSSMeta* meta, int slot_idx, int class_idx) {
if (!meta || slot_idx < 0 || slot_idx >= meta->total_slots) return -1;
if (class_idx < 0 || class_idx >= TINY_NUM_CLASSES_SS) return -1;
SharedSlot* slot = &meta->slots[slot_idx];
// Load state atomically
SlotState state = atomic_load_explicit(&slot->state, memory_order_acquire);
// Transition: UNUSED→ACTIVE or EMPTY→ACTIVE
if (state == SLOT_UNUSED || state == SLOT_EMPTY) {
atomic_store_explicit(&slot->state, SLOT_ACTIVE, memory_order_release);
slot->class_idx = (uint8_t)class_idx;
slot->slab_idx = (uint8_t)slot_idx;
meta->active_slots++;
return 0;
}
return -1; // Already ACTIVE or invalid state
}
// Mark slot as EMPTY (ACTIVE→EMPTY)
// P0-5: Uses atomic store for state transition (caller must hold mutex!)
// Returns: 0 on success, -1 on error
int sp_slot_mark_empty(SharedSSMeta* meta, int slot_idx) {
if (!meta || slot_idx < 0 || slot_idx >= meta->total_slots) return -1;
SharedSlot* slot = &meta->slots[slot_idx];
// Load state atomically
SlotState state = atomic_load_explicit(&slot->state, memory_order_acquire);
if (state == SLOT_ACTIVE) {
atomic_store_explicit(&slot->state, SLOT_EMPTY, memory_order_release);
if (meta->active_slots > 0) {
meta->active_slots--;
}
return 0;
}
return -1; // Not ACTIVE
}
// Sync SP-SLOT view from an existing SuperSlab.
// This is needed when a legacy-allocated SuperSlab reaches the shared-pool
// release path for the first time (slot states are still SLOT_UNUSED).
void sp_meta_sync_slots_from_ss(SharedSSMeta* meta, SuperSlab* ss) {
if (!meta || !ss) return;
int cap = ss_slabs_capacity(ss);
if (cap > MAX_SLOTS_PER_SS) {
cap = MAX_SLOTS_PER_SS;
}
meta->total_slots = (uint8_t)cap;
meta->active_slots = 0;
for (int i = 0; i < cap; i++) {
SlotState state = SLOT_UNUSED;
uint32_t bit = (1u << i);
if (ss->slab_bitmap & bit) {
state = SLOT_ACTIVE;
meta->active_slots++;
} else {
TinySlabMeta* smeta = &ss->slabs[i];
uint16_t used = atomic_load_explicit(&smeta->used, memory_order_relaxed);
if (smeta->capacity > 0 && used == 0) {
state = SLOT_EMPTY;
}
}
uint8_t cls = ss->class_map[i];
if (cls == 255) {
cls = ss->slabs[i].class_idx;
}
meta->slots[i].class_idx = cls;
meta->slots[i].slab_idx = (uint8_t)i;
atomic_store_explicit(&meta->slots[i].state, state, memory_order_release);
}
}
// ---------- Layer 2: Metadata Management (Mid-level) ----------
// Ensure ss_metadata array has capacity for at least min_count entries
// Caller must hold alloc_lock
// Returns: 0 on success, -1 if capacity exceeded
// RACE FIX: No realloc! Fixed-size array prevents race with lock-free Stage 2
static int sp_meta_ensure_capacity(uint32_t min_count) {
if (min_count > MAX_SS_METADATA_ENTRIES) {
#if !HAKMEM_BUILD_RELEASE
static int warn_once = 0;
if (warn_once == 0) {
fprintf(stderr, "[SP_META_CAPACITY_ERROR] Exceeded MAX_SS_METADATA_ENTRIES=%d\n",
MAX_SS_METADATA_ENTRIES);
warn_once = 1;
}
#endif
return -1;
}
return 0;
}
// Find SharedSSMeta for given SuperSlab, or create if not exists
// Caller must hold alloc_lock
// Returns: SharedSSMeta* on success, NULL on error
SharedSSMeta* sp_meta_find_or_create(SuperSlab* ss) {
if (!ss) return NULL;
// P0 Optimization: O(1) lookup via direct pointer (eliminates 7.8% CPU bottleneck)
// Check if this SuperSlab already has metadata cached
if (ss->shared_meta) {
return ss->shared_meta;
}
// RACE FIX: Load count atomically for consistency (even under mutex)
uint32_t count = atomic_load_explicit(&g_shared_pool.ss_meta_count, memory_order_relaxed);
// Search existing metadata (fallback for legacy SuperSlabs without cached pointer)
for (uint32_t i = 0; i < count; i++) {
// RACE FIX: Load pointer atomically for consistency
SuperSlab* meta_ss = atomic_load_explicit(&g_shared_pool.ss_metadata[i].ss, memory_order_relaxed);
if (meta_ss == ss) {
// Cache the pointer for future O(1) lookups
ss->shared_meta = &g_shared_pool.ss_metadata[i];
return &g_shared_pool.ss_metadata[i];
}
}
// Create new metadata entry
if (sp_meta_ensure_capacity(count + 1) != 0) {
return NULL;
}
// RACE FIX: Read current count atomically (even under mutex for consistency)
uint32_t current_count = atomic_load_explicit(&g_shared_pool.ss_meta_count, memory_order_relaxed);
SharedSSMeta* meta = &g_shared_pool.ss_metadata[current_count];
// RACE FIX: Store SuperSlab pointer atomically (visible to lock-free Stage 2)
atomic_store_explicit(&meta->ss, ss, memory_order_relaxed);
meta->total_slots = (uint8_t)ss_slabs_capacity(ss);
meta->active_slots = 0;
// Initialize all slots as UNUSED
// P0-5: Use atomic store for state initialization
for (int i = 0; i < meta->total_slots; i++) {
atomic_store_explicit(&meta->slots[i].state, SLOT_UNUSED, memory_order_relaxed);
meta->slots[i].class_idx = 0;
meta->slots[i].slab_idx = (uint8_t)i;
}
// P0 Optimization: Cache the metadata pointer in SuperSlab for O(1) future lookups
ss->shared_meta = meta;
// RACE FIX: Atomic increment with release semantics
// This ensures all writes to metadata[current_count] (lines 268-278) are visible
// before the count increment is visible to lock-free Stage 2 readers
atomic_fetch_add_explicit(&g_shared_pool.ss_meta_count, 1, memory_order_release);
return meta;
}
// Find UNUSED slot and claim it (UNUSED → ACTIVE) using lock-free CAS
// Returns: slot_idx on success, -1 if no UNUSED slots
int sp_slot_claim_lockfree(SharedSSMeta* meta, int class_idx) {
(void)class_idx;
if (!meta) return -1;
// Optimization: Quick check if any unused slots exist?
// For now, just iterate. Metadata size is small (max 32 slots).
for (int i = 0; i < meta->total_slots; i++) {
SharedSlot* slot = &meta->slots[i];
SlotState state = atomic_load_explicit(&slot->state, memory_order_acquire);
if (state == SLOT_UNUSED) {
// Attempt CAS: UNUSED → ACTIVE
if (atomic_compare_exchange_strong_explicit(
&slot->state,
&state,
SLOT_ACTIVE,
memory_order_acq_rel,
memory_order_acquire)) {
return i; // Success!
}
// CAS failed: someone else took it or state changed
}
}
return -1;
}
// ---------- Layer 3: Free List Management ----------
// Push empty slot to per-class free list
// Caller must hold alloc_lock
// Returns: 0 on success, -1 if list is full
int sp_freelist_push_lockfree(int class_idx, SharedSSMeta* meta, int slot_idx) {
if (class_idx < 0 || class_idx >= TINY_NUM_CLASSES_SS) return -1;
FreeSlotNode* node = node_alloc(class_idx);
if (!node) {
// Pool exhausted
return -1;
}
node->meta = meta;
node->slot_idx = slot_idx;
// Lock-free push to stack (LIFO)
FreeSlotNode* old_head = atomic_load_explicit(
&g_shared_pool.free_slots_lockfree[class_idx].head,
memory_order_relaxed);
do {
node->next = old_head;
} while (!atomic_compare_exchange_weak_explicit(
&g_shared_pool.free_slots_lockfree[class_idx].head,
&old_head,
node,
memory_order_release,
memory_order_relaxed));
return 0;
}
// Pop empty slot from per-class free list
// Lock-free
// Returns: 1 on success, 0 if empty
int sp_freelist_pop_lockfree(int class_idx, SharedSSMeta** meta_out, int* slot_idx_out) {
if (class_idx < 0 || class_idx >= TINY_NUM_CLASSES_SS) return 0;
FreeSlotNode* head = atomic_load_explicit(
&g_shared_pool.free_slots_lockfree[class_idx].head,
memory_order_acquire);
while (head) {
FreeSlotNode* next = head->next;
if (atomic_compare_exchange_weak_explicit(
&g_shared_pool.free_slots_lockfree[class_idx].head,
&head,
next,
memory_order_acquire,
memory_order_acquire)) {
// Success!
*meta_out = head->meta;
*slot_idx_out = head->slot_idx;
// Recycle node (push to free_head list)
FreeSlotNode* free_head = atomic_load_explicit(&g_node_free_head[class_idx], memory_order_relaxed);
do {
head->next = free_head;
} while (!atomic_compare_exchange_weak_explicit(
&g_node_free_head[class_idx],
&free_head,
head,
memory_order_release,
memory_order_relaxed));
return 1;
}
// CAS failed: head updated, retry
}
return 0; // Empty list
}
// Allocator helper for SuperSlab (Phase 9-2 Task 1)
// NOTE: class_idx MUST be a valid tiny class (0-7). Passing an out-of-range
// value previously went through superslab_allocate(8), which overflowed
// g_ss_ace[] and could corrupt neighboring globals, leading to missing
// registry entries and TLS SLL header corruption.
SuperSlab*
sp_internal_allocate_superslab(int class_idx)
{
do {
static _Atomic uint32_t g_sp_alloc_log = 0;
uint32_t shot = atomic_fetch_add_explicit(&g_sp_alloc_log, 1, memory_order_relaxed);
if (shot < 4) {
fprintf(stderr, "[SP_INTERNAL_ALLOC] class_idx=%d\n", class_idx);
fflush(stderr);
}
} while (0);
// Clamp to valid range to avoid out-of-bounds access inside superslab_allocate().
if (class_idx < 0 || class_idx >= TINY_NUM_CLASSES_SS) {
class_idx = TINY_NUM_CLASSES_SS - 1;
}
// Use legacy backend to allocate a SuperSlab (malloc-based)
extern SuperSlab* superslab_allocate(uint8_t size_class);
SuperSlab* ss = superslab_allocate((uint8_t)class_idx);
if (!ss) {
return NULL;
}
// Initialize basic fields if not done by superslab_alloc
ss->active_slabs = 0;
ss->slab_bitmap = 0;
return ss;
}
// ============================================================================
// Public API (High-level)
// ============================================================================
SuperSlab*
shared_pool_acquire_superslab(void)
{
// Phase 12: Legacy wrapper?
// This function seems to be a direct allocation bypass.
return sp_internal_allocate_superslab(0);
}
void sp_fix_geometry_if_needed(SuperSlab* ss, int slab_idx, int class_idx) {
// Phase 9-1: For now, we assume geometry is compatible or set by caller.
// This hook exists for future use when we support dynamic geometry resizing.
(void)ss; (void)slab_idx; (void)class_idx;
}