2025-11-20 02:01:52 +09:00
|
|
|
// Box: Core Allocation
|
|
|
|
|
// Purpose: SuperSlab allocation/deallocation and slab initialization
|
|
|
|
|
|
|
|
|
|
#include "ss_allocation_box.h"
|
|
|
|
|
#include "ss_slab_meta_box.h" // Phase 3d-A: SlabMeta Box boundary
|
|
|
|
|
#include "ss_os_acquire_box.h"
|
|
|
|
|
#include "ss_cache_box.h"
|
|
|
|
|
#include "ss_stats_box.h"
|
|
|
|
|
#include "ss_ace_box.h"
|
|
|
|
|
#include "ss_slab_management_box.h"
|
|
|
|
|
#include "hakmem_super_registry.h"
|
|
|
|
|
#include "hakmem_tiny_config.h"
|
|
|
|
|
#include "hakmem_policy.h" // Phase E3-1: Access FrozenPolicy for never-free policy
|
|
|
|
|
#include "tiny_region_id.h"
|
|
|
|
|
#include "box/tiny_next_ptr_box.h"
|
|
|
|
|
#include <stdio.h>
|
|
|
|
|
#include <stdlib.h>
|
|
|
|
|
#include <string.h>
|
|
|
|
|
#include <sys/mman.h>
|
|
|
|
|
#include <pthread.h>
|
|
|
|
|
|
|
|
|
|
// Global statistics (defined in ss_stats_box.c, declared here for access)
|
|
|
|
|
extern pthread_mutex_t g_superslab_lock;
|
|
|
|
|
extern uint64_t g_superslabs_freed;
|
|
|
|
|
extern uint64_t g_bytes_allocated;
|
|
|
|
|
|
|
|
|
|
// g_ss_force_lg is defined in ss_ace_box.c but needs external linkage
|
|
|
|
|
extern int g_ss_force_lg;
|
|
|
|
|
|
|
|
|
|
// g_ss_populate_once controls MAP_POPULATE flag
|
|
|
|
|
static _Atomic int g_ss_populate_once = 0;
|
|
|
|
|
|
|
|
|
|
// ============================================================================
|
|
|
|
|
// Remote Drain Helper
|
|
|
|
|
// ============================================================================
|
|
|
|
|
|
|
|
|
|
// Drain remote MPSC stack into freelist (ownership already verified by caller)
|
|
|
|
|
void _ss_remote_drain_to_freelist_unsafe(SuperSlab* ss, int slab_idx, TinySlabMeta* meta)
|
|
|
|
|
{
|
|
|
|
|
if (!ss || slab_idx < 0 || slab_idx >= ss_slabs_capacity(ss) || !meta) return;
|
|
|
|
|
|
|
|
|
|
// Atomically take the whole remote list
|
|
|
|
|
uintptr_t head = atomic_exchange_explicit(&ss->remote_heads[slab_idx], 0,
|
|
|
|
|
memory_order_acq_rel);
|
|
|
|
|
if (head == 0) return;
|
|
|
|
|
|
|
|
|
|
// Convert remote stack (offset 0 next) into freelist encoding via Box API
|
|
|
|
|
// and splice in front of current freelist preserving relative order.
|
|
|
|
|
void* prev = meta->freelist;
|
|
|
|
|
int cls = (int)meta->class_idx;
|
|
|
|
|
uintptr_t cur = head;
|
|
|
|
|
while (cur != 0) {
|
|
|
|
|
uintptr_t next = *(uintptr_t*)cur; // remote-next stored at offset 0
|
|
|
|
|
// Restore header for header-classes (class 1-6) which were clobbered by remote push
|
|
|
|
|
#if HAKMEM_TINY_HEADER_CLASSIDX
|
|
|
|
|
if (cls != 0 && cls != 7) {
|
|
|
|
|
uint8_t expected = (uint8_t)(HEADER_MAGIC | (cls & HEADER_CLASS_MASK));
|
|
|
|
|
*(uint8_t*)(uintptr_t)cur = expected;
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
// Rewrite next pointer to Box representation for this class
|
|
|
|
|
tiny_next_write(cls, (void*)cur, prev);
|
|
|
|
|
prev = (void*)cur;
|
|
|
|
|
cur = next;
|
|
|
|
|
}
|
|
|
|
|
meta->freelist = prev;
|
|
|
|
|
// Reset remote count after full drain
|
|
|
|
|
atomic_store_explicit(&ss->remote_counts[slab_idx], 0, memory_order_release);
|
|
|
|
|
|
|
|
|
|
// Update freelist/nonempty visibility bits
|
|
|
|
|
uint32_t bit = (1u << slab_idx);
|
|
|
|
|
atomic_fetch_or_explicit(&ss->freelist_mask, bit, memory_order_release);
|
|
|
|
|
atomic_fetch_or_explicit(&ss->nonempty_mask, bit, memory_order_release);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ============================================================================
|
|
|
|
|
// SuperSlab Allocation (ACE-Aware)
|
|
|
|
|
// ============================================================================
|
|
|
|
|
|
|
|
|
|
SuperSlab* superslab_allocate(uint8_t size_class) {
|
|
|
|
|
// Optional fault injection for testing: HAKMEM_TINY_SS_FAULT_RATE=N → 1/N で失敗
|
|
|
|
|
static int fault_rate = -1; // -1=unparsed, 0=disabled, >0=rate
|
|
|
|
|
static __thread unsigned long fault_tick = 0;
|
|
|
|
|
if (__builtin_expect(fault_rate == -1, 0)) {
|
|
|
|
|
const char* e = getenv("HAKMEM_TINY_SS_FAULT_RATE");
|
|
|
|
|
if (e && *e) {
|
|
|
|
|
int v = atoi(e); if (v < 0) v = 0; fault_rate = v;
|
|
|
|
|
} else {
|
|
|
|
|
fault_rate = 0;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (fault_rate > 0) {
|
|
|
|
|
unsigned long t = ++fault_tick;
|
|
|
|
|
if ((t % (unsigned long)fault_rate) == 0ul) {
|
|
|
|
|
return NULL; // simulate OOM
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
// Optional env clamp for SuperSlab size
|
|
|
|
|
static int env_parsed = 0;
|
|
|
|
|
// Allow full ACE range [MIN..MAX] by default so 1MB/2MB の二択学習が有効になる。
|
|
|
|
|
static uint8_t g_ss_min_lg_env = SUPERSLAB_LG_MIN;
|
|
|
|
|
static uint8_t g_ss_max_lg_env = SUPERSLAB_LG_MAX;
|
|
|
|
|
if (!env_parsed) {
|
|
|
|
|
char* maxmb = getenv("HAKMEM_TINY_SS_MAX_MB");
|
|
|
|
|
if (maxmb) {
|
|
|
|
|
int m = atoi(maxmb); if (m == 1) g_ss_max_lg_env = 20; else if (m == 2) g_ss_max_lg_env = 21;
|
|
|
|
|
}
|
|
|
|
|
char* minmb = getenv("HAKMEM_TINY_SS_MIN_MB");
|
|
|
|
|
if (minmb) {
|
|
|
|
|
int m = atoi(minmb); if (m == 1) g_ss_min_lg_env = 20; else if (m == 2) g_ss_min_lg_env = 21;
|
|
|
|
|
}
|
|
|
|
|
if (g_ss_min_lg_env > g_ss_max_lg_env) g_ss_min_lg_env = g_ss_max_lg_env;
|
|
|
|
|
const char* force_lg_env = getenv("HAKMEM_TINY_SS_FORCE_LG");
|
|
|
|
|
if (force_lg_env && *force_lg_env) {
|
|
|
|
|
int v = atoi(force_lg_env);
|
|
|
|
|
if (v >= SUPERSLAB_LG_MIN && v <= SUPERSLAB_LG_MAX) {
|
|
|
|
|
g_ss_force_lg = v;
|
|
|
|
|
g_ss_min_lg_env = g_ss_max_lg_env = v;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
size_t precharge_default = 0;
|
|
|
|
|
const char* precharge_env = getenv("HAKMEM_TINY_SS_PRECHARGE");
|
|
|
|
|
if (precharge_env && *precharge_env) {
|
|
|
|
|
long v = atol(precharge_env);
|
|
|
|
|
if (v < 0) v = 0;
|
|
|
|
|
precharge_default = (size_t)v;
|
|
|
|
|
if (v > 0) {
|
|
|
|
|
atomic_store_explicit(&g_ss_populate_once, 1, memory_order_relaxed);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
size_t cache_default = 0;
|
|
|
|
|
const char* cache_env = getenv("HAKMEM_TINY_SS_CACHE");
|
|
|
|
|
if (cache_env && *cache_env) {
|
|
|
|
|
long v = atol(cache_env);
|
|
|
|
|
if (v < 0) v = 0;
|
|
|
|
|
cache_default = (size_t)v;
|
|
|
|
|
}
|
|
|
|
|
// Initialize cache/precharge via direct manipulation (box API doesn't need init function)
|
|
|
|
|
for (int i = 0; i < 8; i++) {
|
|
|
|
|
extern size_t g_ss_cache_cap[8];
|
|
|
|
|
extern size_t g_ss_precharge_target[8];
|
|
|
|
|
g_ss_cache_cap[i] = cache_default;
|
|
|
|
|
g_ss_precharge_target[i] = precharge_default;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < 8; i++) {
|
|
|
|
|
char name[64];
|
|
|
|
|
snprintf(name, sizeof(name), "HAKMEM_TINY_SS_CACHE_C%d", i);
|
|
|
|
|
char* cap_env = getenv(name);
|
|
|
|
|
if (cap_env && *cap_env) {
|
|
|
|
|
long v = atol(cap_env);
|
|
|
|
|
if (v < 0) v = 0;
|
|
|
|
|
tiny_ss_cache_set_class_cap(i, (size_t)v);
|
|
|
|
|
}
|
|
|
|
|
snprintf(name, sizeof(name), "HAKMEM_TINY_SS_PRECHARGE_C%d", i);
|
|
|
|
|
char* pre_env = getenv(name);
|
|
|
|
|
if (pre_env && *pre_env) {
|
|
|
|
|
long v = atol(pre_env);
|
|
|
|
|
if (v < 0) v = 0;
|
|
|
|
|
tiny_ss_precharge_set_class_target(i, (size_t)v);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
const char* populate_env = getenv("HAKMEM_TINY_SS_POPULATE_ONCE");
|
|
|
|
|
if (populate_env && atoi(populate_env) != 0) {
|
|
|
|
|
atomic_store_explicit(&g_ss_populate_once, 1, memory_order_relaxed);
|
|
|
|
|
}
|
|
|
|
|
env_parsed = 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
uint8_t lg = (g_ss_force_lg >= 0) ? (uint8_t)g_ss_force_lg : hak_tiny_superslab_next_lg(size_class);
|
|
|
|
|
if (lg < g_ss_min_lg_env) lg = g_ss_min_lg_env;
|
|
|
|
|
if (lg > g_ss_max_lg_env) lg = g_ss_max_lg_env;
|
|
|
|
|
size_t ss_size = (size_t)1 << lg; // 2^20 = 1MB, 2^21 = 2MB
|
|
|
|
|
uintptr_t ss_mask = ss_size - 1;
|
|
|
|
|
int from_cache = 0;
|
|
|
|
|
void* ptr = NULL;
|
|
|
|
|
|
|
|
|
|
// Debug logging flag (lazy init)
|
|
|
|
|
static __thread int dbg = -1;
|
ENV cleanup: Add RELEASE guards to DEBUG ENV variables (14 vars)
Added compile-time guards (#if HAKMEM_BUILD_RELEASE) to eliminate
DEBUG ENV variable overhead in RELEASE builds.
Variables guarded (14 total):
- HAKMEM_TINY_TRACE_RING, HAKMEM_TINY_DUMP_RING_ATEXIT
- HAKMEM_TINY_RF_TRACE, HAKMEM_TINY_MAILBOX_TRACE
- HAKMEM_TINY_MAILBOX_TRACE_LIMIT, HAKMEM_TINY_MAILBOX_SLOWDISC
- HAKMEM_TINY_MAILBOX_SLOWDISC_PERIOD
- HAKMEM_SS_PREWARM_DEBUG, HAKMEM_SS_FREE_DEBUG
- HAKMEM_TINY_FRONT_METRICS, HAKMEM_TINY_FRONT_DUMP
- HAKMEM_TINY_COUNTERS_DUMP, HAKMEM_TINY_REFILL_DUMP
- HAKMEM_PTR_TRACE_DUMP, HAKMEM_PTR_TRACE_VERBOSE
Files modified (9 core files):
- core/tiny_debug_ring.c (ring trace/dump)
- core/box/mailbox_box.c (mailbox trace + slowdisc)
- core/tiny_refill.h (refill trace)
- core/hakmem_tiny_superslab.c (superslab debug)
- core/box/ss_allocation_box.c (allocation debug)
- core/tiny_superslab_free.inc.h (free debug)
- core/box/front_metrics_box.c (frontend metrics)
- core/hakmem_tiny_stats.c (stats dump)
- core/ptr_trace.h (pointer trace)
Bug fixes during implementation:
1. mailbox_box.c - Fixed variable scope (moved 'used' outside guard)
2. hakmem_tiny_stats.c - Fixed incomplete declarations (on1, on2)
Impact:
- Binary size: -85KB total
- bench_random_mixed_hakmem: 319K → 305K (-14K, -4.4%)
- larson_hakmem: 380K → 309K (-71K, -18.7%)
- Performance: No regression (16.9-17.9M ops/s maintained)
- Functional: All tests pass (Random Mixed + Larson)
- Behavior: DEBUG ENV vars correctly ignored in RELEASE builds
Testing:
- Build: Clean compilation (warnings only, pre-existing)
- 100K Random Mixed: 16.9-17.9M ops/s (PASS)
- 10K Larson: 25.9M ops/s (PASS)
- DEBUG ENV verification: Correctly ignored (PASS)
Result: 14 DEBUG ENV variables now have zero overhead in RELEASE builds.
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-27 03:41:07 +09:00
|
|
|
#if HAKMEM_BUILD_RELEASE
|
|
|
|
|
dbg = 0;
|
|
|
|
|
#else
|
2025-11-20 02:01:52 +09:00
|
|
|
if (__builtin_expect(dbg == -1, 0)) {
|
|
|
|
|
const char* e = getenv("HAKMEM_SS_PREWARM_DEBUG");
|
|
|
|
|
dbg = (e && *e && *e != '0') ? 1 : 0;
|
|
|
|
|
}
|
ENV cleanup: Add RELEASE guards to DEBUG ENV variables (14 vars)
Added compile-time guards (#if HAKMEM_BUILD_RELEASE) to eliminate
DEBUG ENV variable overhead in RELEASE builds.
Variables guarded (14 total):
- HAKMEM_TINY_TRACE_RING, HAKMEM_TINY_DUMP_RING_ATEXIT
- HAKMEM_TINY_RF_TRACE, HAKMEM_TINY_MAILBOX_TRACE
- HAKMEM_TINY_MAILBOX_TRACE_LIMIT, HAKMEM_TINY_MAILBOX_SLOWDISC
- HAKMEM_TINY_MAILBOX_SLOWDISC_PERIOD
- HAKMEM_SS_PREWARM_DEBUG, HAKMEM_SS_FREE_DEBUG
- HAKMEM_TINY_FRONT_METRICS, HAKMEM_TINY_FRONT_DUMP
- HAKMEM_TINY_COUNTERS_DUMP, HAKMEM_TINY_REFILL_DUMP
- HAKMEM_PTR_TRACE_DUMP, HAKMEM_PTR_TRACE_VERBOSE
Files modified (9 core files):
- core/tiny_debug_ring.c (ring trace/dump)
- core/box/mailbox_box.c (mailbox trace + slowdisc)
- core/tiny_refill.h (refill trace)
- core/hakmem_tiny_superslab.c (superslab debug)
- core/box/ss_allocation_box.c (allocation debug)
- core/tiny_superslab_free.inc.h (free debug)
- core/box/front_metrics_box.c (frontend metrics)
- core/hakmem_tiny_stats.c (stats dump)
- core/ptr_trace.h (pointer trace)
Bug fixes during implementation:
1. mailbox_box.c - Fixed variable scope (moved 'used' outside guard)
2. hakmem_tiny_stats.c - Fixed incomplete declarations (on1, on2)
Impact:
- Binary size: -85KB total
- bench_random_mixed_hakmem: 319K → 305K (-14K, -4.4%)
- larson_hakmem: 380K → 309K (-71K, -18.7%)
- Performance: No regression (16.9-17.9M ops/s maintained)
- Functional: All tests pass (Random Mixed + Larson)
- Behavior: DEBUG ENV vars correctly ignored in RELEASE builds
Testing:
- Build: Clean compilation (warnings only, pre-existing)
- 100K Random Mixed: 16.9-17.9M ops/s (PASS)
- 10K Larson: 25.9M ops/s (PASS)
- DEBUG ENV verification: Correctly ignored (PASS)
Result: 14 DEBUG ENV variables now have zero overhead in RELEASE builds.
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-27 03:41:07 +09:00
|
|
|
#endif
|
2025-11-20 02:01:52 +09:00
|
|
|
|
|
|
|
|
// Phase 9: Try LRU cache first (lazy deallocation)
|
|
|
|
|
SuperSlab* cached_ss = hak_ss_lru_pop(size_class);
|
|
|
|
|
if (cached_ss) {
|
|
|
|
|
ptr = (void*)cached_ss;
|
|
|
|
|
from_cache = 1;
|
|
|
|
|
// Debug logging for REFILL from LRU
|
|
|
|
|
if (dbg == 1) {
|
|
|
|
|
fprintf(stderr, "[REFILL] class=%d from_lru=1 ss=%p\n",
|
|
|
|
|
size_class, (void*)cached_ss);
|
|
|
|
|
}
|
|
|
|
|
// Skip old cache path - LRU cache takes priority
|
|
|
|
|
} else {
|
|
|
|
|
// Fallback to old cache (will be deprecated)
|
|
|
|
|
ss_cache_precharge(size_class, ss_size, ss_mask);
|
|
|
|
|
void* old_cached = ss_cache_pop(size_class);
|
|
|
|
|
if (old_cached) {
|
|
|
|
|
ptr = old_cached;
|
|
|
|
|
from_cache = 1;
|
|
|
|
|
// Debug logging for REFILL from prewarm (old cache is essentially prewarm)
|
|
|
|
|
if (dbg == 1) {
|
|
|
|
|
fprintf(stderr, "[REFILL] class=%d from_prewarm=1 ss=%p\n",
|
|
|
|
|
size_class, ptr);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!ptr) {
|
|
|
|
|
int populate = atomic_exchange_explicit(&g_ss_populate_once, 0, memory_order_acq_rel);
|
|
|
|
|
ptr = ss_os_acquire(size_class, ss_size, ss_mask, populate);
|
|
|
|
|
if (!ptr) {
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
// Debug logging for REFILL with new allocation
|
|
|
|
|
if (dbg == 1) {
|
|
|
|
|
fprintf(stderr, "[REFILL] class=%d new_alloc=1 ss=%p\n",
|
|
|
|
|
size_class, (void*)ptr);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Initialize SuperSlab header (Phase 12: no global size_class field)
|
|
|
|
|
SuperSlab* ss = (SuperSlab*)ptr;
|
|
|
|
|
ss->magic = SUPERSLAB_MAGIC;
|
|
|
|
|
ss->active_slabs = 0;
|
|
|
|
|
ss->lg_size = lg; // Phase 8.3: Use ACE-determined lg_size (20=1MB, 21=2MB)
|
|
|
|
|
ss->slab_bitmap = 0;
|
|
|
|
|
ss->nonempty_mask = 0; // Phase 6-2.1: ChatGPT Pro P0 - init nonempty mask
|
Tiny Pool redesign: P0.1, P0.3, P1.1, P1.2 - Out-of-band class_idx lookup
This commit implements the first phase of Tiny Pool redesign based on
ChatGPT architecture review. The goal is to eliminate Header/Next pointer
conflicts by moving class_idx lookup out-of-band (to SuperSlab metadata).
## P0.1: C0(8B) class upgraded to 16B
- Size table changed: {16,32,64,128,256,512,1024,2048} (8 classes)
- LUT updated: 1..16 → class 0, 17..32 → class 1, etc.
- tiny_next_off: C0 now uses offset 1 (header preserved)
- Eliminates edge cases for 8B allocations
## P0.3: Slab reuse guard Box (tls_slab_reuse_guard_box.h)
- New Box for draining TLS SLL before slab reuse
- ENV gate: HAKMEM_TINY_SLAB_REUSE_GUARD=1
- Prevents stale pointers when slabs are recycled
- Follows Box theory: single responsibility, minimal API
## P1.1: SuperSlab class_map addition
- Added uint8_t class_map[SLABS_PER_SUPERSLAB_MAX] to SuperSlab
- Maps slab_idx → class_idx for out-of-band lookup
- Initialized to 255 (UNASSIGNED) on SuperSlab creation
- Set correctly on slab initialization in all backends
## P1.2: Free fast path uses class_map
- ENV gate: HAKMEM_TINY_USE_CLASS_MAP=1
- Free path can now get class_idx from class_map instead of Header
- Falls back to Header read if class_map returns invalid value
- Fixed Legacy Backend dynamic slab initialization bug
## Documentation added
- HAKMEM_ARCHITECTURE_OVERVIEW.md: 4-layer architecture analysis
- TLS_SLL_ARCHITECTURE_INVESTIGATION.md: Root cause analysis
- PTR_LIFECYCLE_TRACE_AND_ROOT_CAUSE_ANALYSIS.md: Pointer tracking
- TINY_REDESIGN_CHECKLIST.md: Implementation roadmap (P0-P3)
## Test results
- Baseline: 70% success rate (30% crash - pre-existing issue)
- class_map enabled: 70% success rate (same as baseline)
- Performance: ~30.5M ops/s (unchanged)
## Next steps (P1.3, P2, P3)
- P1.3: Add meta->active for accurate TLS/freelist sync
- P2: TLS SLL redesign with Box-based counting
- P3: Complete Header out-of-band migration
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-28 13:42:39 +09:00
|
|
|
ss->freelist_mask = 0; // P1.1 FIX: Initialize freelist_mask
|
|
|
|
|
ss->empty_mask = 0; // P1.1 FIX: Initialize empty_mask
|
|
|
|
|
ss->empty_count = 0; // P1.1 FIX: Initialize empty_count
|
2025-11-20 02:01:52 +09:00
|
|
|
ss->partial_epoch = 0;
|
|
|
|
|
ss->publish_hint = 0xFF;
|
|
|
|
|
|
|
|
|
|
// Initialize atomics explicitly
|
|
|
|
|
atomic_store_explicit(&ss->total_active_blocks, 0, memory_order_relaxed);
|
|
|
|
|
atomic_store_explicit(&ss->refcount, 0, memory_order_relaxed);
|
|
|
|
|
atomic_store_explicit(&ss->listed, 0, memory_order_relaxed);
|
|
|
|
|
ss->partial_next = NULL;
|
|
|
|
|
|
|
|
|
|
// Phase 9: Initialize LRU fields
|
|
|
|
|
ss->last_used_ns = 0;
|
|
|
|
|
ss->generation = 0;
|
|
|
|
|
ss->lru_prev = NULL;
|
|
|
|
|
ss->lru_next = NULL;
|
|
|
|
|
|
Tiny Pool redesign: P0.1, P0.3, P1.1, P1.2 - Out-of-band class_idx lookup
This commit implements the first phase of Tiny Pool redesign based on
ChatGPT architecture review. The goal is to eliminate Header/Next pointer
conflicts by moving class_idx lookup out-of-band (to SuperSlab metadata).
## P0.1: C0(8B) class upgraded to 16B
- Size table changed: {16,32,64,128,256,512,1024,2048} (8 classes)
- LUT updated: 1..16 → class 0, 17..32 → class 1, etc.
- tiny_next_off: C0 now uses offset 1 (header preserved)
- Eliminates edge cases for 8B allocations
## P0.3: Slab reuse guard Box (tls_slab_reuse_guard_box.h)
- New Box for draining TLS SLL before slab reuse
- ENV gate: HAKMEM_TINY_SLAB_REUSE_GUARD=1
- Prevents stale pointers when slabs are recycled
- Follows Box theory: single responsibility, minimal API
## P1.1: SuperSlab class_map addition
- Added uint8_t class_map[SLABS_PER_SUPERSLAB_MAX] to SuperSlab
- Maps slab_idx → class_idx for out-of-band lookup
- Initialized to 255 (UNASSIGNED) on SuperSlab creation
- Set correctly on slab initialization in all backends
## P1.2: Free fast path uses class_map
- ENV gate: HAKMEM_TINY_USE_CLASS_MAP=1
- Free path can now get class_idx from class_map instead of Header
- Falls back to Header read if class_map returns invalid value
- Fixed Legacy Backend dynamic slab initialization bug
## Documentation added
- HAKMEM_ARCHITECTURE_OVERVIEW.md: 4-layer architecture analysis
- TLS_SLL_ARCHITECTURE_INVESTIGATION.md: Root cause analysis
- PTR_LIFECYCLE_TRACE_AND_ROOT_CAUSE_ANALYSIS.md: Pointer tracking
- TINY_REDESIGN_CHECKLIST.md: Implementation roadmap (P0-P3)
## Test results
- Baseline: 70% success rate (30% crash - pre-existing issue)
- class_map enabled: 70% success rate (same as baseline)
- Performance: ~30.5M ops/s (unchanged)
## Next steps (P1.3, P2, P3)
- P1.3: Add meta->active for accurate TLS/freelist sync
- P2: TLS SLL redesign with Box-based counting
- P3: Complete Header out-of-band migration
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-28 13:42:39 +09:00
|
|
|
// Phase 3d-C: Initialize hot/cold fields
|
|
|
|
|
ss->hot_count = 0;
|
|
|
|
|
ss->cold_count = 0;
|
|
|
|
|
memset(ss->hot_indices, 0, sizeof(ss->hot_indices));
|
|
|
|
|
memset(ss->cold_indices, 0, sizeof(ss->cold_indices));
|
|
|
|
|
|
|
|
|
|
// Phase 12: Initialize next_chunk (legacy per-class chain)
|
|
|
|
|
ss->next_chunk = NULL;
|
|
|
|
|
|
2025-11-20 02:01:52 +09:00
|
|
|
// Initialize all slab metadata (only up to max slabs for this size)
|
|
|
|
|
int max_slabs = (int)(ss_size / SLAB_SIZE);
|
|
|
|
|
|
|
|
|
|
// DEFENSIVE FIX: Zero all slab metadata arrays to prevent ANY uninitialized pointers
|
|
|
|
|
// This catches the 0xa2a2a2a2a2a2a2a2 pattern bug (ASan/debug fill pattern)
|
|
|
|
|
// Even though mmap should return zeroed pages, sanitizers may fill with debug patterns
|
|
|
|
|
memset(ss->slabs, 0, max_slabs * sizeof(TinySlabMeta));
|
|
|
|
|
memset(ss->remote_heads, 0, max_slabs * sizeof(uintptr_t));
|
|
|
|
|
memset(ss->remote_counts, 0, max_slabs * sizeof(uint32_t));
|
|
|
|
|
memset(ss->slab_listed, 0, max_slabs * sizeof(uint32_t));
|
|
|
|
|
|
Tiny Pool redesign: P0.1, P0.3, P1.1, P1.2 - Out-of-band class_idx lookup
This commit implements the first phase of Tiny Pool redesign based on
ChatGPT architecture review. The goal is to eliminate Header/Next pointer
conflicts by moving class_idx lookup out-of-band (to SuperSlab metadata).
## P0.1: C0(8B) class upgraded to 16B
- Size table changed: {16,32,64,128,256,512,1024,2048} (8 classes)
- LUT updated: 1..16 → class 0, 17..32 → class 1, etc.
- tiny_next_off: C0 now uses offset 1 (header preserved)
- Eliminates edge cases for 8B allocations
## P0.3: Slab reuse guard Box (tls_slab_reuse_guard_box.h)
- New Box for draining TLS SLL before slab reuse
- ENV gate: HAKMEM_TINY_SLAB_REUSE_GUARD=1
- Prevents stale pointers when slabs are recycled
- Follows Box theory: single responsibility, minimal API
## P1.1: SuperSlab class_map addition
- Added uint8_t class_map[SLABS_PER_SUPERSLAB_MAX] to SuperSlab
- Maps slab_idx → class_idx for out-of-band lookup
- Initialized to 255 (UNASSIGNED) on SuperSlab creation
- Set correctly on slab initialization in all backends
## P1.2: Free fast path uses class_map
- ENV gate: HAKMEM_TINY_USE_CLASS_MAP=1
- Free path can now get class_idx from class_map instead of Header
- Falls back to Header read if class_map returns invalid value
- Fixed Legacy Backend dynamic slab initialization bug
## Documentation added
- HAKMEM_ARCHITECTURE_OVERVIEW.md: 4-layer architecture analysis
- TLS_SLL_ARCHITECTURE_INVESTIGATION.md: Root cause analysis
- PTR_LIFECYCLE_TRACE_AND_ROOT_CAUSE_ANALYSIS.md: Pointer tracking
- TINY_REDESIGN_CHECKLIST.md: Implementation roadmap (P0-P3)
## Test results
- Baseline: 70% success rate (30% crash - pre-existing issue)
- class_map enabled: 70% success rate (same as baseline)
- Performance: ~30.5M ops/s (unchanged)
## Next steps (P1.3, P2, P3)
- P1.3: Add meta->active for accurate TLS/freelist sync
- P2: TLS SLL redesign with Box-based counting
- P3: Complete Header out-of-band migration
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-28 13:42:39 +09:00
|
|
|
// P1.1: Initialize class_map to UNASSIGNED (255) for all slabs
|
|
|
|
|
// This ensures class_map is in a known state even before slabs are assigned
|
|
|
|
|
memset(ss->class_map, 255, max_slabs * sizeof(uint8_t));
|
|
|
|
|
|
2025-11-20 02:01:52 +09:00
|
|
|
for (int i = 0; i < max_slabs; i++) {
|
|
|
|
|
ss_slab_meta_freelist_set(ss, i, NULL); // Explicit NULL (redundant after memset, but clear intent)
|
|
|
|
|
ss_slab_meta_used_set(ss, i, 0);
|
|
|
|
|
ss_slab_meta_capacity_set(ss, i, 0);
|
|
|
|
|
ss_slab_meta_owner_tid_low_set(ss, i, 0);
|
|
|
|
|
|
|
|
|
|
// Initialize remote queue atomics (memset already zeroed, but use proper atomic init)
|
|
|
|
|
atomic_store_explicit(&ss->remote_heads[i], 0, memory_order_relaxed);
|
|
|
|
|
atomic_store_explicit(&ss->remote_counts[i], 0, memory_order_relaxed);
|
|
|
|
|
atomic_store_explicit(&ss->slab_listed[i], 0, memory_order_relaxed);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (from_cache) {
|
|
|
|
|
ss_stats_cache_reuse();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Phase 8.3: Update ACE current_lg to match allocated size
|
|
|
|
|
g_ss_ace[size_class].current_lg = lg;
|
|
|
|
|
|
|
|
|
|
// Phase 1: Register SuperSlab in global registry for fast lookup
|
|
|
|
|
// CRITICAL: Register AFTER full initialization (ss structure is ready)
|
|
|
|
|
uintptr_t base = (uintptr_t)ss;
|
|
|
|
|
if (!hak_super_register(base, ss)) {
|
|
|
|
|
// Registry full - this is a fatal error
|
|
|
|
|
fprintf(stderr, "HAKMEM FATAL: SuperSlab registry full, cannot register %p\n", ss);
|
|
|
|
|
// Still return ss to avoid memory leak, but lookups may fail
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return ss;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ============================================================================
|
|
|
|
|
// SuperSlab Deallocation
|
|
|
|
|
// ============================================================================
|
|
|
|
|
|
|
|
|
|
void superslab_free(SuperSlab* ss) {
|
|
|
|
|
if (!ss || ss->magic != SUPERSLAB_MAGIC) {
|
|
|
|
|
return; // Invalid SuperSlab
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ADD DEBUG LOGGING
|
|
|
|
|
static __thread int dbg = -1;
|
ENV cleanup: Add RELEASE guards to DEBUG ENV variables (14 vars)
Added compile-time guards (#if HAKMEM_BUILD_RELEASE) to eliminate
DEBUG ENV variable overhead in RELEASE builds.
Variables guarded (14 total):
- HAKMEM_TINY_TRACE_RING, HAKMEM_TINY_DUMP_RING_ATEXIT
- HAKMEM_TINY_RF_TRACE, HAKMEM_TINY_MAILBOX_TRACE
- HAKMEM_TINY_MAILBOX_TRACE_LIMIT, HAKMEM_TINY_MAILBOX_SLOWDISC
- HAKMEM_TINY_MAILBOX_SLOWDISC_PERIOD
- HAKMEM_SS_PREWARM_DEBUG, HAKMEM_SS_FREE_DEBUG
- HAKMEM_TINY_FRONT_METRICS, HAKMEM_TINY_FRONT_DUMP
- HAKMEM_TINY_COUNTERS_DUMP, HAKMEM_TINY_REFILL_DUMP
- HAKMEM_PTR_TRACE_DUMP, HAKMEM_PTR_TRACE_VERBOSE
Files modified (9 core files):
- core/tiny_debug_ring.c (ring trace/dump)
- core/box/mailbox_box.c (mailbox trace + slowdisc)
- core/tiny_refill.h (refill trace)
- core/hakmem_tiny_superslab.c (superslab debug)
- core/box/ss_allocation_box.c (allocation debug)
- core/tiny_superslab_free.inc.h (free debug)
- core/box/front_metrics_box.c (frontend metrics)
- core/hakmem_tiny_stats.c (stats dump)
- core/ptr_trace.h (pointer trace)
Bug fixes during implementation:
1. mailbox_box.c - Fixed variable scope (moved 'used' outside guard)
2. hakmem_tiny_stats.c - Fixed incomplete declarations (on1, on2)
Impact:
- Binary size: -85KB total
- bench_random_mixed_hakmem: 319K → 305K (-14K, -4.4%)
- larson_hakmem: 380K → 309K (-71K, -18.7%)
- Performance: No regression (16.9-17.9M ops/s maintained)
- Functional: All tests pass (Random Mixed + Larson)
- Behavior: DEBUG ENV vars correctly ignored in RELEASE builds
Testing:
- Build: Clean compilation (warnings only, pre-existing)
- 100K Random Mixed: 16.9-17.9M ops/s (PASS)
- 10K Larson: 25.9M ops/s (PASS)
- DEBUG ENV verification: Correctly ignored (PASS)
Result: 14 DEBUG ENV variables now have zero overhead in RELEASE builds.
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-27 03:41:07 +09:00
|
|
|
#if HAKMEM_BUILD_RELEASE
|
|
|
|
|
dbg = 0;
|
|
|
|
|
#else
|
2025-11-20 02:01:52 +09:00
|
|
|
if (__builtin_expect(dbg == -1, 0)) {
|
|
|
|
|
const char* e = getenv("HAKMEM_SS_FREE_DEBUG");
|
|
|
|
|
dbg = (e && *e && *e != '0') ? 1 : 0;
|
|
|
|
|
}
|
ENV cleanup: Add RELEASE guards to DEBUG ENV variables (14 vars)
Added compile-time guards (#if HAKMEM_BUILD_RELEASE) to eliminate
DEBUG ENV variable overhead in RELEASE builds.
Variables guarded (14 total):
- HAKMEM_TINY_TRACE_RING, HAKMEM_TINY_DUMP_RING_ATEXIT
- HAKMEM_TINY_RF_TRACE, HAKMEM_TINY_MAILBOX_TRACE
- HAKMEM_TINY_MAILBOX_TRACE_LIMIT, HAKMEM_TINY_MAILBOX_SLOWDISC
- HAKMEM_TINY_MAILBOX_SLOWDISC_PERIOD
- HAKMEM_SS_PREWARM_DEBUG, HAKMEM_SS_FREE_DEBUG
- HAKMEM_TINY_FRONT_METRICS, HAKMEM_TINY_FRONT_DUMP
- HAKMEM_TINY_COUNTERS_DUMP, HAKMEM_TINY_REFILL_DUMP
- HAKMEM_PTR_TRACE_DUMP, HAKMEM_PTR_TRACE_VERBOSE
Files modified (9 core files):
- core/tiny_debug_ring.c (ring trace/dump)
- core/box/mailbox_box.c (mailbox trace + slowdisc)
- core/tiny_refill.h (refill trace)
- core/hakmem_tiny_superslab.c (superslab debug)
- core/box/ss_allocation_box.c (allocation debug)
- core/tiny_superslab_free.inc.h (free debug)
- core/box/front_metrics_box.c (frontend metrics)
- core/hakmem_tiny_stats.c (stats dump)
- core/ptr_trace.h (pointer trace)
Bug fixes during implementation:
1. mailbox_box.c - Fixed variable scope (moved 'used' outside guard)
2. hakmem_tiny_stats.c - Fixed incomplete declarations (on1, on2)
Impact:
- Binary size: -85KB total
- bench_random_mixed_hakmem: 319K → 305K (-14K, -4.4%)
- larson_hakmem: 380K → 309K (-71K, -18.7%)
- Performance: No regression (16.9-17.9M ops/s maintained)
- Functional: All tests pass (Random Mixed + Larson)
- Behavior: DEBUG ENV vars correctly ignored in RELEASE builds
Testing:
- Build: Clean compilation (warnings only, pre-existing)
- 100K Random Mixed: 16.9-17.9M ops/s (PASS)
- 10K Larson: 25.9M ops/s (PASS)
- DEBUG ENV verification: Correctly ignored (PASS)
Result: 14 DEBUG ENV variables now have zero overhead in RELEASE builds.
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-27 03:41:07 +09:00
|
|
|
#endif
|
2025-11-20 02:01:52 +09:00
|
|
|
if (dbg == 1) {
|
|
|
|
|
fprintf(stderr, "[SS_FREE] CALLED: ss=%p lg_size=%d active_slabs=%u\n",
|
|
|
|
|
(void*)ss, ss->lg_size, ss->active_slabs);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Phase 9: Lazy Deallocation - try to cache in LRU instead of munmap
|
|
|
|
|
size_t ss_size = (size_t)1 << ss->lg_size;
|
|
|
|
|
|
|
|
|
|
// Phase 1: Unregister SuperSlab from registry FIRST
|
|
|
|
|
// CRITICAL: Must unregister BEFORE adding to LRU cache
|
|
|
|
|
// Reason: Cached SuperSlabs should NOT be found by lookups
|
|
|
|
|
uintptr_t base = (uintptr_t)ss;
|
|
|
|
|
hak_super_unregister(base);
|
|
|
|
|
|
|
|
|
|
// Memory fence to ensure unregister is visible
|
|
|
|
|
atomic_thread_fence(memory_order_release);
|
|
|
|
|
|
|
|
|
|
// Phase 9: Try LRU cache first (lazy deallocation)
|
|
|
|
|
// NOTE: LRU cache keeps magic=SUPERSLAB_MAGIC for validation
|
|
|
|
|
// Magic will be cleared on eviction or reuse
|
|
|
|
|
int lru_cached = hak_ss_lru_push(ss);
|
|
|
|
|
if (dbg == 1) {
|
|
|
|
|
fprintf(stderr, "[SS_FREE] hak_ss_lru_push() returned %d\n", lru_cached);
|
|
|
|
|
}
|
|
|
|
|
if (lru_cached) {
|
|
|
|
|
// Successfully cached in LRU - defer munmap
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// LRU cache full or disabled - try old cache using head class_idx (if known)
|
|
|
|
|
int old_cached = ss_cache_push(0, ss);
|
|
|
|
|
if (old_cached) {
|
|
|
|
|
ss_stats_cache_store();
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Phase E3-1: Check never-free policy before munmap
|
|
|
|
|
// If policy forbids Tiny SuperSlab munmap, skip deallocation (leak is intentional)
|
|
|
|
|
const FrozenPolicy* pol = hkm_policy_get();
|
|
|
|
|
if (pol && pol->tiny_ss_never_free_global) {
|
|
|
|
|
// Policy forbids munmap - keep SuperSlab allocated (intentional "leak")
|
|
|
|
|
// Watermark enforcement will be added in Phase E3-2
|
|
|
|
|
#if !HAKMEM_BUILD_RELEASE
|
|
|
|
|
fprintf(stderr, "[SS_POLICY_SKIP] Skipping munmap (never_free policy) ss=%p size=%zu\n",
|
|
|
|
|
(void*)ss, ss_size);
|
|
|
|
|
#endif
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Both caches full - immediately free to OS (eager deallocation)
|
|
|
|
|
// Clear magic to prevent use-after-free
|
|
|
|
|
ss->magic = 0;
|
|
|
|
|
|
|
|
|
|
#if !HAKMEM_BUILD_RELEASE
|
|
|
|
|
fprintf(stderr, "[DEBUG ss_os_release] Freeing SuperSlab ss=%p size=%zu active=%u (LRU full)\n",
|
|
|
|
|
(void*)ss, ss_size,
|
|
|
|
|
atomic_load_explicit(&ss->total_active_blocks, memory_order_relaxed));
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
munmap(ss, ss_size);
|
|
|
|
|
|
|
|
|
|
// Update statistics for actual release to OS
|
|
|
|
|
pthread_mutex_lock(&g_superslab_lock);
|
|
|
|
|
g_superslabs_freed++;
|
|
|
|
|
// Phase 12: we no longer track per-SS size_class on header; skip g_ss_freed_by_class here
|
|
|
|
|
g_bytes_allocated -= ss_size;
|
|
|
|
|
pthread_mutex_unlock(&g_superslab_lock);
|
|
|
|
|
|
|
|
|
|
#if !HAKMEM_BUILD_RELEASE
|
|
|
|
|
fprintf(stderr, "[DEBUG ss_os_release] g_superslabs_freed now = %llu\n",
|
|
|
|
|
(unsigned long long)g_superslabs_freed);
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ============================================================================
|
|
|
|
|
// Slab Initialization within SuperSlab
|
|
|
|
|
// ============================================================================
|
|
|
|
|
|
|
|
|
|
void superslab_init_slab(SuperSlab* ss, int slab_idx, size_t block_size, uint32_t owner_tid)
|
|
|
|
|
{
|
|
|
|
|
if (!ss || slab_idx < 0 || slab_idx >= ss_slabs_capacity(ss)) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Phase E1-CORRECT unified geometry:
|
|
|
|
|
// - block_size is the TOTAL stride for this class (g_tiny_class_sizes[cls])
|
|
|
|
|
// - usable bytes are determined by slab index (slab0 vs others)
|
|
|
|
|
// - capacity = usable / stride for ALL classes (including former C7)
|
|
|
|
|
size_t usable_size = (slab_idx == 0)
|
|
|
|
|
? SUPERSLAB_SLAB0_USABLE_SIZE
|
|
|
|
|
: SUPERSLAB_SLAB_USABLE_SIZE;
|
|
|
|
|
size_t stride = block_size;
|
|
|
|
|
uint16_t capacity = (uint16_t)(usable_size / stride);
|
|
|
|
|
|
|
|
|
|
#if !HAKMEM_BUILD_RELEASE
|
|
|
|
|
if (slab_idx == 0) {
|
|
|
|
|
fprintf(stderr,
|
|
|
|
|
"[SUPERSLAB_INIT] slab 0: usable_size=%zu stride=%zu capacity=%u\n",
|
|
|
|
|
usable_size, stride, (unsigned)capacity);
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
TinySlabMeta* meta = &ss->slabs[slab_idx];
|
|
|
|
|
meta->freelist = NULL; // NULL = linear allocation mode
|
|
|
|
|
meta->used = 0;
|
2025-11-28 13:53:45 +09:00
|
|
|
meta->active = 0; // P1.3: blocks in use by user (starts at 0)
|
2025-11-28 14:11:37 +09:00
|
|
|
meta->tls_cached = 0; // P2.2: blocks cached in TLS SLL (starts at 0)
|
2025-11-20 02:01:52 +09:00
|
|
|
meta->capacity = capacity;
|
|
|
|
|
meta->carved = 0;
|
2025-11-27 11:52:11 +09:00
|
|
|
// Store bits 8-15 of owner_tid (low 8 bits are 0 for glibc pthread IDs)
|
|
|
|
|
meta->owner_tid_low = (uint8_t)((owner_tid >> 8) & 0xFFu);
|
2025-11-21 13:44:05 +09:00
|
|
|
// Fail-safe: stamp class_idx from geometry (stride → class).
|
|
|
|
|
// This normalizes both legacy and shared pool paths.
|
|
|
|
|
for (int i = 0; i < TINY_NUM_CLASSES; i++) {
|
|
|
|
|
if (g_tiny_class_sizes[i] == stride) {
|
|
|
|
|
meta->class_idx = (uint8_t)i;
|
Tiny Pool redesign: P0.1, P0.3, P1.1, P1.2 - Out-of-band class_idx lookup
This commit implements the first phase of Tiny Pool redesign based on
ChatGPT architecture review. The goal is to eliminate Header/Next pointer
conflicts by moving class_idx lookup out-of-band (to SuperSlab metadata).
## P0.1: C0(8B) class upgraded to 16B
- Size table changed: {16,32,64,128,256,512,1024,2048} (8 classes)
- LUT updated: 1..16 → class 0, 17..32 → class 1, etc.
- tiny_next_off: C0 now uses offset 1 (header preserved)
- Eliminates edge cases for 8B allocations
## P0.3: Slab reuse guard Box (tls_slab_reuse_guard_box.h)
- New Box for draining TLS SLL before slab reuse
- ENV gate: HAKMEM_TINY_SLAB_REUSE_GUARD=1
- Prevents stale pointers when slabs are recycled
- Follows Box theory: single responsibility, minimal API
## P1.1: SuperSlab class_map addition
- Added uint8_t class_map[SLABS_PER_SUPERSLAB_MAX] to SuperSlab
- Maps slab_idx → class_idx for out-of-band lookup
- Initialized to 255 (UNASSIGNED) on SuperSlab creation
- Set correctly on slab initialization in all backends
## P1.2: Free fast path uses class_map
- ENV gate: HAKMEM_TINY_USE_CLASS_MAP=1
- Free path can now get class_idx from class_map instead of Header
- Falls back to Header read if class_map returns invalid value
- Fixed Legacy Backend dynamic slab initialization bug
## Documentation added
- HAKMEM_ARCHITECTURE_OVERVIEW.md: 4-layer architecture analysis
- TLS_SLL_ARCHITECTURE_INVESTIGATION.md: Root cause analysis
- PTR_LIFECYCLE_TRACE_AND_ROOT_CAUSE_ANALYSIS.md: Pointer tracking
- TINY_REDESIGN_CHECKLIST.md: Implementation roadmap (P0-P3)
## Test results
- Baseline: 70% success rate (30% crash - pre-existing issue)
- class_map enabled: 70% success rate (same as baseline)
- Performance: ~30.5M ops/s (unchanged)
## Next steps (P1.3, P2, P3)
- P1.3: Add meta->active for accurate TLS/freelist sync
- P2: TLS SLL redesign with Box-based counting
- P3: Complete Header out-of-band migration
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-28 13:42:39 +09:00
|
|
|
// P1.1: Update class_map for out-of-band lookup on free path
|
|
|
|
|
ss->class_map[slab_idx] = (uint8_t)i;
|
2025-11-21 13:44:05 +09:00
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
2025-11-20 02:01:52 +09:00
|
|
|
|
|
|
|
|
superslab_activate_slab(ss, slab_idx);
|
|
|
|
|
}
|