diff --git a/Makefile b/Makefile index 9876e596..6e15cc34 100644 --- a/Makefile +++ b/Makefile @@ -179,12 +179,12 @@ LDFLAGS += $(EXTRA_LDFLAGS) # Targets TARGET = test_hakmem -OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o hakmem_tiny_superslab.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_mid_mt.o hakmem_super_registry.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/free_local_box.o core/box/free_remote_box.o core/box/free_publish_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/link_stubs.o test_hakmem.o +OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o hakmem_tiny_superslab.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_mid_mt.o hakmem_super_registry.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/free_local_box.o core/box/free_remote_box.o core/box/free_publish_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/capacity_box.o core/box/carve_push_box.o core/box/prewarm_box.o core/link_stubs.o test_hakmem.o OBJS = $(OBJS_BASE) # Shared library SHARED_LIB = libhakmem.so -SHARED_OBJS = hakmem_shared.o hakmem_config_shared.o hakmem_tiny_config_shared.o hakmem_ucb1_shared.o hakmem_bigcache_shared.o hakmem_pool_shared.o hakmem_l25_pool_shared.o hakmem_site_rules_shared.o hakmem_tiny_shared.o hakmem_tiny_superslab_shared.o core/box/superslab_expansion_box_shared.o core/box/integrity_box_shared.o core/box/mailbox_box_shared.o core/box/front_gate_box_shared.o core/box/free_local_box_shared.o core/box/free_remote_box_shared.o core/box/free_publish_box_shared.o tiny_sticky_shared.o tiny_remote_shared.o tiny_publish_shared.o tiny_debug_ring_shared.o hakmem_tiny_magazine_shared.o hakmem_tiny_stats_shared.o hakmem_tiny_sfc_shared.o hakmem_tiny_query_shared.o hakmem_tiny_rss_shared.o hakmem_tiny_registry_shared.o hakmem_tiny_remote_target_shared.o hakmem_tiny_bg_spill_shared.o tiny_adaptive_sizing_shared.o hakmem_mid_mt_shared.o hakmem_super_registry_shared.o hakmem_elo_shared.o hakmem_batch_shared.o hakmem_p2_shared.o hakmem_sizeclass_dist_shared.o hakmem_evo_shared.o hakmem_debug_shared.o hakmem_sys_shared.o hakmem_whale_shared.o hakmem_policy_shared.o hakmem_ace_shared.o hakmem_ace_stats_shared.o hakmem_ace_controller_shared.o hakmem_ace_metrics_shared.o hakmem_ace_ucb1_shared.o hakmem_prof_shared.o hakmem_learner_shared.o hakmem_size_hist_shared.o hakmem_learn_log_shared.o hakmem_syscall_shared.o tiny_fastcache_shared.o +SHARED_OBJS = hakmem_shared.o hakmem_config_shared.o hakmem_tiny_config_shared.o hakmem_ucb1_shared.o hakmem_bigcache_shared.o hakmem_pool_shared.o hakmem_l25_pool_shared.o hakmem_site_rules_shared.o hakmem_tiny_shared.o hakmem_tiny_superslab_shared.o core/box/superslab_expansion_box_shared.o core/box/integrity_box_shared.o core/box/mailbox_box_shared.o core/box/front_gate_box_shared.o core/box/free_local_box_shared.o core/box/free_remote_box_shared.o core/box/free_publish_box_shared.o core/box/capacity_box_shared.o core/box/carve_push_box_shared.o core/box/prewarm_box_shared.o tiny_sticky_shared.o tiny_remote_shared.o tiny_publish_shared.o tiny_debug_ring_shared.o hakmem_tiny_magazine_shared.o hakmem_tiny_stats_shared.o hakmem_tiny_sfc_shared.o hakmem_tiny_query_shared.o hakmem_tiny_rss_shared.o hakmem_tiny_registry_shared.o hakmem_tiny_remote_target_shared.o hakmem_tiny_bg_spill_shared.o tiny_adaptive_sizing_shared.o hakmem_mid_mt_shared.o hakmem_super_registry_shared.o hakmem_elo_shared.o hakmem_batch_shared.o hakmem_p2_shared.o hakmem_sizeclass_dist_shared.o hakmem_evo_shared.o hakmem_debug_shared.o hakmem_sys_shared.o hakmem_whale_shared.o hakmem_policy_shared.o hakmem_ace_shared.o hakmem_ace_stats_shared.o hakmem_ace_controller_shared.o hakmem_ace_metrics_shared.o hakmem_ace_ucb1_shared.o hakmem_prof_shared.o hakmem_learner_shared.o hakmem_size_hist_shared.o hakmem_learn_log_shared.o hakmem_syscall_shared.o tiny_fastcache_shared.o # Pool TLS Phase 1 (enable with POOL_TLS_PHASE1=1) ifeq ($(POOL_TLS_PHASE1),1) @@ -203,7 +203,7 @@ endif # Benchmark targets BENCH_HAKMEM = bench_allocators_hakmem BENCH_SYSTEM = bench_allocators_system -BENCH_HAKMEM_OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o hakmem_tiny_superslab.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_mid_mt.o hakmem_super_registry.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/free_local_box.o core/box/free_remote_box.o core/box/free_publish_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/link_stubs.o bench_allocators_hakmem.o +BENCH_HAKMEM_OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o hakmem_tiny_superslab.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_mid_mt.o hakmem_super_registry.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/free_local_box.o core/box/free_remote_box.o core/box/free_publish_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/capacity_box.o core/box/carve_push_box.o core/box/prewarm_box.o core/link_stubs.o bench_allocators_hakmem.o BENCH_HAKMEM_OBJS = $(BENCH_HAKMEM_OBJS_BASE) ifeq ($(POOL_TLS_PHASE1),1) BENCH_HAKMEM_OBJS += pool_tls.o pool_refill.o pool_tls_arena.o pool_tls_registry.o pool_tls_remote.o @@ -380,7 +380,7 @@ test-box-refactor: box-refactor ./larson_hakmem 10 8 128 1024 1 12345 4 # Phase 4: Tiny Pool benchmarks (properly linked with hakmem) -TINY_BENCH_OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o hakmem_tiny_superslab.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/free_local_box.o core/box/free_remote_box.o core/box/free_publish_box.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_mid_mt.o hakmem_super_registry.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/link_stubs.o +TINY_BENCH_OBJS_BASE = hakmem.o hakmem_config.o hakmem_tiny_config.o hakmem_ucb1.o hakmem_bigcache.o hakmem_pool.o hakmem_l25_pool.o hakmem_site_rules.o hakmem_tiny.o hakmem_tiny_superslab.o core/box/superslab_expansion_box.o core/box/integrity_box.o core/box/mailbox_box.o core/box/front_gate_box.o core/box/front_gate_classifier.o core/box/free_local_box.o core/box/free_remote_box.o core/box/free_publish_box.o core/box/capacity_box.o core/box/carve_push_box.o core/box/prewarm_box.o tiny_sticky.o tiny_remote.o tiny_publish.o tiny_debug_ring.o hakmem_tiny_magazine.o hakmem_tiny_stats.o hakmem_tiny_sfc.o hakmem_tiny_query.o hakmem_tiny_rss.o hakmem_tiny_registry.o hakmem_tiny_remote_target.o hakmem_tiny_bg_spill.o tiny_adaptive_sizing.o hakmem_mid_mt.o hakmem_super_registry.o hakmem_elo.o hakmem_batch.o hakmem_p2.o hakmem_sizeclass_dist.o hakmem_evo.o hakmem_debug.o hakmem_sys.o hakmem_whale.o hakmem_policy.o hakmem_ace.o hakmem_ace_stats.o hakmem_prof.o hakmem_learner.o hakmem_size_hist.o hakmem_learn_log.o hakmem_syscall.o hakmem_ace_metrics.o hakmem_ace_ucb1.o hakmem_ace_controller.o tiny_fastcache.o core/link_stubs.o TINY_BENCH_OBJS = $(TINY_BENCH_OBJS_BASE) ifeq ($(POOL_TLS_PHASE1),1) TINY_BENCH_OBJS += pool_tls.o pool_refill.o core/pool_tls_arena.o pool_tls_registry.o pool_tls_remote.o @@ -1239,3 +1239,10 @@ bench-pool-tls: bench_pool_tls_hakmem bench_pool_tls_system @./bench_pool_tls_system 1 100000 256 42 @echo "" @echo "=========================================" + +# Phase E1-CORRECT Debug Bench (minimal test) +test_simple_e1: test_simple_e1.o $(HAKMEM_OBJS) + $(CC) -o $@ $^ $(LDFLAGS) + +test_simple_e1.o: test_simple_e1.c + $(CC) $(CFLAGS) -c -o $@ $< diff --git a/core/box/capacity_box.c b/core/box/capacity_box.c new file mode 100644 index 00000000..fa56c88c --- /dev/null +++ b/core/box/capacity_box.c @@ -0,0 +1,123 @@ +// capacity_box.c - Box Capacity Manager Implementation +#include "capacity_box.h" +#include "../tiny_adaptive_sizing.h" // TLSCacheStats, adaptive_sizing_init() +#include "../hakmem_tiny.h" // g_tls_sll_count +#include "../hakmem_tiny_config.h" // TINY_NUM_CLASSES, TINY_TLS_MAG_CAP +#include "../hakmem_tiny_integrity.h" // HAK_CHECK_CLASS_IDX +#include +#include +#include + +// ============================================================================ +// Internal State +// ============================================================================ + +// Initialization flag (atomic for thread-safety) +static _Atomic int g_box_cap_initialized = 0; + +// External declarations (from adaptive_sizing and hakmem_tiny) +extern __thread TLSCacheStats g_tls_cache_stats[TINY_NUM_CLASSES]; // TLS variable! +extern __thread uint32_t g_tls_sll_count[TINY_NUM_CLASSES]; +extern int g_sll_cap_override[TINY_NUM_CLASSES]; +extern int g_sll_multiplier; + +// ============================================================================ +// Box Capacity API Implementation +// ============================================================================ + +void box_cap_init(void) { + // Idempotent: only initialize once + int expected = 0; + if (atomic_compare_exchange_strong(&g_box_cap_initialized, &expected, 1)) { + // First call: initialize adaptive sizing + adaptive_sizing_init(); + } + // Already initialized or just initialized: safe to proceed +} + +bool box_cap_is_initialized(void) { + return atomic_load(&g_box_cap_initialized) != 0; +} + +uint32_t box_cap_get(int class_idx) { + // PRIORITY 1: Bounds check + HAK_CHECK_CLASS_IDX(class_idx, "box_cap_get"); + + // Ensure initialized + if (!box_cap_is_initialized()) { + // Auto-initialize on first use (defensive) + box_cap_init(); + } + + // Compute SLL capacity using same logic as sll_cap_for_class() + // This centralizes the capacity calculation + + // Check for override + if (g_sll_cap_override[class_idx] > 0) { + uint32_t cap = (uint32_t)g_sll_cap_override[class_idx]; + if (cap > TINY_TLS_MAG_CAP) cap = TINY_TLS_MAG_CAP; + return cap; + } + + // Get base capacity from adaptive sizing + uint32_t cap = g_tls_cache_stats[class_idx].capacity; + + // Apply class-specific multipliers + if (class_idx <= 3) { + // Hot classes: multiply by g_sll_multiplier + uint32_t mult = (g_sll_multiplier > 0 ? (uint32_t)g_sll_multiplier : 1u); + uint64_t want = (uint64_t)cap * (uint64_t)mult; + if (want > (uint64_t)TINY_TLS_MAG_CAP) { + cap = TINY_TLS_MAG_CAP; + } else { + cap = (uint32_t)want; + } + } else if (class_idx >= 4) { + // Mid-large classes: halve capacity + cap = (cap > 1u ? (cap / 2u) : 1u); + } + + return cap; +} + +bool box_cap_has_room(int class_idx, uint32_t n) { + // PRIORITY 1: Bounds check + HAK_CHECK_CLASS_IDX(class_idx, "box_cap_has_room"); + + uint32_t cap = box_cap_get(class_idx); + uint32_t used = g_tls_sll_count[class_idx]; + + // Check if adding N would exceed capacity + if (used >= cap) return false; + uint32_t avail = cap - used; + return (n <= avail); +} + +uint32_t box_cap_avail(int class_idx) { + // PRIORITY 1: Bounds check + HAK_CHECK_CLASS_IDX(class_idx, "box_cap_avail"); + + uint32_t cap = box_cap_get(class_idx); + uint32_t used = g_tls_sll_count[class_idx]; + + if (used >= cap) return 0; + return (cap - used); +} + +void box_cap_update(int class_idx, uint32_t new_cap) { + // PRIORITY 1: Bounds check + HAK_CHECK_CLASS_IDX(class_idx, "box_cap_update"); + + // Ensure initialized + if (!box_cap_is_initialized()) { + box_cap_init(); + } + + // Clamp to max + if (new_cap > TINY_TLS_MAG_CAP) { + new_cap = TINY_TLS_MAG_CAP; + } + + // Update adaptive sizing stats + g_tls_cache_stats[class_idx].capacity = new_cap; +} diff --git a/core/box/capacity_box.h b/core/box/capacity_box.h new file mode 100644 index 00000000..311974b1 --- /dev/null +++ b/core/box/capacity_box.h @@ -0,0 +1,52 @@ +// capacity_box.h - Box Capacity Manager +// Priority 1 Box: TLS Cache Capacity Management +// +// Purpose: +// - Centralize all capacity calculations (adaptive sizing, SLL cap, etc.) +// - Prevent initialization order bugs (root cause of prewarm double-free) +// - Provide simple, safe API for capacity queries +// +// Design: +// - Wraps adaptive_sizing system +// - Idempotent initialization +// - Bounds checking built-in +// - Thread-safe (uses TLS) + +#ifndef HAKMEM_BOX_CAPACITY_H +#define HAKMEM_BOX_CAPACITY_H + +#include +#include + +// ============================================================================ +// Box Capacity API +// ============================================================================ + +// Initialize capacity system (idempotent - safe to call multiple times) +// MUST be called before any other box_cap_* functions +void box_cap_init(void); + +// Get current TLS SLL capacity for a class +// Returns: capacity in blocks, or 0 if not initialized +// Thread-safe: uses TLS +uint32_t box_cap_get(int class_idx); + +// Check if TLS SLL has room for N blocks +// Returns: true if N blocks can be added, false otherwise +// Thread-safe: uses TLS +bool box_cap_has_room(int class_idx, uint32_t n); + +// Get available space in TLS SLL +// Returns: number of blocks that can be added +// Thread-safe: uses TLS +uint32_t box_cap_avail(int class_idx); + +// Update capacity (adaptive sizing hook) +// Note: Normally called by adaptive sizing system, not manually +void box_cap_update(int class_idx, uint32_t new_cap); + +// Check if capacity system is initialized +// Returns: true if box_cap_init() was called +bool box_cap_is_initialized(void); + +#endif // HAKMEM_BOX_CAPACITY_H diff --git a/core/box/carve_push_box.c b/core/box/carve_push_box.c new file mode 100644 index 00000000..d0c7c8ab --- /dev/null +++ b/core/box/carve_push_box.c @@ -0,0 +1,223 @@ +// carve_push_box.c - Box Carve-And-Push Implementation +#include +#include +#include +#include "../hakmem_tiny.h" // MUST BE FIRST: Base types +#include "../tiny_tls.h" // TinyTLSSlab type definition +#include "../hakmem_tiny_config.h" // TINY_NUM_CLASSES +#include "../hakmem_tiny_superslab.h" // ss_active_add(), SuperSlab +#include "../hakmem_tiny_integrity.h" // HAK_CHECK_CLASS_IDX +#include "carve_push_box.h" +#include "capacity_box.h" // box_cap_has_room() +#include "tls_sll_box.h" // tls_sll_push() +#include "tiny_next_ptr_box.h" // tiny_next_write() +#include "../tiny_refill_opt.h" // TinyRefillChain, trc_linear_carve() +#include "../tiny_box_geometry.h" // tiny_stride_for_class(), tiny_slab_base_for_geometry() + +// External declarations +extern __thread TinyTLSSlab g_tls_slabs[TINY_NUM_CLASSES]; +extern __thread void* g_tls_sll_head[TINY_NUM_CLASSES]; +extern __thread uint32_t g_tls_sll_count[TINY_NUM_CLASSES]; + +// ============================================================================ +// Internal Helpers +// ============================================================================ + +// Rollback: return carved blocks to freelist +static void rollback_carved_blocks(int class_idx, TinySlabMeta* meta, + void* head, uint32_t count) { + // Walk the chain and prepend to freelist + void* node = head; + for (uint32_t i = 0; i < count && node; i++) { + void* next = tiny_next_read(class_idx, node); + // Prepend to freelist + tiny_next_write(class_idx, node, meta->freelist); + meta->freelist = node; + node = next; + } + // Rollback metadata counters + meta->carved = (uint16_t)((uint32_t)meta->carved - count); + meta->used = (uint16_t)((uint32_t)meta->used - count); +} + +// ============================================================================ +// Box Carve-Push API Implementation +// ============================================================================ + +uint32_t box_carve_and_push(int class_idx, uint32_t want) { + // PRIORITY 1: Bounds check + HAK_CHECK_CLASS_IDX(class_idx, "box_carve_and_push"); + + if (want == 0) return 0; + + // Step 1: Check TLS SLL capacity + if (!box_cap_has_room(class_idx, want)) { + // Not enough room in TLS SLL + return 0; + } + + // Step 2: Get TLS slab + TinyTLSSlab* tls = &g_tls_slabs[class_idx]; + if (!tls->ss || !tls->meta) { + // No SuperSlab available + return 0; + } + + TinySlabMeta* meta = tls->meta; + + // Step 3: Check if slab has enough uncarved blocks + uint32_t available = (meta->capacity > meta->carved) + ? (meta->capacity - meta->carved) : 0; + if (available < want) { + // Not enough uncarved blocks + // Note: Could try superslab_refill() here, but keeping it simple for now + return 0; + } + + // Step 4: Get stride and slab base + size_t bs = tiny_stride_for_class(class_idx); + uint8_t* slab_base = tls->slab_base ? tls->slab_base + : tiny_slab_base_for_geometry(tls->ss, tls->slab_idx); + + // Step 5: Carve blocks (builds a linked chain) + TinyRefillChain chain; + trc_linear_carve(slab_base, bs, meta, want, class_idx, &chain); + + // Sanity check + if (chain.count != want) { + // Carve failed to produce expected count + // This should not happen, but handle defensively + #if !HAKMEM_BUILD_RELEASE + fprintf(stderr, "[BOX_CARVE_PUSH] WARN: carved %u blocks but expected %u\n", + chain.count, want); + #endif + // Rollback metadata (carved/used already updated by trc_linear_carve) + meta->carved = (uint16_t)((uint32_t)meta->carved - chain.count); + meta->used = (uint16_t)((uint32_t)meta->used - chain.count); + return 0; + } + + // Step 6: Push all blocks to TLS SLL (with rollback on failure) + uint32_t sll_cap = box_cap_get(class_idx); + uint32_t pushed = 0; + void* node = chain.head; + + for (uint32_t i = 0; i < want && node; i++) { + void* next = tiny_next_read(class_idx, node); + + if (!tls_sll_push(class_idx, node, sll_cap)) { + // Push failed (SLL full or other error) + // Rollback: pop all pushed blocks and return to freelist + #if !HAKMEM_BUILD_RELEASE + fprintf(stderr, "[BOX_CARVE_PUSH] Push failed at block %u/%u, rolling back\n", + i, want); + #endif + + // Pop the blocks we just pushed + for (uint32_t j = 0; j < pushed; j++) { + void* popped; + if (tls_sll_pop(class_idx, &popped)) { + // Return to freelist + tiny_next_write(class_idx, popped, meta->freelist); + meta->freelist = popped; + } + } + + // Return remaining unpushed blocks to freelist + while (node) { + void* next_unpushed = tiny_next_read(class_idx, node); + tiny_next_write(class_idx, node, meta->freelist); + meta->freelist = node; + node = next_unpushed; + } + + // Rollback metadata counters + meta->carved = (uint16_t)((uint32_t)meta->carved - want); + meta->used = (uint16_t)((uint32_t)meta->used - want); + + return 0; // All-or-nothing: return 0 on failure + } + + pushed++; + node = next; + } + + // Step 7: Update active counter (all blocks successfully pushed) + ss_active_add(tls->ss, want); + + return want; // Success: all blocks pushed +} + +uint32_t box_carve_and_push_with_freelist(int class_idx, uint32_t want) { + // PRIORITY 1: Bounds check + HAK_CHECK_CLASS_IDX(class_idx, "box_carve_and_push_with_freelist"); + + if (want == 0) return 0; + + // Step 1: Check capacity + if (!box_cap_has_room(class_idx, want)) { + return 0; + } + + // Step 2: Get TLS slab + TinyTLSSlab* tls = &g_tls_slabs[class_idx]; + if (!tls->ss || !tls->meta) { + return 0; + } + + TinySlabMeta* meta = tls->meta; + uint32_t sll_cap = box_cap_get(class_idx); + uint32_t pushed = 0; + + // Step 3: Try freelist first + while (pushed < want && meta->freelist) { + void* p = meta->freelist; + meta->freelist = tiny_next_read(class_idx, p); + meta->used++; + + if (!tls_sll_push(class_idx, p, sll_cap)) { + // Rollback + tiny_next_write(class_idx, p, meta->freelist); + meta->freelist = p; + meta->used--; + + // Rollback all pushed + for (uint32_t j = 0; j < pushed; j++) { + void* popped; + if (tls_sll_pop(class_idx, &popped)) { + tiny_next_write(class_idx, popped, meta->freelist); + meta->freelist = popped; + meta->used--; + } + } + return 0; + } + + ss_active_add(tls->ss, 1); + pushed++; + } + + // Step 4: If still need more, try carving + if (pushed < want) { + uint32_t need = want - pushed; + uint32_t carved = box_carve_and_push(class_idx, need); + + if (carved < need) { + // Partial failure: rollback freelist pushes + for (uint32_t j = 0; j < pushed; j++) { + void* popped; + if (tls_sll_pop(class_idx, &popped)) { + tiny_next_write(class_idx, popped, meta->freelist); + meta->freelist = popped; + meta->used--; + ss_active_add(tls->ss, -1); + } + } + return 0; + } + + pushed += carved; + } + + return pushed; +} diff --git a/core/box/carve_push_box.h b/core/box/carve_push_box.h new file mode 100644 index 00000000..fae98eaa --- /dev/null +++ b/core/box/carve_push_box.h @@ -0,0 +1,51 @@ +// carve_push_box.h - Box Carve-And-Push +// Priority 2 Box: Atomic Block Carving and TLS SLL Push +// +// Purpose: +// - Prevent rollback bugs (root cause of 20-carved-but-16-pushed issue) +// - Atomic operation: carve + header + push (all-or-nothing) +// - Eliminate partial failures that leave orphaned blocks +// +// Design: +// - Wraps trc_linear_carve() + tls_sll_push() +// - Rollback on any failure +// - Active counter management built-in +// - Clear error reporting + +#ifndef HAKMEM_BOX_CARVE_PUSH_H +#define HAKMEM_BOX_CARVE_PUSH_H + +#include +#include + +// ============================================================================ +// Box Carve-Push API +// ============================================================================ + +// Carve N blocks from current TLS slab and atomically push to TLS SLL +// +// Guarantees: +// - All-or-nothing: either all N blocks are pushed, or none +// - No orphaned blocks (carved but not pushed) +// - Headers written correctly before push +// - Active counters updated atomically +// +// Returns: actual count pushed +// - On success: want (all blocks pushed) +// - On failure: 0 (rolled back, no blocks pushed) +// +// Failure cases: +// - No SuperSlab available +// - Slab exhausted (capacity reached) +// - TLS SLL capacity exceeded +// - Invalid class_idx +// +// Thread-safe: uses TLS +uint32_t box_carve_and_push(int class_idx, uint32_t want); + +// Variant: carve and push with freelist fallback +// If slab is exhausted, tries to pop from freelist first +// Same guarantees as box_carve_and_push() +uint32_t box_carve_and_push_with_freelist(int class_idx, uint32_t want); + +#endif // HAKMEM_BOX_CARVE_PUSH_H diff --git a/core/box/prewarm_box.c b/core/box/prewarm_box.c new file mode 100644 index 00000000..808fea66 --- /dev/null +++ b/core/box/prewarm_box.c @@ -0,0 +1,89 @@ +// prewarm_box.c - Box Prewarm Implementation +#include +#include +#include "../hakmem_tiny.h" // MUST BE FIRST: Base types +#include "../tiny_tls.h" // TinyTLSSlab type definition +#include "../hakmem_tiny_config.h" // TINY_NUM_CLASSES +#include "../hakmem_tiny_superslab.h" // SuperSlab +#include "../hakmem_tiny_integrity.h" // HAK_CHECK_CLASS_IDX +#include "prewarm_box.h" +#include "capacity_box.h" // box_cap_init(), box_cap_avail() +#include "carve_push_box.h" // box_carve_and_push() + +// External declarations +extern __thread TinyTLSSlab g_tls_slabs[TINY_NUM_CLASSES]; +extern __thread uint32_t g_tls_sll_count[TINY_NUM_CLASSES]; +extern SuperSlab* superslab_refill(int class_idx); + +// ============================================================================ +// Box Prewarm API Implementation +// ============================================================================ + +int box_prewarm_tls(int class_idx, int count) { + // PRIORITY 1: Bounds check + HAK_CHECK_CLASS_IDX(class_idx, "box_prewarm_tls"); + + if (count <= 0) return 0; + + // Step 1: Ensure capacity system is initialized + // This is critical to prevent the double-free bug + box_cap_init(); + + // Step 2: Check available capacity + uint32_t avail = box_cap_avail(class_idx); + if (avail == 0) { + // TLS SLL already at capacity + return 0; + } + + // Limit count to available capacity + uint32_t want = (uint32_t)count; + if (want > avail) { + want = avail; + } + + // Step 3: Ensure SuperSlab is available + TinyTLSSlab* tls = &g_tls_slabs[class_idx]; + if (!tls->ss) { + // Try to allocate SuperSlab + if (superslab_refill(class_idx) == NULL) { + #if !HAKMEM_BUILD_RELEASE + fprintf(stderr, "[BOX_PREWARM] Failed to allocate SuperSlab for class %d\n", + class_idx); + #endif + return 0; + } + // Reload tls pointer after superslab_refill + tls = &g_tls_slabs[class_idx]; + } + + // Step 4: Atomically carve and push blocks + // This uses Box Carve-Push which guarantees no orphaned blocks + uint32_t pushed = box_carve_and_push(class_idx, want); + + #if !HAKMEM_BUILD_RELEASE + if (pushed < want) { + fprintf(stderr, "[BOX_PREWARM] Partial prewarm: requested=%u pushed=%u class=%d\n", + want, pushed, class_idx); + } + #endif + + return (int)pushed; +} + +int box_prewarm_needed(int class_idx, int target_count) { + // PRIORITY 1: Bounds check + HAK_CHECK_CLASS_IDX(class_idx, "box_prewarm_needed"); + + if (target_count <= 0) return 0; + + // Check current count + uint32_t current = g_tls_sll_count[class_idx]; + if (current >= (uint32_t)target_count) { + // Already at or above target + return 0; + } + + // Return how many more blocks needed + return (target_count - (int)current); +} diff --git a/core/box/prewarm_box.h b/core/box/prewarm_box.h new file mode 100644 index 00000000..98d6f1b3 --- /dev/null +++ b/core/box/prewarm_box.h @@ -0,0 +1,54 @@ +// prewarm_box.h - Box Prewarm +// Priority 3 Box: Safe TLS Cache Pre-warming +// +// Purpose: +// - Simple, safe API for pre-warming TLS caches +// - Hides complex initialization dependencies +// - Uses Box Capacity Manager + Box Carve-Push for safety +// - Prevents double-free bugs from initialization order issues +// +// Design: +// - Wraps capacity_box + carve_push_box +// - Handles SuperSlab allocation automatically +// - Idempotent: safe to call multiple times +// - Clear success/failure reporting + +#ifndef HAKMEM_BOX_PREWARM_H +#define HAKMEM_BOX_PREWARM_H + +#include +#include + +// ============================================================================ +// Box Prewarm API +// ============================================================================ + +// Pre-warm TLS SLL cache for a class with N blocks +// +// What it does: +// 1. Ensures capacity system is initialized +// 2. Checks/allocates SuperSlab if needed +// 3. Atomically carves and pushes N blocks to TLS SLL +// +// Returns: actual count pushed +// - On success: count (or less if capacity limit reached) +// - On failure: 0 +// +// Safety guarantees: +// - No orphaned blocks (all-or-nothing carve-push) +// - Correct initialization order +// - Active counters updated atomically +// - No double-free risk +// +// Thread-safe: uses TLS +// Idempotent: safe to call multiple times (subsequent calls are no-op if already full) +// +// Example: +// box_prewarm_tls(5, 128); // Pre-warm class 5 (256B) with 128 blocks +int box_prewarm_tls(int class_idx, int count); + +// Check if prewarm is needed (TLS SLL is empty or below threshold) +// Returns: number of blocks to prewarm, or 0 if already warmed +int box_prewarm_needed(int class_idx, int target_count); + +#endif // HAKMEM_BOX_PREWARM_H diff --git a/core/box/tiny_next_ptr_box.h b/core/box/tiny_next_ptr_box.h new file mode 100644 index 00000000..a745c916 --- /dev/null +++ b/core/box/tiny_next_ptr_box.h @@ -0,0 +1,134 @@ +#ifndef TINY_NEXT_PTR_BOX_H +#define TINY_NEXT_PTR_BOX_H + +/** + * πŸ“¦ Box: Next Pointer Operations (Lowest-Level API) + * + * Phase E1-CORRECT: Unified next pointer read/write API for ALL classes (C0-C7) + * + * This Box provides structural guarantee that ALL next pointer operations + * use consistent offset calculation, eliminating scattered direct pointer + * access bugs. + * + * Design: + * - With HAKMEM_TINY_HEADER_CLASSIDX=1: Next pointer stored at base+1 (ALL classes) + * - Without headers: Next pointer stored at base+0 + * - Inline expansion ensures ZERO performance cost + * + * Usage: + * void* next = tiny_next_read(class_idx, base_ptr); // Read next pointer + * tiny_next_write(class_idx, base_ptr, new_next); // Write next pointer + * + * Critical: + * - ALL freelist operations MUST use this API + * - Direct access like *(void**)ptr is PROHIBITED + * - Grep can detect violations: grep -rn '\*\(void\*\*\)' core/ + */ + +#include +#include // For debug fprintf +#include // For _Atomic +#include // For abort() + +/** + * Write next pointer to freelist node + * + * @param class_idx Size class index (0-7) + * @param base Base pointer (NOT user pointer) + * @param next_value Next pointer to store (or NULL for list terminator) + * + * CRITICAL FIX: Class 0 (8B block) cannot fit 8B pointer at offset 1! + * - Class 0: 8B total = [1B header][7B data] β†’ pointer at base+0 (overwrite header when free) + * - Class 1-6: Next at base+1 (after header) + * - Class 7: Next at base+0 (no header in original design, kept for compatibility) + * + * NOTE: We take class_idx as parameter (NOT read from header) because: + * - Linear carved blocks don't have headers yet (uninitialized memory) + * - Class 0/7 overwrite header with next pointer when on freelist + */ +static inline void tiny_next_write(int class_idx, void* base, void* next_value) { +#if HAKMEM_TINY_HEADER_CLASSIDX + // Phase E1-CORRECT FIX: Use class_idx parameter (NOT header byte!) + // Reading uninitialized header bytes causes random offset calculation + size_t next_offset = (class_idx == 0 || class_idx == 7) ? 0 : 1; + + // πŸ› DEBUG: Log writes for debugging (Class 1-6 only - Class 0/7 overwrite header) + #if !HAKMEM_BUILD_RELEASE + static _Atomic uint64_t g_write_count = 0; + uint64_t write_num = atomic_fetch_add(&g_write_count, 1); + + // Log first 20 writes for debugging + if (write_num < 20) { + fprintf(stderr, "[BOX_WRITE #%lu] class=%d base=%p next=%p offset=%zu\n", + write_num, class_idx, base, next_value, next_offset); + fflush(stderr); + } + + // Verify header for Class 1-6 (Class 0/7 have no valid header on freelist) + if (next_offset != 0) { + uint8_t header_before = *(uint8_t*)base; + *(void**)((uint8_t*)base + next_offset) = next_value; + uint8_t header_after = *(uint8_t*)base; + + if (header_after != header_before) { + fprintf(stderr, "\nπŸ› BUG DETECTED: Header corruption!\n"); + fprintf(stderr, "Class: %d, Base: %p, Header before: 0x%02x, after: 0x%02x\n", + class_idx, base, header_before, header_after); + fflush(stderr); + abort(); + } + } else { + // Class 0/7: Just write, no header validation + *(void**)((uint8_t*)base + next_offset) = next_value; + } + #else + // Release: Direct write + *(void**)((uint8_t*)base + next_offset) = next_value; + #endif +#else + // No headers: Next pointer at base + *(void**)base = next_value; +#endif +} + +/** + * Read next pointer from freelist node + * + * @param class_idx Size class index (0-7) + * @param base Base pointer (NOT user pointer) + * @return Next pointer (or NULL if end of list) + */ +static inline void* tiny_next_read(int class_idx, const void* base) { +#if HAKMEM_TINY_HEADER_CLASSIDX + // Phase E1-CORRECT FIX: Use class_idx parameter (NOT header byte!) + size_t next_offset = (class_idx == 0 || class_idx == 7) ? 0 : 1; + + // πŸ› DEBUG: Check if we're about to read a corrupted next pointer (Class 1-6 only) + #if !HAKMEM_BUILD_RELEASE + void* next_val = *(void**)((const uint8_t*)base + next_offset); + + // For Class 1-6 (offset=1), check if next pointer looks corrupted (starts with 0xa0-0xa7) + // This means someone wrote to offset 0, overwriting the header + if (next_offset == 1 && next_val != NULL) { + uintptr_t next_addr = (uintptr_t)next_val; + uint8_t high_byte = (next_addr >> 56) & 0xFF; + + if (high_byte >= 0xa0 && high_byte <= 0xa7) { + fprintf(stderr, "\nπŸ› BUG DETECTED: Corrupted next pointer!\n"); + fprintf(stderr, "Class: %d, Base: %p, Next: %p (high byte: 0x%02x)\n", + class_idx, base, next_val, high_byte); + fprintf(stderr, "This means next pointer was written at OFFSET 0!\n"); + fflush(stderr); + abort(); + } + } + #endif + + return *(void**)((const uint8_t*)base + next_offset); +#else + // No headers: Next pointer at base + return *(void**)base; +#endif +} + +#endif // TINY_NEXT_PTR_BOX_H diff --git a/core/box/tls_sll_box.h b/core/box/tls_sll_box.h index 30b511cf..6a0c1dc0 100644 --- a/core/box/tls_sll_box.h +++ b/core/box/tls_sll_box.h @@ -31,6 +31,7 @@ #include "../tiny_region_id.h" // HEADER_MAGIC / HEADER_CLASS_MASK #include "../hakmem_tiny_integrity.h" // PRIORITY 2: Freelist integrity checks #include "../ptr_track.h" // Pointer tracking for debugging header corruption +#include "tiny_next_ptr_box.h" // Box API: Next pointer read/write // Debug guard: validate base pointer before SLL ops (Debug only) #if !HAKMEM_BUILD_RELEASE @@ -81,11 +82,7 @@ static inline bool tls_sll_push(int class_idx, void* ptr, uint32_t capacity) { // PRIORITY 1: Bounds check BEFORE any array access HAK_CHECK_CLASS_IDX(class_idx, "tls_sll_push"); - // CRITICAL: C7 (1KB) is headerless - MUST NOT use TLS SLL - // Reason: SLL stores next pointer in first 8 bytes (user data for C7) - if (__builtin_expect(class_idx == 7, 0)) { - return false; // C7 rejected - } + // Phase E1-CORRECT: All classes including C7 can now use TLS SLL // Capacity check if (g_tls_sll_count[class_idx] >= capacity) { @@ -246,9 +243,10 @@ static inline bool tls_sll_pop(int class_idx, void** out) { #endif // Pop from SLL (reads next from base) - // Phase 7: Read next pointer at header-safe offset + // Phase E1-CORRECT FIX: Class 0 must use offset 0 (8B block can't fit 8B pointer at offset 1) #if HAKMEM_TINY_HEADER_CLASSIDX - const size_t next_offset = (class_idx == 7) ? 0 : 1; + // CRITICAL: Use class_idx argument (NOT header byte) because Class 0/7 overwrite header with next pointer! + const size_t next_offset = (class_idx == 0 || class_idx == 7) ? 0 : 1; #else const size_t next_offset = 0; #endif @@ -272,8 +270,9 @@ static inline bool tls_sll_pop(int class_idx, void** out) { // βœ… FIX #12: VALIDATION - Detect header corruption at the moment it's injected // This is the CRITICAL validation point: we validate the header BEFORE reading next pointer. // If the header is corrupted here, we know corruption happened BEFORE this pop (during push/splice/carve). + // Phase E1-CORRECT: Class 1-6 have headers, Class 0/7 overwrite header with next pointer #if HAKMEM_TINY_HEADER_CLASSIDX - if (class_idx != 7) { + if (class_idx != 0 && class_idx != 7) { // Read byte 0 (should be header = HEADER_MAGIC | class_idx) uint8_t byte0 = *(uint8_t*)base; PTR_TRACK_TLS_POP(base, class_idx); // Track POP operation @@ -315,7 +314,7 @@ static inline bool tls_sll_pop(int class_idx, void** out) { fflush(stderr); abort(); // Immediate crash with backtrace } - } + } // end if (class_idx != 0 && class_idx != 7) #endif // DEBUG: Log read operation for crash investigation @@ -390,40 +389,36 @@ static inline bool tls_sll_pop(int class_idx, void** out) { // - C0-C6 (header): next at base+1 (offset 1) - **WAS NOT CLEARED** ← BUG! // // Previous WRONG assumption: "C0-C6 header hides next" - FALSE! - // Header is 1 byte at base, next is 8 bytes at base+1 (user-accessible memory!) + // Phase E1-CORRECT: All classes have 1-byte header at base, next is at base+1 // // Cost: 1 store instruction (~1 cycle) for all classes #if HAKMEM_TINY_HEADER_CLASSIDX - if (class_idx == 7) { - *(void**)base = NULL; // C7: clear at base (offset 0) - } else { - // DEBUG: Verify header is intact BEFORE clearing next pointer - if (class_idx == 2) { - uint8_t header_before_clear = *(uint8_t*)base; - if (header_before_clear != (HEADER_MAGIC | (class_idx & HEADER_CLASS_MASK))) { - extern _Atomic uint64_t malloc_count; - uint64_t call_num = atomic_load(&malloc_count); - fprintf(stderr, "[POP_HEADER_CHECK] call=%lu cls=%d base=%p header=0x%02x BEFORE clear_next!\n", - call_num, class_idx, base, header_before_clear); - fflush(stderr); - } + // DEBUG: Verify header is intact BEFORE clearing next pointer + if (class_idx == 2) { + uint8_t header_before_clear = *(uint8_t*)base; + if (header_before_clear != (HEADER_MAGIC | (class_idx & HEADER_CLASS_MASK))) { + extern _Atomic uint64_t malloc_count; + uint64_t call_num = atomic_load(&malloc_count); + fprintf(stderr, "[POP_HEADER_CHECK] call=%lu cls=%d base=%p header=0x%02x BEFORE clear_next!\n", + call_num, class_idx, base, header_before_clear); + fflush(stderr); } + } - *(void**)((uint8_t*)base + 1) = NULL; // C0-C6: clear at base+1 (offset 1) + tiny_next_write(class_idx, base, NULL); // All classes: clear next pointer - // DEBUG: Verify header is STILL intact AFTER clearing next pointer - if (class_idx == 2) { - uint8_t header_after_clear = *(uint8_t*)base; - if (header_after_clear != (HEADER_MAGIC | (class_idx & HEADER_CLASS_MASK))) { - extern _Atomic uint64_t malloc_count; - uint64_t call_num = atomic_load(&malloc_count); - fprintf(stderr, "[POP_HEADER_CORRUPTED] call=%lu cls=%d base=%p header=0x%02x AFTER clear_next!\n", - call_num, class_idx, base, header_after_clear); - fprintf(stderr, "[POP_HEADER_CORRUPTED] This means clear_next OVERWROTE the header!\n"); - fprintf(stderr, "[POP_HEADER_CORRUPTED] Bug: next_offset calculation is WRONG!\n"); - fflush(stderr); - abort(); - } + // DEBUG: Verify header is STILL intact AFTER clearing next pointer + if (class_idx == 2) { + uint8_t header_after_clear = *(uint8_t*)base; + if (header_after_clear != (HEADER_MAGIC | (class_idx & HEADER_CLASS_MASK))) { + extern _Atomic uint64_t malloc_count; + uint64_t call_num = atomic_load(&malloc_count); + fprintf(stderr, "[POP_HEADER_CORRUPTED] call=%lu cls=%d base=%p header=0x%02x AFTER clear_next!\n", + call_num, class_idx, base, header_after_clear); + fprintf(stderr, "[POP_HEADER_CORRUPTED] This means clear_next OVERWROTE the header!\n"); + fprintf(stderr, "[POP_HEADER_CORRUPTED] Bug: next_offset calculation is WRONG!\n"); + fflush(stderr); + abort(); } } #else @@ -452,14 +447,37 @@ static inline bool tls_sll_pop(int class_idx, void** out) { // // Performance: ~5 cycles + O(count) for chain traversal static inline uint32_t tls_sll_splice(int class_idx, void* chain_head, uint32_t count, uint32_t capacity) { - // CRITICAL: C7 (1KB) is headerless - MUST NOT splice to TLS SLL - if (__builtin_expect(class_idx == 7, 0)) { - return 0; // C7 rejected + // Phase E1-CORRECT: All classes including C7 can now use splice + + // πŸ› DEBUG: UNCONDITIONAL log to verify function is called + #if !HAKMEM_BUILD_RELEASE + { + static _Atomic int g_once = 0; + if (atomic_fetch_add(&g_once, 1) == 0) { + fprintf(stderr, "[SPLICE_ENTRY] First call to tls_sll_splice()! cls=%d count=%u capacity=%u\n", + class_idx, count, capacity); + fflush(stderr); + } } + #endif // Calculate available capacity uint32_t available = (capacity > g_tls_sll_count[class_idx]) ? (capacity - g_tls_sll_count[class_idx]) : 0; + + // πŸ› DEBUG: Log ALL splice inputs to diagnose truncation + #if !HAKMEM_BUILD_RELEASE + { + static _Atomic uint64_t g_splice_log_count = 0; + uint64_t splice_num = atomic_fetch_add(&g_splice_log_count, 1); + if (splice_num < 10) { // Log first 10 splices + fprintf(stderr, "[SPLICE_DEBUG #%lu] cls=%d count=%u capacity=%u sll_count=%u available=%u\n", + splice_num, class_idx, count, capacity, g_tls_sll_count[class_idx], available); + fflush(stderr); + } + } + #endif + if (available == 0 || count == 0 || !chain_head) { return 0; // No space or empty chain } @@ -499,7 +517,7 @@ static inline uint32_t tls_sll_splice(int class_idx, void* chain_head, uint32_t } // Move to next node - void* next = *(void**)((uint8_t*)node + next_offset); + void* next = tiny_next_read(class_idx, node); node = next; restored_count++; } diff --git a/core/hakmem_tiny.c b/core/hakmem_tiny.c index 59d9b6ea..dcdf70fa 100644 --- a/core/hakmem_tiny.c +++ b/core/hakmem_tiny.c @@ -7,6 +7,7 @@ #include "hakmem_syscall.h" // Phase 6.X P0 Fix: Box 3 syscall layer (bypasses LD_PRELOAD) #include "hakmem_tiny_magazine.h" #include "hakmem_tiny_integrity.h" // PRIORITY 1-4: Corruption detection +#include "box/tiny_next_ptr_box.h" // Box API: next pointer read/write // Phase 1 modules (must come AFTER hakmem_tiny.h for TinyPool definition) #include "hakmem_tiny_batch_refill.h" // Phase 1: Batch refill/spill for mini-magazine #include "hakmem_tiny_stats.h" // Phase 1: Batched statistics (replaces XOR RNG) @@ -33,17 +34,18 @@ extern uint64_t g_bytes_allocated; // from hakmem_tiny_superslab.c // ============================================================================ // Size class table (Box 3 dependency) // ============================================================================ -// Definition for g_tiny_class_sizes (declared in hakmem_tiny_config.h) -// Used by Box 3 (tiny_box_geometry.h) for stride calculations +// Phase E1-CORRECT: ALL classes have 1-byte header +// These sizes represent TOTAL BLOCK SIZE (stride) = [Header 1B][Data N-1B] +// Usable data = stride - 1 (implicit) const size_t g_tiny_class_sizes[TINY_NUM_CLASSES] = { - 8, // Class 0: 8 bytes - 16, // Class 1: 16 bytes - 32, // Class 2: 32 bytes - 64, // Class 3: 64 bytes - 128, // Class 4: 128 bytes - 256, // Class 5: 256 bytes - 512, // Class 6: 512 bytes - 1024 // Class 7: 1024 bytes + 8, // Class 0: 8B total = [Header 1B][Data 7B] + 16, // Class 1: 16B total = [Header 1B][Data 15B] + 32, // Class 2: 32B total = [Header 1B][Data 31B] + 64, // Class 3: 64B total = [Header 1B][Data 63B] + 128, // Class 4: 128B total = [Header 1B][Data 127B] + 256, // Class 5: 256B total = [Header 1B][Data 255B] + 512, // Class 6: 512B total = [Header 1B][Data 511B] + 1024 // Class 7: 1024B total = [Header 1B][Data 1023B] }; // ============================================================================ @@ -153,12 +155,9 @@ static inline void tiny_debug_track_alloc_ret(int cls, void* ptr); #if HAKMEM_TINY_HEADER_CLASSIDX #if HAKMEM_BUILD_RELEASE - // Phase 3: Release - Ultra-fast inline macro (3-4 instructions) - // Eliminates function call overhead, NULL check, guard check, tracking + // Phase E1-CORRECT: ALL classes have 1-byte headers (including C7) + // Ultra-fast inline macro (3-4 instructions) #define HAK_RET_ALLOC(cls, base_ptr) do { \ - if (__builtin_expect((cls) == 7, 0)) { \ - return (base_ptr); \ - } \ *(uint8_t*)(base_ptr) = HEADER_MAGIC | ((cls) & HEADER_CLASS_MASK); \ return (void*)((uint8_t*)(base_ptr) + 1); \ } while(0) @@ -215,7 +214,7 @@ static void tiny_apply_mem_diet(void); // Phase 6.23: SuperSlab allocation forward declaration static inline void* hak_tiny_alloc_superslab(int class_idx); static inline void* superslab_tls_bump_fast(int class_idx); -static SuperSlab* superslab_refill(int class_idx); +SuperSlab* superslab_refill(int class_idx); static inline void* superslab_alloc_from_slab(SuperSlab* ss, int slab_idx); static inline uint32_t sll_cap_for_class(int class_idx, uint32_t mag_cap); // Forward decl: used by tiny_spec_pop_path before its definition @@ -245,7 +244,7 @@ static void tiny_remote_drain_locked(struct TinySlab* slab); __attribute__((always_inline)) static inline void* hak_tiny_alloc_wrapper(int class_idx); // Helpers for SuperSlab active block accounting (atomic, saturating dec) -static inline __attribute__((always_inline)) void ss_active_add(SuperSlab* ss, uint32_t n) { +void ss_active_add(SuperSlab* ss, uint32_t n) { atomic_fetch_add_explicit(&ss->total_active_blocks, n, memory_order_relaxed); } static inline __attribute__((always_inline)) void ss_active_inc(SuperSlab* ss) { @@ -502,7 +501,7 @@ static _Atomic uint32_t g_ss_partial_epoch = 0; // Phase 6.24: Unified TLS slab cache (Medium fix) // Reduces TLS reads from 3 to 1 (cache-line aligned for performance) -static __thread TinyTLSSlab g_tls_slabs[TINY_NUM_CLASSES]; +__thread TinyTLSSlab g_tls_slabs[TINY_NUM_CLASSES]; static _Atomic uint32_t g_tls_target_cap[TINY_NUM_CLASSES]; static _Atomic uint32_t g_tls_target_refill[TINY_NUM_CLASSES]; static _Atomic uint32_t g_tls_target_spill[TINY_NUM_CLASSES]; @@ -1196,7 +1195,7 @@ typedef struct __attribute__((aligned(64))) { static __thread TinyFastCache g_fast_cache[TINY_NUM_CLASSES]; static int g_frontend_enable = 0; // HAKMEM_TINY_FRONTEND=1 (experimental ultra-fast frontend) // SLL capacity multiplier for hot tiny classes (env: HAKMEM_SLL_MULTIPLIER) -static int g_sll_multiplier = 2; +int g_sll_multiplier = 2; // Cached thread id (uint32) to avoid repeated pthread_self() in hot paths static __thread uint32_t g_tls_tid32; static __thread int g_tls_tid32_inited; @@ -1236,7 +1235,7 @@ static inline __attribute__((always_inline)) pthread_t tiny_self_pt(void) { // tiny_mmap_gate.h already included at top #include "tiny_publish.h" -static int g_sll_cap_override[TINY_NUM_CLASSES] = {0}; // HAKMEM_TINY_SLL_CAP_C{0..7} +int g_sll_cap_override[TINY_NUM_CLASSES] = {0}; // HAKMEM_TINY_SLL_CAP_C{0..7} // Optional prefetch on SLL pop (guarded by env: HAKMEM_TINY_PREFETCH=1) static int g_tiny_prefetch = 0; @@ -1290,15 +1289,8 @@ static __thread TinyQuickSlot g_tls_quick[TINY_NUM_CLASSES]; // compile-out via void hak_tiny_prewarm_tls_cache(void) { // Pre-warm each class with HAKMEM_TINY_PREWARM_COUNT blocks // This reduces the first-allocation miss penalty by populating TLS cache + // Phase E1-CORRECT: ALL classes (including C7) now use TLS SLL for (int class_idx = 0; class_idx < TINY_NUM_CLASSES; class_idx++) { - // CRITICAL: C7 (1KB) is headerless - skip TLS SLL refill, but create SuperSlab - if (class_idx == 7) { - // Create C7 SuperSlab explicitly (refill functions skip C7) - // Note: superslab_refill is already declared in hakmem_tiny_refill.inc.h - (void)superslab_refill(class_idx); - continue; - } - int count = HAKMEM_TINY_PREWARM_COUNT; // Default: 16 blocks per class // Trigger refill to populate TLS cache diff --git a/core/hakmem_tiny_init.inc b/core/hakmem_tiny_init.inc index 4fda6402..cc148555 100644 --- a/core/hakmem_tiny_init.inc +++ b/core/hakmem_tiny_init.inc @@ -1,6 +1,7 @@ // hakmem_tiny_init.inc // Note: uses TLS ops inline helpers for prewarm when class5 hotpath is enabled #include "hakmem_tiny_tls_ops.h" +#include "box/prewarm_box.h" // Box Prewarm API (Priority 3) // Phase 2D-2: Initialization function extraction // // This file contains the hak_tiny_init() function extracted from hakmem_tiny.c @@ -127,17 +128,27 @@ void hak_tiny_init(void) { if (pw && *pw) prewarm = atoi(pw); if (prewarm < 0) prewarm = 0; if (prewarm > (int)tls5->cap) prewarm = (int)tls5->cap; + if (prewarm > 0) { - (void)tls_refill_from_tls_slab(5, tls5, (uint32_t)prewarm); + // βœ… NEW: Use Box Prewarm API (safe, simple, handles all initialization) + // Box Prewarm guarantees: + // - Correct initialization order (capacity system initialized first) + // - No orphaned blocks (atomic carve-and-push) + // - No double-free risk (all-or-nothing semantics) + // - Clear error handling + int taken = box_prewarm_tls(5, prewarm); + + #if !HAKMEM_BUILD_RELEASE + // Debug logging (optional) + fprintf(stderr, "[PREWARM] class=5 requested=%d taken=%d\n", prewarm, taken); + #endif + (void)taken; // Suppress unused warning in release builds } } if (mem_diet_enabled) { tiny_apply_mem_diet(); } - // Phase 2b: Initialize adaptive TLS cache sizing - adaptive_sizing_init(); - // Enable signal-triggered stats dump if requested (SIGUSR1) hak_tiny_enable_signal_dump(); diff --git a/core/hakmem_tiny_refill.inc.h b/core/hakmem_tiny_refill.inc.h index 946b3963..f8cddff0 100644 --- a/core/hakmem_tiny_refill.inc.h +++ b/core/hakmem_tiny_refill.inc.h @@ -27,6 +27,7 @@ #include "superslab/superslab_inline.h" // For slab_index_for/ss_slabs_capacity (Debug validation) #include "box/tls_sll_box.h" // Box TLS-SLL: Safe SLL operations API #include "hakmem_tiny_integrity.h" // PRIORITY 1-4: Corruption detection +#include "box/tiny_next_ptr_box.h" // Box API: Next pointer read/write #include #include #include @@ -86,10 +87,10 @@ static inline void* tiny_fast_pop(int class_idx); static inline int tiny_fast_push(int class_idx, void* ptr); static inline int tls_refill_from_tls_slab(int class_idx, TinyTLSList* tls, uint32_t want); static inline uint32_t sll_cap_for_class(int class_idx, uint32_t mag_cap); -static SuperSlab* superslab_refill(int class_idx); +SuperSlab* superslab_refill(int class_idx); static void* slab_data_start(SuperSlab* ss, int slab_idx); static inline uint8_t* tiny_slab_base_for(SuperSlab* ss, int slab_idx); -static inline void ss_active_add(SuperSlab* ss, uint32_t n); +void ss_active_add(SuperSlab* ss, uint32_t n); static inline void ss_active_inc(SuperSlab* ss); static TinySlab* allocate_new_slab(int class_idx); static void move_to_full_list(int class_idx, struct TinySlab* target_slab); @@ -180,16 +181,11 @@ static inline void* tiny_fast_refill_and_take(int class_idx, TinyTLSList* tls) { } void* ret = batch_head; -#if HAKMEM_TINY_HEADER_CLASSIDX - const size_t next_off_tls = (class_idx == 7) ? 0 : 1; -#else - const size_t next_off_tls = 0; -#endif - void* node = *(void**)((uint8_t*)ret + next_off_tls); + void* node = tiny_next_read(class_idx, ret); uint32_t remaining = (taken > 0u) ? (taken - 1u) : 0u; while (node && remaining > 0u) { - void* next = *(void**)((uint8_t*)node + next_off_tls); + void* next = tiny_next_read(class_idx, node); int pushed = 0; if (__builtin_expect(g_fastcache_enable && class_idx <= 3, 1)) { // Headerless array stack for hottest tiny classes @@ -297,10 +293,7 @@ static inline int sll_refill_small_from_ss(int class_idx, int max_take) { HAK_CHECK_CLASS_IDX(class_idx, "sll_refill_small_from_ss"); atomic_fetch_add(&g_integrity_check_class_bounds, 1); - // CRITICAL: C7 (1KB) is headerless - incompatible with TLS SLL refill - if (__builtin_expect(class_idx == 7, 0)) { - return 0; // C7 uses slow path exclusively - } + // Phase E1-CORRECT: C7 now has headers, can use small refill if (!g_use_superslab || max_take <= 0) return 0; // ランタむムA/B: P0γ‚’ζœ‰εŠΉεŒ–γ—γ¦γ„γ‚‹ε ΄εˆγ―γƒγƒƒγƒrefillへ委譲 @@ -353,14 +346,12 @@ static inline int sll_refill_small_from_ss(int class_idx, int max_take) { meta->carved++; meta->used++; - // βœ… FIX #11B: Restore header BEFORE tls_sll_push + // Phase E1-CORRECT: Restore header BEFORE tls_sll_push // ROOT CAUSE: Simple refill path carves blocks but doesn't write headers. - // tls_sll_push() expects headers at base for C0-C6 to write next at base+1. - // Without header, base+1 contains garbage β†’ chain corruption β†’ SEGV! + // tls_sll_push() expects headers at base to write next at base+1. + // ALL classes (including C7) need headers restored! #if HAKMEM_TINY_HEADER_CLASSIDX - if (class_idx != 7) { - *(uint8_t*)p = HEADER_MAGIC | (class_idx & HEADER_CLASS_MASK); - } + *(uint8_t*)p = HEADER_MAGIC | (class_idx & HEADER_CLASS_MASK); #endif // CRITICAL: Use Box TLS-SLL API (C7-safe, no race) @@ -376,22 +367,24 @@ static inline int sll_refill_small_from_ss(int class_idx, int max_take) { // Freelist fallback if (__builtin_expect(meta->freelist != NULL, 0)) { void* p = meta->freelist; - meta->freelist = *(void**)p; + // BUG FIX: Use Box API to read next pointer at correct offset + void* next = tiny_next_read(class_idx, p); + meta->freelist = next; meta->used++; - // βœ… FIX #11B: Restore header BEFORE tls_sll_push (same as Fix #11 for freelist) + // Phase E1-CORRECT: Restore header BEFORE tls_sll_push // Freelist stores next at base (offset 0), overwriting header. // Must restore header so tls_sll_push can write next at base+1 correctly. + // ALL classes (including C7) need headers restored! #if HAKMEM_TINY_HEADER_CLASSIDX - if (class_idx != 7) { - *(uint8_t*)p = HEADER_MAGIC | (class_idx & HEADER_CLASS_MASK); - } + *(uint8_t*)p = HEADER_MAGIC | (class_idx & HEADER_CLASS_MASK); #endif // CRITICAL: Use Box TLS-SLL API (C7-safe, no race) if (!tls_sll_push(class_idx, p, sll_cap)) { // SLL full (should not happen, room was checked) - *(void**)p = meta->freelist; // Rollback freelist + // BUG FIX: Use Box API to write rollback next pointer + tiny_next_write(class_idx, p, next); // Rollback freelist meta->freelist = p; meta->used--; break; @@ -421,7 +414,8 @@ static inline int sll_refill_small_from_ss(int class_idx, int max_take) { while (taken < take) { void* p = NULL; if (__builtin_expect(meta->freelist != NULL, 0)) { - p = meta->freelist; meta->freelist = *(void**)p; meta->used++; + // BUG FIX: Use Box API to read next pointer at correct offset + p = meta->freelist; meta->freelist = tiny_next_read(class_idx, p); meta->used++; // Track active blocks reserved into TLS SLL ss_active_inc(tls->ss); } else if (__builtin_expect(meta->carved < meta->capacity, 1)) { diff --git a/core/tiny_free_fast_v2.inc.h b/core/tiny_free_fast_v2.inc.h index b76735d8..e9895f0c 100644 --- a/core/tiny_free_fast_v2.inc.h +++ b/core/tiny_free_fast_v2.inc.h @@ -51,32 +51,29 @@ extern void hak_tiny_free(void* ptr); // Fallback for non-header allocations static inline int hak_tiny_free_fast_v2(void* ptr) { if (__builtin_expect(!ptr, 0)) return 0; - // CRITICAL: C7 (1KB) is headerless and CANNOT use fast path - // Reading ptr-1 for C7 causes SIGBUS (accesses previous allocation or unmapped page) - // Solution: Check for 1KB alignment and delegate to slow path - // Note: This heuristic has ~0.1% false positive rate (other allocations at 1KB boundaries) - // but is necessary for C7 safety. Slow path handles all cases correctly. - if (__builtin_expect(((uintptr_t)ptr & 0x3FF) == 0, 0)) { - // Pointer is 1KB-aligned β†’ likely C7 or page boundary allocation - // Use slow path for safety (slow path has proper C7 handling) - return 0; - } + // Phase E3-1: Remove registry lookup (50-100 cycles overhead) + // Reason: Phase E1 added headers to C7, making this check redundant + // Header magic validation (2-3 cycles) is now sufficient for all classes + // Expected: 9M β†’ 30-50M ops/s recovery (+226-443%) - // CRITICAL: Check if header is accessible + // CRITICAL: Check if header is accessible before reading void* header_addr = (char*)ptr - 1; -#if defined(HAKMEM_POOL_TLS_PHASE1) && HAKMEM_TINY_SAFE_FREE - // Strict mode: validate header address with mincore() on every free +#if !HAKMEM_BUILD_RELEASE + // Debug: Always validate header accessibility (strict safety check) + // Cost: ~634 cycles per free (mincore syscall) + // Benefit: Catch all SEGV cases (100% safe) extern int hak_is_memory_readable(void* addr); if (!hak_is_memory_readable(header_addr)) { return 0; // Header not accessible - not a Tiny allocation } #else - // Pool TLS disabled: Optimize for common case (99.9% hit rate) + // Release: Optimize for common case (99.9% hit rate) // Strategy: Only check page boundaries (ptr & 0xFFF == 0) // - Page boundary check: 1-2 cycles // - mincore() syscall: ~634 cycles (only if page-aligned) // - Result: 99.9% of frees avoid mincore() β†’ 317-634x faster! + // - Safety: Page-aligned allocations are rare, most Tiny blocks are interior if (__builtin_expect(((uintptr_t)ptr & 0xFFF) == 0, 0)) { extern int hak_is_memory_readable(void* addr); if (!hak_is_memory_readable(header_addr)) { @@ -116,30 +113,23 @@ static inline int hak_tiny_free_fast_v2(void* ptr) { } atomic_fetch_add(&g_integrity_check_class_bounds, 1); - // 2. Check TLS freelist capacity (optional, for bounded cache) - // Note: Can be disabled in release for maximum speed -#if !HAKMEM_BUILD_RELEASE - // Debug-only: simple capacity guard to avoid unbounded TLS growth + // 2. Check TLS freelist capacity (defense in depth - ALWAYS ENABLED) + // CRITICAL: Enable in both debug and release to prevent corruption accumulation + // Reason: If C7 slips through magic validation, capacity limit prevents unbounded growth + // Cost: 1 comparison (~1 cycle, predict-not-taken) + // Benefit: Fail-safe against TLS SLL pollution from false positives uint32_t cap = (uint32_t)TINY_TLS_MAG_CAP; if (__builtin_expect(g_tls_sll_count[class_idx] >= cap, 0)) { - return 0; // Route to slow path for spill + return 0; // Route to slow path for spill (Front Gate will catch corruption) } -#endif // 3. Push base to TLS freelist (4 instructions, 5-7 cycles) // Must push base (block start) not user pointer! - // Classes 0-6: Allocation returns base+1 (after header) β†’ Free must compute base = ptr-1 - // Class 7 (C7): Headerless, allocation returns base β†’ Free uses ptr as-is - void* base; - if (__builtin_expect(class_idx == 7, 0)) { - // C7 is headerless - ptr IS the base (no adjustment needed) - base = ptr; - } else { - // Normal classes have 1-byte header - base is ptr-1 - base = (char*)ptr - 1; - } + // Phase E1: ALL classes (C0-C7) have 1-byte header β†’ base = ptr-1 + void* base = (char*)ptr - 1; - // Use Box TLS-SLL API (C7-safe) + // REVERT E3-2: Use Box TLS-SLL for all builds (testing hypothesis) + // Hypothesis: Box TLS-SLL acts as verification layer, masking underlying bugs if (!tls_sll_push(class_idx, base, UINT32_MAX)) { // C7 rejected or capacity exceeded - route to slow path return 0; diff --git a/core/tiny_superslab_alloc.inc.h b/core/tiny_superslab_alloc.inc.h index cdcb324e..9bb9a182 100644 --- a/core/tiny_superslab_alloc.inc.h +++ b/core/tiny_superslab_alloc.inc.h @@ -9,6 +9,7 @@ // - hak_tiny_alloc_superslab(): Main SuperSlab allocation entry point #include "box/superslab_expansion_box.h" // Box E: Expansion with TLS state guarantee +#include "box/tiny_next_ptr_box.h" // Box API: Next pointer read/write // ============================================================================ // Phase 6.23: SuperSlab Allocation Helpers @@ -152,7 +153,7 @@ static inline void* superslab_alloc_from_slab(SuperSlab* ss, int slab_idx) { } } - meta->freelist = *(void**)block; // Pop from freelist + meta->freelist = tiny_next_read(ss->size_class, block); // Pop from freelist meta->used++; if (__builtin_expect(tiny_refill_failfast_level() >= 2, 0)) { @@ -196,7 +197,7 @@ static inline int adopt_bind_if_safe(TinyTLSSlab* tls, SuperSlab* ss, int slab_i } // Phase 6.24 & 7.6: Refill TLS SuperSlab (with unified TLS cache + deferred allocation) -static SuperSlab* superslab_refill(int class_idx) { +SuperSlab* superslab_refill(int class_idx) { #if HAKMEM_DEBUG_COUNTERS g_superslab_refill_calls_dbg[class_idx]++; #endif @@ -713,7 +714,7 @@ static inline void* hak_tiny_alloc_superslab(int class_idx) { return NULL; } } - void* next = *(void**)block; + void* next = tiny_next_read(class_idx, block); meta->freelist = next; meta->used++; // Optional: clear freelist bit when becomes empty @@ -770,21 +771,6 @@ static inline void* hak_tiny_alloc_superslab(int class_idx) { // } meta->used++; - // Debug: Log first C7 alloc for path verification (debug-only) -#if HAKMEM_DEBUG_VERBOSE - if (class_idx == 7) { - static _Atomic int c7_alloc_count = 0; - int count = atomic_fetch_add_explicit(&c7_alloc_count, 1, memory_order_relaxed); - if (count == 0) { - void* next = NULL; - // C7 has no header, next pointer is at base - if (block && ss->size_class == 7) { - next = *(void**)block; - } - fprintf(stderr, "[C7_FIRST_ALLOC] ptr=%p next=%p slab_idx=%d\n", block, next, slab_idx); - } - } -#endif // Track active blocks in SuperSlab for conservative reclamation ss_active_inc(ss);