diff --git a/core/box/hak_alloc_api.inc.h b/core/box/hak_alloc_api.inc.h index 6d9b6800..1992e818 100644 --- a/core/box/hak_alloc_api.inc.h +++ b/core/box/hak_alloc_api.inc.h @@ -1,8 +1,10 @@ // hak_alloc_api.inc.h — Box: hak_alloc_at() implementation +// Phase 2 Update: Lane-based allocation routing (Single Source of Truth) #ifndef HAK_ALLOC_API_INC_H #define HAK_ALLOC_API_INC_H -#include "../hakmem_tiny.h" // For tiny_get_max_size() (Phase 16) +#include "../hakmem_tiny.h" // For tiny_get_max_size() + hak_lane_classify.inc.h +#include "../hakmem_pool.h" // Phase 2: For hak_pool_try_alloc() (Pool lane 1025B-52KB) #include "../hakmem_smallmid.h" // For Small-Mid Front Box (Phase 17-1) #ifdef HAKMEM_POOL_TLS_PHASE1 @@ -106,15 +108,29 @@ inline void* hak_alloc_at(size_t size, hak_callsite_t site) { hkm_size_hist_record(size); + // ========================================================================= + // Phase 2: Pool Lane (LANE_POOL: 1025B-52KB) + // ========================================================================= + // Key fix: Route 1025-52KB to Pool BEFORE ACE + // This eliminates the "unmanaged zone" (1025-2047B) that caused libc fragmentation + // + // Pool has 2KB as smallest class, so 1025-2047B requests use 2KB class + // (internal fragmentation ~48%, but better than libc fragmentation!) + + if (HAK_LANE_IS_POOL(size)) { #ifdef HAKMEM_POOL_TLS_PHASE1 - // Phase 1: Ultra-fast Pool TLS for 8KB-52KB range - if (size >= 8192 && size <= 53248) { - void* pool_ptr = pool_alloc(size); - // PERF_OPT: likely hint - pool allocations usually succeed - if (__builtin_expect(pool_ptr != NULL, 1)) return pool_ptr; - // Fall through to existing Mid allocator as fallback - } + // Pool TLS fast path (8KB-52KB only, pool_tls.c classes) + if (size >= 8192 && size <= 53248) { + void* pool_ptr = pool_alloc(size); + if (__builtin_expect(pool_ptr != NULL, 1)) return pool_ptr; + } #endif + // Pool API path (1025B-52KB, hakmem_pool.c classes including 2KB) + // This catches 1025-8191B range that Pool TLS doesn't handle + void* pool_try = hak_pool_try_alloc(size, site_id); + if (__builtin_expect(pool_try != NULL, 1)) return pool_try; + // Fall through to ACE if Pool fails + } #if HAKMEM_FEATURE_EVOLUTION if (g_evo_sample_mask > 0) { @@ -155,7 +171,13 @@ inline void* hak_alloc_at(size_t size, hak_callsite_t site) { #endif } - if (size > TINY_MAX_SIZE && size < threshold) { + // ========================================================================= + // Phase 2: ACE Lane (LANE_ACE: 52KB-2MB) + HUGE Lane (2MB+) + // ========================================================================= + // ACE handles sizes between Pool max (52KB) and huge threshold (2MB) + // Sizes > 2MB go directly to mmap (LANE_HUGE) + + if (HAK_LANE_IS_ACE(size) || size > LANE_POOL_MAX) { const FrozenPolicy* pol = hkm_policy_get(); #if HAKMEM_DEBUG_TIMING HKM_TIME_START(t_ace); @@ -167,46 +189,41 @@ inline void* hak_alloc_at(size_t size, hak_callsite_t site) { if (l1) return l1; } - // PHASE 7 CRITICAL FIX: Handle allocation gap (1KB-8KB) when ACE is disabled - // Size range: - // 0-1024: Tiny allocator - // 1025-8191: Gap! (Mid starts at 8KB, ACE often disabled) - // 8KB-32KB: Mid allocator - // 32KB-2MB: ACE (if enabled, otherwise mmap) - // 2MB+: mmap - // - // Solution: Use mmap for gap when ACE failed (ACE disabled or OOM) + // ========================================================================= + // Phase 2: Final Fallback (mmap) - should be rare after Pool fix + // ========================================================================= + // With Phase 2 Pool extension, 1025-52KB should be handled by Pool + // This fallback is for: + // - LANE_HUGE (2MB+): Normal mmap path + // - Pool/ACE failures: Emergency fallback + // - LANE_TINY failures: Should not happen (design bug) - // Track final fallback mmaps globally extern _Atomic uint64_t g_final_fallback_mmap_count; void* ptr; - if (size >= threshold) { - // Large allocation (>= 2MB default): descend via single boundary + if (HAK_LANE_IS_HUGE(size)) { + // LANE_HUGE: Normal path for 2MB+ allocations atomic_fetch_add(&g_final_fallback_mmap_count, 1); ptr = hak_os_map_boundary(size, site_id); - } else if (size >= TINY_MAX_SIZE) { - // Mid-range allocation (1KB-2MB): try mmap as final fallback - // This handles the gap when ACE is disabled or failed + } else if (size > LANE_TINY_MAX) { + // Pool or ACE failed for 1025B-2MB range - emergency mmap fallback atomic_fetch_add(&g_final_fallback_mmap_count, 1); static _Atomic int gap_alloc_count = 0; int count = atomic_fetch_add(&gap_alloc_count, 1); - #if HAKMEM_DEBUG_VERBOSE - if (count < 3) fprintf(stderr, "[HAKMEM] INFO: mid-gap fallback size=%zu\n", size); + #if !HAKMEM_BUILD_RELEASE + if (count < 5) { + fprintf(stderr, "[HAKMEM] Phase 2 WARN: Pool/ACE fallback size=%zu (should be rare)\n", size); + } #endif ptr = hak_os_map_boundary(size, site_id); } else { - // Should never reach here (size <= TINY_MAX_SIZE should be handled by Tiny) + // LANE_TINY failed - this is a design bug! + HAK_LANE_ASSERT_NO_FALLBACK(LANE_FALLBACK, size); static _Atomic int oom_count = 0; int count = atomic_fetch_add(&oom_count, 1); if (count < 10) { - fprintf(stderr, "[HAKMEM] OOM: Unexpected allocation path for size=%zu, returning NULL\n", size); - fprintf(stderr, "[HAKMEM] (OOM count: %d) This should not happen!\n", count + 1); + fprintf(stderr, "[HAKMEM] BUG: Tiny lane failed for size=%zu (should not happen)\n", size); } -#if HAKMEM_DEBUG_TIMING - HKM_TIME_START(t_malloc); - HKM_TIME_END(HKM_CAT_FALLBACK_MALLOC, t_malloc); // Keep timing for compatibility -#endif errno = ENOMEM; return NULL; } diff --git a/core/box/hak_lane_classify.inc.h b/core/box/hak_lane_classify.inc.h new file mode 100644 index 00000000..69e36b8c --- /dev/null +++ b/core/box/hak_lane_classify.inc.h @@ -0,0 +1,265 @@ +/** + * hak_lane_classify.inc.h - Phase 2: Lane Classification Box + * + * Box: Allocation Lane Classification (Single Source of Truth) + * Responsibility: Centralized size-to-lane mapping with unified boundary definitions + * Contract: All allocator boundaries defined here; no hardcoded values elsewhere + * + * Design Principles (Box Pattern): + * 1. Single Source of Truth: All lane boundaries defined in ONE place + * 2. Normalize-then-Classify: Always use normalized size for classification + * 3. Clear Invariants: POOL_MIN = TINY_MAX + 1 (no gaps) + * 4. Observable: Debug helpers for lane inspection + * 5. Safe: LANE_FALLBACK catches design bugs + * + * Problem Solved: + * - Before: TINY_MAX_SIZE=1024 vs tiny_get_max_size()=2047 (inconsistent!) + * - Before: Hardcoded 8192 in Pool TLS, 1024 in Tiny, etc. + * - Result: 1025-2047B "unmanaged zone" causing libc fragmentation + * + * Solution: + * - Define all boundaries as LANE_* constants + * - hak_classify_size() is THE authority for routing + * - Existing code uses compatibility wrappers + * + * Lane Architecture: + * LANE_TINY: [0, LANE_TINY_MAX] = 0-1024B SuperSlab + * LANE_POOL: (LANE_TINY_MAX, LANE_POOL_MAX] = 1025-52KB Pool per-thread + * LANE_ACE: (LANE_POOL_MAX, LANE_ACE_MAX] = 52KB-2MB ACE learning + * LANE_HUGE: (LANE_ACE_MAX, ∞) = 2MB+ mmap direct + * + * Created: 2025-12-02 (Phase 2-1) + * License: MIT + */ + +#ifndef HAK_LANE_CLASSIFY_INC_H +#define HAK_LANE_CLASSIFY_INC_H + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +// ============================================================================ +// Lane Boundary Definitions (Single Source of Truth) +// ============================================================================ +// +// CRITICAL: These are the ONLY authoritative boundary values. +// All other code MUST reference these constants (not hardcode numbers). +// +// Invariant: Each lane's MIN = previous lane's MAX + 1 (no gaps!) + +#define LANE_TINY_MAX 1024 // Tiny handles [0, 1024] +#define LANE_POOL_MIN (LANE_TINY_MAX + 1) // Pool handles [1025, ...] (invariant!) +#define LANE_POOL_MAX (52 * 1024) // Pool handles [..., 52KB] +#define LANE_ACE_MIN (LANE_POOL_MAX + 1) // ACE handles [52KB+1, ...] +#define LANE_ACE_MAX (2 * 1024 * 1024) // ACE handles [..., 2MB] +#define LANE_HUGE_MIN (LANE_ACE_MAX + 1) // Huge handles [2MB+1, ...] + +// ============================================================================ +// Pool Internal: Request Size vs Block Size (separate concepts!) +// ============================================================================ +// +// POOL_MIN_REQUEST_SIZE: Smallest user request Pool will accept (= LANE_POOL_MIN) +// POOL_MIN_CLASS_SIZE: Smallest block class Pool actually allocates +// +// Example: request=1056B -> class=2048B (internal fragmentation ~48%, acceptable) + +#define POOL_MIN_REQUEST_SIZE LANE_POOL_MIN // 1025 (boundary) +#define POOL_MIN_CLASS_SIZE (2 * 1024) // 2048 (block size) + +// ============================================================================ +// Lane Enumeration +// ============================================================================ + +typedef enum { + LANE_TINY, // SuperSlab-based, 0-1024B, TLS cache + LANE_POOL, // Pool per-thread, 1025-52KB, site-sharded + LANE_ACE, // ACE learning layer, 52KB-2MB + LANE_HUGE, // Direct mmap, 2MB+ + LANE_FALLBACK // Bug detection only (should never happen) +} hak_lane_t; + +// ============================================================================ +// Size Normalization +// ============================================================================ +// +// Purpose: Convert user-requested size to internal allocation size +// Rule: All lane classification uses normalized size for consistency +// +// Note: HEADER_SIZE and alignment are allocator-specific. +// This function provides a generic template; actual allocators may have +// their own normalization based on their header requirements. + +#ifndef HAK_LANE_HEADER_SIZE +#define HAK_LANE_HEADER_SIZE 16 // Default header size (override if needed) +#endif + +#ifndef HAK_LANE_ALIGN +#define HAK_LANE_ALIGN 16 // Default alignment (override if needed) +#endif + +/** + * hak_normalize_size - Convert user size to internal allocation size + * + * @param user_size Size requested by user (malloc argument) + * @return Internal size (header + aligned user data) + * + * This ensures consistent boundary checking across all allocators. + * Example: user_size=1000, header=16, align=16 -> norm_size=1024 + */ +__attribute__((always_inline)) +static inline size_t hak_normalize_size(size_t user_size) { + size_t n = user_size; + // For lane classification, we use user_size directly since each + // allocator (Tiny/Pool/ACE) handles its own header internally. + // The boundaries are defined in terms of user-visible sizes. + return n; +} + +// ============================================================================ +// Lane Classification (THE Authority) +// ============================================================================ + +/** + * hak_classify_size - Determine which lane handles this allocation + * + * @param size User-requested size (not normalized) + * @return Lane enumeration value + * + * CRITICAL: This is THE single point of truth for allocation routing. + * All allocation paths MUST use this function (or the switch macro). + * + * Boundaries are INCLUSIVE on the lower side, EXCLUSIVE on the upper: + * LANE_TINY: size <= LANE_TINY_MAX + * LANE_POOL: LANE_TINY_MAX < size <= LANE_POOL_MAX + * LANE_ACE: LANE_POOL_MAX < size <= LANE_ACE_MAX + * LANE_HUGE: size > LANE_ACE_MAX + */ +__attribute__((always_inline, pure)) +static inline hak_lane_t hak_classify_size(size_t size) { + if (__builtin_expect(size <= LANE_TINY_MAX, 1)) { + return LANE_TINY; // Hot path: most allocations are small + } + if (size <= LANE_POOL_MAX) { + return LANE_POOL; // 1025-52KB + } + if (size <= LANE_ACE_MAX) { + return LANE_ACE; // 52KB-2MB + } + return LANE_HUGE; // 2MB+ (direct mmap) + // Note: LANE_FALLBACK is never returned here; it's for error detection +} + +// ============================================================================ +// Convenience Macros for Routing +// ============================================================================ + +/** + * HAK_LANE_IS_TINY - Check if size belongs to Tiny lane + */ +#define HAK_LANE_IS_TINY(size) ((size) <= LANE_TINY_MAX) + +/** + * HAK_LANE_IS_POOL - Check if size belongs to Pool lane + */ +#define HAK_LANE_IS_POOL(size) ((size) > LANE_TINY_MAX && (size) <= LANE_POOL_MAX) + +/** + * HAK_LANE_IS_ACE - Check if size belongs to ACE lane + */ +#define HAK_LANE_IS_ACE(size) ((size) > LANE_POOL_MAX && (size) <= LANE_ACE_MAX) + +/** + * HAK_LANE_IS_HUGE - Check if size belongs to Huge lane + */ +#define HAK_LANE_IS_HUGE(size) ((size) > LANE_ACE_MAX) + +// ============================================================================ +// Compatibility Wrappers (for existing code migration) +// ============================================================================ +// +// These allow gradual migration from old constants to new LANE_* values. +// TODO: Remove these after all code is migrated to use LANE_* directly. + +// Tiny compatibility +#ifndef TINY_MAX_SIZE +#define TINY_MAX_SIZE LANE_TINY_MAX +#endif + +// Pool compatibility (request boundary, not class size) +// Note: POOL_MIN_SIZE historically meant "minimum request size Pool accepts" +#ifndef POOL_MIN_SIZE_COMPAT +#define POOL_MIN_SIZE_COMPAT POOL_MIN_REQUEST_SIZE +#endif + +// ============================================================================ +// Debug / Observability +// ============================================================================ + +#if !defined(HAKMEM_BUILD_RELEASE) || !HAKMEM_BUILD_RELEASE + +/** + * hak_lane_name - Get human-readable lane name + */ +static inline const char* hak_lane_name(hak_lane_t lane) { + switch (lane) { + case LANE_TINY: return "TINY"; + case LANE_POOL: return "POOL"; + case LANE_ACE: return "ACE"; + case LANE_HUGE: return "HUGE"; + case LANE_FALLBACK: return "FALLBACK"; + default: return "UNKNOWN"; + } +} + +/** + * hak_lane_debug - Print lane classification for debugging + */ +static inline void hak_lane_debug(size_t size) { + hak_lane_t lane = hak_classify_size(size); + fprintf(stderr, "[LANE] size=%zu -> %s\n", size, hak_lane_name(lane)); +} + +/** + * hak_lane_config_report - Print lane configuration + */ +static inline void hak_lane_config_report(void) { + fprintf(stderr, "[LANE_CONFIG] Boundaries:\n"); + fprintf(stderr, " TINY: [0, %d]\n", LANE_TINY_MAX); + fprintf(stderr, " POOL: [%d, %d] (class_min=%d)\n", + LANE_POOL_MIN, LANE_POOL_MAX, POOL_MIN_CLASS_SIZE); + fprintf(stderr, " ACE: [%d, %d]\n", LANE_ACE_MIN, LANE_ACE_MAX); + fprintf(stderr, " HUGE: [%d, ...]\n", LANE_HUGE_MIN); +} + +#endif // !HAKMEM_BUILD_RELEASE + +// ============================================================================ +// Fallback Detection Guard +// ============================================================================ + +/** + * HAK_LANE_ASSERT_NO_FALLBACK - Assert that FALLBACK lane is never reached + * + * Usage: Place in allocation paths where LANE_FALLBACK indicates a bug. + * In release builds, this compiles to nothing. + */ +#if !defined(HAKMEM_BUILD_RELEASE) || !HAKMEM_BUILD_RELEASE +#define HAK_LANE_ASSERT_NO_FALLBACK(lane, size) do { \ + if (__builtin_expect((lane) == LANE_FALLBACK, 0)) { \ + fprintf(stderr, "[HAKMEM] BUG: LANE_FALLBACK reached for size=%zu\n", (size_t)(size)); \ + abort(); \ + } \ +} while (0) +#else +#define HAK_LANE_ASSERT_NO_FALLBACK(lane, size) ((void)0) +#endif + +#ifdef __cplusplus +} +#endif + +#endif // HAK_LANE_CLASSIFY_INC_H diff --git a/core/box/hak_wrappers.inc.h b/core/box/hak_wrappers.inc.h index ace72c73..5ec082b3 100644 --- a/core/box/hak_wrappers.inc.h +++ b/core/box/hak_wrappers.inc.h @@ -34,6 +34,8 @@ void* realloc(void* ptr, size_t size) { #include "../front/malloc_tiny_fast.h" // Phase 26: Front Gate Unification #include "tiny_front_config_box.h" // Phase 4-Step3: Compile-time config for dead code elimination #include "wrapper_env_box.h" // Wrapper env cache (step trace / LD safe / free trace) +#include // write for diagnostics +#include // strlen for diagnostics // malloc wrapper - intercepts system malloc() calls __thread uint64_t g_malloc_total_calls = 0; @@ -52,6 +54,32 @@ extern int g_jemalloc_loaded; // Cached during hak_init_impl(), defined in hakm // Defined here, accessed from tls_sll_box.h for corruption detection _Atomic uint64_t malloc_count = 0; +// Lightweight fallback diagnostics (enabled with HAKMEM_WRAP_DIAG=1) +typedef enum { + FB_INIT_WAIT_FAIL = 0, + FB_INIT_LD_WAIT_FAIL, + FB_FORCE_LIBC, + FB_LD_SAFE, + FB_JEMALLOC_BLOCK, + FB_LOCKDEPTH, + FB_NOT_OWNED, + FB_OTHER, + FB_REASON_COUNT +} wrapper_fb_reason_t; +static _Atomic uint64_t g_fb_counts[FB_REASON_COUNT]; +static _Atomic int g_fb_log_count[FB_REASON_COUNT]; + +static inline void wrapper_record_fallback(wrapper_fb_reason_t reason, const char* msg) { + atomic_fetch_add_explicit(&g_fb_counts[reason], 1, memory_order_relaxed); + const wrapper_env_cfg_t* wcfg = wrapper_env_cfg(); + if (__builtin_expect(wcfg->wrap_diag, 0)) { + int n = atomic_fetch_add_explicit(&g_fb_log_count[reason], 1, memory_order_relaxed); + if (n < 4 && msg) { + write(2, msg, strlen(msg)); + } + } +} + void* malloc(size_t size) { uint64_t count = atomic_fetch_add(&malloc_count, 1); @@ -84,6 +112,7 @@ void* malloc(size_t size) { // Guard against recursion during initialization int init_wait = hak_init_wait_for_ready(); if (__builtin_expect(init_wait <= 0, 0)) { + wrapper_record_fallback(FB_INIT_WAIT_FAIL, "[wrap] libc malloc: init_wait\n"); g_hakmem_lock_depth--; extern void* __libc_malloc(size_t); if (size == 33000) write(2, "RET:Initializing\n", 17); @@ -99,6 +128,7 @@ void* malloc(size_t size) { } if (__builtin_expect(hak_force_libc_alloc(), 0)) { + wrapper_record_fallback(FB_FORCE_LIBC, "[wrap] libc malloc: force_libc\n"); g_hakmem_lock_depth--; extern void* __libc_malloc(size_t); if (wcfg->step_trace && size == 33000) write(2, "RET:ForceLibc\n", 14); @@ -109,7 +139,10 @@ void* malloc(size_t size) { int ld_mode = hak_ld_env_mode(); if (ld_mode) { if (wcfg->step_trace && size == 33000) write(2, "STEP:3 LD Mode\n", 15); - if (hak_ld_block_jemalloc() && g_jemalloc_loaded) { + // BUG FIX: g_jemalloc_loaded == -1 (unknown) should not trigger fallback + // Only fallback if jemalloc is ACTUALLY loaded (> 0) + if (hak_ld_block_jemalloc() && g_jemalloc_loaded > 0) { + wrapper_record_fallback(FB_JEMALLOC_BLOCK, "[wrap] libc malloc: jemalloc block\n"); g_hakmem_lock_depth--; extern void* __libc_malloc(size_t); if (wcfg->step_trace && size == 33000) write(2, "RET:Jemalloc\n", 13); @@ -118,6 +151,7 @@ void* malloc(size_t size) { if (!g_initialized) { hak_init(); } int ld_init_wait = hak_init_wait_for_ready(); if (__builtin_expect(ld_init_wait <= 0, 0)) { + wrapper_record_fallback(FB_INIT_LD_WAIT_FAIL, "[wrap] libc malloc: ld init_wait\n"); g_hakmem_lock_depth--; extern void* __libc_malloc(size_t); if (wcfg->step_trace && size == 33000) write(2, "RET:Init2\n", 10); @@ -125,6 +159,7 @@ void* malloc(size_t size) { } // Cache HAKMEM_LD_SAFE to avoid repeated getenv on hot path if (wcfg->ld_safe_mode >= 2) { + wrapper_record_fallback(FB_LD_SAFE, "[wrap] libc malloc: ld_safe\n"); g_hakmem_lock_depth--; extern void* __libc_malloc(size_t); if (wcfg->step_trace && size == 33000) write(2, "RET:LDSafe\n", 11); @@ -284,11 +319,13 @@ void free(void* ptr) { // Unknown pointer or non-HAKMEM: fall back to libc free(ptr) extern void __libc_free(void*); ptr_trace_dump_now("wrap_libc_lockdepth"); + wrapper_record_fallback(FB_LOCKDEPTH, "[wrap] libc free: lockdepth\n"); __libc_free(ptr); return; } int free_init_wait = hak_init_wait_for_ready(); if (__builtin_expect(free_init_wait <= 0, 0)) { + wrapper_record_fallback(FB_INIT_WAIT_FAIL, "[wrap] libc free: init_wait\n"); #if !HAKMEM_BUILD_RELEASE uint64_t count = atomic_fetch_add_explicit(&fg_libc_bypass_count, 1, memory_order_relaxed); if (count < 10) { @@ -302,10 +339,11 @@ void free(void* ptr) { } if (__builtin_expect(hak_force_libc_alloc(), 0)) { extern void __libc_free(void*); ptr_trace_dump_now("wrap_libc_force"); __libc_free(ptr); return; } if (hak_ld_env_mode()) { - if (hak_ld_block_jemalloc() && g_jemalloc_loaded) { extern void __libc_free(void*); ptr_trace_dump_now("wrap_libc_ld_jemalloc"); __libc_free(ptr); return; } + // BUG FIX: g_jemalloc_loaded == -1 (unknown) should not trigger fallback + if (hak_ld_block_jemalloc() && g_jemalloc_loaded > 0) { extern void __libc_free(void*); ptr_trace_dump_now("wrap_libc_ld_jemalloc"); __libc_free(ptr); return; } if (!g_initialized) { hak_init(); } int free_ld_wait = hak_init_wait_for_ready(); - if (__builtin_expect(free_ld_wait <= 0, 0)) { extern void __libc_free(void*); ptr_trace_dump_now("wrap_libc_ld_init"); __libc_free(ptr); return; } + if (__builtin_expect(free_ld_wait <= 0, 0)) { wrapper_record_fallback(FB_INIT_LD_WAIT_FAIL, "[wrap] libc free: ld init_wait\n"); extern void __libc_free(void*); ptr_trace_dump_now("wrap_libc_ld_init"); __libc_free(ptr); return; } } // Phase 15: Box Separation - Domain check to distinguish hakmem vs external pointers @@ -342,6 +380,7 @@ void free(void* ptr) { // No valid hakmem header → external pointer (BenchMeta, libc allocation, etc.) extern void __libc_free(void*); ptr_trace_dump_now("wrap_libc_external_nomag"); + wrapper_record_fallback(FB_NOT_OWNED, "[wrap] libc free: not_owned\n"); __libc_free(ptr); return; } @@ -361,6 +400,7 @@ void* calloc(size_t nmemb, size_t size) { if (g_hakmem_lock_depth > 1) { g_hakmem_lock_depth--; extern void* __libc_calloc(size_t, size_t); + wrapper_record_fallback(FB_LOCKDEPTH, "[wrap] libc calloc: lockdepth\n"); return __libc_calloc(nmemb, size); } @@ -368,6 +408,7 @@ void* calloc(size_t nmemb, size_t size) { if (__builtin_expect(calloc_init_wait <= 0, 0)) { g_hakmem_lock_depth--; extern void* __libc_calloc(size_t, size_t); + wrapper_record_fallback(FB_INIT_WAIT_FAIL, "[wrap] libc calloc: init_wait\n"); return __libc_calloc(nmemb, size); } @@ -386,9 +427,11 @@ void* calloc(size_t nmemb, size_t size) { int ld_mode = hak_ld_env_mode(); if (ld_mode) { - if (hak_ld_block_jemalloc() && g_jemalloc_loaded) { + // BUG FIX: g_jemalloc_loaded == -1 (unknown) should not trigger fallback + if (hak_ld_block_jemalloc() && g_jemalloc_loaded > 0) { g_hakmem_lock_depth--; extern void* __libc_calloc(size_t, size_t); + wrapper_record_fallback(FB_JEMALLOC_BLOCK, "[wrap] libc calloc: jemalloc block\n"); return __libc_calloc(nmemb, size); } if (!g_initialized) { hak_init(); } @@ -396,6 +439,7 @@ void* calloc(size_t nmemb, size_t size) { if (__builtin_expect(calloc_ld_wait <= 0, 0)) { g_hakmem_lock_depth--; extern void* __libc_calloc(size_t, size_t); + wrapper_record_fallback(FB_INIT_LD_WAIT_FAIL, "[wrap] libc calloc: ld init_wait\n"); return __libc_calloc(nmemb, size); } // Reuse cached ld_safe_mode from malloc (same static variable scope won't work, use inline function instead) @@ -409,6 +453,7 @@ void* calloc(size_t nmemb, size_t size) { if (ld_safe_mode_calloc >= 2 || total > TINY_MAX_SIZE) { g_hakmem_lock_depth--; extern void* __libc_calloc(size_t, size_t); + if (ld_safe_mode_calloc >= 2) wrapper_record_fallback(FB_LD_SAFE, "[wrap] libc calloc: ld_safe\n"); return __libc_calloc(nmemb, size); } } @@ -421,16 +466,17 @@ void* calloc(size_t nmemb, size_t size) { } void* realloc(void* ptr, size_t size) { - if (g_hakmem_lock_depth > 0) { extern void* __libc_realloc(void*, size_t); return __libc_realloc(ptr, size); } + if (g_hakmem_lock_depth > 0) { wrapper_record_fallback(FB_LOCKDEPTH, "[wrap] libc realloc: lockdepth\n"); extern void* __libc_realloc(void*, size_t); return __libc_realloc(ptr, size); } int realloc_init_wait = hak_init_wait_for_ready(); - if (__builtin_expect(realloc_init_wait <= 0, 0)) { extern void* __libc_realloc(void*, size_t); return __libc_realloc(ptr, size); } - if (__builtin_expect(hak_force_libc_alloc(), 0)) { extern void* __libc_realloc(void*, size_t); return __libc_realloc(ptr, size); } + if (__builtin_expect(realloc_init_wait <= 0, 0)) { wrapper_record_fallback(FB_INIT_WAIT_FAIL, "[wrap] libc realloc: init_wait\n"); extern void* __libc_realloc(void*, size_t); return __libc_realloc(ptr, size); } + if (__builtin_expect(hak_force_libc_alloc(), 0)) { wrapper_record_fallback(FB_FORCE_LIBC, "[wrap] libc realloc: force_libc\n"); extern void* __libc_realloc(void*, size_t); return __libc_realloc(ptr, size); } int ld_mode = hak_ld_env_mode(); if (ld_mode) { - if (hak_ld_block_jemalloc() && g_jemalloc_loaded) { extern void* __libc_realloc(void*, size_t); return __libc_realloc(ptr, size); } + // BUG FIX: g_jemalloc_loaded == -1 (unknown) should not trigger fallback + if (hak_ld_block_jemalloc() && g_jemalloc_loaded > 0) { wrapper_record_fallback(FB_JEMALLOC_BLOCK, "[wrap] libc realloc: jemalloc block\n"); extern void* __libc_realloc(void*, size_t); return __libc_realloc(ptr, size); } if (!g_initialized) { hak_init(); } int realloc_ld_wait = hak_init_wait_for_ready(); - if (__builtin_expect(realloc_ld_wait <= 0, 0)) { extern void* __libc_realloc(void*, size_t); return __libc_realloc(ptr, size); } + if (__builtin_expect(realloc_ld_wait <= 0, 0)) { wrapper_record_fallback(FB_INIT_LD_WAIT_FAIL, "[wrap] libc realloc: ld init_wait\n"); extern void* __libc_realloc(void*, size_t); return __libc_realloc(ptr, size); } } if (ptr == NULL) { return malloc(size); } if (size == 0) { free(ptr); return NULL; } diff --git a/core/box/wrapper_env_box.c b/core/box/wrapper_env_box.c index ecf87115..c829edc8 100644 --- a/core/box/wrapper_env_box.c +++ b/core/box/wrapper_env_box.c @@ -3,7 +3,7 @@ #include #include -wrapper_env_cfg_t g_wrapper_env = {.inited = 0, .step_trace = 0, .ld_safe_mode = 1, .free_wrap_trace = 0}; +wrapper_env_cfg_t g_wrapper_env = {.inited = 0, .step_trace = 0, .ld_safe_mode = 1, .free_wrap_trace = 0, .wrap_diag = 0}; static inline int env_flag(const char* name, int def) { const char* e = getenv(name); @@ -39,6 +39,7 @@ void wrapper_env_init_once(void) { g_wrapper_env.step_trace = env_flag("HAKMEM_STEP_TRACE", 0); g_wrapper_env.ld_safe_mode = env_int("HAKMEM_LD_SAFE", 1); g_wrapper_env.free_wrap_trace = env_flag("HAKMEM_FREE_WRAP_TRACE", 0); + g_wrapper_env.wrap_diag = env_flag("HAKMEM_WRAP_DIAG", 0); // Mark as initialized last with memory barrier atomic_store_explicit(&g_wrapper_env.inited, 1, memory_order_release); diff --git a/core/box/wrapper_env_box.h b/core/box/wrapper_env_box.h index a1048512..b9bc0713 100644 --- a/core/box/wrapper_env_box.h +++ b/core/box/wrapper_env_box.h @@ -9,6 +9,7 @@ typedef struct { int step_trace; // HAKMEM_STEP_TRACE (default: 0) int ld_safe_mode; // HAKMEM_LD_SAFE (default: 1) int free_wrap_trace; // HAKMEM_FREE_WRAP_TRACE (default: 0) + int wrap_diag; // HAKMEM_WRAP_DIAG (default: 0) - log first few libc fallbacks } wrapper_env_cfg_t; extern wrapper_env_cfg_t g_wrapper_env; diff --git a/core/hakmem_pool.h b/core/hakmem_pool.h index 3486fe15..9d82fe76 100644 --- a/core/hakmem_pool.h +++ b/core/hakmem_pool.h @@ -1,21 +1,26 @@ -// hakmem_pool.h - L2 Hybrid Pool (2-32KiB Mid-Size Allocations) +// hakmem_pool.h - L2 Hybrid Pool (1KB-52KB Mid-Size Allocations) // Purpose: Per-thread pool with site-based sharding for mid-size fast-path // // Design Philosophy: -// - **5 size classes**: 2KiB, 4KiB, 8KiB, 16KiB, 32KiB +// - **7 size classes**: 2KiB, 4KiB, 8KiB, 16KiB, 32KiB, 40KiB, 52KiB // - **64KiB pool pages**: 32 blocks (2KiB), 16 blocks (4KiB), 8 blocks (8KiB), etc. // - **per-thread freelist**: Lock-free allocation (mimalloc strategy) // - **O(1) site→shard mapping**: `shard = (pc >> 4) & (SHARDS-1)` // - **MPSC queue**: Remote-free handling (cross-thread deallocation) // +// Phase 2 Update: +// - Pool now accepts requests from 1025B (LANE_POOL_MIN) to 52KB +// - Requests 1025-2047B are rounded up to 2KB class (internal fragmentation OK) +// - This eliminates the "unmanaged zone" between Tiny (1024B) and Pool (was 2KB) +// // Target Workloads: // - mir (medium): 2-32KiB allocations → +52% → target +10-20% // - mixed: combination → +66% → target +10-25% // -// Integration: Called by hakmem.c between malloc (< 2KiB) and BigCache (>= 1MB) +// Integration: Called by hakmem.c for sizes > LANE_TINY_MAX (1024B) // // License: MIT -// Date: 2025-10-21 +// Date: 2025-10-21 (Phase 2 Update: 2025-12-02) #ifndef HAKMEM_POOL_H #define HAKMEM_POOL_H @@ -23,15 +28,18 @@ #include #include +// Phase 2: Lane Classification Box (Single Source of Truth for boundaries) +#include "box/hak_lane_classify.inc.h" + // =========================================================================== // Configuration Constants // =========================================================================== -#define POOL_NUM_CLASSES 7 // 2KiB, 4KiB, 8KiB, 16KiB, 32KiB, DYN1, DYN2 (optional) +#define POOL_NUM_CLASSES 7 // 2KiB, 4KiB, 8KiB, 16KiB, 32KiB, 40KiB, 52KiB #define POOL_PAGE_SIZE (64 * 1024) // 64KiB per pool page #define POOL_NUM_SHARDS 64 // Site-based sharding (power of 2) -// Size class boundaries (in bytes) +// Size class boundaries (in bytes) - actual block sizes #define POOL_CLASS_2KB (2 * 1024) #define POOL_CLASS_4KB (4 * 1024) #define POOL_CLASS_8KB (8 * 1024) @@ -40,9 +48,22 @@ #define POOL_CLASS_40KB (40 * 1024) // Phase 6.21: Bridge class 0 #define POOL_CLASS_52KB (52 * 1024) // Phase 6.21: Bridge class 1 -// Minimum/maximum size handled by pool -#define POOL_MIN_SIZE POOL_CLASS_2KB // 2KiB minimum -#define POOL_MAX_SIZE POOL_CLASS_52KB // 52KiB maximum (Phase 6.21: expanded for Bridge classes) +// =========================================================================== +// Phase 2: Request Size vs Block Size (separate concepts!) +// =========================================================================== +// +// POOL_MIN_SIZE: Smallest USER REQUEST Pool accepts (= LANE_POOL_MIN = 1025) +// POOL_MIN_CLASS: Smallest BLOCK SIZE Pool allocates (= 2KB) +// +// Example: request=1056B -> class=2KB (internal fragmentation ~48%, acceptable) +// This is better than libc fragmentation from mmap fallback! + +// Request boundary (from lane classification - Single Source of Truth) +#define POOL_MIN_SIZE POOL_MIN_REQUEST_SIZE // = 1025 (LANE_TINY_MAX + 1) +#define POOL_MAX_SIZE LANE_POOL_MAX // = 52KB + +// Block class boundary (internal, for size-to-class mapping) +#define POOL_MIN_CLASS POOL_CLASS_2KB // Smallest actual block = 2KB // Remote-free drain threshold #define POOL_REMOTE_DRAIN_THRESHOLD 16 // Drain every N allocs @@ -97,7 +118,8 @@ void hak_pool_extra_metrics_snapshot(uint64_t* trylock_attempts, uint64_t* trylo // Get shard index from site_id (0-63) int hak_pool_get_shard_index(uintptr_t site_id); -// Check if size is poolable (2-32KiB range) +// Check if size is poolable (1025B-52KB range, Phase 2 expanded) +// Phase 2: Now accepts 1025B+ (was 2KB+) to eliminate unmanaged zone static inline int hak_pool_is_poolable(size_t size) { return size >= POOL_MIN_SIZE && size <= POOL_MAX_SIZE; } diff --git a/core/hakmem_tiny.h b/core/hakmem_tiny.h index 4c689328..71b5b34d 100644 --- a/core/hakmem_tiny.h +++ b/core/hakmem_tiny.h @@ -11,6 +11,9 @@ // Include page mini-magazine module (Phase 1: Hybrid optimization) #include "hakmem_tiny_mini_mag.h" +// Phase 2: Lane Classification Box (Single Source of Truth for boundaries) +#include "box/hak_lane_classify.inc.h" + // Forward declaration for initialization guard int hak_is_initializing(void); @@ -23,17 +26,19 @@ int hak_is_initializing(void); #define TINY_NUM_CLASSES 8 #define TINY_SLAB_SIZE (64 * 1024) // 64KB per slab -// Phase E1-CORRECT: All Tiny classes use a 1-byte header. -// C7 stride=1024B → usable 1023B (1024-1). 1024B は Mid allocator に委譲する。 -#define TINY_MAX_SIZE 1024 // Tiny handles up to 1024B (C7 total size) - default + +// Phase 2 FIX: TINY_MAX_SIZE now references LANE_TINY_MAX (Single Source of Truth) +// Previously: TINY_MAX_SIZE=1024 vs tiny_get_max_size()=2047 (inconsistent!) +// Now: Both reference LANE_TINY_MAX (1024) from hak_lane_classify.inc.h +#undef TINY_MAX_SIZE // Remove compatibility wrapper if defined +#define TINY_MAX_SIZE LANE_TINY_MAX // = 1024 (authoritative) // Phase 16: Dynamic Tiny max size control (ENV: HAKMEM_TINY_MAX_CLASS) -// Strategy: Reduce Tiny coverage to ~256B, delegate 512/1024B to Mid +// Strategy: Reduce Tiny coverage to ~256B, delegate 512/1024B to Pool // ENV values: // HAKMEM_TINY_MAX_CLASS=5 → Tiny handles up to 255B (C0-C5) -// HAKMEM_TINY_MAX_CLASS=7 → Tiny handles up to 1023B (C0-C7, default) -// Forward declaration (implementation in hakmem_tiny.c) -// Optimized: Inline for hot path (0.95% overhead removal) +// HAKMEM_TINY_MAX_CLASS=7 → Tiny handles up to 1024B (C0-C7, default) +// Phase 2 FIX: sizes[7] = 1024 (was 2047, caused boundary mismatch!) #include #include extern bool smallmid_is_enabled(void); @@ -48,7 +53,9 @@ static inline size_t tiny_get_max_size(void) { if (parsed >= 0 && parsed < TINY_NUM_CLASSES) max_class = parsed; } if (smallmid_is_enabled() && max_class > 5) max_class = 5; - static const size_t sizes[8] = {7, 15, 31, 63, 127, 255, 511, 2047}; + // Phase 2 FIX: sizes[7] = LANE_TINY_MAX (was 2047!) + // This ensures tiny_get_max_size() <= LANE_TINY_MAX always + static const size_t sizes[8] = {7, 15, 31, 63, 127, 255, 511, LANE_TINY_MAX}; g_cached = sizes[max_class]; } return g_cached;