Files
hakmem/core/box/hak_lane_classify.inc.h

268 lines
9.8 KiB
C
Raw Normal View History

feat(Phase 2-1): Lane Classification + Fallback Reduction ## Phase 2-1: Lane Classification Box (Single Source of Truth) ### New Module: hak_lane_classify.inc.h - Centralized size-to-lane mapping with unified boundary definitions - Lane architecture: - LANE_TINY: [0, 1024B] SuperSlab (unchanged) - LANE_POOL: [1025, 52KB] Pool per-thread (extended!) - LANE_ACE: [52KB, 2MB] ACE learning - LANE_HUGE: [2MB+] mmap direct - Key invariant: POOL_MIN = TINY_MAX + 1 (no gaps) ### Fixed: Tiny/Pool Boundary Mismatch - Before: TINY_MAX_SIZE=1024 vs tiny_get_max_size()=2047 (inconsistent!) - After: Both reference LANE_TINY_MAX=1024 (authoritative) - Impact: Eliminates 1025-2047B "unmanaged zone" causing libc fragmentation ### Updated Files - core/hakmem_tiny.h: Use LANE_TINY_MAX, fix sizes[7]=1024 (was 2047) - core/hakmem_pool.h: Use POOL_MIN_REQUEST_SIZE=1025 (was 2048) - core/box/hak_alloc_api.inc.h: Lane-based routing (HAK_LANE_IS_*) ## jemalloc Block Bug Fix ### Root Cause - g_jemalloc_loaded initialized to -1 (unknown) - Condition `if (block && g_jemalloc_loaded)` treated -1 as true - Result: ALL allocations fallback to libc (even when jemalloc not loaded!) ### Fix - Change condition to `g_jemalloc_loaded > 0` - Only fallback when jemalloc is ACTUALLY loaded - Applied to: malloc/free/calloc/realloc ### Impact - Before: 100% libc fallback (jemalloc block false positive) - After: Only genuine cases fallback (init_wait, lockdepth, etc.) ## Fallback Diagnostics (ChatGPT contribution) ### New Feature: HAKMEM_WRAP_DIAG - ENV flag to enable fallback logging - Reason-specific counters (init_wait, jemalloc_block, lockdepth, etc.) - First 4 occurrences logged per reason - Helps identify unwanted fallback paths ### Implementation - core/box/wrapper_env_box.{c,h}: ENV cache + DIAG flag - core/box/hak_wrappers.inc.h: wrapper_record_fallback() calls ## Verification ### Fallback Reduction - Before fix: [wrap] libc malloc: jemalloc block (100% fallback) - After fix: Only init_wait + lockdepth (expected, minimal) ### Known Issue - Tiny allocator OOM (size=8) still crashes - This is a pre-existing bug, unrelated to Phase 2-1 - Was hidden by jemalloc block false positive - Will be investigated separately ## Performance Impact ### sh8bench 8 threads - Phase 1-1: 15秒 - Phase 2-1: 14秒 (~7% improvement) ### Note - True hakmem performance now measurable (no more 100% fallback) - Tiny OOM prevents full benchmark completion - Next: Fix Tiny allocator for complete evaluation 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com> Co-Authored-By: ChatGPT <chatgpt@openai.com>
2025-12-02 19:13:28 +09:00
/**
* hak_lane_classify.inc.h - Phase 2: Lane Classification Box
*
* Box: Allocation Lane Classification (Single Source of Truth)
* Responsibility: Centralized size-to-lane mapping with unified boundary definitions
* Contract: All allocator boundaries defined here; no hardcoded values elsewhere
*
* Design Principles (Box Pattern):
* 1. Single Source of Truth: All lane boundaries defined in ONE place
* 2. Normalize-then-Classify: Always use normalized size for classification
* 3. Clear Invariants: POOL_MIN = TINY_MAX + 1 (no gaps)
* 4. Observable: Debug helpers for lane inspection
* 5. Safe: LANE_FALLBACK catches design bugs
*
* Problem Solved:
* - Before: TINY_MAX_SIZE=1024 vs tiny_get_max_size()=2047 (inconsistent!)
* - Before: Hardcoded 8192 in Pool TLS, 1024 in Tiny, etc.
* - Result: 1025-2047B "unmanaged zone" causing libc fragmentation
*
* Solution:
* - Define all boundaries as LANE_* constants
* - hak_classify_size() is THE authority for routing
* - Existing code uses compatibility wrappers
*
* Lane Architecture:
* LANE_TINY: [0, LANE_TINY_MAX] = 0-1024B SuperSlab
* LANE_POOL: (LANE_TINY_MAX, LANE_POOL_MAX] = 1025-52KB Pool per-thread
* LANE_ACE: (LANE_POOL_MAX, LANE_ACE_MAX] = 52KB-2MB ACE learning
* LANE_HUGE: (LANE_ACE_MAX, ) = 2MB+ mmap direct
*
* Created: 2025-12-02 (Phase 2-1)
* License: MIT
*/
#ifndef HAK_LANE_CLASSIFY_INC_H
#define HAK_LANE_CLASSIFY_INC_H
#include <stddef.h>
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
// ============================================================================
// Lane Boundary Definitions (Single Source of Truth)
// ============================================================================
//
// CRITICAL: These are the ONLY authoritative boundary values.
// All other code MUST reference these constants (not hardcode numbers).
//
// Invariant: Each lane's MIN = previous lane's MAX + 1 (no gaps!)
#define LANE_TINY_MAX 1024 // Tiny handles [0, 1024]
#define LANE_POOL_MIN (LANE_TINY_MAX + 1) // Pool handles [1025, ...] (invariant!)
#define LANE_POOL_MAX (52 * 1024) // Pool handles [..., 52KB]
#define LANE_ACE_MIN (LANE_POOL_MAX + 1) // ACE handles [52KB+1, ...]
#define LANE_ACE_MAX (2 * 1024 * 1024) // ACE handles [..., 2MB]
#define LANE_HUGE_MIN (LANE_ACE_MAX + 1) // Huge handles [2MB+1, ...]
// ============================================================================
// Pool Internal: Request Size vs Block Size (separate concepts!)
// ============================================================================
//
// POOL_MIN_REQUEST_SIZE: Smallest user request Pool will accept (= LANE_POOL_MIN)
// POOL_MIN_CLASS_SIZE: Smallest block class Pool actually allocates
//
// Example: request=1056B -> class=2048B (internal fragmentation ~48%, acceptable)
#define POOL_MIN_REQUEST_SIZE LANE_POOL_MIN // 1025 (boundary)
#define POOL_MIN_CLASS_SIZE (2 * 1024) // 2048 (block size)
// ============================================================================
// Lane Enumeration
// ============================================================================
typedef enum {
LANE_TINY, // SuperSlab-based, 0-1024B, TLS cache
LANE_POOL, // Pool per-thread, 1025-52KB, site-sharded
LANE_ACE, // ACE learning layer, 52KB-2MB
LANE_HUGE, // Direct mmap, 2MB+
LANE_FALLBACK // Bug detection only (should never happen)
} hak_lane_t;
// ============================================================================
// Size Normalization
// ============================================================================
//
// Purpose: Convert user-requested size to internal allocation size
// Rule: All lane classification uses normalized size for consistency
//
// Note: HEADER_SIZE and alignment are allocator-specific.
// This function provides a generic template; actual allocators may have
// their own normalization based on their header requirements.
#ifndef HAK_LANE_HEADER_SIZE
#define HAK_LANE_HEADER_SIZE 16 // Default header size (override if needed)
#endif
#ifndef HAK_LANE_ALIGN
#define HAK_LANE_ALIGN 16 // Default alignment (override if needed)
#endif
/**
* hak_normalize_size - Convert user size to internal allocation size
*
* @param user_size Size requested by user (malloc argument)
* @return Internal size (header + aligned user data)
*
* This ensures consistent boundary checking across all allocators.
* Example: user_size=1000, header=16, align=16 -> norm_size=1024
*/
__attribute__((always_inline))
static inline size_t hak_normalize_size(size_t user_size) {
size_t n = user_size;
// For lane classification, we use user_size directly since each
// allocator (Tiny/Pool/ACE) handles its own header internally.
// The boundaries are defined in terms of user-visible sizes.
return n;
}
// ============================================================================
// Lane Classification (THE Authority)
// ============================================================================
/**
* hak_classify_size - Determine which lane handles this allocation
*
* @param size User-requested size (not normalized)
* @return Lane enumeration value
*
* CRITICAL: This is THE single point of truth for allocation routing.
* All allocation paths MUST use this function (or the switch macro).
*
* Boundaries are INCLUSIVE on the lower side, EXCLUSIVE on the upper:
* LANE_TINY: size <= LANE_TINY_MAX
* LANE_POOL: LANE_TINY_MAX < size <= LANE_POOL_MAX
* LANE_ACE: LANE_POOL_MAX < size <= LANE_ACE_MAX
* LANE_HUGE: size > LANE_ACE_MAX
*/
__attribute__((always_inline, pure))
static inline hak_lane_t hak_classify_size(size_t size) {
if (__builtin_expect(size <= LANE_TINY_MAX, 1)) {
return LANE_TINY; // Hot path: most allocations are small
}
if (size <= LANE_POOL_MAX) {
return LANE_POOL; // 1025-52KB
}
if (size <= LANE_ACE_MAX) {
return LANE_ACE; // 52KB-2MB
}
return LANE_HUGE; // 2MB+ (direct mmap)
// Note: LANE_FALLBACK is never returned here; it's for error detection
}
// ============================================================================
// Convenience Macros for Routing
// ============================================================================
/**
* HAK_LANE_IS_TINY - Check if size belongs to Tiny lane
*/
#define HAK_LANE_IS_TINY(size) ((size) <= LANE_TINY_MAX)
/**
* HAK_LANE_IS_POOL - Check if size belongs to Pool lane
*/
#define HAK_LANE_IS_POOL(size) ((size) > LANE_TINY_MAX && (size) <= LANE_POOL_MAX)
/**
* HAK_LANE_IS_ACE - Check if size belongs to ACE lane
*/
#define HAK_LANE_IS_ACE(size) ((size) > LANE_POOL_MAX && (size) <= LANE_ACE_MAX)
/**
* HAK_LANE_IS_HUGE - Check if size belongs to Huge lane
*/
#define HAK_LANE_IS_HUGE(size) ((size) > LANE_ACE_MAX)
// ============================================================================
// Compatibility Wrappers (for existing code migration)
// ============================================================================
//
// These allow gradual migration from old constants to new LANE_* values.
// TODO: Remove these after all code is migrated to use LANE_* directly.
// Tiny compatibility
#ifndef TINY_MAX_SIZE
#define TINY_MAX_SIZE LANE_TINY_MAX
#endif
// Pool compatibility (request boundary, not class size)
// Note: POOL_MIN_SIZE historically meant "minimum request size Pool accepts"
#ifndef POOL_MIN_SIZE_COMPAT
#define POOL_MIN_SIZE_COMPAT POOL_MIN_REQUEST_SIZE
#endif
// ============================================================================
// Debug / Observability
// ============================================================================
#if !defined(HAKMEM_BUILD_RELEASE) || !HAKMEM_BUILD_RELEASE
Comprehensive legacy cleanup and architecture consolidation Summary of Changes: MOVED TO ARCHIVE: - core/hakmem_tiny_legacy_slow_box.inc → archive/ * Slow path legacy code preserved for reference * Superseded by Gatekeeper Box architecture - core/superslab_allocate.c → archive/superslab_allocate_legacy.c * Legacy SuperSlab allocation implementation * Functionality integrated into new Box system - core/superslab_head.c → archive/superslab_head_legacy.c * Legacy slab head management * Refactored through Box architecture REMOVED DEAD CODE: - Eliminated unused allocation policy variants from ss_allocation_box.c * Reduced from 127+ lines of conditional logic to focused implementation * Removed: old policy branches, unused allocation strategies * Kept: current Box-based allocation path ADDED NEW INFRASTRUCTURE: - core/superslab_head_stub.c (41 lines) * Minimal stub for backward compatibility * Delegates to new architecture - Enhanced core/superslab_cache.c (75 lines added) * Added missing API functions for cache management * Proper interface for SuperSlab cache integration REFACTORED CORE SYSTEMS: - core/hakmem_super_registry.c * Moved registration logic from scattered locations * Centralized SuperSlab registry management - core/hakmem_tiny.c * Removed 27 lines of redundant initialization * Simplified through Box architecture - core/hakmem_tiny_alloc.inc * Streamlined allocation path to use Gatekeeper * Removed legacy decision logic - core/box/ss_allocation_box.c/h * Dramatically simplified allocation policy * Removed conditional branches for unused strategies * Focused on current Box-based approach BUILD SYSTEM: - Updated Makefile for archive structure - Removed obsolete object file references - Maintained build compatibility SAFETY & TESTING: - All deletions verified: no broken references - Build verification: RELEASE=0 and RELEASE=1 pass - Smoke tests: 100% pass rate - Functional verification: allocation/free intact Architecture Consolidation: Before: Multiple overlapping allocation paths with legacy code branches After: Single unified path through Gatekeeper Boxes with clear architecture Benefits: - Reduced code size and complexity - Improved maintainability - Single source of truth for allocation logic - Better diagnostic/observability hooks - Foundation for future optimizations 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-12-04 14:22:48 +09:00
#include <stdio.h>
feat(Phase 2-1): Lane Classification + Fallback Reduction ## Phase 2-1: Lane Classification Box (Single Source of Truth) ### New Module: hak_lane_classify.inc.h - Centralized size-to-lane mapping with unified boundary definitions - Lane architecture: - LANE_TINY: [0, 1024B] SuperSlab (unchanged) - LANE_POOL: [1025, 52KB] Pool per-thread (extended!) - LANE_ACE: [52KB, 2MB] ACE learning - LANE_HUGE: [2MB+] mmap direct - Key invariant: POOL_MIN = TINY_MAX + 1 (no gaps) ### Fixed: Tiny/Pool Boundary Mismatch - Before: TINY_MAX_SIZE=1024 vs tiny_get_max_size()=2047 (inconsistent!) - After: Both reference LANE_TINY_MAX=1024 (authoritative) - Impact: Eliminates 1025-2047B "unmanaged zone" causing libc fragmentation ### Updated Files - core/hakmem_tiny.h: Use LANE_TINY_MAX, fix sizes[7]=1024 (was 2047) - core/hakmem_pool.h: Use POOL_MIN_REQUEST_SIZE=1025 (was 2048) - core/box/hak_alloc_api.inc.h: Lane-based routing (HAK_LANE_IS_*) ## jemalloc Block Bug Fix ### Root Cause - g_jemalloc_loaded initialized to -1 (unknown) - Condition `if (block && g_jemalloc_loaded)` treated -1 as true - Result: ALL allocations fallback to libc (even when jemalloc not loaded!) ### Fix - Change condition to `g_jemalloc_loaded > 0` - Only fallback when jemalloc is ACTUALLY loaded - Applied to: malloc/free/calloc/realloc ### Impact - Before: 100% libc fallback (jemalloc block false positive) - After: Only genuine cases fallback (init_wait, lockdepth, etc.) ## Fallback Diagnostics (ChatGPT contribution) ### New Feature: HAKMEM_WRAP_DIAG - ENV flag to enable fallback logging - Reason-specific counters (init_wait, jemalloc_block, lockdepth, etc.) - First 4 occurrences logged per reason - Helps identify unwanted fallback paths ### Implementation - core/box/wrapper_env_box.{c,h}: ENV cache + DIAG flag - core/box/hak_wrappers.inc.h: wrapper_record_fallback() calls ## Verification ### Fallback Reduction - Before fix: [wrap] libc malloc: jemalloc block (100% fallback) - After fix: Only init_wait + lockdepth (expected, minimal) ### Known Issue - Tiny allocator OOM (size=8) still crashes - This is a pre-existing bug, unrelated to Phase 2-1 - Was hidden by jemalloc block false positive - Will be investigated separately ## Performance Impact ### sh8bench 8 threads - Phase 1-1: 15秒 - Phase 2-1: 14秒 (~7% improvement) ### Note - True hakmem performance now measurable (no more 100% fallback) - Tiny OOM prevents full benchmark completion - Next: Fix Tiny allocator for complete evaluation 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com> Co-Authored-By: ChatGPT <chatgpt@openai.com>
2025-12-02 19:13:28 +09:00
/**
* hak_lane_name - Get human-readable lane name
*/
static inline const char* hak_lane_name(hak_lane_t lane) {
switch (lane) {
case LANE_TINY: return "TINY";
case LANE_POOL: return "POOL";
case LANE_ACE: return "ACE";
case LANE_HUGE: return "HUGE";
case LANE_FALLBACK: return "FALLBACK";
default: return "UNKNOWN";
}
}
/**
* hak_lane_debug - Print lane classification for debugging
*/
static inline void hak_lane_debug(size_t size) {
hak_lane_t lane = hak_classify_size(size);
fprintf(stderr, "[LANE] size=%zu -> %s\n", size, hak_lane_name(lane));
}
/**
* hak_lane_config_report - Print lane configuration
*/
static inline void hak_lane_config_report(void) {
fprintf(stderr, "[LANE_CONFIG] Boundaries:\n");
fprintf(stderr, " TINY: [0, %d]\n", LANE_TINY_MAX);
fprintf(stderr, " POOL: [%d, %d] (class_min=%d)\n",
LANE_POOL_MIN, LANE_POOL_MAX, POOL_MIN_CLASS_SIZE);
fprintf(stderr, " ACE: [%d, %d]\n", LANE_ACE_MIN, LANE_ACE_MAX);
fprintf(stderr, " HUGE: [%d, ...]\n", LANE_HUGE_MIN);
}
#endif // !HAKMEM_BUILD_RELEASE
// ============================================================================
// Fallback Detection Guard
// ============================================================================
/**
* HAK_LANE_ASSERT_NO_FALLBACK - Assert that FALLBACK lane is never reached
*
* Usage: Place in allocation paths where LANE_FALLBACK indicates a bug.
* In release builds, this compiles to nothing.
*/
#if !defined(HAKMEM_BUILD_RELEASE) || !HAKMEM_BUILD_RELEASE
#define HAK_LANE_ASSERT_NO_FALLBACK(lane, size) do { \
if (__builtin_expect((lane) == LANE_FALLBACK, 0)) { \
fprintf(stderr, "[HAKMEM] BUG: LANE_FALLBACK reached for size=%zu\n", (size_t)(size)); \
abort(); \
} \
} while (0)
#else
#define HAK_LANE_ASSERT_NO_FALLBACK(lane, size) ((void)0)
#endif
#ifdef __cplusplus
}
#endif
#endif // HAK_LANE_CLASSIFY_INC_H