Files
hakmem/core/box/hak_alloc_api.inc.h
Moe Charm (CI) 6818e350c4 Phase 16: Dynamic Tiny/Mid Boundary with A/B Testing (ENV-controlled)
IMPLEMENTATION:
===============
Add dynamic boundary adjustment between Tiny and Mid allocators via
HAKMEM_TINY_MAX_CLASS environment variable for performance tuning.

Changes:
--------
1. hakmem_tiny.h/c: Add tiny_get_max_size() - reads ENV and maps class
   to max usable size (default: class 7 = 1023B, can reduce to class 5 = 255B)

2. hakmem_mid_mt.h/c: Add mid_get_min_size() - returns tiny_get_max_size() + 1
   to ensure no size gap between allocators

3. hak_alloc_api.inc.h: Replace static TINY_MAX_SIZE with dynamic
   tiny_get_max_size() call in allocation routing logic

4. Size gap fix: Mid's range now dynamically adjusts based on Tiny's max
   (prevents 256-1023B from falling through when HAKMEM_TINY_MAX_CLASS=5)

A/B BENCHMARK RESULTS:
======================
Config A (Default, C0-C7, Tiny up to 1023B):
  128B:  6.34M ops/s  |  256B:  6.34M ops/s
  512B:  5.55M ops/s  |  1024B: 5.91M ops/s

Config B (Reduced, C0-C5, Tiny up to 255B):
  128B:  1.38M ops/s (-78%)  |  256B:  1.36M ops/s (-79%)
  512B:  1.33M ops/s (-76%)  |  1024B: 1.37M ops/s (-77%)

FINDINGS:
=========
 Size gap fixed - no OOM crashes with HAKMEM_TINY_MAX_CLASS=5
 Severe performance degradation (-76% to -79%) when reducing Tiny coverage
 Even 128B degraded (should still use Tiny) - possible class filtering issue
⚠️  Mid's coarse size classes (8KB/16KB/32KB) cause fragmentation for small sizes

HYPOTHESIS:
-----------
Mid allocator uses 8KB blocks for all 256-1024B allocations, causing:
- Severe internal fragmentation (1024B request → 8KB block = 87% waste)
- Poor cache utilization
- Consistent ~1.3M ops/s across all sizes (same 8KB class)

RECOMMENDATION:
===============
**Keep default HAKMEM_TINY_MAX_CLASS=7 (C0-C7, up to 1023B)**

Reducing Tiny coverage is COUNTERPRODUCTIVE with current Mid allocator design.
To make this viable, Mid would need finer size classes for 256B-8KB range.

ENV USAGE (for future experimentation):
----------------------------------------
export HAKMEM_TINY_MAX_CLASS=7  # Default (C0-C7, up to 1023B)
export HAKMEM_TINY_MAX_CLASS=5  # Reduced (C0-C5, up to 255B) - NOT recommended

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-16 01:26:48 +09:00

217 lines
7.5 KiB
C

// hak_alloc_api.inc.h — Box: hak_alloc_at() implementation
#ifndef HAK_ALLOC_API_INC_H
#define HAK_ALLOC_API_INC_H
#include "../hakmem_tiny.h" // For tiny_get_max_size() (Phase 16)
#ifdef HAKMEM_POOL_TLS_PHASE1
#include "../pool_tls.h"
#endif
// Centralized OS mapping boundary to keep syscalls in one place
static inline void* hak_os_map_boundary(size_t size, uintptr_t site_id) {
#if HAKMEM_DEBUG_TIMING
HKM_TIME_START(t_mmap);
#endif
void* p = hak_alloc_mmap_impl(size);
#if HAKMEM_DEBUG_TIMING
HKM_TIME_END(HKM_CAT_SYSCALL_MMAP, t_mmap);
#endif
(void)site_id; // reserved for future accounting/learning
return p;
}
__attribute__((always_inline))
inline void* hak_alloc_at(size_t size, hak_callsite_t site) {
#if HAKMEM_DEBUG_TIMING
HKM_TIME_START(t0);
#endif
if (!g_initialized) hak_init();
uintptr_t site_id = (uintptr_t)site;
// Phase 16: Dynamic Tiny max size (ENV: HAKMEM_TINY_MAX_CLASS)
// Default: 1023B (C0-C7), can be reduced to 255B (C0-C5) to delegate 512/1024B to Mid
if (__builtin_expect(size <= tiny_get_max_size(), 1)) {
#if HAKMEM_DEBUG_TIMING
HKM_TIME_START(t_tiny);
#endif
void* tiny_ptr = NULL;
#ifdef HAKMEM_TINY_PHASE6_BOX_REFACTOR
tiny_ptr = hak_tiny_alloc_fast_wrapper(size);
#elif defined(HAKMEM_TINY_PHASE6_ULTRA_SIMPLE)
tiny_ptr = hak_tiny_alloc_ultra_simple(size);
#elif defined(HAKMEM_TINY_PHASE6_METADATA)
tiny_ptr = hak_tiny_alloc_metadata(size);
#else
tiny_ptr = hak_tiny_alloc(size);
#endif
#if HAKMEM_DEBUG_TIMING
HKM_TIME_END(HKM_CAT_TINY_ALLOC, t_tiny);
#endif
if (tiny_ptr) { hkm_ace_track_alloc(); return tiny_ptr; }
// PHASE 7 CRITICAL FIX: No malloc fallback for Tiny failures
// If Tiny fails for size <= tiny_get_max_size(), let it flow to Mid/ACE layers
// This prevents mixed HAKMEM/libc allocation bugs
#if HAKMEM_TINY_HEADER_CLASSIDX
if (!tiny_ptr && size <= tiny_get_max_size()) {
#if !HAKMEM_BUILD_RELEASE
// Tiny failed - log and continue to Mid/ACE (no early return!)
static int log_count = 0;
if (log_count < 3) {
fprintf(stderr, "[DEBUG] Phase 7: tiny_alloc(%zu) failed, trying Mid/ACE layers (no malloc fallback)\n", size);
log_count++;
}
#endif
// Continue to Mid allocation below (do NOT fallback to malloc!)
}
#else
#if !HAKMEM_BUILD_RELEASE
static int log_count = 0; if (log_count < 3) { fprintf(stderr, "[DEBUG] tiny_alloc(%zu) returned NULL, falling back\n", size); log_count++; }
#endif
#endif
}
hkm_size_hist_record(size);
#ifdef HAKMEM_POOL_TLS_PHASE1
// Phase 1: Ultra-fast Pool TLS for 8KB-52KB range
if (size >= 8192 && size <= 53248) {
void* pool_ptr = pool_alloc(size);
if (pool_ptr) return pool_ptr;
// Fall through to existing Mid allocator as fallback
}
#endif
if (__builtin_expect(mid_is_in_range(size), 0)) {
#if HAKMEM_DEBUG_TIMING
HKM_TIME_START(t_mid);
#endif
void* mid_ptr = mid_mt_alloc(size);
#if HAKMEM_DEBUG_TIMING
HKM_TIME_END(HKM_CAT_POOL_GET, t_mid);
#endif
if (mid_ptr) return mid_ptr;
}
#if HAKMEM_FEATURE_EVOLUTION
if (g_evo_sample_mask > 0) {
static _Atomic uint64_t tick_counter = 0;
if ((atomic_fetch_add(&tick_counter, 1) & g_evo_sample_mask) == 0) {
struct timespec now; clock_gettime(CLOCK_MONOTONIC, &now);
uint64_t now_ns = now.tv_sec * 1000000000ULL + now.tv_nsec;
if (hak_evo_tick(now_ns)) {
int new_strategy = hak_elo_select_strategy();
atomic_store(&g_cached_strategy_id, new_strategy);
}
}
}
#endif
size_t threshold;
if (HAK_ENABLED_LEARNING(HAKMEM_FEATURE_ELO)) {
int strategy_id = atomic_load(&g_cached_strategy_id);
threshold = hak_elo_get_threshold(strategy_id);
} else {
threshold = 2097152;
}
if (HAK_ENABLED_CACHE(HAKMEM_FEATURE_BIGCACHE) && size >= threshold) {
void* cached_ptr = NULL;
#if HAKMEM_DEBUG_TIMING
HKM_TIME_START(t_bc);
#endif
if (hak_bigcache_try_get(size, site_id, &cached_ptr)) {
#if HAKMEM_DEBUG_TIMING
HKM_TIME_END(HKM_CAT_BIGCACHE_GET, t_bc);
#endif
return cached_ptr;
}
#if HAKMEM_DEBUG_TIMING
HKM_TIME_END(HKM_CAT_BIGCACHE_GET, t_bc);
#endif
}
if (size >= 33000 && size <= 34000) {
fprintf(stderr, "[ALLOC] 33KB: TINY_MAX_SIZE=%d, threshold=%zu, condition=%d\n",
TINY_MAX_SIZE, threshold, (size > TINY_MAX_SIZE && size < threshold));
}
if (size > TINY_MAX_SIZE && size < threshold) {
if (size >= 33000 && size <= 34000) {
fprintf(stderr, "[ALLOC] 33KB: Calling hkm_ace_alloc\n");
}
const FrozenPolicy* pol = hkm_policy_get();
#if HAKMEM_DEBUG_TIMING
HKM_TIME_START(t_ace);
#endif
void* l1 = hkm_ace_alloc(size, site_id, pol);
#if HAKMEM_DEBUG_TIMING
HKM_TIME_END(HKM_CAT_POOL_GET, t_ace);
#endif
if (size >= 33000 && size <= 34000) {
fprintf(stderr, "[ALLOC] 33KB: hkm_ace_alloc returned %p\n", l1);
}
if (l1) return l1;
}
// PHASE 7 CRITICAL FIX: Handle allocation gap (1KB-8KB) when ACE is disabled
// Size range:
// 0-1024: Tiny allocator
// 1025-8191: Gap! (Mid starts at 8KB, ACE often disabled)
// 8KB-32KB: Mid allocator
// 32KB-2MB: ACE (if enabled, otherwise mmap)
// 2MB+: mmap
//
// Solution: Use mmap for gap when ACE failed (ACE disabled or OOM)
// Track final fallback mmaps globally
extern _Atomic uint64_t g_final_fallback_mmap_count;
void* ptr;
if (size >= threshold) {
// Large allocation (>= 2MB default): descend via single boundary
atomic_fetch_add(&g_final_fallback_mmap_count, 1);
ptr = hak_os_map_boundary(size, site_id);
} else if (size >= TINY_MAX_SIZE) {
// Mid-range allocation (1KB-2MB): try mmap as final fallback
// This handles the gap when ACE is disabled or failed
atomic_fetch_add(&g_final_fallback_mmap_count, 1);
static _Atomic int gap_alloc_count = 0;
int count = atomic_fetch_add(&gap_alloc_count, 1);
#if HAKMEM_DEBUG_VERBOSE
if (count < 3) fprintf(stderr, "[HAKMEM] INFO: mid-gap fallback size=%zu\n", size);
#endif
ptr = hak_os_map_boundary(size, site_id);
} else {
// Should never reach here (size <= TINY_MAX_SIZE should be handled by Tiny)
static _Atomic int oom_count = 0;
int count = atomic_fetch_add(&oom_count, 1);
if (count < 10) {
fprintf(stderr, "[HAKMEM] OOM: Unexpected allocation path for size=%zu, returning NULL\n", size);
fprintf(stderr, "[HAKMEM] (OOM count: %d) This should not happen!\n", count + 1);
}
#if HAKMEM_DEBUG_TIMING
HKM_TIME_START(t_malloc);
HKM_TIME_END(HKM_CAT_FALLBACK_MALLOC, t_malloc); // Keep timing for compatibility
#endif
errno = ENOMEM;
return NULL;
}
if (!ptr) return NULL;
if (g_evo_sample_mask > 0) { hak_evo_record_size(size); }
AllocHeader* hdr = (AllocHeader*)((char*)ptr - HEADER_SIZE);
if (hdr->magic != HAKMEM_MAGIC) { fprintf(stderr, "[hakmem] ERROR: Invalid magic in allocated header!\n"); return ptr; }
hdr->alloc_site = site_id;
hdr->class_bytes = (size >= threshold) ? threshold : 0;
#if HAKMEM_DEBUG_TIMING
HKM_TIME_END(HKM_CAT_HAK_ALLOC, t0);
#endif
return ptr;
}
#endif // HAK_ALLOC_API_INC_H