Phase 16: Dynamic Tiny/Mid Boundary with A/B Testing (ENV-controlled)
IMPLEMENTATION: =============== Add dynamic boundary adjustment between Tiny and Mid allocators via HAKMEM_TINY_MAX_CLASS environment variable for performance tuning. Changes: -------- 1. hakmem_tiny.h/c: Add tiny_get_max_size() - reads ENV and maps class to max usable size (default: class 7 = 1023B, can reduce to class 5 = 255B) 2. hakmem_mid_mt.h/c: Add mid_get_min_size() - returns tiny_get_max_size() + 1 to ensure no size gap between allocators 3. hak_alloc_api.inc.h: Replace static TINY_MAX_SIZE with dynamic tiny_get_max_size() call in allocation routing logic 4. Size gap fix: Mid's range now dynamically adjusts based on Tiny's max (prevents 256-1023B from falling through when HAKMEM_TINY_MAX_CLASS=5) A/B BENCHMARK RESULTS: ====================== Config A (Default, C0-C7, Tiny up to 1023B): 128B: 6.34M ops/s | 256B: 6.34M ops/s 512B: 5.55M ops/s | 1024B: 5.91M ops/s Config B (Reduced, C0-C5, Tiny up to 255B): 128B: 1.38M ops/s (-78%) | 256B: 1.36M ops/s (-79%) 512B: 1.33M ops/s (-76%) | 1024B: 1.37M ops/s (-77%) FINDINGS: ========= ✅ Size gap fixed - no OOM crashes with HAKMEM_TINY_MAX_CLASS=5 ❌ Severe performance degradation (-76% to -79%) when reducing Tiny coverage ❌ Even 128B degraded (should still use Tiny) - possible class filtering issue ⚠️ Mid's coarse size classes (8KB/16KB/32KB) cause fragmentation for small sizes HYPOTHESIS: ----------- Mid allocator uses 8KB blocks for all 256-1024B allocations, causing: - Severe internal fragmentation (1024B request → 8KB block = 87% waste) - Poor cache utilization - Consistent ~1.3M ops/s across all sizes (same 8KB class) RECOMMENDATION: =============== **Keep default HAKMEM_TINY_MAX_CLASS=7 (C0-C7, up to 1023B)** Reducing Tiny coverage is COUNTERPRODUCTIVE with current Mid allocator design. To make this viable, Mid would need finer size classes for 256B-8KB range. ENV USAGE (for future experimentation): ---------------------------------------- export HAKMEM_TINY_MAX_CLASS=7 # Default (C0-C7, up to 1023B) export HAKMEM_TINY_MAX_CLASS=5 # Reduced (C0-C5, up to 255B) - NOT recommended 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@ -2,6 +2,8 @@
|
|||||||
#ifndef HAK_ALLOC_API_INC_H
|
#ifndef HAK_ALLOC_API_INC_H
|
||||||
#define HAK_ALLOC_API_INC_H
|
#define HAK_ALLOC_API_INC_H
|
||||||
|
|
||||||
|
#include "../hakmem_tiny.h" // For tiny_get_max_size() (Phase 16)
|
||||||
|
|
||||||
#ifdef HAKMEM_POOL_TLS_PHASE1
|
#ifdef HAKMEM_POOL_TLS_PHASE1
|
||||||
#include "../pool_tls.h"
|
#include "../pool_tls.h"
|
||||||
#endif
|
#endif
|
||||||
@ -29,7 +31,9 @@ inline void* hak_alloc_at(size_t size, hak_callsite_t site) {
|
|||||||
|
|
||||||
uintptr_t site_id = (uintptr_t)site;
|
uintptr_t site_id = (uintptr_t)site;
|
||||||
|
|
||||||
if (__builtin_expect(size <= TINY_MAX_SIZE, 1)) {
|
// Phase 16: Dynamic Tiny max size (ENV: HAKMEM_TINY_MAX_CLASS)
|
||||||
|
// Default: 1023B (C0-C7), can be reduced to 255B (C0-C5) to delegate 512/1024B to Mid
|
||||||
|
if (__builtin_expect(size <= tiny_get_max_size(), 1)) {
|
||||||
#if HAKMEM_DEBUG_TIMING
|
#if HAKMEM_DEBUG_TIMING
|
||||||
HKM_TIME_START(t_tiny);
|
HKM_TIME_START(t_tiny);
|
||||||
#endif
|
#endif
|
||||||
@ -49,10 +53,10 @@ inline void* hak_alloc_at(size_t size, hak_callsite_t site) {
|
|||||||
if (tiny_ptr) { hkm_ace_track_alloc(); return tiny_ptr; }
|
if (tiny_ptr) { hkm_ace_track_alloc(); return tiny_ptr; }
|
||||||
|
|
||||||
// PHASE 7 CRITICAL FIX: No malloc fallback for Tiny failures
|
// PHASE 7 CRITICAL FIX: No malloc fallback for Tiny failures
|
||||||
// If Tiny fails for size <= TINY_MAX_SIZE, let it flow to Mid/ACE layers
|
// If Tiny fails for size <= tiny_get_max_size(), let it flow to Mid/ACE layers
|
||||||
// This prevents mixed HAKMEM/libc allocation bugs
|
// This prevents mixed HAKMEM/libc allocation bugs
|
||||||
#if HAKMEM_TINY_HEADER_CLASSIDX
|
#if HAKMEM_TINY_HEADER_CLASSIDX
|
||||||
if (!tiny_ptr && size <= TINY_MAX_SIZE) {
|
if (!tiny_ptr && size <= tiny_get_max_size()) {
|
||||||
#if !HAKMEM_BUILD_RELEASE
|
#if !HAKMEM_BUILD_RELEASE
|
||||||
// Tiny failed - log and continue to Mid/ACE (no early return!)
|
// Tiny failed - log and continue to Mid/ACE (no early return!)
|
||||||
static int log_count = 0;
|
static int log_count = 0;
|
||||||
|
|||||||
@ -408,8 +408,8 @@ void mid_mt_init(void) {
|
|||||||
* Thread-safe, lock-free (uses TLS)
|
* Thread-safe, lock-free (uses TLS)
|
||||||
*/
|
*/
|
||||||
void* mid_mt_alloc(size_t size) {
|
void* mid_mt_alloc(size_t size) {
|
||||||
// Validate size range
|
// Validate size range (Phase 16: dynamic min size based on Tiny's max)
|
||||||
if (unlikely(size < MID_MIN_SIZE || size > MID_MAX_SIZE)) {
|
if (unlikely(size < mid_get_min_size() || size > MID_MAX_SIZE)) {
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -35,10 +35,17 @@ extern "C" {
|
|||||||
#define MID_NUM_CLASSES 3 // Total number of size classes
|
#define MID_NUM_CLASSES 3 // Total number of size classes
|
||||||
|
|
||||||
// Phase 13: Close Tiny/Mid gap.
|
// Phase 13: Close Tiny/Mid gap.
|
||||||
// Tiny now handles up to 1023B (C7 usable size), so Mid must accept
|
// Phase 16: Dynamic Mid min size - must start where Tiny ends
|
||||||
// 1KB-32KB. We keep size classes at 8/16/32KB; sub-8KB sizes use the
|
// Tiny max size is configurable via HAKMEM_TINY_MAX_CLASS:
|
||||||
// 8KB class with some internal slack.
|
// - HAKMEM_TINY_MAX_CLASS=7 (default) → Tiny up to 1023B → Mid starts at 1024B
|
||||||
#define MID_MIN_SIZE (1024) // 1KB (was 8KB)
|
// - HAKMEM_TINY_MAX_CLASS=5 → Tiny up to 255B → Mid starts at 256B
|
||||||
|
#include "hakmem_tiny.h" // For tiny_get_max_size()
|
||||||
|
|
||||||
|
static inline size_t mid_get_min_size(void) {
|
||||||
|
return tiny_get_max_size() + 1; // Mid starts where Tiny ends
|
||||||
|
}
|
||||||
|
|
||||||
|
#define MID_MIN_SIZE_STATIC (1024) // Static fallback (C7 default)
|
||||||
#define MID_MAX_SIZE (32 * 1024) // 32KB
|
#define MID_MAX_SIZE (32 * 1024) // 32KB
|
||||||
#define MID_CHUNK_SIZE (4 * 1024 * 1024) // 4MB chunks (same as mimalloc segments)
|
#define MID_CHUNK_SIZE (4 * 1024 * 1024) // 4MB chunks (same as mimalloc segments)
|
||||||
|
|
||||||
@ -125,9 +132,11 @@ extern MidGlobalRegistry g_mid_registry;
|
|||||||
void mid_mt_init(void);
|
void mid_mt_init(void);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* mid_mt_alloc - Allocate memory from Mid Range pool (8-32KB)
|
* mid_mt_alloc - Allocate memory from Mid Range pool
|
||||||
*
|
*
|
||||||
* @param size Allocation size (must be MID_MIN_SIZE ≤ size ≤ MID_MAX_SIZE)
|
* @param size Allocation size (must be mid_get_min_size() ≤ size ≤ MID_MAX_SIZE)
|
||||||
|
* Phase 16: Range adjusts dynamically based on Tiny's max size
|
||||||
|
* Default: 1024B-32KB, can expand to 256B-32KB if Tiny reduced to C0-C5
|
||||||
* @return Allocated pointer (aligned to block_size), or NULL on failure
|
* @return Allocated pointer (aligned to block_size), or NULL on failure
|
||||||
*
|
*
|
||||||
* Thread-safety: Lock-free (uses TLS)
|
* Thread-safety: Lock-free (uses TLS)
|
||||||
@ -211,10 +220,12 @@ static inline size_t mid_class_to_size(int class_idx) {
|
|||||||
* mid_is_in_range - Check if size is in Mid Range pool range
|
* mid_is_in_range - Check if size is in Mid Range pool range
|
||||||
*
|
*
|
||||||
* @param size Allocation size
|
* @param size Allocation size
|
||||||
* @return true if 8KB ≤ size ≤ 32KB
|
* @return true if (tiny_max+1) ≤ size ≤ 32KB
|
||||||
|
*
|
||||||
|
* Phase 16: Dynamic range - adjusts based on Tiny's max size
|
||||||
*/
|
*/
|
||||||
static inline bool mid_is_in_range(size_t size) {
|
static inline bool mid_is_in_range(size_t size) {
|
||||||
return (size >= MID_MIN_SIZE && size <= MID_MAX_SIZE);
|
return (size >= mid_get_min_size() && size <= MID_MAX_SIZE);
|
||||||
}
|
}
|
||||||
|
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
|
|||||||
@ -48,6 +48,36 @@ const size_t g_tiny_class_sizes[TINY_NUM_CLASSES] = {
|
|||||||
1024 // Class 7: 1024B total = [Header 1B][Data 1023B]
|
1024 // Class 7: 1024B total = [Header 1B][Data 1023B]
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// Phase 16: Dynamic Tiny Max Size (ENV: HAKMEM_TINY_MAX_CLASS)
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
// Get dynamic max size for Tiny allocator based on ENV configuration
|
||||||
|
// Default: 1023B (C0-C7), can be reduced to 255B (C0-C5)
|
||||||
|
size_t tiny_get_max_size(void) {
|
||||||
|
static int g_max_class = -1;
|
||||||
|
if (__builtin_expect(g_max_class == -1, 0)) {
|
||||||
|
const char* env = getenv("HAKMEM_TINY_MAX_CLASS");
|
||||||
|
if (env && *env) {
|
||||||
|
int max_class = atoi(env);
|
||||||
|
if (max_class >= 0 && max_class < TINY_NUM_CLASSES) {
|
||||||
|
g_max_class = max_class;
|
||||||
|
} else {
|
||||||
|
g_max_class = 7; // Default: all classes (C0-C7)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
g_max_class = 7; // Default: all classes
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Map class to max usable size (stride - 1)
|
||||||
|
// C0=8B, C1=16B, C2=32B, C3=64B, C4=128B, C5=256B, C6=512B, C7=1024B
|
||||||
|
static const size_t class_to_max_size[TINY_NUM_CLASSES] = {
|
||||||
|
7, 15, 31, 63, 127, 255, 511, 1023
|
||||||
|
};
|
||||||
|
return class_to_max_size[g_max_class];
|
||||||
|
}
|
||||||
|
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
// PRIORITY 1-4: Integrity Check Counters
|
// PRIORITY 1-4: Integrity Check Counters
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
|
|||||||
@ -25,7 +25,15 @@ int hak_is_initializing(void);
|
|||||||
#define TINY_SLAB_SIZE (64 * 1024) // 64KB per slab
|
#define TINY_SLAB_SIZE (64 * 1024) // 64KB per slab
|
||||||
// Phase E1-CORRECT: All Tiny classes use a 1-byte header.
|
// Phase E1-CORRECT: All Tiny classes use a 1-byte header.
|
||||||
// C7 stride=1024B → usable 1023B (1024-1). 1024B は Mid allocator に委譲する。
|
// C7 stride=1024B → usable 1023B (1024-1). 1024B は Mid allocator に委譲する。
|
||||||
#define TINY_MAX_SIZE 1023 // Tiny handles up to 1023B (C7 usable size)
|
#define TINY_MAX_SIZE 1023 // Tiny handles up to 1023B (C7 usable size) - default
|
||||||
|
|
||||||
|
// Phase 16: Dynamic Tiny max size control (ENV: HAKMEM_TINY_MAX_CLASS)
|
||||||
|
// Strategy: Reduce Tiny coverage to ~256B, delegate 512/1024B to Mid
|
||||||
|
// ENV values:
|
||||||
|
// HAKMEM_TINY_MAX_CLASS=5 → Tiny handles up to 255B (C0-C5)
|
||||||
|
// HAKMEM_TINY_MAX_CLASS=7 → Tiny handles up to 1023B (C0-C7, default)
|
||||||
|
// Forward declaration (implementation in hakmem_tiny.c)
|
||||||
|
size_t tiny_get_max_size(void);
|
||||||
|
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
// Size Classes
|
// Size Classes
|
||||||
|
|||||||
Reference in New Issue
Block a user