Phase 16: Dynamic Tiny/Mid Boundary with A/B Testing (ENV-controlled)

IMPLEMENTATION:
===============
Add dynamic boundary adjustment between Tiny and Mid allocators via
HAKMEM_TINY_MAX_CLASS environment variable for performance tuning.

Changes:
--------
1. hakmem_tiny.h/c: Add tiny_get_max_size() - reads ENV and maps class
   to max usable size (default: class 7 = 1023B, can reduce to class 5 = 255B)

2. hakmem_mid_mt.h/c: Add mid_get_min_size() - returns tiny_get_max_size() + 1
   to ensure no size gap between allocators

3. hak_alloc_api.inc.h: Replace static TINY_MAX_SIZE with dynamic
   tiny_get_max_size() call in allocation routing logic

4. Size gap fix: Mid's range now dynamically adjusts based on Tiny's max
   (prevents 256-1023B from falling through when HAKMEM_TINY_MAX_CLASS=5)

A/B BENCHMARK RESULTS:
======================
Config A (Default, C0-C7, Tiny up to 1023B):
  128B:  6.34M ops/s  |  256B:  6.34M ops/s
  512B:  5.55M ops/s  |  1024B: 5.91M ops/s

Config B (Reduced, C0-C5, Tiny up to 255B):
  128B:  1.38M ops/s (-78%)  |  256B:  1.36M ops/s (-79%)
  512B:  1.33M ops/s (-76%)  |  1024B: 1.37M ops/s (-77%)

FINDINGS:
=========
 Size gap fixed - no OOM crashes with HAKMEM_TINY_MAX_CLASS=5
 Severe performance degradation (-76% to -79%) when reducing Tiny coverage
 Even 128B degraded (should still use Tiny) - possible class filtering issue
⚠️  Mid's coarse size classes (8KB/16KB/32KB) cause fragmentation for small sizes

HYPOTHESIS:
-----------
Mid allocator uses 8KB blocks for all 256-1024B allocations, causing:
- Severe internal fragmentation (1024B request → 8KB block = 87% waste)
- Poor cache utilization
- Consistent ~1.3M ops/s across all sizes (same 8KB class)

RECOMMENDATION:
===============
**Keep default HAKMEM_TINY_MAX_CLASS=7 (C0-C7, up to 1023B)**

Reducing Tiny coverage is COUNTERPRODUCTIVE with current Mid allocator design.
To make this viable, Mid would need finer size classes for 256B-8KB range.

ENV USAGE (for future experimentation):
----------------------------------------
export HAKMEM_TINY_MAX_CLASS=7  # Default (C0-C7, up to 1023B)
export HAKMEM_TINY_MAX_CLASS=5  # Reduced (C0-C5, up to 255B) - NOT recommended

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Moe Charm (CI)
2025-11-16 01:26:48 +09:00
parent a4ef2fa1f1
commit 6818e350c4
5 changed files with 67 additions and 14 deletions

View File

@ -2,6 +2,8 @@
#ifndef HAK_ALLOC_API_INC_H #ifndef HAK_ALLOC_API_INC_H
#define HAK_ALLOC_API_INC_H #define HAK_ALLOC_API_INC_H
#include "../hakmem_tiny.h" // For tiny_get_max_size() (Phase 16)
#ifdef HAKMEM_POOL_TLS_PHASE1 #ifdef HAKMEM_POOL_TLS_PHASE1
#include "../pool_tls.h" #include "../pool_tls.h"
#endif #endif
@ -29,7 +31,9 @@ inline void* hak_alloc_at(size_t size, hak_callsite_t site) {
uintptr_t site_id = (uintptr_t)site; uintptr_t site_id = (uintptr_t)site;
if (__builtin_expect(size <= TINY_MAX_SIZE, 1)) { // Phase 16: Dynamic Tiny max size (ENV: HAKMEM_TINY_MAX_CLASS)
// Default: 1023B (C0-C7), can be reduced to 255B (C0-C5) to delegate 512/1024B to Mid
if (__builtin_expect(size <= tiny_get_max_size(), 1)) {
#if HAKMEM_DEBUG_TIMING #if HAKMEM_DEBUG_TIMING
HKM_TIME_START(t_tiny); HKM_TIME_START(t_tiny);
#endif #endif
@ -49,10 +53,10 @@ inline void* hak_alloc_at(size_t size, hak_callsite_t site) {
if (tiny_ptr) { hkm_ace_track_alloc(); return tiny_ptr; } if (tiny_ptr) { hkm_ace_track_alloc(); return tiny_ptr; }
// PHASE 7 CRITICAL FIX: No malloc fallback for Tiny failures // PHASE 7 CRITICAL FIX: No malloc fallback for Tiny failures
// If Tiny fails for size <= TINY_MAX_SIZE, let it flow to Mid/ACE layers // If Tiny fails for size <= tiny_get_max_size(), let it flow to Mid/ACE layers
// This prevents mixed HAKMEM/libc allocation bugs // This prevents mixed HAKMEM/libc allocation bugs
#if HAKMEM_TINY_HEADER_CLASSIDX #if HAKMEM_TINY_HEADER_CLASSIDX
if (!tiny_ptr && size <= TINY_MAX_SIZE) { if (!tiny_ptr && size <= tiny_get_max_size()) {
#if !HAKMEM_BUILD_RELEASE #if !HAKMEM_BUILD_RELEASE
// Tiny failed - log and continue to Mid/ACE (no early return!) // Tiny failed - log and continue to Mid/ACE (no early return!)
static int log_count = 0; static int log_count = 0;

View File

@ -408,8 +408,8 @@ void mid_mt_init(void) {
* Thread-safe, lock-free (uses TLS) * Thread-safe, lock-free (uses TLS)
*/ */
void* mid_mt_alloc(size_t size) { void* mid_mt_alloc(size_t size) {
// Validate size range // Validate size range (Phase 16: dynamic min size based on Tiny's max)
if (unlikely(size < MID_MIN_SIZE || size > MID_MAX_SIZE)) { if (unlikely(size < mid_get_min_size() || size > MID_MAX_SIZE)) {
return NULL; return NULL;
} }

View File

@ -35,10 +35,17 @@ extern "C" {
#define MID_NUM_CLASSES 3 // Total number of size classes #define MID_NUM_CLASSES 3 // Total number of size classes
// Phase 13: Close Tiny/Mid gap. // Phase 13: Close Tiny/Mid gap.
// Tiny now handles up to 1023B (C7 usable size), so Mid must accept // Phase 16: Dynamic Mid min size - must start where Tiny ends
// 1KB-32KB. We keep size classes at 8/16/32KB; sub-8KB sizes use the // Tiny max size is configurable via HAKMEM_TINY_MAX_CLASS:
// 8KB class with some internal slack. // - HAKMEM_TINY_MAX_CLASS=7 (default) → Tiny up to 1023B → Mid starts at 1024B
#define MID_MIN_SIZE (1024) // 1KB (was 8KB) // - HAKMEM_TINY_MAX_CLASS=5 → Tiny up to 255B → Mid starts at 256B
#include "hakmem_tiny.h" // For tiny_get_max_size()
static inline size_t mid_get_min_size(void) {
return tiny_get_max_size() + 1; // Mid starts where Tiny ends
}
#define MID_MIN_SIZE_STATIC (1024) // Static fallback (C7 default)
#define MID_MAX_SIZE (32 * 1024) // 32KB #define MID_MAX_SIZE (32 * 1024) // 32KB
#define MID_CHUNK_SIZE (4 * 1024 * 1024) // 4MB chunks (same as mimalloc segments) #define MID_CHUNK_SIZE (4 * 1024 * 1024) // 4MB chunks (same as mimalloc segments)
@ -125,9 +132,11 @@ extern MidGlobalRegistry g_mid_registry;
void mid_mt_init(void); void mid_mt_init(void);
/** /**
* mid_mt_alloc - Allocate memory from Mid Range pool (8-32KB) * mid_mt_alloc - Allocate memory from Mid Range pool
* *
* @param size Allocation size (must be MID_MIN_SIZE ≤ size ≤ MID_MAX_SIZE) * @param size Allocation size (must be mid_get_min_size() ≤ size ≤ MID_MAX_SIZE)
* Phase 16: Range adjusts dynamically based on Tiny's max size
* Default: 1024B-32KB, can expand to 256B-32KB if Tiny reduced to C0-C5
* @return Allocated pointer (aligned to block_size), or NULL on failure * @return Allocated pointer (aligned to block_size), or NULL on failure
* *
* Thread-safety: Lock-free (uses TLS) * Thread-safety: Lock-free (uses TLS)
@ -211,10 +220,12 @@ static inline size_t mid_class_to_size(int class_idx) {
* mid_is_in_range - Check if size is in Mid Range pool range * mid_is_in_range - Check if size is in Mid Range pool range
* *
* @param size Allocation size * @param size Allocation size
* @return true if 8KB ≤ size ≤ 32KB * @return true if (tiny_max+1) ≤ size ≤ 32KB
*
* Phase 16: Dynamic range - adjusts based on Tiny's max size
*/ */
static inline bool mid_is_in_range(size_t size) { static inline bool mid_is_in_range(size_t size) {
return (size >= MID_MIN_SIZE && size <= MID_MAX_SIZE); return (size >= mid_get_min_size() && size <= MID_MAX_SIZE);
} }
// ============================================================================ // ============================================================================

View File

@ -48,6 +48,36 @@ const size_t g_tiny_class_sizes[TINY_NUM_CLASSES] = {
1024 // Class 7: 1024B total = [Header 1B][Data 1023B] 1024 // Class 7: 1024B total = [Header 1B][Data 1023B]
}; };
// ============================================================================
// Phase 16: Dynamic Tiny Max Size (ENV: HAKMEM_TINY_MAX_CLASS)
// ============================================================================
// Get dynamic max size for Tiny allocator based on ENV configuration
// Default: 1023B (C0-C7), can be reduced to 255B (C0-C5)
size_t tiny_get_max_size(void) {
static int g_max_class = -1;
if (__builtin_expect(g_max_class == -1, 0)) {
const char* env = getenv("HAKMEM_TINY_MAX_CLASS");
if (env && *env) {
int max_class = atoi(env);
if (max_class >= 0 && max_class < TINY_NUM_CLASSES) {
g_max_class = max_class;
} else {
g_max_class = 7; // Default: all classes (C0-C7)
}
} else {
g_max_class = 7; // Default: all classes
}
}
// Map class to max usable size (stride - 1)
// C0=8B, C1=16B, C2=32B, C3=64B, C4=128B, C5=256B, C6=512B, C7=1024B
static const size_t class_to_max_size[TINY_NUM_CLASSES] = {
7, 15, 31, 63, 127, 255, 511, 1023
};
return class_to_max_size[g_max_class];
}
// ============================================================================ // ============================================================================
// PRIORITY 1-4: Integrity Check Counters // PRIORITY 1-4: Integrity Check Counters
// ============================================================================ // ============================================================================

View File

@ -25,7 +25,15 @@ int hak_is_initializing(void);
#define TINY_SLAB_SIZE (64 * 1024) // 64KB per slab #define TINY_SLAB_SIZE (64 * 1024) // 64KB per slab
// Phase E1-CORRECT: All Tiny classes use a 1-byte header. // Phase E1-CORRECT: All Tiny classes use a 1-byte header.
// C7 stride=1024B → usable 1023B (1024-1). 1024B は Mid allocator に委譲する。 // C7 stride=1024B → usable 1023B (1024-1). 1024B は Mid allocator に委譲する。
#define TINY_MAX_SIZE 1023 // Tiny handles up to 1023B (C7 usable size) #define TINY_MAX_SIZE 1023 // Tiny handles up to 1023B (C7 usable size) - default
// Phase 16: Dynamic Tiny max size control (ENV: HAKMEM_TINY_MAX_CLASS)
// Strategy: Reduce Tiny coverage to ~256B, delegate 512/1024B to Mid
// ENV values:
// HAKMEM_TINY_MAX_CLASS=5 → Tiny handles up to 255B (C0-C5)
// HAKMEM_TINY_MAX_CLASS=7 → Tiny handles up to 1023B (C0-C7, default)
// Forward declaration (implementation in hakmem_tiny.c)
size_t tiny_get_max_size(void);
// ============================================================================ // ============================================================================
// Size Classes // Size Classes