2025-11-05 12:31:14 +09:00
|
|
|
/**
|
|
|
|
|
* hakmem_mid_mt.h
|
|
|
|
|
*
|
2025-11-15 05:51:58 +09:00
|
|
|
* Mid Range Multi-threaded Allocator (1-32KB)
|
2025-11-05 12:31:14 +09:00
|
|
|
* mimalloc-style per-thread segment design for optimal MT performance
|
|
|
|
|
*
|
|
|
|
|
* Part of Hybrid Approach:
|
2025-11-15 05:51:58 +09:00
|
|
|
* - ≤1023B: Tiny Pool (header-based, C7 usable size)
|
|
|
|
|
* - 1-32KB: Mid MT (this module, mimalloc-style per-thread)
|
|
|
|
|
* - ≥64KB: Large Pool (learning-based, ELO strategies)
|
2025-11-05 12:31:14 +09:00
|
|
|
*
|
|
|
|
|
* Created: 2025-11-01
|
|
|
|
|
* Goal: 46M → 100-120M ops/s (2.2-2.6x improvement)
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
#ifndef HAKMEM_MID_MT_H
|
|
|
|
|
#define HAKMEM_MID_MT_H
|
|
|
|
|
|
|
|
|
|
#include <stddef.h>
|
|
|
|
|
#include <stdint.h>
|
|
|
|
|
#include <stdbool.h>
|
|
|
|
|
#include <pthread.h>
|
|
|
|
|
|
|
|
|
|
#ifdef __cplusplus
|
|
|
|
|
extern "C" {
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
// ============================================================================
|
|
|
|
|
// Size Classes
|
|
|
|
|
// ============================================================================
|
|
|
|
|
|
|
|
|
|
#define MID_SIZE_CLASS_8K 0 // 8KB blocks
|
|
|
|
|
#define MID_SIZE_CLASS_16K 1 // 16KB blocks
|
|
|
|
|
#define MID_SIZE_CLASS_32K 2 // 32KB blocks
|
|
|
|
|
#define MID_NUM_CLASSES 3 // Total number of size classes
|
|
|
|
|
|
2025-11-15 05:51:58 +09:00
|
|
|
// Phase 13: Close Tiny/Mid gap.
|
Phase 16: Dynamic Tiny/Mid Boundary with A/B Testing (ENV-controlled)
IMPLEMENTATION:
===============
Add dynamic boundary adjustment between Tiny and Mid allocators via
HAKMEM_TINY_MAX_CLASS environment variable for performance tuning.
Changes:
--------
1. hakmem_tiny.h/c: Add tiny_get_max_size() - reads ENV and maps class
to max usable size (default: class 7 = 1023B, can reduce to class 5 = 255B)
2. hakmem_mid_mt.h/c: Add mid_get_min_size() - returns tiny_get_max_size() + 1
to ensure no size gap between allocators
3. hak_alloc_api.inc.h: Replace static TINY_MAX_SIZE with dynamic
tiny_get_max_size() call in allocation routing logic
4. Size gap fix: Mid's range now dynamically adjusts based on Tiny's max
(prevents 256-1023B from falling through when HAKMEM_TINY_MAX_CLASS=5)
A/B BENCHMARK RESULTS:
======================
Config A (Default, C0-C7, Tiny up to 1023B):
128B: 6.34M ops/s | 256B: 6.34M ops/s
512B: 5.55M ops/s | 1024B: 5.91M ops/s
Config B (Reduced, C0-C5, Tiny up to 255B):
128B: 1.38M ops/s (-78%) | 256B: 1.36M ops/s (-79%)
512B: 1.33M ops/s (-76%) | 1024B: 1.37M ops/s (-77%)
FINDINGS:
=========
✅ Size gap fixed - no OOM crashes with HAKMEM_TINY_MAX_CLASS=5
❌ Severe performance degradation (-76% to -79%) when reducing Tiny coverage
❌ Even 128B degraded (should still use Tiny) - possible class filtering issue
⚠️ Mid's coarse size classes (8KB/16KB/32KB) cause fragmentation for small sizes
HYPOTHESIS:
-----------
Mid allocator uses 8KB blocks for all 256-1024B allocations, causing:
- Severe internal fragmentation (1024B request → 8KB block = 87% waste)
- Poor cache utilization
- Consistent ~1.3M ops/s across all sizes (same 8KB class)
RECOMMENDATION:
===============
**Keep default HAKMEM_TINY_MAX_CLASS=7 (C0-C7, up to 1023B)**
Reducing Tiny coverage is COUNTERPRODUCTIVE with current Mid allocator design.
To make this viable, Mid would need finer size classes for 256B-8KB range.
ENV USAGE (for future experimentation):
----------------------------------------
export HAKMEM_TINY_MAX_CLASS=7 # Default (C0-C7, up to 1023B)
export HAKMEM_TINY_MAX_CLASS=5 # Reduced (C0-C5, up to 255B) - NOT recommended
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-16 01:26:48 +09:00
|
|
|
// Phase 16: Dynamic Mid min size - must start where Tiny ends
|
|
|
|
|
// Tiny max size is configurable via HAKMEM_TINY_MAX_CLASS:
|
|
|
|
|
// - HAKMEM_TINY_MAX_CLASS=7 (default) → Tiny up to 1023B → Mid starts at 1024B
|
|
|
|
|
// - HAKMEM_TINY_MAX_CLASS=5 → Tiny up to 255B → Mid starts at 256B
|
|
|
|
|
#include "hakmem_tiny.h" // For tiny_get_max_size()
|
|
|
|
|
|
|
|
|
|
static inline size_t mid_get_min_size(void) {
|
|
|
|
|
return tiny_get_max_size() + 1; // Mid starts where Tiny ends
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#define MID_MIN_SIZE_STATIC (1024) // Static fallback (C7 default)
|
2025-11-05 12:31:14 +09:00
|
|
|
#define MID_MAX_SIZE (32 * 1024) // 32KB
|
|
|
|
|
#define MID_CHUNK_SIZE (4 * 1024 * 1024) // 4MB chunks (same as mimalloc segments)
|
|
|
|
|
|
|
|
|
|
// ============================================================================
|
|
|
|
|
// Data Structures
|
|
|
|
|
// ============================================================================
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* MidThreadSegment - Per-thread segment for lock-free allocation
|
|
|
|
|
*
|
|
|
|
|
* Memory layout optimized for cache line alignment (64 bytes)
|
|
|
|
|
* - Cache line 0: Fast path fields (free_list, current, end, used_count)
|
|
|
|
|
* - Cache line 1: Metadata (chunk_base, sizes, capacity)
|
|
|
|
|
* - Cache line 2: Statistics (optional, for debugging)
|
|
|
|
|
*/
|
|
|
|
|
typedef struct MidThreadSegment {
|
|
|
|
|
// === Fast Path (Cache line 0) ===
|
|
|
|
|
void* free_list; // Free objects linked list (NULL if empty)
|
|
|
|
|
void* current; // Bump allocation pointer
|
|
|
|
|
void* end; // End of current chunk
|
|
|
|
|
uint32_t used_count; // Number of allocated blocks
|
|
|
|
|
uint32_t padding0; // Alignment padding
|
|
|
|
|
|
|
|
|
|
// === Metadata (Cache line 1) ===
|
|
|
|
|
void* chunk_base; // Base address of current chunk
|
|
|
|
|
size_t chunk_size; // Size of chunk (typically 64KB)
|
|
|
|
|
size_t block_size; // Size of each block (8KB/16KB/32KB)
|
|
|
|
|
uint32_t capacity; // Total blocks in chunk
|
|
|
|
|
uint32_t padding1; // Alignment padding
|
|
|
|
|
|
|
|
|
|
// === Statistics (Cache line 2) ===
|
|
|
|
|
uint64_t alloc_count; // Total allocations
|
|
|
|
|
uint64_t free_count; // Total frees
|
|
|
|
|
uint32_t refill_count; // Number of chunk refills
|
|
|
|
|
uint32_t padding2; // Alignment padding
|
|
|
|
|
|
|
|
|
|
} __attribute__((aligned(64))) MidThreadSegment;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* MidSegmentRegistry - Global registry for segment lookup in free()
|
|
|
|
|
*
|
|
|
|
|
* Used to find the owning segment when freeing a pointer.
|
|
|
|
|
* Entries are sorted by base address for O(log N) binary search.
|
|
|
|
|
*/
|
|
|
|
|
typedef struct MidSegmentRegistry {
|
|
|
|
|
void* base; // Segment base address
|
|
|
|
|
size_t block_size; // Block size (8KB/16KB/32KB)
|
|
|
|
|
int class_idx; // Size class index (0-2)
|
|
|
|
|
int padding; // Alignment padding
|
|
|
|
|
} MidSegmentRegistry;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* MidGlobalRegistry - Global registry manager
|
|
|
|
|
*
|
|
|
|
|
* Thread-safety: Protected by pthread_mutex
|
|
|
|
|
* Performance: Lock only during registry operations (low frequency)
|
|
|
|
|
*/
|
|
|
|
|
typedef struct MidGlobalRegistry {
|
|
|
|
|
MidSegmentRegistry* entries; // Dynamic array of registry entries
|
|
|
|
|
uint32_t count; // Number of entries
|
|
|
|
|
uint32_t capacity; // Array capacity
|
|
|
|
|
pthread_mutex_t lock; // Registry lock
|
|
|
|
|
} MidGlobalRegistry;
|
|
|
|
|
|
|
|
|
|
// ============================================================================
|
|
|
|
|
// Global Variables
|
|
|
|
|
// ============================================================================
|
|
|
|
|
|
|
|
|
|
// TLS: Each thread has its own segments (lock-free!)
|
|
|
|
|
extern __thread MidThreadSegment g_mid_segments[MID_NUM_CLASSES];
|
|
|
|
|
|
|
|
|
|
// Global registry (protected by lock)
|
|
|
|
|
extern MidGlobalRegistry g_mid_registry;
|
|
|
|
|
|
|
|
|
|
// ============================================================================
|
|
|
|
|
// API Functions
|
|
|
|
|
// ============================================================================
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* mid_mt_init - Initialize Mid Range MT allocator
|
|
|
|
|
*
|
|
|
|
|
* Call once at startup (thread-safe, idempotent)
|
|
|
|
|
*/
|
|
|
|
|
void mid_mt_init(void);
|
|
|
|
|
|
|
|
|
|
/**
|
Phase 16: Dynamic Tiny/Mid Boundary with A/B Testing (ENV-controlled)
IMPLEMENTATION:
===============
Add dynamic boundary adjustment between Tiny and Mid allocators via
HAKMEM_TINY_MAX_CLASS environment variable for performance tuning.
Changes:
--------
1. hakmem_tiny.h/c: Add tiny_get_max_size() - reads ENV and maps class
to max usable size (default: class 7 = 1023B, can reduce to class 5 = 255B)
2. hakmem_mid_mt.h/c: Add mid_get_min_size() - returns tiny_get_max_size() + 1
to ensure no size gap between allocators
3. hak_alloc_api.inc.h: Replace static TINY_MAX_SIZE with dynamic
tiny_get_max_size() call in allocation routing logic
4. Size gap fix: Mid's range now dynamically adjusts based on Tiny's max
(prevents 256-1023B from falling through when HAKMEM_TINY_MAX_CLASS=5)
A/B BENCHMARK RESULTS:
======================
Config A (Default, C0-C7, Tiny up to 1023B):
128B: 6.34M ops/s | 256B: 6.34M ops/s
512B: 5.55M ops/s | 1024B: 5.91M ops/s
Config B (Reduced, C0-C5, Tiny up to 255B):
128B: 1.38M ops/s (-78%) | 256B: 1.36M ops/s (-79%)
512B: 1.33M ops/s (-76%) | 1024B: 1.37M ops/s (-77%)
FINDINGS:
=========
✅ Size gap fixed - no OOM crashes with HAKMEM_TINY_MAX_CLASS=5
❌ Severe performance degradation (-76% to -79%) when reducing Tiny coverage
❌ Even 128B degraded (should still use Tiny) - possible class filtering issue
⚠️ Mid's coarse size classes (8KB/16KB/32KB) cause fragmentation for small sizes
HYPOTHESIS:
-----------
Mid allocator uses 8KB blocks for all 256-1024B allocations, causing:
- Severe internal fragmentation (1024B request → 8KB block = 87% waste)
- Poor cache utilization
- Consistent ~1.3M ops/s across all sizes (same 8KB class)
RECOMMENDATION:
===============
**Keep default HAKMEM_TINY_MAX_CLASS=7 (C0-C7, up to 1023B)**
Reducing Tiny coverage is COUNTERPRODUCTIVE with current Mid allocator design.
To make this viable, Mid would need finer size classes for 256B-8KB range.
ENV USAGE (for future experimentation):
----------------------------------------
export HAKMEM_TINY_MAX_CLASS=7 # Default (C0-C7, up to 1023B)
export HAKMEM_TINY_MAX_CLASS=5 # Reduced (C0-C5, up to 255B) - NOT recommended
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-16 01:26:48 +09:00
|
|
|
* mid_mt_alloc - Allocate memory from Mid Range pool
|
2025-11-05 12:31:14 +09:00
|
|
|
*
|
Phase 16: Dynamic Tiny/Mid Boundary with A/B Testing (ENV-controlled)
IMPLEMENTATION:
===============
Add dynamic boundary adjustment between Tiny and Mid allocators via
HAKMEM_TINY_MAX_CLASS environment variable for performance tuning.
Changes:
--------
1. hakmem_tiny.h/c: Add tiny_get_max_size() - reads ENV and maps class
to max usable size (default: class 7 = 1023B, can reduce to class 5 = 255B)
2. hakmem_mid_mt.h/c: Add mid_get_min_size() - returns tiny_get_max_size() + 1
to ensure no size gap between allocators
3. hak_alloc_api.inc.h: Replace static TINY_MAX_SIZE with dynamic
tiny_get_max_size() call in allocation routing logic
4. Size gap fix: Mid's range now dynamically adjusts based on Tiny's max
(prevents 256-1023B from falling through when HAKMEM_TINY_MAX_CLASS=5)
A/B BENCHMARK RESULTS:
======================
Config A (Default, C0-C7, Tiny up to 1023B):
128B: 6.34M ops/s | 256B: 6.34M ops/s
512B: 5.55M ops/s | 1024B: 5.91M ops/s
Config B (Reduced, C0-C5, Tiny up to 255B):
128B: 1.38M ops/s (-78%) | 256B: 1.36M ops/s (-79%)
512B: 1.33M ops/s (-76%) | 1024B: 1.37M ops/s (-77%)
FINDINGS:
=========
✅ Size gap fixed - no OOM crashes with HAKMEM_TINY_MAX_CLASS=5
❌ Severe performance degradation (-76% to -79%) when reducing Tiny coverage
❌ Even 128B degraded (should still use Tiny) - possible class filtering issue
⚠️ Mid's coarse size classes (8KB/16KB/32KB) cause fragmentation for small sizes
HYPOTHESIS:
-----------
Mid allocator uses 8KB blocks for all 256-1024B allocations, causing:
- Severe internal fragmentation (1024B request → 8KB block = 87% waste)
- Poor cache utilization
- Consistent ~1.3M ops/s across all sizes (same 8KB class)
RECOMMENDATION:
===============
**Keep default HAKMEM_TINY_MAX_CLASS=7 (C0-C7, up to 1023B)**
Reducing Tiny coverage is COUNTERPRODUCTIVE with current Mid allocator design.
To make this viable, Mid would need finer size classes for 256B-8KB range.
ENV USAGE (for future experimentation):
----------------------------------------
export HAKMEM_TINY_MAX_CLASS=7 # Default (C0-C7, up to 1023B)
export HAKMEM_TINY_MAX_CLASS=5 # Reduced (C0-C5, up to 255B) - NOT recommended
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-16 01:26:48 +09:00
|
|
|
* @param size Allocation size (must be mid_get_min_size() ≤ size ≤ MID_MAX_SIZE)
|
|
|
|
|
* Phase 16: Range adjusts dynamically based on Tiny's max size
|
|
|
|
|
* Default: 1024B-32KB, can expand to 256B-32KB if Tiny reduced to C0-C5
|
2025-11-05 12:31:14 +09:00
|
|
|
* @return Allocated pointer (aligned to block_size), or NULL on failure
|
|
|
|
|
*
|
|
|
|
|
* Thread-safety: Lock-free (uses TLS)
|
|
|
|
|
* Performance: O(1) fast path, O(1) amortized
|
|
|
|
|
*
|
|
|
|
|
* Fast path:
|
|
|
|
|
* 1. Check free_list (most common, ~4-5 instructions)
|
|
|
|
|
* 2. Bump allocation if free_list empty (~6-8 instructions)
|
|
|
|
|
* 3. Refill chunk if segment exhausted (rare, ~0.1%)
|
|
|
|
|
*/
|
|
|
|
|
void* mid_mt_alloc(size_t size);
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* mid_mt_free - Free memory allocated by mid_mt_alloc
|
|
|
|
|
*
|
|
|
|
|
* @param ptr Pointer to free (must be from mid_mt_alloc)
|
|
|
|
|
* @param size Original allocation size (for size class lookup)
|
|
|
|
|
*
|
|
|
|
|
* Thread-safety: Lock-free if freeing to own thread's segment
|
|
|
|
|
* Requires registry lock if remote free (cross-thread)
|
|
|
|
|
* Performance: O(1) local free, O(log N) remote free (registry lookup)
|
|
|
|
|
*
|
|
|
|
|
* Note: Phase 1 implementation does not handle remote free (memory leak)
|
|
|
|
|
* Phase 2 will implement per-segment atomic remote free list
|
|
|
|
|
*/
|
|
|
|
|
void mid_mt_free(void* ptr, size_t size);
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* mid_mt_thread_exit - Cleanup thread-local segments
|
|
|
|
|
*
|
|
|
|
|
* Called on thread exit to release resources
|
|
|
|
|
* Should be registered via pthread_key_create or __attribute__((destructor))
|
|
|
|
|
*/
|
|
|
|
|
void mid_mt_thread_exit(void);
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* mid_registry_lookup - Find segment containing ptr (for free() path)
|
|
|
|
|
*
|
|
|
|
|
* @param ptr Pointer to lookup
|
|
|
|
|
* @param out_block_size Output: block size if found
|
|
|
|
|
* @param out_class_idx Output: size class index if found
|
|
|
|
|
* @return true if found in Mid MT registry, false otherwise
|
|
|
|
|
*
|
|
|
|
|
* Used internally by hak_free_at() to identify Mid MT allocations
|
|
|
|
|
*/
|
|
|
|
|
bool mid_registry_lookup(void* ptr, size_t* out_block_size, int* out_class_idx);
|
|
|
|
|
|
|
|
|
|
// ============================================================================
|
|
|
|
|
// Inline Helper Functions
|
|
|
|
|
// ============================================================================
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* mid_size_to_class - Convert size to size class index
|
|
|
|
|
*
|
|
|
|
|
* @param size Allocation size
|
|
|
|
|
* @return Size class index (0-2), or -1 if out of range
|
|
|
|
|
*/
|
|
|
|
|
static inline int mid_size_to_class(size_t size) {
|
|
|
|
|
if (size <= 8192) return MID_SIZE_CLASS_8K;
|
|
|
|
|
if (size <= 16384) return MID_SIZE_CLASS_16K;
|
|
|
|
|
if (size <= 32768) return MID_SIZE_CLASS_32K;
|
|
|
|
|
return -1; // Out of range
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* mid_class_to_size - Convert size class to block size
|
|
|
|
|
*
|
|
|
|
|
* @param class_idx Size class index (0-2)
|
|
|
|
|
* @return Block size in bytes
|
|
|
|
|
*/
|
|
|
|
|
static inline size_t mid_class_to_size(int class_idx) {
|
|
|
|
|
static const size_t sizes[MID_NUM_CLASSES] = {
|
|
|
|
|
8192, // 8KB
|
|
|
|
|
16384, // 16KB
|
|
|
|
|
32768 // 32KB
|
|
|
|
|
};
|
|
|
|
|
return (class_idx >= 0 && class_idx < MID_NUM_CLASSES) ? sizes[class_idx] : 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* mid_is_in_range - Check if size is in Mid Range pool range
|
|
|
|
|
*
|
|
|
|
|
* @param size Allocation size
|
Phase 16: Dynamic Tiny/Mid Boundary with A/B Testing (ENV-controlled)
IMPLEMENTATION:
===============
Add dynamic boundary adjustment between Tiny and Mid allocators via
HAKMEM_TINY_MAX_CLASS environment variable for performance tuning.
Changes:
--------
1. hakmem_tiny.h/c: Add tiny_get_max_size() - reads ENV and maps class
to max usable size (default: class 7 = 1023B, can reduce to class 5 = 255B)
2. hakmem_mid_mt.h/c: Add mid_get_min_size() - returns tiny_get_max_size() + 1
to ensure no size gap between allocators
3. hak_alloc_api.inc.h: Replace static TINY_MAX_SIZE with dynamic
tiny_get_max_size() call in allocation routing logic
4. Size gap fix: Mid's range now dynamically adjusts based on Tiny's max
(prevents 256-1023B from falling through when HAKMEM_TINY_MAX_CLASS=5)
A/B BENCHMARK RESULTS:
======================
Config A (Default, C0-C7, Tiny up to 1023B):
128B: 6.34M ops/s | 256B: 6.34M ops/s
512B: 5.55M ops/s | 1024B: 5.91M ops/s
Config B (Reduced, C0-C5, Tiny up to 255B):
128B: 1.38M ops/s (-78%) | 256B: 1.36M ops/s (-79%)
512B: 1.33M ops/s (-76%) | 1024B: 1.37M ops/s (-77%)
FINDINGS:
=========
✅ Size gap fixed - no OOM crashes with HAKMEM_TINY_MAX_CLASS=5
❌ Severe performance degradation (-76% to -79%) when reducing Tiny coverage
❌ Even 128B degraded (should still use Tiny) - possible class filtering issue
⚠️ Mid's coarse size classes (8KB/16KB/32KB) cause fragmentation for small sizes
HYPOTHESIS:
-----------
Mid allocator uses 8KB blocks for all 256-1024B allocations, causing:
- Severe internal fragmentation (1024B request → 8KB block = 87% waste)
- Poor cache utilization
- Consistent ~1.3M ops/s across all sizes (same 8KB class)
RECOMMENDATION:
===============
**Keep default HAKMEM_TINY_MAX_CLASS=7 (C0-C7, up to 1023B)**
Reducing Tiny coverage is COUNTERPRODUCTIVE with current Mid allocator design.
To make this viable, Mid would need finer size classes for 256B-8KB range.
ENV USAGE (for future experimentation):
----------------------------------------
export HAKMEM_TINY_MAX_CLASS=7 # Default (C0-C7, up to 1023B)
export HAKMEM_TINY_MAX_CLASS=5 # Reduced (C0-C5, up to 255B) - NOT recommended
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-16 01:26:48 +09:00
|
|
|
* @return true if (tiny_max+1) ≤ size ≤ 32KB
|
|
|
|
|
*
|
|
|
|
|
* Phase 16: Dynamic range - adjusts based on Tiny's max size
|
2025-11-28 18:14:31 +09:00
|
|
|
* PERF_OPT: Force inline to eliminate function call overhead in hot path
|
2025-11-05 12:31:14 +09:00
|
|
|
*/
|
2025-11-28 18:14:31 +09:00
|
|
|
__attribute__((always_inline))
|
2025-11-05 12:31:14 +09:00
|
|
|
static inline bool mid_is_in_range(size_t size) {
|
Phase 16: Dynamic Tiny/Mid Boundary with A/B Testing (ENV-controlled)
IMPLEMENTATION:
===============
Add dynamic boundary adjustment between Tiny and Mid allocators via
HAKMEM_TINY_MAX_CLASS environment variable for performance tuning.
Changes:
--------
1. hakmem_tiny.h/c: Add tiny_get_max_size() - reads ENV and maps class
to max usable size (default: class 7 = 1023B, can reduce to class 5 = 255B)
2. hakmem_mid_mt.h/c: Add mid_get_min_size() - returns tiny_get_max_size() + 1
to ensure no size gap between allocators
3. hak_alloc_api.inc.h: Replace static TINY_MAX_SIZE with dynamic
tiny_get_max_size() call in allocation routing logic
4. Size gap fix: Mid's range now dynamically adjusts based on Tiny's max
(prevents 256-1023B from falling through when HAKMEM_TINY_MAX_CLASS=5)
A/B BENCHMARK RESULTS:
======================
Config A (Default, C0-C7, Tiny up to 1023B):
128B: 6.34M ops/s | 256B: 6.34M ops/s
512B: 5.55M ops/s | 1024B: 5.91M ops/s
Config B (Reduced, C0-C5, Tiny up to 255B):
128B: 1.38M ops/s (-78%) | 256B: 1.36M ops/s (-79%)
512B: 1.33M ops/s (-76%) | 1024B: 1.37M ops/s (-77%)
FINDINGS:
=========
✅ Size gap fixed - no OOM crashes with HAKMEM_TINY_MAX_CLASS=5
❌ Severe performance degradation (-76% to -79%) when reducing Tiny coverage
❌ Even 128B degraded (should still use Tiny) - possible class filtering issue
⚠️ Mid's coarse size classes (8KB/16KB/32KB) cause fragmentation for small sizes
HYPOTHESIS:
-----------
Mid allocator uses 8KB blocks for all 256-1024B allocations, causing:
- Severe internal fragmentation (1024B request → 8KB block = 87% waste)
- Poor cache utilization
- Consistent ~1.3M ops/s across all sizes (same 8KB class)
RECOMMENDATION:
===============
**Keep default HAKMEM_TINY_MAX_CLASS=7 (C0-C7, up to 1023B)**
Reducing Tiny coverage is COUNTERPRODUCTIVE with current Mid allocator design.
To make this viable, Mid would need finer size classes for 256B-8KB range.
ENV USAGE (for future experimentation):
----------------------------------------
export HAKMEM_TINY_MAX_CLASS=7 # Default (C0-C7, up to 1023B)
export HAKMEM_TINY_MAX_CLASS=5 # Reduced (C0-C5, up to 255B) - NOT recommended
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-16 01:26:48 +09:00
|
|
|
return (size >= mid_get_min_size() && size <= MID_MAX_SIZE);
|
2025-11-05 12:31:14 +09:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ============================================================================
|
|
|
|
|
// Configuration (can be overridden via environment variables)
|
|
|
|
|
// ============================================================================
|
|
|
|
|
|
|
|
|
|
// Default chunk size (64KB)
|
|
|
|
|
#ifndef MID_DEFAULT_CHUNK_SIZE
|
|
|
|
|
#define MID_DEFAULT_CHUNK_SIZE (64 * 1024)
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
// Initial registry capacity
|
|
|
|
|
#ifndef MID_REGISTRY_INITIAL_CAPACITY
|
|
|
|
|
#define MID_REGISTRY_INITIAL_CAPACITY 64
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
// Enable/disable statistics collection
|
|
|
|
|
#ifndef MID_ENABLE_STATS
|
|
|
|
|
#define MID_ENABLE_STATS 0 // DISABLED for performance
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
// Enable/disable debug logging
|
|
|
|
|
#ifndef MID_DEBUG
|
|
|
|
|
#define MID_DEBUG 0 // DISABLE for performance testing
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#if MID_DEBUG
|
|
|
|
|
#include <stdio.h>
|
|
|
|
|
#define MID_LOG(fmt, ...) fprintf(stderr, "[MID_MT] " fmt "\n", ##__VA_ARGS__)
|
|
|
|
|
#else
|
|
|
|
|
#define MID_LOG(fmt, ...) ((void)0)
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
// ============================================================================
|
|
|
|
|
// Statistics (Debug/Profiling)
|
|
|
|
|
// ============================================================================
|
|
|
|
|
|
|
|
|
|
#if MID_ENABLE_STATS
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* MidStats - Global statistics for profiling
|
|
|
|
|
*/
|
|
|
|
|
typedef struct MidStats {
|
|
|
|
|
uint64_t total_allocs; // Total allocations
|
|
|
|
|
uint64_t total_frees; // Total frees
|
|
|
|
|
uint64_t total_refills; // Total chunk refills
|
|
|
|
|
uint64_t local_frees; // Local frees (same thread)
|
|
|
|
|
uint64_t remote_frees; // Remote frees (cross-thread)
|
|
|
|
|
uint64_t registry_lookups; // Registry lookups
|
|
|
|
|
} MidStats;
|
|
|
|
|
|
|
|
|
|
extern MidStats g_mid_stats;
|
|
|
|
|
|
|
|
|
|
void mid_mt_print_stats(void);
|
|
|
|
|
|
|
|
|
|
#endif // MID_ENABLE_STATS
|
|
|
|
|
|
|
|
|
|
#ifdef __cplusplus
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#endif // HAKMEM_MID_MT_H
|