diff --git a/Makefile b/Makefile index 3a605913..f911ce0d 100644 --- a/Makefile +++ b/Makefile @@ -616,6 +616,19 @@ bench_random_mixed_hakmem: bench_random_mixed_hakmem.o $(TINY_BENCH_OBJS) bench_random_mixed_system: bench_random_mixed_system.o $(CC) -o $@ $^ $(LDFLAGS) +# Mid MT gap benchmark (1KB-8KB allocations) - Phase 5-Step2 verification +bench_mid_mt_gap_hakmem.o: bench_mid_mt_gap.c hakmem.h + $(CC) $(CFLAGS) -DUSE_HAKMEM -c -o $@ $< + +bench_mid_mt_gap_system.o: bench_mid_mt_gap.c + $(CC) $(CFLAGS) -c -o $@ $< + +bench_mid_mt_gap_hakmem: bench_mid_mt_gap_hakmem.o $(TINY_BENCH_OBJS) + $(CC) -o $@ $^ $(LDFLAGS) + +bench_mid_mt_gap_system: bench_mid_mt_gap_system.o + $(CC) -o $@ $^ $(LDFLAGS) + # Fixed-size microbench (direct link variants) bench_fixed_size_hakmem.o: benchmarks/src/fixed/bench_fixed_size.c hakmem.h $(CC) $(CFLAGS) -DUSE_HAKMEM -c -o $@ $< diff --git a/bench_mid_mt_gap.c b/bench_mid_mt_gap.c new file mode 100644 index 00000000..4695e1be --- /dev/null +++ b/bench_mid_mt_gap.c @@ -0,0 +1,137 @@ +// bench_mid_mt_gap.c - Targeted benchmark for Mid MT allocation gap fix +// Tests 1KB-8KB allocations that were falling through to mmap() before fix +// +// Usage: +// ./bench_mid_mt_gap_hakmem [cycles] [ws] [seed] +// +// Size distribution: +// - 1KB (1024B) +// - 2KB (2048B) +// - 4KB (4096B) +// - 8KB (8192B) +// +// Expected improvement: 100-1000x faster (mmap → Mid MT) + +#include +#include +#include +#include +#include + +#ifdef USE_HAKMEM +#include "hakmem.h" + +// Box BenchMeta: Benchmark metadata management (bypass hakmem wrapper) +extern void* __libc_calloc(size_t, size_t); +extern void __libc_free(void*); +#define BENCH_META_CALLOC __libc_calloc +#define BENCH_META_FREE __libc_free + +// Phase 20-2: BenchFast mode - prealloc pool init +#include "core/box/bench_fast_box.h" +#else +// System malloc build: use standard libc +#define BENCH_META_CALLOC calloc +#define BENCH_META_FREE free +#endif + +static inline uint64_t now_ns(void) { + struct timespec ts; clock_gettime(CLOCK_MONOTONIC, &ts); + return (uint64_t)ts.tv_sec*1000000000ull + (uint64_t)ts.tv_nsec; +} + +static inline uint32_t xorshift32(uint32_t* s){ + uint32_t x=*s; x^=x<<13; x^=x>>17; x^=x<<5; *s=x; return x; +} + +int main(int argc, char** argv){ + int cycles = (argc>1)? atoi(argv[1]) : 1000000; // 1M cycles (faster than 10M) + int ws = (argc>2)? atoi(argv[2]) : 256; // working-set slots + uint32_t seed = (argc>3)? (uint32_t)strtoul(argv[3],NULL,10) : 1234567u; + + if (cycles <= 0) cycles = 1; + if (ws <= 0) ws = 256; + +#ifdef USE_HAKMEM + // Phase 20-2: BenchFast prealloc pool initialization + int prealloc_count = bench_fast_init(); + if (prealloc_count > 0) { + fprintf(stderr, "[BENCH] BenchFast mode: %d blocks preallocated\n", prealloc_count); + } +#else + // System malloc also needs warmup for fair comparison + (void)malloc(1); // Force libc initialization +#endif + + // Box BenchMeta: Use __libc_calloc to bypass hakmem wrapper + void** slots = (void**)BENCH_META_CALLOC((size_t)ws, sizeof(void*)); + if (!slots) { fprintf(stderr, "alloc failed (slots)\n"); return 1; } + + // Size distribution: 1KB, 2KB, 4KB, 8KB (evenly distributed) + const size_t sizes[4] = {1024, 2048, 4096, 8192}; + + // Warmup run (exclude from timing) + const char* warmup_env = getenv("HAKMEM_BENCH_WARMUP"); + int warmup_cycles = warmup_env ? atoi(warmup_env) : 0; + if (warmup_cycles > 0) { + fprintf(stderr, "[BENCH_WARMUP] Running %d warmup cycles (not timed)...\n", warmup_cycles); + uint32_t warmup_seed = seed; + for (int i=0; i + +#ifdef __cplusplus +extern "C" { +#endif + +// ============================================================================ +// Box Contract: Mid MT Free Routing +// ============================================================================ + +/** + * mid_free_route_try - Try Mid MT free path first + * + * @param ptr Pointer to free + * @return true if handled by Mid MT, false to fall through + * + * Box Responsibilities: + * 1. Query Mid MT registry (mid_registry_lookup) + * 2. If found: Call mid_mt_free() and return true + * 3. If not found: Return false (let existing path handle it) + * + * Box Guarantees: + * - Zero side effects if returning false + * - Correct free if returning true + * - Thread-safe (Mid MT registry has mutex protection) + * + * Performance: + * - Mid MT hit: O(log N) registry lookup + O(1) free = ~50 cycles + * - Mid MT miss: O(log N) registry lookup only = ~50 cycles + * - Compare to current broken path: 4 lookups + libc = ~750 cycles + * + * Usage Example: + * void free(void* ptr) { + * if (mid_free_route_try(ptr)) return; // Mid MT handled + * // Fall through to existing free path... + * } + */ +__attribute__((always_inline)) +static inline bool mid_free_route_try(void* ptr) { + if (!ptr) return false; // NULL ptr, not Mid MT + + // Query Mid MT registry (binary search + mutex) + size_t block_size = 0; + int class_idx = 0; + + if (mid_registry_lookup(ptr, &block_size, &class_idx)) { + // Found in Mid MT registry, route to mid_mt_free() + mid_mt_free(ptr, block_size); + return true; // Handled + } + + // Not in Mid MT registry, fall through to existing path + return false; +} + +// ============================================================================ +// Box Observability (Debug/Profiling) +// ============================================================================ + +#if MID_DEBUG +/** + * mid_free_route_stats - Print Mid Free Route Box statistics + * + * Only available in debug builds (MID_DEBUG=1) + * Tracks hit/miss rates for performance analysis + */ +void mid_free_route_stats(void); +#endif + +#ifdef __cplusplus +} +#endif + +#endif // MID_FREE_ROUTE_BOX_H diff --git a/core/hakmem_mid_mt.h b/core/hakmem_mid_mt.h index 9c0ef59c..141af25c 100644 --- a/core/hakmem_mid_mt.h +++ b/core/hakmem_mid_mt.h @@ -41,12 +41,16 @@ extern "C" { // - HAKMEM_TINY_MAX_CLASS=5 → Tiny up to 255B → Mid starts at 256B #include "hakmem_tiny.h" // For tiny_get_max_size() -static inline size_t mid_get_min_size(void) { - return tiny_get_max_size() + 1; // Mid starts where Tiny ends -} - #define MID_MIN_SIZE_STATIC (1024) // Static fallback (C7 default) #define MID_MAX_SIZE (32 * 1024) // 32KB + +static inline size_t mid_get_min_size(void) { + // Phase 5-Step2 FIX: Use static 1024 instead of tiny_get_max_size() + 1 + // Bug: tiny_get_max_size() returns 2047 (C7 usable), making min = 2048 + // This caused 1KB-2KB allocations to fall through to mmap() (100-1000x slower!) + // Fix: Use MID_MIN_SIZE_STATIC (1024) to align with actual Tiny/Mid boundary + return MID_MIN_SIZE_STATIC; // 1024 = TINY_MAX_SIZE +} #define MID_CHUNK_SIZE (4 * 1024 * 1024) // 4MB chunks (same as mimalloc segments) // ============================================================================