Files
hakmem/bench_mid_mt_gap.c

138 lines
4.1 KiB
C
Raw Normal View History

// bench_mid_mt_gap.c - Targeted benchmark for Mid MT allocation gap fix
// Tests 1KB-8KB allocations that were falling through to mmap() before fix
//
// Usage:
// ./bench_mid_mt_gap_hakmem [cycles] [ws] [seed]
//
// Size distribution:
// - 1KB (1024B)
// - 2KB (2048B)
// - 4KB (4096B)
// - 8KB (8192B)
//
// Expected improvement: 100-1000x faster (mmap → Mid MT)
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <time.h>
#include <string.h>
#ifdef USE_HAKMEM
#include "hakmem.h"
// Box BenchMeta: Benchmark metadata management (bypass hakmem wrapper)
extern void* __libc_calloc(size_t, size_t);
extern void __libc_free(void*);
#define BENCH_META_CALLOC __libc_calloc
#define BENCH_META_FREE __libc_free
// Phase 20-2: BenchFast mode - prealloc pool init
#include "core/box/bench_fast_box.h"
#else
// System malloc build: use standard libc
#define BENCH_META_CALLOC calloc
#define BENCH_META_FREE free
#endif
static inline uint64_t now_ns(void) {
struct timespec ts; clock_gettime(CLOCK_MONOTONIC, &ts);
return (uint64_t)ts.tv_sec*1000000000ull + (uint64_t)ts.tv_nsec;
}
static inline uint32_t xorshift32(uint32_t* s){
uint32_t x=*s; x^=x<<13; x^=x>>17; x^=x<<5; *s=x; return x;
}
int main(int argc, char** argv){
int cycles = (argc>1)? atoi(argv[1]) : 1000000; // 1M cycles (faster than 10M)
int ws = (argc>2)? atoi(argv[2]) : 256; // working-set slots
uint32_t seed = (argc>3)? (uint32_t)strtoul(argv[3],NULL,10) : 1234567u;
if (cycles <= 0) cycles = 1;
if (ws <= 0) ws = 256;
#ifdef USE_HAKMEM
// Phase 20-2: BenchFast prealloc pool initialization
int prealloc_count = bench_fast_init();
if (prealloc_count > 0) {
fprintf(stderr, "[BENCH] BenchFast mode: %d blocks preallocated\n", prealloc_count);
}
#else
// System malloc also needs warmup for fair comparison
(void)malloc(1); // Force libc initialization
#endif
// Box BenchMeta: Use __libc_calloc to bypass hakmem wrapper
void** slots = (void**)BENCH_META_CALLOC((size_t)ws, sizeof(void*));
if (!slots) { fprintf(stderr, "alloc failed (slots)\n"); return 1; }
// Size distribution: 1KB, 2KB, 4KB, 8KB (evenly distributed)
const size_t sizes[4] = {1024, 2048, 4096, 8192};
// Warmup run (exclude from timing)
const char* warmup_env = getenv("HAKMEM_BENCH_WARMUP");
int warmup_cycles = warmup_env ? atoi(warmup_env) : 0;
if (warmup_cycles > 0) {
fprintf(stderr, "[BENCH_WARMUP] Running %d warmup cycles (not timed)...\n", warmup_cycles);
uint32_t warmup_seed = seed;
for (int i=0; i<warmup_cycles; i++){
uint32_t r = xorshift32(&warmup_seed);
int idx = (int)(r % (uint32_t)ws);
if (slots[idx]){
free(slots[idx]);
slots[idx] = NULL;
} else {
// Pick size from 1KB, 2KB, 4KB, 8KB
size_t sz = sizes[r % 4];
void* p = malloc(sz);
if (p) {
((unsigned char*)p)[0] = (unsigned char)r;
slots[idx] = p;
}
}
}
// Drain warmup allocations
for (int i=0;i<ws;i++){ if (slots[i]) { free(slots[i]); slots[i]=NULL; } }
fprintf(stderr, "[BENCH_WARMUP] Warmup completed. Starting timed run...\n");
}
uint64_t start = now_ns();
int frees = 0, allocs = 0;
for (int i=0; i<cycles; i++){
uint32_t r = xorshift32(&seed);
int idx = (int)(r % (uint32_t)ws);
if (slots[idx]){
free(slots[idx]);
slots[idx] = NULL;
frees++;
} else {
// Pick size from 1KB, 2KB, 4KB, 8KB (25% each)
size_t sz = sizes[r % 4];
void* p = malloc(sz);
if (p) {
((unsigned char*)p)[0] = (unsigned char)r;
slots[idx] = p;
allocs++;
}
}
}
uint64_t end = now_ns();
// Drain remaining allocations
for (int i=0; i<ws; i++){
if (slots[i]) { free(slots[i]); slots[i]=NULL; }
}
double ns = (double)(end - start);
double ops_per_s = (double)cycles / (ns / 1e9);
printf("Throughput = %.2f M operations per second, relative time: %.6f s.\n",
ops_per_s / 1e6, ns / 1e9);
fprintf(stderr, "[BENCH] Cycles=%d, Allocs=%d, Frees=%d, WS=%d\n",
cycles, allocs, frees, ws);
BENCH_META_FREE(slots);
return 0;
}