138 lines
4.1 KiB
C
138 lines
4.1 KiB
C
|
|
// bench_mid_mt_gap.c - Targeted benchmark for Mid MT allocation gap fix
|
||
|
|
// Tests 1KB-8KB allocations that were falling through to mmap() before fix
|
||
|
|
//
|
||
|
|
// Usage:
|
||
|
|
// ./bench_mid_mt_gap_hakmem [cycles] [ws] [seed]
|
||
|
|
//
|
||
|
|
// Size distribution:
|
||
|
|
// - 1KB (1024B)
|
||
|
|
// - 2KB (2048B)
|
||
|
|
// - 4KB (4096B)
|
||
|
|
// - 8KB (8192B)
|
||
|
|
//
|
||
|
|
// Expected improvement: 100-1000x faster (mmap → Mid MT)
|
||
|
|
|
||
|
|
#include <stdio.h>
|
||
|
|
#include <stdlib.h>
|
||
|
|
#include <stdint.h>
|
||
|
|
#include <time.h>
|
||
|
|
#include <string.h>
|
||
|
|
|
||
|
|
#ifdef USE_HAKMEM
|
||
|
|
#include "hakmem.h"
|
||
|
|
|
||
|
|
// Box BenchMeta: Benchmark metadata management (bypass hakmem wrapper)
|
||
|
|
extern void* __libc_calloc(size_t, size_t);
|
||
|
|
extern void __libc_free(void*);
|
||
|
|
#define BENCH_META_CALLOC __libc_calloc
|
||
|
|
#define BENCH_META_FREE __libc_free
|
||
|
|
|
||
|
|
// Phase 20-2: BenchFast mode - prealloc pool init
|
||
|
|
#include "core/box/bench_fast_box.h"
|
||
|
|
#else
|
||
|
|
// System malloc build: use standard libc
|
||
|
|
#define BENCH_META_CALLOC calloc
|
||
|
|
#define BENCH_META_FREE free
|
||
|
|
#endif
|
||
|
|
|
||
|
|
static inline uint64_t now_ns(void) {
|
||
|
|
struct timespec ts; clock_gettime(CLOCK_MONOTONIC, &ts);
|
||
|
|
return (uint64_t)ts.tv_sec*1000000000ull + (uint64_t)ts.tv_nsec;
|
||
|
|
}
|
||
|
|
|
||
|
|
static inline uint32_t xorshift32(uint32_t* s){
|
||
|
|
uint32_t x=*s; x^=x<<13; x^=x>>17; x^=x<<5; *s=x; return x;
|
||
|
|
}
|
||
|
|
|
||
|
|
int main(int argc, char** argv){
|
||
|
|
int cycles = (argc>1)? atoi(argv[1]) : 1000000; // 1M cycles (faster than 10M)
|
||
|
|
int ws = (argc>2)? atoi(argv[2]) : 256; // working-set slots
|
||
|
|
uint32_t seed = (argc>3)? (uint32_t)strtoul(argv[3],NULL,10) : 1234567u;
|
||
|
|
|
||
|
|
if (cycles <= 0) cycles = 1;
|
||
|
|
if (ws <= 0) ws = 256;
|
||
|
|
|
||
|
|
#ifdef USE_HAKMEM
|
||
|
|
// Phase 20-2: BenchFast prealloc pool initialization
|
||
|
|
int prealloc_count = bench_fast_init();
|
||
|
|
if (prealloc_count > 0) {
|
||
|
|
fprintf(stderr, "[BENCH] BenchFast mode: %d blocks preallocated\n", prealloc_count);
|
||
|
|
}
|
||
|
|
#else
|
||
|
|
// System malloc also needs warmup for fair comparison
|
||
|
|
(void)malloc(1); // Force libc initialization
|
||
|
|
#endif
|
||
|
|
|
||
|
|
// Box BenchMeta: Use __libc_calloc to bypass hakmem wrapper
|
||
|
|
void** slots = (void**)BENCH_META_CALLOC((size_t)ws, sizeof(void*));
|
||
|
|
if (!slots) { fprintf(stderr, "alloc failed (slots)\n"); return 1; }
|
||
|
|
|
||
|
|
// Size distribution: 1KB, 2KB, 4KB, 8KB (evenly distributed)
|
||
|
|
const size_t sizes[4] = {1024, 2048, 4096, 8192};
|
||
|
|
|
||
|
|
// Warmup run (exclude from timing)
|
||
|
|
const char* warmup_env = getenv("HAKMEM_BENCH_WARMUP");
|
||
|
|
int warmup_cycles = warmup_env ? atoi(warmup_env) : 0;
|
||
|
|
if (warmup_cycles > 0) {
|
||
|
|
fprintf(stderr, "[BENCH_WARMUP] Running %d warmup cycles (not timed)...\n", warmup_cycles);
|
||
|
|
uint32_t warmup_seed = seed;
|
||
|
|
for (int i=0; i<warmup_cycles; i++){
|
||
|
|
uint32_t r = xorshift32(&warmup_seed);
|
||
|
|
int idx = (int)(r % (uint32_t)ws);
|
||
|
|
if (slots[idx]){
|
||
|
|
free(slots[idx]);
|
||
|
|
slots[idx] = NULL;
|
||
|
|
} else {
|
||
|
|
// Pick size from 1KB, 2KB, 4KB, 8KB
|
||
|
|
size_t sz = sizes[r % 4];
|
||
|
|
void* p = malloc(sz);
|
||
|
|
if (p) {
|
||
|
|
((unsigned char*)p)[0] = (unsigned char)r;
|
||
|
|
slots[idx] = p;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
// Drain warmup allocations
|
||
|
|
for (int i=0;i<ws;i++){ if (slots[i]) { free(slots[i]); slots[i]=NULL; } }
|
||
|
|
fprintf(stderr, "[BENCH_WARMUP] Warmup completed. Starting timed run...\n");
|
||
|
|
}
|
||
|
|
|
||
|
|
uint64_t start = now_ns();
|
||
|
|
int frees = 0, allocs = 0;
|
||
|
|
for (int i=0; i<cycles; i++){
|
||
|
|
uint32_t r = xorshift32(&seed);
|
||
|
|
int idx = (int)(r % (uint32_t)ws);
|
||
|
|
if (slots[idx]){
|
||
|
|
free(slots[idx]);
|
||
|
|
slots[idx] = NULL;
|
||
|
|
frees++;
|
||
|
|
} else {
|
||
|
|
// Pick size from 1KB, 2KB, 4KB, 8KB (25% each)
|
||
|
|
size_t sz = sizes[r % 4];
|
||
|
|
void* p = malloc(sz);
|
||
|
|
if (p) {
|
||
|
|
((unsigned char*)p)[0] = (unsigned char)r;
|
||
|
|
slots[idx] = p;
|
||
|
|
allocs++;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
uint64_t end = now_ns();
|
||
|
|
|
||
|
|
// Drain remaining allocations
|
||
|
|
for (int i=0; i<ws; i++){
|
||
|
|
if (slots[i]) { free(slots[i]); slots[i]=NULL; }
|
||
|
|
}
|
||
|
|
|
||
|
|
double ns = (double)(end - start);
|
||
|
|
double ops_per_s = (double)cycles / (ns / 1e9);
|
||
|
|
|
||
|
|
printf("Throughput = %.2f M operations per second, relative time: %.6f s.\n",
|
||
|
|
ops_per_s / 1e6, ns / 1e9);
|
||
|
|
fprintf(stderr, "[BENCH] Cycles=%d, Allocs=%d, Frees=%d, WS=%d\n",
|
||
|
|
cycles, allocs, frees, ws);
|
||
|
|
|
||
|
|
BENCH_META_FREE(slots);
|
||
|
|
return 0;
|
||
|
|
}
|