// bench_random_mixed.c — Random mixed small allocations (16–1024B) // Usage (direct-link builds via Makefile): // ./bench_random_mixed_hakmem [cycles] [ws] [seed] // ./bench_random_mixed_system [cycles] [ws] [seed] // // Default: 10M cycles for steady-state measurement (use 100K for quick smoke test) // Recommended: Run 10 times and calculate mean/median/stddev for accurate results // // Prints: "Throughput = operations per second, relative time: ." #include #include #include #include #include #ifdef USE_HAKMEM #include "hakmem.h" // Box BenchMeta: Benchmark metadata management (bypass hakmem wrapper) // Phase 15: Separate BenchMeta (slots array) from CoreAlloc (user workload) extern void* __libc_calloc(size_t, size_t); extern void __libc_free(void*); #define BENCH_META_CALLOC __libc_calloc #define BENCH_META_FREE __libc_free // Phase 20-2: BenchFast mode - prealloc pool init #include "core/box/bench_fast_box.h" #else // System malloc build: use standard libc #define BENCH_META_CALLOC calloc #define BENCH_META_FREE free #endif static inline uint64_t now_ns(void) { struct timespec ts; clock_gettime(CLOCK_MONOTONIC, &ts); return (uint64_t)ts.tv_sec*1000000000ull + (uint64_t)ts.tv_nsec; } static inline uint32_t xorshift32(uint32_t* s){ uint32_t x=*s; x^=x<<13; x^=x>>17; x^=x<<5; *s=x; return x; } int main(int argc, char** argv){ int cycles = (argc>1)? atoi(argv[1]) : 10000000; // total ops (10M for steady-state measurement) int ws = (argc>2)? atoi(argv[2]) : 8192; // working-set slots uint32_t seed = (argc>3)? (uint32_t)strtoul(argv[3],NULL,10) : 1234567u; if (cycles <= 0) cycles = 1; if (ws <= 0) ws = 1024; #ifdef USE_HAKMEM // Phase 20-2: BenchFast prealloc pool initialization // Must be called BEFORE main benchmark loop to avoid recursion int prealloc_count = bench_fast_init(); if (prealloc_count > 0) { fprintf(stderr, "[BENCH] BenchFast mode: %d blocks preallocated\n", prealloc_count); } #else // System malloc also needs warmup for fair comparison (void)malloc(1); // Force libc initialization #endif // Box BenchMeta: Use __libc_calloc to bypass hakmem wrapper void** slots = (void**)BENCH_META_CALLOC((size_t)ws, sizeof(void*)); if (!slots) { fprintf(stderr, "alloc failed (slots)\n"); return 1; } // Warmup run (exclude from timing) - HAKMEM_BENCH_WARMUP=N const char* warmup_env = getenv("HAKMEM_BENCH_WARMUP"); int warmup_cycles = warmup_env ? atoi(warmup_env) : 0; if (warmup_cycles > 0) { fprintf(stderr, "[BENCH_WARMUP] Running %d warmup cycles (not timed)...\n", warmup_cycles); uint32_t warmup_seed = seed; for (int i=0; i= 66000 || (i > 28000 && i % 1000 == 0))) { // DISABLED for perf fprintf(stderr, "[TEST] Iteration %d (allocs=%d frees=%d)\n", i, allocs, frees); } uint32_t r = xorshift32(&seed); int idx = (int)(r % (uint32_t)ws); if (slots[idx]){ if (0 && i > 28300) { // DISABLED (Phase 2 perf) fprintf(stderr, "[FREE] i=%d ptr=%p idx=%d\n", i, slots[idx], idx); fflush(stderr); } free(slots[idx]); if (0 && i > 28300) { // DISABLED (Phase 2 perf) fprintf(stderr, "[FREE_DONE] i=%d\n", i); fflush(stderr); } slots[idx] = NULL; frees++; } else { // 16..1024 bytes (power-of-two-ish skew) size_t sz = 16u + (r & 0x3FFu); // 16..1040 (approx 16..1024) if (0 && i > 28300) { // DISABLED (Phase 2 perf) fprintf(stderr, "[MALLOC] i=%d sz=%zu idx=%d\n", i, sz, idx); fflush(stderr); } void* p = malloc(sz); if (0 && i > 28300) { // DISABLED (Phase 2 perf) fprintf(stderr, "[MALLOC_DONE] i=%d p=%p\n", i, p); fflush(stderr); } if (!p) continue; // touch first byte to avoid optimizer artifacts ((unsigned char*)p)[0] = (unsigned char)r; slots[idx] = p; allocs++; } } // drain fprintf(stderr, "[TEST] Main loop completed. Starting drain phase...\n"); for (int i=0;i0.0?sec:1e-9); // Include params in output to avoid confusion about test conditions printf("Throughput = %9.0f ops/s [iter=%d ws=%d] time=%.3fs\n", tput, cycles, ws, sec); (void)allocs; (void)frees; // Box BenchMeta: Use __libc_free to bypass hakmem wrapper BENCH_META_FREE(slots); #ifdef USE_HAKMEM // Phase 20-2: Print BenchFast stats (verify pool wasn't exhausted) bench_fast_stats(); // Phase 21-1: Ring cache - DELETED (A/B test: OFF is faster) // extern void ring_cache_print_stats(void); // ring_cache_print_stats(); // Phase 27: UltraHeap front statistics (experimental, UltraHeap ビルドのみ) // ENV: HAKMEM_TINY_ULTRA_HEAP_DUMP=1 で出力有効化 #if HAKMEM_TINY_ULTRA_HEAP { const char* dump = getenv("HAKMEM_TINY_ULTRA_HEAP_DUMP"); if (dump && *dump && *dump != '0') { extern void tiny_ultra_heap_stats_snapshot(uint64_t hit[8], uint64_t refill[8], uint64_t fallback[8], int reset); uint64_t hit[8] = {0}, refill[8] = {0}, fallback[8] = {0}; tiny_ultra_heap_stats_snapshot(hit, refill, fallback, 0); fprintf(stderr, "[ULTRA_HEAP_STATS] class hit refill fallback\n"); for (int c = 0; c < 8; c++) { if (hit[c] || refill[c] || fallback[c]) { fprintf(stderr, " C%d: %llu %llu %llu\n", c, (unsigned long long)hit[c], (unsigned long long)refill[c], (unsigned long long)fallback[c]); } } } } #endif #endif return 0; }