Major Features: - Debug counter infrastructure for Refill Stage tracking - Free Pipeline counters (ss_local, ss_remote, tls_sll) - Diagnostic counters for early return analysis - Unified larson.sh benchmark runner with profiles - Phase 6-3 regression analysis documentation Bug Fixes: - Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB) - Fix profile variable naming consistency - Add .gitignore patterns for large files Performance: - Phase 6-3: 4.79 M ops/s (has OOM risk) - With SuperSlab: 3.13 M ops/s (+19% improvement) This is a clean repository without large log files. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
363 lines
12 KiB
C
363 lines
12 KiB
C
// Redis-style workload benchmark for HAKMEM vs mimalloc comparison
|
|
// Tests small string allocations (16B-1KB) typical in Redis workloads
|
|
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <time.h>
|
|
#include <pthread.h>
|
|
#include <unistd.h>
|
|
#include <getopt.h>
|
|
|
|
#define DEFAULT_ITERATIONS 1000000
|
|
#define DEFAULT_THREADS 4
|
|
#define DEFAULT_CYCLES 100
|
|
#define DEFAULT_OPS_PER_CYCLE 1000
|
|
#define MAX_SIZE 1024
|
|
#define MIN_SIZE 16
|
|
|
|
typedef struct {
|
|
size_t size;
|
|
char data[MAX_SIZE];
|
|
} RedisString;
|
|
|
|
static inline double now_ns(void) {
|
|
struct timespec ts;
|
|
clock_gettime(CLOCK_MONOTONIC, &ts);
|
|
return (ts.tv_sec * 1e9 + ts.tv_nsec);
|
|
}
|
|
|
|
// Redis-style operations
|
|
typedef enum {
|
|
REDIS_SET = 0, // SET key value (alloc + free)
|
|
REDIS_GET = 1, // GET key (read-only, minimal alloc)
|
|
REDIS_LPUSH = 2, // LPUSH key value (alloc)
|
|
REDIS_LPOP = 3, // LPOP key (free)
|
|
REDIS_SADD = 4, // SADD key member (alloc)
|
|
REDIS_SREM = 5, // SREM key member (free)
|
|
REDIS_RANDOM = 6 // Random mixed operations
|
|
} RedisOp;
|
|
|
|
const char* op_names[] = {"SET", "GET", "LPUSH", "LPOP", "SADD", "SREM", "RANDOM"};
|
|
|
|
// Thread data structure
|
|
typedef struct {
|
|
RedisString** strings;
|
|
int capacity;
|
|
int count;
|
|
} StringPool;
|
|
|
|
typedef struct {
|
|
int thread_id;
|
|
RedisOp operation;
|
|
int iterations;
|
|
int cycles;
|
|
int ops_per_cycle;
|
|
size_t min_size;
|
|
size_t max_size;
|
|
double result_time;
|
|
size_t total_allocated;
|
|
} ThreadData;
|
|
|
|
// Thread-local string pool
|
|
__thread StringPool pool;
|
|
|
|
void pool_init(int capacity) {
|
|
pool.capacity = capacity;
|
|
pool.count = 0;
|
|
pool.strings = calloc(capacity, sizeof(RedisString*));
|
|
}
|
|
|
|
void pool_cleanup() {
|
|
for (int i = 0; i < pool.count; i++) {
|
|
if (pool.strings[i]) {
|
|
free(pool.strings[i]);
|
|
}
|
|
}
|
|
free(pool.strings);
|
|
pool.count = 0;
|
|
pool.capacity = 0;
|
|
}
|
|
|
|
RedisString* pool_alloc(size_t size) {
|
|
if (pool.count < pool.capacity) {
|
|
RedisString* str = malloc(sizeof(RedisString));
|
|
if (str) {
|
|
str->size = size;
|
|
snprintf(str->data, size > 16 ? 16 : size, "key%d", pool.count);
|
|
pool.strings[pool.count++] = str;
|
|
return str;
|
|
}
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
void pool_free(RedisString* str) {
|
|
if (!str) return;
|
|
|
|
// Find and remove from pool
|
|
for (int i = 0; i < pool.count; i++) {
|
|
if (pool.strings[i] == str) {
|
|
pool.strings[i] = pool.strings[--pool.count];
|
|
free(str);
|
|
return;
|
|
}
|
|
}
|
|
// Not found in pool, free directly
|
|
free(str);
|
|
}
|
|
|
|
// Redis-style workload simulation
|
|
void* redis_worker(void* arg) {
|
|
ThreadData* data = (ThreadData*)arg;
|
|
double total_time = 0.0;
|
|
|
|
pool_init(data->ops_per_cycle * 2);
|
|
|
|
for (int cycle = 0; cycle < data->cycles; cycle++) {
|
|
double start = now_ns();
|
|
|
|
switch (data->operation) {
|
|
case REDIS_SET: {
|
|
// SET key value: alloc + free pattern
|
|
for (int i = 0; i < data->ops_per_cycle; i++) {
|
|
size_t size = data->min_size + (rand() % (data->max_size - data->min_size));
|
|
RedisString* str = pool_alloc(size);
|
|
if (str) {
|
|
// Simulate SET operation
|
|
data->total_allocated += size;
|
|
pool_free(str);
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
case REDIS_GET: {
|
|
// GET key: read-heavy, minimal alloc
|
|
for (int i = 0; i < data->ops_per_cycle; i++) {
|
|
if (pool.count > 0) {
|
|
RedisString* str = pool.strings[rand() % pool.count];
|
|
if (str) {
|
|
// Simulate GET operation (read data)
|
|
volatile size_t len = strlen(str->data);
|
|
(void)len; // Prevent optimization
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
case REDIS_LPUSH: {
|
|
// LPUSH: alloc-heavy
|
|
for (int i = 0; i < data->ops_per_cycle; i++) {
|
|
size_t size = data->min_size + (rand() % (data->max_size - data->min_size));
|
|
RedisString* str = pool_alloc(size);
|
|
if (str) {
|
|
data->total_allocated += size;
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
case REDIS_LPOP: {
|
|
// LPOP: free-heavy
|
|
for (int i = 0; i < data->ops_per_cycle && pool.count > 0; i++) {
|
|
pool_free(pool.strings[0]);
|
|
}
|
|
break;
|
|
}
|
|
case REDIS_SADD: {
|
|
// SADD: similar to SET but for sets
|
|
for (int i = 0; i < data->ops_per_cycle; i++) {
|
|
size_t size = data->min_size + (rand() % (data->max_size - data->min_size));
|
|
RedisString* str = pool_alloc(size);
|
|
if (str) {
|
|
snprintf(str->data, 16, "member%d", i);
|
|
data->total_allocated += size;
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
case REDIS_SREM: {
|
|
// SREM: remove from set
|
|
for (int i = 0; i < data->ops_per_cycle && pool.count > 0; i++) {
|
|
pool_free(pool.strings[rand() % pool.count]);
|
|
}
|
|
break;
|
|
}
|
|
case REDIS_RANDOM: {
|
|
// Random mix of operations (70% GET, 20% SET, 5% LPUSH, 5% LPOP)
|
|
for (int i = 0; i < data->ops_per_cycle; i++) {
|
|
int r = rand() % 100;
|
|
if (r < 70) { // GET
|
|
if (pool.count > 0) {
|
|
RedisString* str = pool.strings[rand() % pool.count];
|
|
if (str) {
|
|
volatile size_t len = strlen(str->data);
|
|
(void)len;
|
|
}
|
|
}
|
|
} else if (r < 90) { // SET
|
|
size_t size = data->min_size + (rand() % (data->max_size - data->min_size));
|
|
RedisString* str = pool_alloc(size);
|
|
if (str) {
|
|
data->total_allocated += size;
|
|
pool_free(str);
|
|
}
|
|
} else if (r < 95) { // LPUSH
|
|
size_t size = data->min_size + (rand() % (data->max_size - data->min_size));
|
|
RedisString* str = pool_alloc(size);
|
|
if (str) {
|
|
data->total_allocated += size;
|
|
}
|
|
} else { // LPOP
|
|
if (pool.count > 0) {
|
|
pool_free(pool.strings[0]);
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
double end = now_ns();
|
|
total_time += (end - start);
|
|
}
|
|
|
|
data->result_time = total_time / data->cycles; // Average time per cycle
|
|
pool_cleanup();
|
|
|
|
return NULL;
|
|
}
|
|
|
|
void run_benchmark(const char* allocator_name, RedisOp op, int threads, int cycles, int ops_per_cycle, size_t min_size, size_t max_size) {
|
|
printf("\n=== %s - %s ===\n", allocator_name, op_names[op]);
|
|
printf("Threads: %d, Cycles: %d, Ops/cycle: %d\n", threads, cycles, ops_per_cycle);
|
|
printf("Size range: %zu-%zu bytes\n", min_size, max_size);
|
|
printf("=====================================\n");
|
|
|
|
pthread_t* thread_ids = malloc(sizeof(pthread_t) * threads);
|
|
ThreadData* thread_data = malloc(sizeof(ThreadData) * threads);
|
|
|
|
double total_time = 0.0;
|
|
size_t total_allocated = 0;
|
|
|
|
// Create and start threads
|
|
for (int i = 0; i < threads; i++) {
|
|
thread_data[i].thread_id = i;
|
|
thread_data[i].operation = op;
|
|
thread_data[i].iterations = ops_per_cycle * cycles;
|
|
thread_data[i].cycles = cycles;
|
|
thread_data[i].ops_per_cycle = ops_per_cycle;
|
|
thread_data[i].min_size = min_size;
|
|
thread_data[i].max_size = max_size;
|
|
thread_data[i].result_time = 0.0;
|
|
thread_data[i].total_allocated = 0;
|
|
|
|
pthread_create(&thread_ids[i], NULL, redis_worker, &thread_data[i]);
|
|
}
|
|
|
|
// Wait for completion and collect results
|
|
for (int i = 0; i < threads; i++) {
|
|
pthread_join(thread_ids[i], NULL);
|
|
total_time += thread_data[i].result_time;
|
|
total_allocated += thread_data[i].total_allocated;
|
|
}
|
|
|
|
double avg_time_per_cycle = total_time / threads;
|
|
double ops_per_sec = (threads * ops_per_cycle) / (avg_time_per_cycle / 1e9);
|
|
double mops_per_sec = ops_per_sec / 1e6;
|
|
|
|
printf("Average time per cycle: %.2f ms\n", avg_time_per_cycle / 1e6);
|
|
printf("Throughput: %.2f M ops/sec\n", mops_per_sec);
|
|
printf("Total allocated: %.2f MB\n", total_allocated / (1024.0 * 1024.0));
|
|
printf("=====================================\n");
|
|
|
|
free(thread_ids);
|
|
free(thread_data);
|
|
}
|
|
|
|
void print_usage(const char* prog) {
|
|
printf("Usage: %s [options]\n", prog);
|
|
printf("Options:\n");
|
|
printf(" -t, --threads N Number of threads (default: %d)\n", DEFAULT_THREADS);
|
|
printf(" -c, --cycles N Number of cycles (default: %d)\n", DEFAULT_CYCLES);
|
|
printf(" -o, --ops N Operations per cycle (default: %d)\n", DEFAULT_OPS_PER_CYCLE);
|
|
printf(" -m, --min-size N Minimum allocation size (default: %d)\n", MIN_SIZE);
|
|
printf(" -M, --max-size N Maximum allocation size (default: %d)\n", MAX_SIZE);
|
|
printf(" -a, --allocators Compare all allocators\n");
|
|
printf(" -h, --help Show this help\n");
|
|
printf("\nRedis operations:\n");
|
|
for (int i = 0; i < 7; i++) {
|
|
printf(" %d: %s\n", i, op_names[i]);
|
|
}
|
|
}
|
|
|
|
int main(int argc, char** argv) {
|
|
int threads = DEFAULT_THREADS;
|
|
int cycles = DEFAULT_CYCLES;
|
|
int ops_per_cycle = DEFAULT_OPS_PER_CYCLE;
|
|
size_t min_size = MIN_SIZE;
|
|
size_t max_size = MAX_SIZE;
|
|
int compare_all = 0;
|
|
RedisOp operation = REDIS_RANDOM;
|
|
|
|
static struct option long_options[] = {
|
|
{"threads", required_argument, 0, 't'},
|
|
{"cycles", required_argument, 0, 'c'},
|
|
{"ops", required_argument, 0, 'o'},
|
|
{"min-size", required_argument, 0, 'm'},
|
|
{"max-size", required_argument, 0, 'M'},
|
|
{"allocators", no_argument, 0, 'a'},
|
|
{"help", no_argument, 0, 'h'},
|
|
{"operation", required_argument, 0, 'r'},
|
|
{0, 0, 0, 0}
|
|
};
|
|
|
|
int opt;
|
|
while ((opt = getopt_long(argc, argv, "t:c:o:m:M:ahr:", long_options, NULL)) != -1) {
|
|
switch (opt) {
|
|
case 't': threads = atoi(optarg); break;
|
|
case 'c': cycles = atoi(optarg); break;
|
|
case 'o': ops_per_cycle = atoi(optarg); break;
|
|
case 'm': min_size = (size_t)atoi(optarg); break;
|
|
case 'M': max_size = (size_t)atoi(optarg); break;
|
|
case 'a': compare_all = 1; break;
|
|
case 'r': operation = (RedisOp)atoi(optarg); break;
|
|
case 'h':
|
|
default:
|
|
print_usage(argv[0]);
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
if (min_size > max_size) {
|
|
printf("Error: min_size cannot be greater than max_size\n");
|
|
return 1;
|
|
}
|
|
|
|
if (min_size < 16 || max_size > MAX_SIZE) {
|
|
printf("Error: size range must be between 16 and %d bytes\n", MAX_SIZE);
|
|
return 1;
|
|
}
|
|
|
|
printf("Redis-style Memory Allocator Benchmark\n");
|
|
printf("=====================================\n");
|
|
|
|
if (compare_all) {
|
|
// Compare all allocators with all operations
|
|
const char* allocators[] = {"System", "HAKMEM", "mimalloc"};
|
|
for (int op = 0; op < 7; op++) {
|
|
for (int i = 0; i < 3; i++) {
|
|
run_benchmark(allocators[i], (RedisOp)op, threads, cycles, ops_per_cycle, min_size, max_size);
|
|
}
|
|
}
|
|
} else {
|
|
// Run single operation with current allocator
|
|
const char* allocator = "System"; // Default, can be overridden via LD_PRELOAD
|
|
#ifdef USE_HAKMEM
|
|
allocator = "HAKMEM";
|
|
#endif
|
|
run_benchmark(allocator, operation, threads, cycles, ops_per_cycle, min_size, max_size);
|
|
}
|
|
|
|
return 0;
|
|
}
|