Major Features: - Debug counter infrastructure for Refill Stage tracking - Free Pipeline counters (ss_local, ss_remote, tls_sll) - Diagnostic counters for early return analysis - Unified larson.sh benchmark runner with profiles - Phase 6-3 regression analysis documentation Bug Fixes: - Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB) - Fix profile variable naming consistency - Add .gitignore patterns for large files Performance: - Phase 6-3: 4.79 M ops/s (has OOM risk) - With SuperSlab: 3.13 M ops/s (+19% improvement) This is a clean repository without large log files. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
171 lines
5.8 KiB
C
171 lines
5.8 KiB
C
#include <stdio.h>
|
||
#include <stdint.h>
|
||
#include <pthread.h>
|
||
#include <stdatomic.h>
|
||
|
||
// Reproduce the exact structures from hakmem_tiny.h
|
||
|
||
#define TINY_NUM_CLASSES 8
|
||
#define TINY_SLAB_SIZE (64 * 1024)
|
||
#define SLAB_REGISTRY_SIZE 1024
|
||
#define TINY_TLS_MAG_CAP 2048
|
||
|
||
// Mini-mag structure
|
||
typedef struct {
|
||
void* next;
|
||
} MiniMagBlock;
|
||
|
||
typedef struct {
|
||
MiniMagBlock* head;
|
||
uint16_t count;
|
||
uint16_t capacity;
|
||
} PageMiniMag;
|
||
|
||
// Slab structure
|
||
typedef struct TinySlab {
|
||
void* base;
|
||
uint64_t* bitmap;
|
||
uint16_t free_count;
|
||
uint16_t total_count;
|
||
uint8_t class_idx;
|
||
uint8_t _padding[3];
|
||
struct TinySlab* next;
|
||
atomic_uintptr_t remote_head;
|
||
atomic_uint remote_count;
|
||
pthread_t owner_tid;
|
||
uint16_t hint_word;
|
||
uint8_t summary_words;
|
||
uint8_t _pad_sum[1];
|
||
uint64_t* summary;
|
||
PageMiniMag mini_mag;
|
||
} TinySlab;
|
||
|
||
// Registry entry
|
||
typedef struct {
|
||
uintptr_t slab_base;
|
||
void* owner;
|
||
} SlabRegistryEntry;
|
||
|
||
// TLS Magazine
|
||
typedef struct {
|
||
void* ptr;
|
||
} TinyMagItem;
|
||
|
||
typedef struct {
|
||
TinyMagItem items[TINY_TLS_MAG_CAP];
|
||
int top;
|
||
int cap;
|
||
} TinyTLSMag;
|
||
|
||
// SuperSlab structures
|
||
typedef struct TinySlabMeta {
|
||
void* freelist;
|
||
uint16_t used;
|
||
uint16_t capacity;
|
||
uint32_t owner_tid;
|
||
} TinySlabMeta;
|
||
|
||
#define SLABS_PER_SUPERSLAB 32
|
||
typedef struct SuperSlab {
|
||
uint64_t magic;
|
||
uint8_t size_class;
|
||
uint8_t active_slabs;
|
||
uint16_t _pad0;
|
||
uint32_t slab_bitmap;
|
||
TinySlabMeta slabs[SLABS_PER_SUPERSLAB];
|
||
} __attribute__((aligned(64))) SuperSlab;
|
||
|
||
// Bitmap words per class
|
||
static const uint8_t g_tiny_bitmap_words[TINY_NUM_CLASSES] = {
|
||
128, 64, 32, 16, 8, 4, 2, 1
|
||
};
|
||
|
||
static const uint16_t g_tiny_blocks_per_slab[TINY_NUM_CLASSES] = {
|
||
8192, 4096, 2048, 1024, 512, 256, 128, 64
|
||
};
|
||
|
||
int main() {
|
||
printf("=== HAKMEM Memory Overhead Breakdown ===\n\n");
|
||
|
||
// Structure sizes
|
||
printf("Structure Sizes:\n");
|
||
printf(" TinySlab: %lu bytes\n", sizeof(TinySlab));
|
||
printf(" TinyTLSMag: %lu bytes\n", sizeof(TinyTLSMag));
|
||
printf(" SlabRegistryEntry: %lu bytes\n", sizeof(SlabRegistryEntry));
|
||
printf(" SuperSlab: %lu bytes\n", sizeof(SuperSlab));
|
||
printf(" TinySlabMeta: %lu bytes\n", sizeof(TinySlabMeta));
|
||
printf("\n");
|
||
|
||
// Test scenario: 1M × 16B allocations (class 1)
|
||
int class_idx = 1; // 16B
|
||
int num_allocs = 1000000;
|
||
|
||
printf("Test Scenario: %d × 16B allocations\n\n", num_allocs);
|
||
|
||
// Calculate theoretical data size
|
||
size_t data_size = num_allocs * 16;
|
||
printf("Theoretical Data: %.2f MB\n", data_size / (1024.0 * 1024.0));
|
||
|
||
// Calculate slabs needed
|
||
int blocks_per_slab = g_tiny_blocks_per_slab[class_idx]; // 4096 for 16B
|
||
int slabs_needed = (num_allocs + blocks_per_slab - 1) / blocks_per_slab;
|
||
printf("Slabs needed: %d (4096 blocks per slab)\n\n", slabs_needed);
|
||
|
||
// Component 1: Global Registry
|
||
size_t registry_size = SLAB_REGISTRY_SIZE * sizeof(SlabRegistryEntry);
|
||
printf("Component 1: Global Slab Registry\n");
|
||
printf(" Entries: %d\n", SLAB_REGISTRY_SIZE);
|
||
printf(" Size: %.2f KB (fixed)\n\n", registry_size / 1024.0);
|
||
|
||
// Component 2: TLS Magazine (per thread, assume 1 thread)
|
||
size_t tls_mag_size = TINY_NUM_CLASSES * sizeof(TinyTLSMag);
|
||
printf("Component 2: TLS Magazine (per thread)\n");
|
||
printf(" Classes: %d\n", TINY_NUM_CLASSES);
|
||
printf(" Capacity per class: %d items\n", TINY_TLS_MAG_CAP);
|
||
printf(" Size: %.2f KB per thread\n\n", tls_mag_size / 1024.0);
|
||
|
||
// Component 3: Per-slab metadata
|
||
size_t slab_metadata_size = slabs_needed * sizeof(TinySlab);
|
||
printf("Component 3: Slab Metadata\n");
|
||
printf(" Slabs: %d\n", slabs_needed);
|
||
printf(" Size per slab: %lu bytes\n", sizeof(TinySlab));
|
||
printf(" Total: %.2f KB\n\n", slab_metadata_size / 1024.0);
|
||
|
||
// Component 4: Bitmaps (primary + summary)
|
||
int bitmap_words = g_tiny_bitmap_words[class_idx]; // 64 for class 1
|
||
int summary_words = (bitmap_words + 63) / 64; // 1 for class 1
|
||
size_t bitmap_size = slabs_needed * bitmap_words * sizeof(uint64_t);
|
||
size_t summary_size = slabs_needed * summary_words * sizeof(uint64_t);
|
||
printf("Component 4: Bitmaps\n");
|
||
printf(" Primary bitmap: %d words × %d slabs = %.2f KB\n",
|
||
bitmap_words, slabs_needed, bitmap_size / 1024.0);
|
||
printf(" Summary bitmap: %d words × %d slabs = %.2f KB\n",
|
||
summary_words, slabs_needed, summary_size / 1024.0);
|
||
printf(" Total: %.2f KB\n\n", (bitmap_size + summary_size) / 1024.0);
|
||
|
||
// Component 5: Slab data regions
|
||
size_t slab_data = slabs_needed * TINY_SLAB_SIZE;
|
||
printf("Component 5: Slab Data Regions\n");
|
||
printf(" Slabs: %d × 64 KB = %.2f MB\n\n", slabs_needed, slab_data / (1024.0 * 1024.0));
|
||
|
||
// Total overhead calculation
|
||
size_t total_metadata = registry_size + tls_mag_size + slab_metadata_size +
|
||
bitmap_size + summary_size;
|
||
size_t total_memory = total_metadata + slab_data;
|
||
|
||
printf("=== TOTAL BREAKDOWN ===\n");
|
||
printf("Data used: %.2f MB (actual allocations)\n", data_size / (1024.0 * 1024.0));
|
||
printf("Slab wasted space: %.2f MB (unused blocks in slabs)\n",
|
||
(slab_data - data_size) / (1024.0 * 1024.0));
|
||
printf("Metadata overhead: %.2f MB\n", total_metadata / (1024.0 * 1024.0));
|
||
printf(" - Registry: %.2f MB\n", registry_size / (1024.0 * 1024.0));
|
||
printf(" - TLS Magazine: %.2f MB\n", tls_mag_size / (1024.0 * 1024.0));
|
||
printf(" - Slab metadata: %.2f MB\n", slab_metadata_size / (1024.0 * 1024.0));
|
||
printf(" - Bitmaps: %.2f MB\n", (bitmap_size + summary_size) / (1024.0 * 1024.0));
|
||
printf("Total memory: %.2f MB\n", total_memory / (1024.0 * 1024.0));
|
||
printf("Overhead %%: %.1f%%\n",
|
||
((total_memory - data_size) / (double)data_size) * 100.0);
|
||
|
||
return 0;
|
||
}
|