Debug Counters Implementation - Clean History
Major Features: - Debug counter infrastructure for Refill Stage tracking - Free Pipeline counters (ss_local, ss_remote, tls_sll) - Diagnostic counters for early return analysis - Unified larson.sh benchmark runner with profiles - Phase 6-3 regression analysis documentation Bug Fixes: - Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB) - Fix profile variable naming consistency - Add .gitignore patterns for large files Performance: - Phase 6-3: 4.79 M ops/s (has OOM risk) - With SuperSlab: 3.13 M ops/s (+19% improvement) This is a clean repository without large log files. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
198
core/hakmem_tiny_simple.c
Normal file
198
core/hakmem_tiny_simple.c
Normal file
@ -0,0 +1,198 @@
|
||||
// hakmem_tiny_simple.c
|
||||
// Phase 6-1: Ultra-Simple Tiny Allocator Implementation
|
||||
//
|
||||
// Design: "Simple Front + Smart Back"
|
||||
// - Front: 3-4 instruction fast path (this file, Phase 1)
|
||||
// - Back: Learning layer (Phase 2, to be added)
|
||||
//
|
||||
// Backend: Simple mmap-based chunk allocation (Phase 1)
|
||||
// Will integrate with SuperSlab in Phase 2
|
||||
|
||||
#include "hakmem_tiny_simple.h"
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <pthread.h>
|
||||
#include <sys/mman.h>
|
||||
#include <unistd.h>
|
||||
|
||||
// ============================================================================
|
||||
// Phase 1: Ultra-Simple Fast Path Data Structures
|
||||
// ============================================================================
|
||||
|
||||
// TLS Free List - THE ONLY fast path data structure!
|
||||
__thread void* g_tls_tiny_cache[TINY_NUM_CLASSES] = {NULL};
|
||||
|
||||
// TLS Stats (per class)
|
||||
__thread TinySimpleStats g_tls_tiny_stats[TINY_NUM_CLASSES] = {{0}};
|
||||
|
||||
// Size class metadata
|
||||
static const size_t g_class_sizes[TINY_NUM_CLASSES] = {
|
||||
8, 16, 32, 64, 128, 256, 512, 1024
|
||||
};
|
||||
|
||||
// Refill count (Phase 1: fixed, Phase 2: adaptive)
|
||||
#define REFILL_COUNT 64
|
||||
|
||||
// ============================================================================
|
||||
// Backend: Simple Chunk Allocator (Phase 1)
|
||||
// ============================================================================
|
||||
|
||||
// Chunk size: 1MB (will be tuned in Phase 2)
|
||||
#define CHUNK_SIZE (1024 * 1024)
|
||||
|
||||
// Per-class chunk state (TLS)
|
||||
typedef struct {
|
||||
char* current_chunk; // Current chunk base (byte pointer for arithmetic)
|
||||
char* chunk_cursor; // Next free block in chunk
|
||||
char* chunk_end; // End of current chunk
|
||||
uint64_t chunks_allocated; // Total chunks allocated
|
||||
} TinyChunkState;
|
||||
|
||||
static __thread TinyChunkState g_chunk_state[TINY_NUM_CLASSES];
|
||||
|
||||
// Allocate a new chunk from OS
|
||||
static void* allocate_chunk(void) {
|
||||
void* chunk = mmap(NULL, CHUNK_SIZE,
|
||||
PROT_READ | PROT_WRITE,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||
if (chunk == MAP_FAILED) {
|
||||
return NULL;
|
||||
}
|
||||
return chunk;
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Initialization
|
||||
// ============================================================================
|
||||
|
||||
void hak_tiny_simple_init(void) {
|
||||
// Clear TLS cache and stats
|
||||
memset(g_tls_tiny_cache, 0, sizeof(g_tls_tiny_cache));
|
||||
memset(g_tls_tiny_stats, 0, sizeof(g_tls_tiny_stats));
|
||||
memset(g_chunk_state, 0, sizeof(g_chunk_state));
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Ultra-Fast Allocation (3-4 instructions!)
|
||||
// ============================================================================
|
||||
|
||||
void* hak_tiny_simple_alloc(size_t size) {
|
||||
// Convert size to class (inlined)
|
||||
int cls = hak_tiny_simple_size_to_class(size);
|
||||
if (cls < 0) return NULL; // >1KB, not handled by Tiny
|
||||
|
||||
// Ultra-fast path: Pop from free list
|
||||
void** head = &g_tls_tiny_cache[cls];
|
||||
void* ptr = *head;
|
||||
if (ptr) {
|
||||
*head = *(void**)ptr; // Single instruction pop!
|
||||
g_tls_tiny_stats[cls].alloc_count++;
|
||||
g_tls_tiny_stats[cls].hit_count++;
|
||||
return ptr;
|
||||
}
|
||||
|
||||
// Miss: go to slow path
|
||||
g_tls_tiny_stats[cls].miss_count++;
|
||||
return hak_tiny_simple_alloc_slow(size, cls);
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Fast Free
|
||||
// ============================================================================
|
||||
|
||||
void hak_tiny_simple_free(void* ptr, size_t size) {
|
||||
if (ptr == NULL) return;
|
||||
|
||||
// Convert size to class (inlined)
|
||||
int cls = hak_tiny_simple_size_to_class(size);
|
||||
if (cls < 0) return; // Invalid size
|
||||
|
||||
// Fast path: Push to free list
|
||||
void** head = &g_tls_tiny_cache[cls];
|
||||
*(void**)ptr = *head; // ptr->next = head
|
||||
*head = ptr; // head = ptr
|
||||
|
||||
g_tls_tiny_stats[cls].free_count++;
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Slow Path: Batch Refill
|
||||
// ============================================================================
|
||||
|
||||
void* hak_tiny_simple_alloc_slow(size_t size, int class_idx) {
|
||||
// Phase 1: Simple batch refill from chunk
|
||||
// Phase 2: Will add adaptive refill count based on miss rate
|
||||
|
||||
size_t class_size = g_class_sizes[class_idx];
|
||||
void** head = &g_tls_tiny_cache[class_idx];
|
||||
TinyChunkState* cs = &g_chunk_state[class_idx];
|
||||
|
||||
// Refill batch (Phase 1: fixed at 64 blocks)
|
||||
int refilled = 0;
|
||||
for (int i = 0; i < REFILL_COUNT; i++) {
|
||||
// Check if current chunk is exhausted
|
||||
if (cs->chunk_cursor + (ptrdiff_t)class_size > cs->chunk_end) {
|
||||
// Allocate new chunk
|
||||
void* new_chunk = allocate_chunk();
|
||||
if (new_chunk == NULL) {
|
||||
break; // Out of memory
|
||||
}
|
||||
|
||||
cs->current_chunk = (char*)new_chunk;
|
||||
cs->chunk_cursor = (char*)new_chunk;
|
||||
cs->chunk_end = (char*)new_chunk + CHUNK_SIZE;
|
||||
cs->chunks_allocated++;
|
||||
}
|
||||
|
||||
// Carve out a block from chunk
|
||||
void* block = (void*)cs->chunk_cursor;
|
||||
cs->chunk_cursor = cs->chunk_cursor + (ptrdiff_t)class_size;
|
||||
|
||||
// Add to free list
|
||||
*(void**)block = *head;
|
||||
*head = block;
|
||||
refilled++;
|
||||
}
|
||||
|
||||
// Pop one block for the caller
|
||||
void* ptr = *head;
|
||||
if (ptr) {
|
||||
*head = *(void**)ptr;
|
||||
g_tls_tiny_stats[class_idx].alloc_count++;
|
||||
return ptr;
|
||||
}
|
||||
|
||||
// Complete failure (out of memory)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Stats (for debugging and Phase 2 learning layer)
|
||||
// ============================================================================
|
||||
|
||||
void hak_tiny_simple_get_stats(int class_idx, TinySimpleStats* stats) {
|
||||
if (class_idx < 0 || class_idx >= TINY_NUM_CLASSES || stats == NULL) {
|
||||
return;
|
||||
}
|
||||
*stats = g_tls_tiny_stats[class_idx];
|
||||
}
|
||||
|
||||
void hak_tiny_simple_print_stats(void) {
|
||||
printf("\n=== Tiny Simple Allocator Stats ===\n");
|
||||
printf("Class | Size | Allocs | Frees | Hits | Misses | Hit Rate\n");
|
||||
printf("------|-------|-----------|-----------|-----------|-----------|----------\n");
|
||||
|
||||
for (int i = 0; i < TINY_NUM_CLASSES; i++) {
|
||||
TinySimpleStats* s = &g_tls_tiny_stats[i];
|
||||
double hit_rate = (s->alloc_count > 0)
|
||||
? (100.0 * s->hit_count / s->alloc_count)
|
||||
: 0.0;
|
||||
|
||||
printf(" %d | %4zuB | %9lu | %9lu | %9lu | %9lu | %6.2f%%\n",
|
||||
i, g_class_sizes[i],
|
||||
s->alloc_count, s->free_count,
|
||||
s->hit_count, s->miss_count,
|
||||
hit_rate);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
Reference in New Issue
Block a user