Files
hakmem/core/hakmem_tiny_simple.c
Moe Charm (CI) 52386401b3 Debug Counters Implementation - Clean History
Major Features:
- Debug counter infrastructure for Refill Stage tracking
- Free Pipeline counters (ss_local, ss_remote, tls_sll)
- Diagnostic counters for early return analysis
- Unified larson.sh benchmark runner with profiles
- Phase 6-3 regression analysis documentation

Bug Fixes:
- Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB)
- Fix profile variable naming consistency
- Add .gitignore patterns for large files

Performance:
- Phase 6-3: 4.79 M ops/s (has OOM risk)
- With SuperSlab: 3.13 M ops/s (+19% improvement)

This is a clean repository without large log files.

🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-05 12:31:14 +09:00

199 lines
6.5 KiB
C

// hakmem_tiny_simple.c
// Phase 6-1: Ultra-Simple Tiny Allocator Implementation
//
// Design: "Simple Front + Smart Back"
// - Front: 3-4 instruction fast path (this file, Phase 1)
// - Back: Learning layer (Phase 2, to be added)
//
// Backend: Simple mmap-based chunk allocation (Phase 1)
// Will integrate with SuperSlab in Phase 2
#include "hakmem_tiny_simple.h"
#include <stdio.h>
#include <string.h>
#include <pthread.h>
#include <sys/mman.h>
#include <unistd.h>
// ============================================================================
// Phase 1: Ultra-Simple Fast Path Data Structures
// ============================================================================
// TLS Free List - THE ONLY fast path data structure!
__thread void* g_tls_tiny_cache[TINY_NUM_CLASSES] = {NULL};
// TLS Stats (per class)
__thread TinySimpleStats g_tls_tiny_stats[TINY_NUM_CLASSES] = {{0}};
// Size class metadata
static const size_t g_class_sizes[TINY_NUM_CLASSES] = {
8, 16, 32, 64, 128, 256, 512, 1024
};
// Refill count (Phase 1: fixed, Phase 2: adaptive)
#define REFILL_COUNT 64
// ============================================================================
// Backend: Simple Chunk Allocator (Phase 1)
// ============================================================================
// Chunk size: 1MB (will be tuned in Phase 2)
#define CHUNK_SIZE (1024 * 1024)
// Per-class chunk state (TLS)
typedef struct {
char* current_chunk; // Current chunk base (byte pointer for arithmetic)
char* chunk_cursor; // Next free block in chunk
char* chunk_end; // End of current chunk
uint64_t chunks_allocated; // Total chunks allocated
} TinyChunkState;
static __thread TinyChunkState g_chunk_state[TINY_NUM_CLASSES];
// Allocate a new chunk from OS
static void* allocate_chunk(void) {
void* chunk = mmap(NULL, CHUNK_SIZE,
PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (chunk == MAP_FAILED) {
return NULL;
}
return chunk;
}
// ============================================================================
// Initialization
// ============================================================================
void hak_tiny_simple_init(void) {
// Clear TLS cache and stats
memset(g_tls_tiny_cache, 0, sizeof(g_tls_tiny_cache));
memset(g_tls_tiny_stats, 0, sizeof(g_tls_tiny_stats));
memset(g_chunk_state, 0, sizeof(g_chunk_state));
}
// ============================================================================
// Ultra-Fast Allocation (3-4 instructions!)
// ============================================================================
void* hak_tiny_simple_alloc(size_t size) {
// Convert size to class (inlined)
int cls = hak_tiny_simple_size_to_class(size);
if (cls < 0) return NULL; // >1KB, not handled by Tiny
// Ultra-fast path: Pop from free list
void** head = &g_tls_tiny_cache[cls];
void* ptr = *head;
if (ptr) {
*head = *(void**)ptr; // Single instruction pop!
g_tls_tiny_stats[cls].alloc_count++;
g_tls_tiny_stats[cls].hit_count++;
return ptr;
}
// Miss: go to slow path
g_tls_tiny_stats[cls].miss_count++;
return hak_tiny_simple_alloc_slow(size, cls);
}
// ============================================================================
// Fast Free
// ============================================================================
void hak_tiny_simple_free(void* ptr, size_t size) {
if (ptr == NULL) return;
// Convert size to class (inlined)
int cls = hak_tiny_simple_size_to_class(size);
if (cls < 0) return; // Invalid size
// Fast path: Push to free list
void** head = &g_tls_tiny_cache[cls];
*(void**)ptr = *head; // ptr->next = head
*head = ptr; // head = ptr
g_tls_tiny_stats[cls].free_count++;
}
// ============================================================================
// Slow Path: Batch Refill
// ============================================================================
void* hak_tiny_simple_alloc_slow(size_t size, int class_idx) {
// Phase 1: Simple batch refill from chunk
// Phase 2: Will add adaptive refill count based on miss rate
size_t class_size = g_class_sizes[class_idx];
void** head = &g_tls_tiny_cache[class_idx];
TinyChunkState* cs = &g_chunk_state[class_idx];
// Refill batch (Phase 1: fixed at 64 blocks)
int refilled = 0;
for (int i = 0; i < REFILL_COUNT; i++) {
// Check if current chunk is exhausted
if (cs->chunk_cursor + (ptrdiff_t)class_size > cs->chunk_end) {
// Allocate new chunk
void* new_chunk = allocate_chunk();
if (new_chunk == NULL) {
break; // Out of memory
}
cs->current_chunk = (char*)new_chunk;
cs->chunk_cursor = (char*)new_chunk;
cs->chunk_end = (char*)new_chunk + CHUNK_SIZE;
cs->chunks_allocated++;
}
// Carve out a block from chunk
void* block = (void*)cs->chunk_cursor;
cs->chunk_cursor = cs->chunk_cursor + (ptrdiff_t)class_size;
// Add to free list
*(void**)block = *head;
*head = block;
refilled++;
}
// Pop one block for the caller
void* ptr = *head;
if (ptr) {
*head = *(void**)ptr;
g_tls_tiny_stats[class_idx].alloc_count++;
return ptr;
}
// Complete failure (out of memory)
return NULL;
}
// ============================================================================
// Stats (for debugging and Phase 2 learning layer)
// ============================================================================
void hak_tiny_simple_get_stats(int class_idx, TinySimpleStats* stats) {
if (class_idx < 0 || class_idx >= TINY_NUM_CLASSES || stats == NULL) {
return;
}
*stats = g_tls_tiny_stats[class_idx];
}
void hak_tiny_simple_print_stats(void) {
printf("\n=== Tiny Simple Allocator Stats ===\n");
printf("Class | Size | Allocs | Frees | Hits | Misses | Hit Rate\n");
printf("------|-------|-----------|-----------|-----------|-----------|----------\n");
for (int i = 0; i < TINY_NUM_CLASSES; i++) {
TinySimpleStats* s = &g_tls_tiny_stats[i];
double hit_rate = (s->alloc_count > 0)
? (100.0 * s->hit_count / s->alloc_count)
: 0.0;
printf(" %d | %4zuB | %9lu | %9lu | %9lu | %9lu | %6.2f%%\n",
i, g_class_sizes[i],
s->alloc_count, s->free_count,
s->hit_count, s->miss_count,
hit_rate);
}
printf("\n");
}