// hakmem_tiny_simple.c // Phase 6-1: Ultra-Simple Tiny Allocator Implementation // // Design: "Simple Front + Smart Back" // - Front: 3-4 instruction fast path (this file, Phase 1) // - Back: Learning layer (Phase 2, to be added) // // Backend: Simple mmap-based chunk allocation (Phase 1) // Will integrate with SuperSlab in Phase 2 #include "hakmem_tiny_simple.h" #include #include #include #include #include // ============================================================================ // Phase 1: Ultra-Simple Fast Path Data Structures // ============================================================================ // TLS Free List - THE ONLY fast path data structure! __thread void* g_tls_tiny_cache[TINY_NUM_CLASSES] = {NULL}; // TLS Stats (per class) __thread TinySimpleStats g_tls_tiny_stats[TINY_NUM_CLASSES] = {{0}}; // Size class metadata static const size_t g_class_sizes[TINY_NUM_CLASSES] = { 8, 16, 32, 64, 128, 256, 512, 1024 }; // Refill count (Phase 1: fixed, Phase 2: adaptive) #define REFILL_COUNT 64 // ============================================================================ // Backend: Simple Chunk Allocator (Phase 1) // ============================================================================ // Chunk size: 1MB (will be tuned in Phase 2) #define CHUNK_SIZE (1024 * 1024) // Per-class chunk state (TLS) typedef struct { char* current_chunk; // Current chunk base (byte pointer for arithmetic) char* chunk_cursor; // Next free block in chunk char* chunk_end; // End of current chunk uint64_t chunks_allocated; // Total chunks allocated } TinyChunkState; static __thread TinyChunkState g_chunk_state[TINY_NUM_CLASSES]; // Allocate a new chunk from OS static void* allocate_chunk(void) { void* chunk = mmap(NULL, CHUNK_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (chunk == MAP_FAILED) { return NULL; } return chunk; } // ============================================================================ // Initialization // ============================================================================ void hak_tiny_simple_init(void) { // Clear TLS cache and stats memset(g_tls_tiny_cache, 0, sizeof(g_tls_tiny_cache)); memset(g_tls_tiny_stats, 0, sizeof(g_tls_tiny_stats)); memset(g_chunk_state, 0, sizeof(g_chunk_state)); } // ============================================================================ // Ultra-Fast Allocation (3-4 instructions!) // ============================================================================ void* hak_tiny_simple_alloc(size_t size) { // Convert size to class (inlined) int cls = hak_tiny_simple_size_to_class(size); if (cls < 0) return NULL; // >1KB, not handled by Tiny // Ultra-fast path: Pop from free list void** head = &g_tls_tiny_cache[cls]; void* ptr = *head; if (ptr) { *head = *(void**)ptr; // Single instruction pop! g_tls_tiny_stats[cls].alloc_count++; g_tls_tiny_stats[cls].hit_count++; return ptr; } // Miss: go to slow path g_tls_tiny_stats[cls].miss_count++; return hak_tiny_simple_alloc_slow(size, cls); } // ============================================================================ // Fast Free // ============================================================================ void hak_tiny_simple_free(void* ptr, size_t size) { if (ptr == NULL) return; // Convert size to class (inlined) int cls = hak_tiny_simple_size_to_class(size); if (cls < 0) return; // Invalid size // Fast path: Push to free list void** head = &g_tls_tiny_cache[cls]; *(void**)ptr = *head; // ptr->next = head *head = ptr; // head = ptr g_tls_tiny_stats[cls].free_count++; } // ============================================================================ // Slow Path: Batch Refill // ============================================================================ void* hak_tiny_simple_alloc_slow(size_t size, int class_idx) { // Phase 1: Simple batch refill from chunk // Phase 2: Will add adaptive refill count based on miss rate size_t class_size = g_class_sizes[class_idx]; void** head = &g_tls_tiny_cache[class_idx]; TinyChunkState* cs = &g_chunk_state[class_idx]; // Refill batch (Phase 1: fixed at 64 blocks) int refilled = 0; for (int i = 0; i < REFILL_COUNT; i++) { // Check if current chunk is exhausted if (cs->chunk_cursor + (ptrdiff_t)class_size > cs->chunk_end) { // Allocate new chunk void* new_chunk = allocate_chunk(); if (new_chunk == NULL) { break; // Out of memory } cs->current_chunk = (char*)new_chunk; cs->chunk_cursor = (char*)new_chunk; cs->chunk_end = (char*)new_chunk + CHUNK_SIZE; cs->chunks_allocated++; } // Carve out a block from chunk void* block = (void*)cs->chunk_cursor; cs->chunk_cursor = cs->chunk_cursor + (ptrdiff_t)class_size; // Add to free list *(void**)block = *head; *head = block; refilled++; } // Pop one block for the caller void* ptr = *head; if (ptr) { *head = *(void**)ptr; g_tls_tiny_stats[class_idx].alloc_count++; return ptr; } // Complete failure (out of memory) return NULL; } // ============================================================================ // Stats (for debugging and Phase 2 learning layer) // ============================================================================ void hak_tiny_simple_get_stats(int class_idx, TinySimpleStats* stats) { if (class_idx < 0 || class_idx >= TINY_NUM_CLASSES || stats == NULL) { return; } *stats = g_tls_tiny_stats[class_idx]; } void hak_tiny_simple_print_stats(void) { printf("\n=== Tiny Simple Allocator Stats ===\n"); printf("Class | Size | Allocs | Frees | Hits | Misses | Hit Rate\n"); printf("------|-------|-----------|-----------|-----------|-----------|----------\n"); for (int i = 0; i < TINY_NUM_CLASSES; i++) { TinySimpleStats* s = &g_tls_tiny_stats[i]; double hit_rate = (s->alloc_count > 0) ? (100.0 * s->hit_count / s->alloc_count) : 0.0; printf(" %d | %4zuB | %9lu | %9lu | %9lu | %9lu | %6.2f%%\n", i, g_class_sizes[i], s->alloc_count, s->free_count, s->hit_count, s->miss_count, hit_rate); } printf("\n"); }