199 lines
6.5 KiB
C
199 lines
6.5 KiB
C
|
|
// hakmem_tiny_simple.c
|
||
|
|
// Phase 6-1: Ultra-Simple Tiny Allocator Implementation
|
||
|
|
//
|
||
|
|
// Design: "Simple Front + Smart Back"
|
||
|
|
// - Front: 3-4 instruction fast path (this file, Phase 1)
|
||
|
|
// - Back: Learning layer (Phase 2, to be added)
|
||
|
|
//
|
||
|
|
// Backend: Simple mmap-based chunk allocation (Phase 1)
|
||
|
|
// Will integrate with SuperSlab in Phase 2
|
||
|
|
|
||
|
|
#include "hakmem_tiny_simple.h"
|
||
|
|
#include <stdio.h>
|
||
|
|
#include <string.h>
|
||
|
|
#include <pthread.h>
|
||
|
|
#include <sys/mman.h>
|
||
|
|
#include <unistd.h>
|
||
|
|
|
||
|
|
// ============================================================================
|
||
|
|
// Phase 1: Ultra-Simple Fast Path Data Structures
|
||
|
|
// ============================================================================
|
||
|
|
|
||
|
|
// TLS Free List - THE ONLY fast path data structure!
|
||
|
|
__thread void* g_tls_tiny_cache[TINY_NUM_CLASSES] = {NULL};
|
||
|
|
|
||
|
|
// TLS Stats (per class)
|
||
|
|
__thread TinySimpleStats g_tls_tiny_stats[TINY_NUM_CLASSES] = {{0}};
|
||
|
|
|
||
|
|
// Size class metadata
|
||
|
|
static const size_t g_class_sizes[TINY_NUM_CLASSES] = {
|
||
|
|
8, 16, 32, 64, 128, 256, 512, 1024
|
||
|
|
};
|
||
|
|
|
||
|
|
// Refill count (Phase 1: fixed, Phase 2: adaptive)
|
||
|
|
#define REFILL_COUNT 64
|
||
|
|
|
||
|
|
// ============================================================================
|
||
|
|
// Backend: Simple Chunk Allocator (Phase 1)
|
||
|
|
// ============================================================================
|
||
|
|
|
||
|
|
// Chunk size: 1MB (will be tuned in Phase 2)
|
||
|
|
#define CHUNK_SIZE (1024 * 1024)
|
||
|
|
|
||
|
|
// Per-class chunk state (TLS)
|
||
|
|
typedef struct {
|
||
|
|
char* current_chunk; // Current chunk base (byte pointer for arithmetic)
|
||
|
|
char* chunk_cursor; // Next free block in chunk
|
||
|
|
char* chunk_end; // End of current chunk
|
||
|
|
uint64_t chunks_allocated; // Total chunks allocated
|
||
|
|
} TinyChunkState;
|
||
|
|
|
||
|
|
static __thread TinyChunkState g_chunk_state[TINY_NUM_CLASSES];
|
||
|
|
|
||
|
|
// Allocate a new chunk from OS
|
||
|
|
static void* allocate_chunk(void) {
|
||
|
|
void* chunk = mmap(NULL, CHUNK_SIZE,
|
||
|
|
PROT_READ | PROT_WRITE,
|
||
|
|
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||
|
|
if (chunk == MAP_FAILED) {
|
||
|
|
return NULL;
|
||
|
|
}
|
||
|
|
return chunk;
|
||
|
|
}
|
||
|
|
|
||
|
|
// ============================================================================
|
||
|
|
// Initialization
|
||
|
|
// ============================================================================
|
||
|
|
|
||
|
|
void hak_tiny_simple_init(void) {
|
||
|
|
// Clear TLS cache and stats
|
||
|
|
memset(g_tls_tiny_cache, 0, sizeof(g_tls_tiny_cache));
|
||
|
|
memset(g_tls_tiny_stats, 0, sizeof(g_tls_tiny_stats));
|
||
|
|
memset(g_chunk_state, 0, sizeof(g_chunk_state));
|
||
|
|
}
|
||
|
|
|
||
|
|
// ============================================================================
|
||
|
|
// Ultra-Fast Allocation (3-4 instructions!)
|
||
|
|
// ============================================================================
|
||
|
|
|
||
|
|
void* hak_tiny_simple_alloc(size_t size) {
|
||
|
|
// Convert size to class (inlined)
|
||
|
|
int cls = hak_tiny_simple_size_to_class(size);
|
||
|
|
if (cls < 0) return NULL; // >1KB, not handled by Tiny
|
||
|
|
|
||
|
|
// Ultra-fast path: Pop from free list
|
||
|
|
void** head = &g_tls_tiny_cache[cls];
|
||
|
|
void* ptr = *head;
|
||
|
|
if (ptr) {
|
||
|
|
*head = *(void**)ptr; // Single instruction pop!
|
||
|
|
g_tls_tiny_stats[cls].alloc_count++;
|
||
|
|
g_tls_tiny_stats[cls].hit_count++;
|
||
|
|
return ptr;
|
||
|
|
}
|
||
|
|
|
||
|
|
// Miss: go to slow path
|
||
|
|
g_tls_tiny_stats[cls].miss_count++;
|
||
|
|
return hak_tiny_simple_alloc_slow(size, cls);
|
||
|
|
}
|
||
|
|
|
||
|
|
// ============================================================================
|
||
|
|
// Fast Free
|
||
|
|
// ============================================================================
|
||
|
|
|
||
|
|
void hak_tiny_simple_free(void* ptr, size_t size) {
|
||
|
|
if (ptr == NULL) return;
|
||
|
|
|
||
|
|
// Convert size to class (inlined)
|
||
|
|
int cls = hak_tiny_simple_size_to_class(size);
|
||
|
|
if (cls < 0) return; // Invalid size
|
||
|
|
|
||
|
|
// Fast path: Push to free list
|
||
|
|
void** head = &g_tls_tiny_cache[cls];
|
||
|
|
*(void**)ptr = *head; // ptr->next = head
|
||
|
|
*head = ptr; // head = ptr
|
||
|
|
|
||
|
|
g_tls_tiny_stats[cls].free_count++;
|
||
|
|
}
|
||
|
|
|
||
|
|
// ============================================================================
|
||
|
|
// Slow Path: Batch Refill
|
||
|
|
// ============================================================================
|
||
|
|
|
||
|
|
void* hak_tiny_simple_alloc_slow(size_t size, int class_idx) {
|
||
|
|
// Phase 1: Simple batch refill from chunk
|
||
|
|
// Phase 2: Will add adaptive refill count based on miss rate
|
||
|
|
|
||
|
|
size_t class_size = g_class_sizes[class_idx];
|
||
|
|
void** head = &g_tls_tiny_cache[class_idx];
|
||
|
|
TinyChunkState* cs = &g_chunk_state[class_idx];
|
||
|
|
|
||
|
|
// Refill batch (Phase 1: fixed at 64 blocks)
|
||
|
|
int refilled = 0;
|
||
|
|
for (int i = 0; i < REFILL_COUNT; i++) {
|
||
|
|
// Check if current chunk is exhausted
|
||
|
|
if (cs->chunk_cursor + (ptrdiff_t)class_size > cs->chunk_end) {
|
||
|
|
// Allocate new chunk
|
||
|
|
void* new_chunk = allocate_chunk();
|
||
|
|
if (new_chunk == NULL) {
|
||
|
|
break; // Out of memory
|
||
|
|
}
|
||
|
|
|
||
|
|
cs->current_chunk = (char*)new_chunk;
|
||
|
|
cs->chunk_cursor = (char*)new_chunk;
|
||
|
|
cs->chunk_end = (char*)new_chunk + CHUNK_SIZE;
|
||
|
|
cs->chunks_allocated++;
|
||
|
|
}
|
||
|
|
|
||
|
|
// Carve out a block from chunk
|
||
|
|
void* block = (void*)cs->chunk_cursor;
|
||
|
|
cs->chunk_cursor = cs->chunk_cursor + (ptrdiff_t)class_size;
|
||
|
|
|
||
|
|
// Add to free list
|
||
|
|
*(void**)block = *head;
|
||
|
|
*head = block;
|
||
|
|
refilled++;
|
||
|
|
}
|
||
|
|
|
||
|
|
// Pop one block for the caller
|
||
|
|
void* ptr = *head;
|
||
|
|
if (ptr) {
|
||
|
|
*head = *(void**)ptr;
|
||
|
|
g_tls_tiny_stats[class_idx].alloc_count++;
|
||
|
|
return ptr;
|
||
|
|
}
|
||
|
|
|
||
|
|
// Complete failure (out of memory)
|
||
|
|
return NULL;
|
||
|
|
}
|
||
|
|
|
||
|
|
// ============================================================================
|
||
|
|
// Stats (for debugging and Phase 2 learning layer)
|
||
|
|
// ============================================================================
|
||
|
|
|
||
|
|
void hak_tiny_simple_get_stats(int class_idx, TinySimpleStats* stats) {
|
||
|
|
if (class_idx < 0 || class_idx >= TINY_NUM_CLASSES || stats == NULL) {
|
||
|
|
return;
|
||
|
|
}
|
||
|
|
*stats = g_tls_tiny_stats[class_idx];
|
||
|
|
}
|
||
|
|
|
||
|
|
void hak_tiny_simple_print_stats(void) {
|
||
|
|
printf("\n=== Tiny Simple Allocator Stats ===\n");
|
||
|
|
printf("Class | Size | Allocs | Frees | Hits | Misses | Hit Rate\n");
|
||
|
|
printf("------|-------|-----------|-----------|-----------|-----------|----------\n");
|
||
|
|
|
||
|
|
for (int i = 0; i < TINY_NUM_CLASSES; i++) {
|
||
|
|
TinySimpleStats* s = &g_tls_tiny_stats[i];
|
||
|
|
double hit_rate = (s->alloc_count > 0)
|
||
|
|
? (100.0 * s->hit_count / s->alloc_count)
|
||
|
|
: 0.0;
|
||
|
|
|
||
|
|
printf(" %d | %4zuB | %9lu | %9lu | %9lu | %9lu | %6.2f%%\n",
|
||
|
|
i, g_class_sizes[i],
|
||
|
|
s->alloc_count, s->free_count,
|
||
|
|
s->hit_count, s->miss_count,
|
||
|
|
hit_rate);
|
||
|
|
}
|
||
|
|
printf("\n");
|
||
|
|
}
|