hakmem/core/hakmem_tiny_simple.c

// hakmem_tiny_simple.c
// Phase 6-1: Ultra-Simple Tiny Allocator Implementation
//
// Design: "Simple Front + Smart Back"
// - Front: 3-4 instruction fast path (this file, Phase 1)
// - Back: Learning layer (Phase 2, to be added)
//
// Backend: Simple mmap-based chunk allocation (Phase 1)
//          Will integrate with SuperSlab in Phase 2

#include "hakmem_tiny_simple.h"
#include <stdio.h>
#include <string.h>
#include <pthread.h>
#include <sys/mman.h>
#include <unistd.h>

// ============================================================================
// Phase 1: Ultra-Simple Fast Path Data Structures
// ============================================================================

// TLS Free List - THE ONLY fast path data structure!
__thread void* g_tls_tiny_cache[TINY_NUM_CLASSES] = {NULL};

// TLS Stats (per class)
__thread TinySimpleStats g_tls_tiny_stats[TINY_NUM_CLASSES] = {{0}};

// Size class metadata
static const size_t g_class_sizes[TINY_NUM_CLASSES] = {
    8, 16, 32, 64, 128, 256, 512, 1024
};

// Refill count (Phase 1: fixed, Phase 2: adaptive)
#define REFILL_COUNT 64

// ============================================================================
// Backend: Simple Chunk Allocator (Phase 1)
// ============================================================================

// Chunk size: 1MB (will be tuned in Phase 2)
#define CHUNK_SIZE (1024 * 1024)

// Per-class chunk state (TLS)
typedef struct {
    char* current_chunk;     // Current chunk base (byte pointer for arithmetic)
    char* chunk_cursor;      // Next free block in chunk
    char* chunk_end;         // End of current chunk
    uint64_t chunks_allocated;  // Total chunks allocated
} TinyChunkState;

static __thread TinyChunkState g_chunk_state[TINY_NUM_CLASSES];

// Allocate a new chunk from OS
static void* allocate_chunk(void) {
    void* chunk = mmap(NULL, CHUNK_SIZE,
                       PROT_READ | PROT_WRITE,
                       MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
    if (chunk == MAP_FAILED) {
        return NULL;
    }
    return chunk;
}

// ============================================================================
// Initialization
// ============================================================================

void hak_tiny_simple_init(void) {
    // Clear TLS cache and stats
    memset(g_tls_tiny_cache, 0, sizeof(g_tls_tiny_cache));
    memset(g_tls_tiny_stats, 0, sizeof(g_tls_tiny_stats));
    memset(g_chunk_state, 0, sizeof(g_chunk_state));
}

// ============================================================================
// Ultra-Fast Allocation (3-4 instructions!)
// ============================================================================

void* hak_tiny_simple_alloc(size_t size) {
    // Convert size to class (inlined)
    int cls = hak_tiny_simple_size_to_class(size);
    if (cls < 0) return NULL;  // >1KB, not handled by Tiny

    // Ultra-fast path: Pop from free list
    void** head = &g_tls_tiny_cache[cls];
    void* ptr = *head;
    if (ptr) {
        *head = *(void**)ptr;  // Single instruction pop!
        g_tls_tiny_stats[cls].alloc_count++;
        g_tls_tiny_stats[cls].hit_count++;
        return ptr;
    }

    // Miss: go to slow path
    g_tls_tiny_stats[cls].miss_count++;
    return hak_tiny_simple_alloc_slow(size, cls);
}

// ============================================================================
// Fast Free
// ============================================================================

void hak_tiny_simple_free(void* ptr, size_t size) {
    if (ptr == NULL) return;

    // Convert size to class (inlined)
    int cls = hak_tiny_simple_size_to_class(size);
    if (cls < 0) return;  // Invalid size

    // Fast path: Push to free list
    void** head = &g_tls_tiny_cache[cls];
    *(void**)ptr = *head;  // ptr->next = head
    *head = ptr;            // head = ptr

    g_tls_tiny_stats[cls].free_count++;
}

// ============================================================================
// Slow Path: Batch Refill
// ============================================================================

void* hak_tiny_simple_alloc_slow(size_t size, int class_idx) {
    // Phase 1: Simple batch refill from chunk
    // Phase 2: Will add adaptive refill count based on miss rate

    size_t class_size = g_class_sizes[class_idx];
    void** head = &g_tls_tiny_cache[class_idx];
    TinyChunkState* cs = &g_chunk_state[class_idx];

    // Refill batch (Phase 1: fixed at 64 blocks)
    int refilled = 0;
    for (int i = 0; i < REFILL_COUNT; i++) {
        // Check if current chunk is exhausted
        if (cs->chunk_cursor + (ptrdiff_t)class_size > cs->chunk_end) {
            // Allocate new chunk
            void* new_chunk = allocate_chunk();
            if (new_chunk == NULL) {
                break;  // Out of memory
            }

            cs->current_chunk = (char*)new_chunk;
            cs->chunk_cursor = (char*)new_chunk;
            cs->chunk_end = (char*)new_chunk + CHUNK_SIZE;
            cs->chunks_allocated++;
        }

        // Carve out a block from chunk
        void* block = (void*)cs->chunk_cursor;
        cs->chunk_cursor = cs->chunk_cursor + (ptrdiff_t)class_size;

        // Add to free list
        *(void**)block = *head;
        *head = block;
        refilled++;
    }

    // Pop one block for the caller
    void* ptr = *head;
    if (ptr) {
        *head = *(void**)ptr;
        g_tls_tiny_stats[class_idx].alloc_count++;
        return ptr;
    }

    // Complete failure (out of memory)
    return NULL;
}

// ============================================================================
// Stats (for debugging and Phase 2 learning layer)
// ============================================================================

void hak_tiny_simple_get_stats(int class_idx, TinySimpleStats* stats) {
    if (class_idx < 0 || class_idx >= TINY_NUM_CLASSES || stats == NULL) {
        return;
    }
    *stats = g_tls_tiny_stats[class_idx];
}

void hak_tiny_simple_print_stats(void) {
    printf("\n=== Tiny Simple Allocator Stats ===\n");
    printf("Class | Size  | Allocs    | Frees     | Hits      | Misses    | Hit Rate\n");
    printf("------|-------|-----------|-----------|-----------|-----------|----------\n");

    for (int i = 0; i < TINY_NUM_CLASSES; i++) {
        TinySimpleStats* s = &g_tls_tiny_stats[i];
        double hit_rate = (s->alloc_count > 0)
            ? (100.0 * s->hit_count / s->alloc_count)
            : 0.0;

        printf("  %d   | %4zuB | %9lu | %9lu | %9lu | %9lu | %6.2f%%\n",
            i, g_class_sizes[i],
            s->alloc_count, s->free_count,
            s->hit_count, s->miss_count,
            hit_rate);
    }
    printf("\n");
}
Debug Counters Implementation - Clean History Major Features: - Debug counter infrastructure for Refill Stage tracking - Free Pipeline counters (ss_local, ss_remote, tls_sll) - Diagnostic counters for early return analysis - Unified larson.sh benchmark runner with profiles - Phase 6-3 regression analysis documentation Bug Fixes: - Fix SuperSlab disabled by default (HAKMEM_TINY_USE_SUPERSLAB) - Fix profile variable naming consistency - Add .gitignore patterns for large files Performance: - Phase 6-3: 4.79 M ops/s (has OOM risk) - With SuperSlab: 3.13 M ops/s (+19% improvement) This is a clean repository without large log files. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com> 2025-11-05 12:31:14 +09:00			`// hakmem_tiny_simple.c`
			`// Phase 6-1: Ultra-Simple Tiny Allocator Implementation`
			`//`
			`// Design: "Simple Front + Smart Back"`
			`// - Front: 3-4 instruction fast path (this file, Phase 1)`
			`// - Back: Learning layer (Phase 2, to be added)`
			`//`
			`// Backend: Simple mmap-based chunk allocation (Phase 1)`
			`// Will integrate with SuperSlab in Phase 2`

			`#include "hakmem_tiny_simple.h"`
			`#include <stdio.h>`
			`#include <string.h>`
			`#include <pthread.h>`
			`#include <sys/mman.h>`
			`#include <unistd.h>`

			`// ============================================================================`
			`// Phase 1: Ultra-Simple Fast Path Data Structures`
			`// ============================================================================`

			`// TLS Free List - THE ONLY fast path data structure!`
			`__thread void* g_tls_tiny_cache[TINY_NUM_CLASSES] = {NULL};`

			`// TLS Stats (per class)`
			`__thread TinySimpleStats g_tls_tiny_stats[TINY_NUM_CLASSES] = {{0}};`

			`// Size class metadata`
			`static const size_t g_class_sizes[TINY_NUM_CLASSES] = {`
			`8, 16, 32, 64, 128, 256, 512, 1024`
			`};`

			`// Refill count (Phase 1: fixed, Phase 2: adaptive)`
			`#define REFILL_COUNT 64`

			`// ============================================================================`
			`// Backend: Simple Chunk Allocator (Phase 1)`
			`// ============================================================================`

			`// Chunk size: 1MB (will be tuned in Phase 2)`
			`#define CHUNK_SIZE (1024 * 1024)`

			`// Per-class chunk state (TLS)`
			`typedef struct {`
			`char* current_chunk; // Current chunk base (byte pointer for arithmetic)`
			`char* chunk_cursor; // Next free block in chunk`
			`char* chunk_end; // End of current chunk`
			`uint64_t chunks_allocated; // Total chunks allocated`
			`} TinyChunkState;`

			`static __thread TinyChunkState g_chunk_state[TINY_NUM_CLASSES];`

			`// Allocate a new chunk from OS`
			`static void* allocate_chunk(void) {`
			`void* chunk = mmap(NULL, CHUNK_SIZE,`
			`PROT_READ \| PROT_WRITE,`
			`MAP_PRIVATE \| MAP_ANONYMOUS, -1, 0);`
			`if (chunk == MAP_FAILED) {`
			`return NULL;`
			`}`
			`return chunk;`
			`}`

			`// ============================================================================`
			`// Initialization`
			`// ============================================================================`

			`void hak_tiny_simple_init(void) {`
			`// Clear TLS cache and stats`
			`memset(g_tls_tiny_cache, 0, sizeof(g_tls_tiny_cache));`
			`memset(g_tls_tiny_stats, 0, sizeof(g_tls_tiny_stats));`
			`memset(g_chunk_state, 0, sizeof(g_chunk_state));`
			`}`

			`// ============================================================================`
			`// Ultra-Fast Allocation (3-4 instructions!)`
			`// ============================================================================`

			`void* hak_tiny_simple_alloc(size_t size) {`
			`// Convert size to class (inlined)`
			`int cls = hak_tiny_simple_size_to_class(size);`
			`if (cls < 0) return NULL; // >1KB, not handled by Tiny`

			`// Ultra-fast path: Pop from free list`
			`void** head = &g_tls_tiny_cache[cls];`
			`void* ptr = *head;`
			`if (ptr) {`
			`head = (void**)ptr; // Single instruction pop!`
			`g_tls_tiny_stats[cls].alloc_count++;`
			`g_tls_tiny_stats[cls].hit_count++;`
			`return ptr;`
			`}`

			`// Miss: go to slow path`
			`g_tls_tiny_stats[cls].miss_count++;`
			`return hak_tiny_simple_alloc_slow(size, cls);`
			`}`

			`// ============================================================================`
			`// Fast Free`
			`// ============================================================================`

			`void hak_tiny_simple_free(void* ptr, size_t size) {`
			`if (ptr == NULL) return;`

			`// Convert size to class (inlined)`
			`int cls = hak_tiny_simple_size_to_class(size);`
			`if (cls < 0) return; // Invalid size`

			`// Fast path: Push to free list`
			`void** head = &g_tls_tiny_cache[cls];`
			`(void)ptr = head; // ptr->next = head`
			`*head = ptr; // head = ptr`

			`g_tls_tiny_stats[cls].free_count++;`
			`}`

			`// ============================================================================`
			`// Slow Path: Batch Refill`
			`// ============================================================================`

			`void* hak_tiny_simple_alloc_slow(size_t size, int class_idx) {`
			`// Phase 1: Simple batch refill from chunk`
			`// Phase 2: Will add adaptive refill count based on miss rate`

			`size_t class_size = g_class_sizes[class_idx];`
			`void** head = &g_tls_tiny_cache[class_idx];`
			`TinyChunkState* cs = &g_chunk_state[class_idx];`

			`// Refill batch (Phase 1: fixed at 64 blocks)`
			`int refilled = 0;`
			`for (int i = 0; i < REFILL_COUNT; i++) {`
			`// Check if current chunk is exhausted`
			`if (cs->chunk_cursor + (ptrdiff_t)class_size > cs->chunk_end) {`
			`// Allocate new chunk`
			`void* new_chunk = allocate_chunk();`
			`if (new_chunk == NULL) {`
			`break; // Out of memory`
			`}`

			`cs->current_chunk = (char*)new_chunk;`
			`cs->chunk_cursor = (char*)new_chunk;`
			`cs->chunk_end = (char*)new_chunk + CHUNK_SIZE;`
			`cs->chunks_allocated++;`
			`}`

			`// Carve out a block from chunk`
			`void* block = (void*)cs->chunk_cursor;`
			`cs->chunk_cursor = cs->chunk_cursor + (ptrdiff_t)class_size;`

			`// Add to free list`
			`(void)block = head;`
			`*head = block;`
			`refilled++;`
			`}`

			`// Pop one block for the caller`
			`void* ptr = *head;`
			`if (ptr) {`
			`head = (void**)ptr;`
			`g_tls_tiny_stats[class_idx].alloc_count++;`
			`return ptr;`
			`}`

			`// Complete failure (out of memory)`
			`return NULL;`
			`}`

			`// ============================================================================`
			`// Stats (for debugging and Phase 2 learning layer)`
			`// ============================================================================`

			`void hak_tiny_simple_get_stats(int class_idx, TinySimpleStats* stats) {`
			`if (class_idx < 0 \|\| class_idx >= TINY_NUM_CLASSES \|\| stats == NULL) {`
			`return;`
			`}`
			`*stats = g_tls_tiny_stats[class_idx];`
			`}`

			`void hak_tiny_simple_print_stats(void) {`
			`printf("\n=== Tiny Simple Allocator Stats ===\n");`
			`printf("Class \| Size \| Allocs \| Frees \| Hits \| Misses \| Hit Rate\n");`
			`printf("------\|-------\|-----------\|-----------\|-----------\|-----------\|----------\n");`

			`for (int i = 0; i < TINY_NUM_CLASSES; i++) {`
			`TinySimpleStats* s = &g_tls_tiny_stats[i];`
			`double hit_rate = (s->alloc_count > 0)`
			`? (100.0 * s->hit_count / s->alloc_count)`
			`: 0.0;`

			`printf(" %d \| %4zuB \| %9lu \| %9lu \| %9lu \| %9lu \| %6.2f%%\n",`
			`i, g_class_sizes[i],`
			`s->alloc_count, s->free_count,`
			`s->hit_count, s->miss_count,`
			`hit_rate);`
			`}`
			`printf("\n");`
			`}`