Files
hakmem/archive/smallmid/hakmem_smallmid_superslab.c
Moe Charm (CI) a67965139f Add performance analysis reports and archive legacy superslab
- Add investigation reports for allocation routing, bottlenecks, madvise
- Archive old smallmid superslab implementation
- Document Page Box integration findings

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-12-05 15:31:58 +09:00

430 lines
13 KiB
C

/**
* hakmem_smallmid_superslab.c - Small-Mid SuperSlab Backend Implementation
*
* Phase 17-2: Dedicated SuperSlab pool for Small-Mid allocator
* Goal: 2-3x performance improvement via batch refills and dedicated backend
*
* Created: 2025-11-16
*/
#include "hakmem_smallmid_superslab.h"
#include "hakmem_smallmid.h"
#include <sys/mman.h>
#include <string.h>
#include <stdio.h>
#include <time.h>
#include <errno.h>
// ============================================================================
// Global State
// ============================================================================
SmallMidSSHead g_smallmid_ss_pools[SMALLMID_NUM_CLASSES];
static pthread_once_t g_smallmid_ss_init_once = PTHREAD_ONCE_INIT;
static int g_smallmid_ss_initialized = 0;
#ifdef HAKMEM_SMALLMID_SS_STATS
SmallMidSSStats g_smallmid_ss_stats = {0};
#endif
// ============================================================================
// Initialization
// ============================================================================
static void smallmid_superslab_init_once(void) {
for (int i = 0; i < SMALLMID_NUM_CLASSES; i++) {
SmallMidSSHead* pool = &g_smallmid_ss_pools[i];
pool->class_idx = i;
pool->total_ss = 0;
pool->first_ss = NULL;
pool->current_ss = NULL;
pool->lru_head = NULL;
pool->lru_tail = NULL;
pthread_mutex_init(&pool->lock, NULL);
pool->alloc_count = 0;
pool->refill_count = 0;
pool->ss_alloc_count = 0;
pool->ss_free_count = 0;
}
g_smallmid_ss_initialized = 1;
#ifndef SMALLMID_DEBUG
#define SMALLMID_DEBUG 0
#endif
#if SMALLMID_DEBUG
fprintf(stderr, "[SmallMid SuperSlab] Initialized (%d classes)\n", SMALLMID_NUM_CLASSES);
#endif
}
void smallmid_superslab_init(void) {
pthread_once(&g_smallmid_ss_init_once, smallmid_superslab_init_once);
}
// ============================================================================
// SuperSlab Allocation/Deallocation
// ============================================================================
/**
* smallmid_superslab_alloc - Allocate a new 1MB SuperSlab
*
* Strategy:
* - mmap 1MB aligned region (PROT_READ|WRITE, MAP_PRIVATE|ANONYMOUS)
* - Initialize header, metadata, counters
* - Add to per-class pool chain
* - Return SuperSlab pointer
*/
SmallMidSuperSlab* smallmid_superslab_alloc(int class_idx) {
if (class_idx < 0 || class_idx >= SMALLMID_NUM_CLASSES) {
return NULL;
}
// Allocate 1MB aligned region
void* mem = mmap(NULL, SMALLMID_SUPERSLAB_SIZE,
PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS,
-1, 0);
if (mem == MAP_FAILED) {
fprintf(stderr, "[SmallMid SS] mmap failed: %s\n", strerror(errno));
return NULL;
}
// Ensure alignment (mmap should return aligned address)
uintptr_t addr = (uintptr_t)mem;
if ((addr & (SMALLMID_SS_ALIGNMENT - 1)) != 0) {
fprintf(stderr, "[SmallMid SS] WARNING: mmap returned unaligned address %p\n", mem);
munmap(mem, SMALLMID_SUPERSLAB_SIZE);
return NULL;
}
SmallMidSuperSlab* ss = (SmallMidSuperSlab*)mem;
// Initialize header
ss->magic = SMALLMID_SS_MAGIC;
ss->num_slabs = SMALLMID_SLABS_PER_SS;
ss->active_slabs = 0;
ss->refcount = 1;
ss->total_active = 0;
ss->slab_bitmap = 0;
ss->nonempty_mask = 0;
ss->last_used_ns = 0;
ss->generation = 0;
ss->next = NULL;
ss->lru_next = NULL;
ss->lru_prev = NULL;
// Initialize slab metadata (all inactive initially)
for (int i = 0; i < SMALLMID_SLABS_PER_SS; i++) {
SmallMidSlabMeta* meta = &ss->slabs[i];
meta->freelist = NULL;
meta->used = 0;
meta->capacity = 0;
meta->carved = 0;
meta->class_idx = class_idx;
meta->flags = SMALLMID_SLAB_INACTIVE;
}
// Update pool stats
SmallMidSSHead* pool = &g_smallmid_ss_pools[class_idx];
atomic_fetch_add(&pool->total_ss, 1);
atomic_fetch_add(&pool->ss_alloc_count, 1);
#ifdef HAKMEM_SMALLMID_SS_STATS
atomic_fetch_add(&g_smallmid_ss_stats.total_ss_alloc, 1);
#endif
#if SMALLMID_DEBUG
fprintf(stderr, "[SmallMid SS] Allocated SuperSlab %p (class=%d, size=1MB)\n",
ss, class_idx);
#endif
return ss;
}
/**
* smallmid_superslab_free - Free a SuperSlab
*
* Strategy:
* - Validate refcount == 0 (all blocks freed)
* - munmap the 1MB region
* - Update pool stats
*/
void smallmid_superslab_free(SmallMidSuperSlab* ss) {
if (!ss || ss->magic != SMALLMID_SS_MAGIC) {
fprintf(stderr, "[SmallMid SS] ERROR: Invalid SuperSlab %p\n", ss);
return;
}
uint32_t refcount = atomic_load(&ss->refcount);
if (refcount > 0) {
fprintf(stderr, "[SmallMid SS] WARNING: Freeing SuperSlab with refcount=%u\n", refcount);
}
uint32_t active = atomic_load(&ss->total_active);
if (active > 0) {
fprintf(stderr, "[SmallMid SS] WARNING: Freeing SuperSlab with active blocks=%u\n", active);
}
// Invalidate magic
ss->magic = 0xDEADBEEF;
// munmap
if (munmap(ss, SMALLMID_SUPERSLAB_SIZE) != 0) {
fprintf(stderr, "[SmallMid SS] munmap failed: %s\n", strerror(errno));
}
#ifdef HAKMEM_SMALLMID_SS_STATS
atomic_fetch_add(&g_smallmid_ss_stats.total_ss_free, 1);
#endif
#if SMALLMID_DEBUG
fprintf(stderr, "[SmallMid SS] Freed SuperSlab %p\n", ss);
#endif
}
// ============================================================================
// Slab Initialization
// ============================================================================
/**
* smallmid_slab_init - Initialize a slab within SuperSlab
*
* Strategy:
* - Calculate slab base address (ss_base + slab_idx * 64KB)
* - Set capacity based on size class (256/128/64 blocks)
* - Mark slab as active
* - Update SuperSlab bitmaps
*/
void smallmid_slab_init(SmallMidSuperSlab* ss, int slab_idx, int class_idx) {
if (!ss || slab_idx < 0 || slab_idx >= SMALLMID_SLABS_PER_SS) {
return;
}
SmallMidSlabMeta* meta = &ss->slabs[slab_idx];
// Set capacity based on class
const uint16_t capacities[SMALLMID_NUM_CLASSES] = {
SMALLMID_BLOCKS_256B,
SMALLMID_BLOCKS_512B,
SMALLMID_BLOCKS_1KB
};
meta->freelist = NULL;
meta->used = 0;
meta->capacity = capacities[class_idx];
meta->carved = 0;
meta->class_idx = class_idx;
meta->flags = SMALLMID_SLAB_ACTIVE;
// Update SuperSlab bitmaps
ss->slab_bitmap |= (1u << slab_idx);
ss->nonempty_mask |= (1u << slab_idx);
ss->active_slabs++;
#if SMALLMID_DEBUG
fprintf(stderr, "[SmallMid SS] Initialized slab %d in SS %p (class=%d, capacity=%u)\n",
slab_idx, ss, class_idx, meta->capacity);
#endif
}
// ============================================================================
// Batch Refill (Performance-Critical Path)
// ============================================================================
/**
* smallmid_refill_batch - Batch refill TLS freelist from SuperSlab
*
* Performance target: 5-8 instructions per call (amortized)
*
* Strategy:
* 1. Try current slab's freelist (fast path: pop batch_max blocks)
* 2. Fall back to bump allocation if freelist empty
* 3. Allocate new slab if current is full
* 4. Allocate new SuperSlab if no slabs available
*
* Returns: Number of blocks refilled (0 on failure)
*/
int smallmid_refill_batch(int class_idx, void** batch_out, int batch_max) {
if (class_idx < 0 || class_idx >= SMALLMID_NUM_CLASSES || !batch_out || batch_max <= 0) {
return 0;
}
SmallMidSSHead* pool = &g_smallmid_ss_pools[class_idx];
// Ensure SuperSlab pool is initialized
if (!g_smallmid_ss_initialized) {
smallmid_superslab_init();
}
// Allocate first SuperSlab if needed
pthread_mutex_lock(&pool->lock);
if (!pool->current_ss) {
pool->current_ss = smallmid_superslab_alloc(class_idx);
if (!pool->current_ss) {
pthread_mutex_unlock(&pool->lock);
return 0;
}
// Add to chain
if (!pool->first_ss) {
pool->first_ss = pool->current_ss;
}
// Initialize first slab
smallmid_slab_init(pool->current_ss, 0, class_idx);
}
SmallMidSuperSlab* ss = pool->current_ss;
pthread_mutex_unlock(&pool->lock);
// Find active slab with available blocks
int slab_idx = -1;
SmallMidSlabMeta* meta = NULL;
for (int i = 0; i < SMALLMID_SLABS_PER_SS; i++) {
if (!(ss->slab_bitmap & (1u << i))) {
continue; // Slab not active
}
meta = &ss->slabs[i];
if (meta->used < meta->capacity) {
slab_idx = i;
break; // Found slab with space
}
}
// No slab with space - try to allocate new slab
if (slab_idx == -1) {
pthread_mutex_lock(&pool->lock);
// Find first inactive slab
for (int i = 0; i < SMALLMID_SLABS_PER_SS; i++) {
if (!(ss->slab_bitmap & (1u << i))) {
smallmid_slab_init(ss, i, class_idx);
slab_idx = i;
meta = &ss->slabs[i];
break;
}
}
pthread_mutex_unlock(&pool->lock);
// All slabs exhausted - need new SuperSlab
if (slab_idx == -1) {
pthread_mutex_lock(&pool->lock);
SmallMidSuperSlab* new_ss = smallmid_superslab_alloc(class_idx);
if (!new_ss) {
pthread_mutex_unlock(&pool->lock);
return 0;
}
// Link to chain
new_ss->next = pool->first_ss;
pool->first_ss = new_ss;
pool->current_ss = new_ss;
// Initialize first slab
smallmid_slab_init(new_ss, 0, class_idx);
pthread_mutex_unlock(&pool->lock);
ss = new_ss;
slab_idx = 0;
meta = &ss->slabs[0];
}
}
// Now we have a slab with available capacity
// Strategy: Try freelist first, then bump allocation
const size_t block_sizes[SMALLMID_NUM_CLASSES] = {256, 512, 1024};
size_t block_size = block_sizes[class_idx];
int refilled = 0;
// Calculate slab data base address
uintptr_t ss_base = (uintptr_t)ss;
uintptr_t slab_base = ss_base + (slab_idx * SMALLMID_SLAB_SIZE);
// Fast path: Pop from freelist (if available)
void* freelist_head = meta->freelist;
while (freelist_head && refilled < batch_max) {
// Add 1-byte header space (Phase 7 technology)
void* user_ptr = (uint8_t*)freelist_head + 1;
batch_out[refilled++] = user_ptr;
// Next block (freelist stored at offset 0 in user data)
freelist_head = *(void**)user_ptr;
}
meta->freelist = freelist_head;
// Slow path: Bump allocation
while (refilled < batch_max && meta->carved < meta->capacity) {
// Calculate block base address (with 1-byte header)
uintptr_t block_base = slab_base + (meta->carved * (block_size + 1));
void* base_ptr = (void*)block_base;
void* user_ptr = (uint8_t*)base_ptr + 1;
// Write header (0xb0 | class_idx)
*(uint8_t*)base_ptr = 0xb0 | class_idx;
batch_out[refilled++] = user_ptr;
meta->carved++;
meta->used++;
// Update SuperSlab active counter
atomic_fetch_add(&ss->total_active, 1);
}
// Update stats
atomic_fetch_add(&pool->alloc_count, refilled);
atomic_fetch_add(&pool->refill_count, 1);
#ifdef HAKMEM_SMALLMID_SS_STATS
atomic_fetch_add(&g_smallmid_ss_stats.total_refills, 1);
atomic_fetch_add(&g_smallmid_ss_stats.total_blocks_carved, refilled);
#endif
#if SMALLMID_DEBUG
if (refilled > 0) {
fprintf(stderr, "[SmallMid SS] Refilled %d blocks (class=%d, slab=%d, carved=%u/%u)\n",
refilled, class_idx, slab_idx, meta->carved, meta->capacity);
}
#endif
return refilled;
}
// ============================================================================
// Statistics
// ============================================================================
#ifdef HAKMEM_SMALLMID_SS_STATS
void smallmid_ss_print_stats(void) {
fprintf(stderr, "\n=== Small-Mid SuperSlab Statistics ===\n");
fprintf(stderr, "Total SuperSlab allocs: %lu\n", g_smallmid_ss_stats.total_ss_alloc);
fprintf(stderr, "Total SuperSlab frees: %lu\n", g_smallmid_ss_stats.total_ss_free);
fprintf(stderr, "Total refills: %lu\n", g_smallmid_ss_stats.total_refills);
fprintf(stderr, "Total blocks carved: %lu\n", g_smallmid_ss_stats.total_blocks_carved);
fprintf(stderr, "Total blocks freed: %lu\n", g_smallmid_ss_stats.total_blocks_freed);
fprintf(stderr, "\nPer-class statistics:\n");
for (int i = 0; i < SMALLMID_NUM_CLASSES; i++) {
SmallMidSSHead* pool = &g_smallmid_ss_pools[i];
fprintf(stderr, " Class %d (%zuB):\n", i, g_smallmid_class_sizes[i]);
fprintf(stderr, " Total SS: %zu\n", pool->total_ss);
fprintf(stderr, " Allocs: %lu\n", pool->alloc_count);
fprintf(stderr, " Refills: %lu\n", pool->refill_count);
}
fprintf(stderr, "=======================================\n\n");
}
#endif