Tiny: fix header/stride mismatch and harden refill paths
- Root cause: header-based class indexing (HEADER_CLASSIDX=1) wrote a 1-byte header during allocation, but linear carve/refill and initial slab capacity still used bare class block sizes. This mismatch could overrun slab usable space and corrupt freelists, causing reproducible SEGV at ~100k iters. Changes - Superslab: compute capacity with effective stride (block_size + header for classes 0..6; class7 remains headerless) in superslab_init_slab(). Add a debug-only bound check in superslab_alloc_from_slab() to fail fast if carve would exceed usable bytes. - Refill (non-P0 and P0): use header-aware stride for all linear carving and TLS window bump operations. Ensure alignment/validation in tiny_refill_opt.h also uses stride, not raw class size. - Drain: keep existing defense-in-depth for remote sentinel and sanitize nodes before splicing into freelist (already present). Notes - This unifies the memory layout across alloc/linear-carve/refill with a single stride definition and keeps class7 (1024B) headerless as designed. - Debug builds add fail-fast checks; release builds remain lean. Next - Re-run Tiny benches (256/1024B) in debug to confirm stability, then in release. If any remaining crash persists, bisect with HAKMEM_TINY_P0_BATCH_REFILL=0 to isolate P0 batch carve, and continue reducing branch-miss as planned.
This commit is contained in:
172
core/pool_tls_arena.c
Normal file
172
core/pool_tls_arena.c
Normal file
@ -0,0 +1,172 @@
|
||||
#include "pool_tls_arena.h"
|
||||
#include "pool_tls.h" // For POOL_HEADER_SIZE, POOL_USE_HEADERS
|
||||
#include <sys/mman.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/syscall.h>
|
||||
#include <unistd.h>
|
||||
#include <pthread.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
// TLS storage (automatically zero-initialized)
|
||||
__thread PoolChunk g_tls_arena[POOL_SIZE_CLASSES];
|
||||
|
||||
int g_arena_max_growth_level = 3; // 0:1MB,1:2MB,2:4MB,3:8MB
|
||||
size_t g_arena_initial_chunk_size = (size_t)1 << 20; // 1MB
|
||||
|
||||
static pthread_once_t g_arena_cfg_once = PTHREAD_ONCE_INIT;
|
||||
static void arena_read_env(void){
|
||||
const char* s_init = getenv("HAKMEM_POOL_TLS_ARENA_MB_INIT");
|
||||
const char* s_max = getenv("HAKMEM_POOL_TLS_ARENA_MB_MAX");
|
||||
const char* s_gl = getenv("HAKMEM_POOL_TLS_ARENA_GROWTH_LEVELS");
|
||||
if (s_init){ long v = atol(s_init); if (v>=1 && v<=64) g_arena_initial_chunk_size = (size_t)v << 20; }
|
||||
if (s_max){ long v = atol(s_max); if (v>=1 && v<=1024){
|
||||
size_t max_bytes = (size_t)v << 20; size_t sz = g_arena_initial_chunk_size; int lvl=0; while (sz < max_bytes && lvl<30){ sz <<= 1; lvl++; }
|
||||
g_arena_max_growth_level = lvl; if (g_arena_max_growth_level<0) g_arena_max_growth_level=0; }
|
||||
}
|
||||
if (s_gl){ long v = atol(s_gl); if (v>=0 && v<=30) g_arena_max_growth_level = (int)v; }
|
||||
}
|
||||
|
||||
// External imports (from pool config)
|
||||
extern const size_t POOL_CLASS_SIZES[POOL_SIZE_CLASSES];
|
||||
|
||||
// Debug stats
|
||||
#ifdef POOL_TLS_ARENA_DEBUG
|
||||
static __thread struct {
|
||||
uint64_t mmap_calls;
|
||||
uint64_t total_carved;
|
||||
uint64_t chunk_exhaustions;
|
||||
} g_arena_stats;
|
||||
#endif
|
||||
|
||||
// Ensure chunk has space for at least 'needed' bytes
|
||||
// Returns 0 on success, -1 on mmap failure
|
||||
static int chunk_ensure(PoolChunk* chunk, size_t needed) {
|
||||
// Check if current chunk has space
|
||||
if (chunk->chunk_base && (chunk->offset + needed <= chunk->chunk_size)) {
|
||||
return 0; // Space available
|
||||
}
|
||||
|
||||
// Need new chunk - calculate size with exponential growth
|
||||
pthread_once(&g_arena_cfg_once, arena_read_env);
|
||||
size_t new_size;
|
||||
if (chunk->growth_level >= g_arena_max_growth_level) {
|
||||
new_size = g_arena_initial_chunk_size << g_arena_max_growth_level;
|
||||
} else {
|
||||
new_size = g_arena_initial_chunk_size << chunk->growth_level;
|
||||
chunk->growth_level++;
|
||||
}
|
||||
|
||||
// CRITICAL FIX: DO NOT munmap old chunk!
|
||||
// Reason: Live allocations may still point into it. Arena chunks are kept
|
||||
// alive for the thread's lifetime and only freed at thread exit.
|
||||
// This is standard arena behavior - grow but never shrink.
|
||||
//
|
||||
// REMOVED BUGGY CODE:
|
||||
// if (chunk->chunk_base) {
|
||||
// munmap(chunk->chunk_base, chunk->chunk_size); // ← SEGV! Live ptrs exist!
|
||||
// }
|
||||
//
|
||||
// OLD CHUNK IS LEAKED INTENTIONALLY - it contains live allocations
|
||||
|
||||
#ifdef POOL_TLS_ARENA_DEBUG
|
||||
if (chunk->chunk_base) {
|
||||
g_arena_stats.chunk_exhaustions++;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Allocate new chunk
|
||||
void* new_base = mmap(NULL, new_size, PROT_READ | PROT_WRITE,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||
if (new_base == MAP_FAILED) {
|
||||
return -1; // OOM
|
||||
}
|
||||
|
||||
#ifdef POOL_TLS_ARENA_DEBUG
|
||||
g_arena_stats.mmap_calls++;
|
||||
#endif
|
||||
|
||||
// Register range for owner resolution
|
||||
pid_t tid = (pid_t)syscall(SYS_gettid);
|
||||
pool_reg_register(new_base, new_size, tid, -1); // class-less at arena level
|
||||
|
||||
chunk->chunk_base = new_base;
|
||||
chunk->chunk_size = new_size;
|
||||
chunk->offset = 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Carve blocks from TLS Arena
|
||||
int arena_batch_carve(int class_idx, void** out_blocks, int count) {
|
||||
if (class_idx < 0 || class_idx >= POOL_SIZE_CLASSES) {
|
||||
return 0; // Invalid class
|
||||
}
|
||||
|
||||
PoolChunk* chunk = &g_tls_arena[class_idx];
|
||||
size_t block_size = POOL_CLASS_SIZES[class_idx];
|
||||
|
||||
// Calculate allocation size with header space
|
||||
#if POOL_USE_HEADERS
|
||||
size_t alloc_size = block_size + POOL_HEADER_SIZE;
|
||||
#else
|
||||
size_t alloc_size = block_size;
|
||||
#endif
|
||||
|
||||
// Ensure chunk has space for all blocks
|
||||
size_t needed = alloc_size * count;
|
||||
if (chunk_ensure(chunk, needed) != 0) {
|
||||
return 0; // OOM
|
||||
}
|
||||
|
||||
// Carve blocks from chunk
|
||||
int carved = 0;
|
||||
for (int i = 0; i < count; i++) {
|
||||
if (chunk->offset + alloc_size > chunk->chunk_size) {
|
||||
break; // Chunk exhausted (shouldn't happen after ensure)
|
||||
}
|
||||
|
||||
// Return pointer AFTER header space
|
||||
out_blocks[i] = (char*)chunk->chunk_base + chunk->offset
|
||||
#if POOL_USE_HEADERS
|
||||
+ POOL_HEADER_SIZE
|
||||
#endif
|
||||
;
|
||||
chunk->offset += alloc_size;
|
||||
carved++;
|
||||
|
||||
#ifdef POOL_TLS_ARENA_DEBUG
|
||||
g_arena_stats.total_carved++;
|
||||
#endif
|
||||
}
|
||||
|
||||
return carved;
|
||||
}
|
||||
|
||||
// Thread cleanup
|
||||
static void __attribute__((destructor)) arena_cleanup(void) {
|
||||
arena_cleanup_thread();
|
||||
}
|
||||
|
||||
void arena_cleanup_thread(void) {
|
||||
for (int i = 0; i < POOL_SIZE_CLASSES; i++) {
|
||||
PoolChunk* chunk = &g_tls_arena[i];
|
||||
if (chunk->chunk_base) {
|
||||
pid_t tid = (pid_t)syscall(SYS_gettid);
|
||||
pool_reg_unregister(chunk->chunk_base, chunk->chunk_size, tid);
|
||||
munmap(chunk->chunk_base, chunk->chunk_size);
|
||||
chunk->chunk_base = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef POOL_TLS_ARENA_DEBUG
|
||||
#include <stdio.h>
|
||||
|
||||
void arena_print_stats(void) {
|
||||
printf("[Pool TLS Arena Stats]\n");
|
||||
printf(" mmap calls: %lu\n", g_arena_stats.mmap_calls);
|
||||
printf(" blocks carved: %lu\n", g_arena_stats.total_carved);
|
||||
printf(" chunk exhaustions: %lu\n", g_arena_stats.chunk_exhaustions);
|
||||
}
|
||||
#endif
|
||||
Reference in New Issue
Block a user