hakmem/core/pool_tls_arena.c

#include "pool_tls_arena.h"
#include "pool_tls.h"  // For POOL_HEADER_SIZE, POOL_USE_HEADERS
#include "page_arena.h"  // Phase 24: PageArena integration
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/syscall.h>
#include <unistd.h>
#include <pthread.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <errno.h>
#include <stdatomic.h>

// TLS storage (automatically zero-initialized)
__thread PoolChunk g_tls_arena[POOL_SIZE_CLASSES];

int g_arena_max_growth_level = 3;           // 0:1MB,1:2MB,2:4MB,3:8MB
size_t g_arena_initial_chunk_size = (size_t)1 << 20; // 1MB

static pthread_once_t g_arena_cfg_once = PTHREAD_ONCE_INIT;
static void arena_read_env(void){
  const char* s_init = getenv("HAKMEM_POOL_TLS_ARENA_MB_INIT");
  const char* s_max  = getenv("HAKMEM_POOL_TLS_ARENA_MB_MAX");
  const char* s_gl   = getenv("HAKMEM_POOL_TLS_ARENA_GROWTH_LEVELS");
  if (s_init){ long v = atol(s_init); if (v>=1 && v<=64) g_arena_initial_chunk_size = (size_t)v << 20; }
  if (s_max){ long v = atol(s_max); if (v>=1 && v<=1024){
      size_t max_bytes = (size_t)v << 20; size_t sz = g_arena_initial_chunk_size; int lvl=0; while (sz < max_bytes && lvl<30){ sz <<= 1; lvl++; }
      g_arena_max_growth_level = lvl; if (g_arena_max_growth_level<0) g_arena_max_growth_level=0; }
  }
  if (s_gl){ long v = atol(s_gl); if (v>=0 && v<=30) g_arena_max_growth_level = (int)v; }
}

// External imports (from pool config)
extern const size_t POOL_CLASS_SIZES[POOL_SIZE_CLASSES];

// Debug stats
#ifdef POOL_TLS_ARENA_DEBUG
static __thread struct {
    uint64_t mmap_calls;
    uint64_t total_carved;
    uint64_t chunk_exhaustions;
} g_arena_stats;
#endif

// Ensure chunk has space for at least 'needed' bytes
// Returns 0 on success, -1 on mmap failure
static int chunk_ensure(PoolChunk* chunk, size_t needed) {
    // Check if current chunk has space
    if (chunk->chunk_base && (chunk->offset + needed <= chunk->chunk_size)) {
        return 0;  // Space available
    }

    // Phase 24: Ensure PageArena is initialized before first use
    if (page_arena_enabled() && g_page_arena.hot.pages == NULL) {
        page_arena_init(&g_page_arena);
    }

    // Need new chunk - calculate size with exponential growth
    pthread_once(&g_arena_cfg_once, arena_read_env);
    size_t new_size;
    if (chunk->growth_level >= g_arena_max_growth_level) {
        new_size = g_arena_initial_chunk_size << g_arena_max_growth_level;
    } else {
        new_size = g_arena_initial_chunk_size << chunk->growth_level;
        chunk->growth_level++;
    }

    // CRITICAL FIX: DO NOT munmap old chunk!
    // Reason: Live allocations may still point into it. Arena chunks are kept
    // alive for the thread's lifetime and only freed at thread exit.
    // This is standard arena behavior - grow but never shrink.
    //
    // REMOVED BUGGY CODE:
    // if (chunk->chunk_base) {
    //     munmap(chunk->chunk_base, chunk->chunk_size);  // ← SEGV! Live ptrs exist!
    // }
    //
    // OLD CHUNK IS LEAKED INTENTIONALLY - it contains live allocations

#ifdef POOL_TLS_ARENA_DEBUG
    if (chunk->chunk_base) {
        g_arena_stats.chunk_exhaustions++;
    }
#endif

    // Phase 24: Try PageArena first, fallback to mmap
    void* new_base = page_arena_alloc_pages(&g_page_arena, new_size);
    if (!new_base) {
        // PageArena cache miss → fallback to mmap
        new_base = mmap(NULL, new_size, PROT_READ | PROT_WRITE,
                        MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
    }

    if (new_base == MAP_FAILED || new_base == NULL) {
        // DEBUG: Log allocation failure details
        static _Atomic int alloc_fail_count = 0;
        int fail_num = atomic_fetch_add(&alloc_fail_count, 1);
        if (fail_num < 10) {
            fprintf(stderr, "[POOL_ARENA] alloc FAILED: new_size=%zu MB, growth_level=%d, errno=%d\n",
                    new_size / (1024*1024), chunk->growth_level, errno);
        }
        return -1;  // OOM
    }

#ifdef POOL_TLS_ARENA_DEBUG
    g_arena_stats.mmap_calls++;
#endif

    // Register range for owner resolution
    pid_t tid = (pid_t)syscall(SYS_gettid);
    pool_reg_register(new_base, new_size, tid, -1); // class-less at arena level

    chunk->chunk_base = new_base;
    chunk->chunk_size = new_size;
    chunk->offset = 0;

    return 0;
}

// Carve blocks from TLS Arena
int arena_batch_carve(int class_idx, void** out_blocks, int count) {
    if (class_idx < 0 || class_idx >= POOL_SIZE_CLASSES) {
        return 0;  // Invalid class
    }

    PoolChunk* chunk = &g_tls_arena[class_idx];
    size_t block_size = POOL_CLASS_SIZES[class_idx];

    // Calculate allocation size with header space
#if POOL_USE_HEADERS
    size_t alloc_size = block_size + POOL_HEADER_SIZE;
#else
    size_t alloc_size = block_size;
#endif

    // Ensure chunk has space for all blocks
    size_t needed = alloc_size * count;
    if (chunk_ensure(chunk, needed) != 0) {
        // DEBUG: Log chunk_ensure failure
        static _Atomic int ensure_fail_count = 0;
        int fail_num = atomic_fetch_add(&ensure_fail_count, 1);
        if (fail_num < 10) {
            fprintf(stderr, "[POOL_ARENA] chunk_ensure FAILED: class=%d, block_size=%zu, count=%d, needed=%zu\n",
                    class_idx, block_size, count, needed);
        }
        return 0;  // OOM
    }

    // Carve blocks from chunk
    int carved = 0;
    for (int i = 0; i < count; i++) {
        if (chunk->offset + alloc_size > chunk->chunk_size) {
            break;  // Chunk exhausted (shouldn't happen after ensure)
        }

        // Return pointer AFTER header space
        out_blocks[i] = (char*)chunk->chunk_base + chunk->offset
#if POOL_USE_HEADERS
                        + POOL_HEADER_SIZE
#endif
                        ;
        chunk->offset += alloc_size;
        carved++;

#ifdef POOL_TLS_ARENA_DEBUG
        g_arena_stats.total_carved++;
#endif
    }

    return carved;
}

// Thread cleanup
static void __attribute__((destructor)) arena_cleanup(void) {
    arena_cleanup_thread();
}

void arena_cleanup_thread(void) {
    for (int i = 0; i < POOL_SIZE_CLASSES; i++) {
        PoolChunk* chunk = &g_tls_arena[i];
        if (chunk->chunk_base) {
            pid_t tid = (pid_t)syscall(SYS_gettid);
            pool_reg_unregister(chunk->chunk_base, chunk->chunk_size, tid);

            // Phase 24: Return to PageArena if enabled
            if (page_arena_enabled()) {
                page_arena_free_pages(&g_page_arena, chunk->chunk_base, chunk->chunk_size);
            } else {
                munmap(chunk->chunk_base, chunk->chunk_size);
            }

            chunk->chunk_base = NULL;
        }
    }
}

#ifdef POOL_TLS_ARENA_DEBUG
#include <stdio.h>

void arena_print_stats(void) {
    printf("[Pool TLS Arena Stats]\n");
    printf("  mmap calls: %lu\n", g_arena_stats.mmap_calls);
    printf("  blocks carved: %lu\n", g_arena_stats.total_carved);
    printf("  chunk exhaustions: %lu\n", g_arena_stats.chunk_exhaustions);
}
#endif