281 lines
9.0 KiB
C
281 lines
9.0 KiB
C
|
|
// smallsegment_v5.c - SmallSegment v5 Implementation (Phase v5-2)
|
||
|
|
//
|
||
|
|
// Purpose: 2MiB segment-based page allocation with O(1) page_meta lookup
|
||
|
|
// Design: Each segment contains 32 pages (64KiB each) with embedded metadata
|
||
|
|
|
||
|
|
#include <stdlib.h>
|
||
|
|
#include <string.h>
|
||
|
|
#include <sys/mman.h>
|
||
|
|
#include <stdint.h>
|
||
|
|
#include <stdio.h>
|
||
|
|
#include "box/smallsegment_v5_box.h"
|
||
|
|
|
||
|
|
#ifndef likely
|
||
|
|
#define likely(x) __builtin_expect(!!(x), 1)
|
||
|
|
#define unlikely(x) __builtin_expect(!!(x), 0)
|
||
|
|
#endif
|
||
|
|
|
||
|
|
// ============================================================================
|
||
|
|
// Segment Allocation (Phase v5-2)
|
||
|
|
// ============================================================================
|
||
|
|
|
||
|
|
// Thread-local segment list (static allocation to avoid malloc recursion)
|
||
|
|
#define MAX_SEGMENTS_PER_THREAD 4
|
||
|
|
typedef struct {
|
||
|
|
SmallSegmentV5 seg;
|
||
|
|
int in_use;
|
||
|
|
uint32_t used_pages; // Bitmap: which pages are currently in use
|
||
|
|
} TLSSegmentSlot;
|
||
|
|
|
||
|
|
static __thread TLSSegmentSlot g_segment_slots_v5[MAX_SEGMENTS_PER_THREAD];
|
||
|
|
static __thread int g_last_alloc_slot_v5 = -1; // Last slot we allocated from
|
||
|
|
|
||
|
|
SmallSegmentV5* small_segment_v5_acquire(void) {
|
||
|
|
// Find free slot in TLS (avoid malloc to prevent recursion)
|
||
|
|
TLSSegmentSlot* slot = NULL;
|
||
|
|
for (int i = 0; i < MAX_SEGMENTS_PER_THREAD; i++) {
|
||
|
|
if (!g_segment_slots_v5[i].in_use) {
|
||
|
|
slot = &g_segment_slots_v5[i];
|
||
|
|
break;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
if (!slot) {
|
||
|
|
return NULL; // Out of TLS segment slots
|
||
|
|
}
|
||
|
|
|
||
|
|
// Allocate 2MiB aligned segment via mmap
|
||
|
|
// Use MAP_ANONYMOUS which typically gives us aligned addresses for large allocations
|
||
|
|
void* mem = mmap(NULL, SMALL_SEGMENT_V5_SIZE,
|
||
|
|
PROT_READ | PROT_WRITE,
|
||
|
|
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||
|
|
|
||
|
|
if (mem == MAP_FAILED || mem == NULL) {
|
||
|
|
return NULL;
|
||
|
|
}
|
||
|
|
|
||
|
|
uintptr_t addr = (uintptr_t)mem;
|
||
|
|
|
||
|
|
// Check if we got 2MiB alignment (common for large mmap allocations)
|
||
|
|
// If not, remap with extra space to force alignment
|
||
|
|
if ((addr & (SMALL_SEGMENT_V5_SIZE - 1)) != 0) {
|
||
|
|
// Not aligned, need to reallocate with overallocation
|
||
|
|
munmap(mem, SMALL_SEGMENT_V5_SIZE);
|
||
|
|
|
||
|
|
// Allocate 4MiB to ensure we can find a 2MiB aligned region
|
||
|
|
size_t alloc_size = SMALL_SEGMENT_V5_SIZE * 2;
|
||
|
|
mem = mmap(NULL, alloc_size,
|
||
|
|
PROT_READ | PROT_WRITE,
|
||
|
|
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||
|
|
|
||
|
|
if (mem == MAP_FAILED || mem == NULL) {
|
||
|
|
return NULL;
|
||
|
|
}
|
||
|
|
|
||
|
|
// Find the aligned address within this region
|
||
|
|
uintptr_t raw_addr = (uintptr_t)mem;
|
||
|
|
addr = (raw_addr + SMALL_SEGMENT_V5_SIZE - 1) & ~(SMALL_SEGMENT_V5_SIZE - 1);
|
||
|
|
|
||
|
|
// Verify the aligned address is within our mapping
|
||
|
|
if (addr < raw_addr || addr + SMALL_SEGMENT_V5_SIZE > raw_addr + alloc_size) {
|
||
|
|
munmap(mem, alloc_size);
|
||
|
|
return NULL; // Alignment calculation error
|
||
|
|
}
|
||
|
|
|
||
|
|
// We keep the whole 4MiB mapping to avoid complex munmap logic
|
||
|
|
// This wastes some memory but ensures correctness
|
||
|
|
}
|
||
|
|
|
||
|
|
// Debug: Verify address is aligned
|
||
|
|
if ((addr & (SMALL_SEGMENT_V5_SIZE - 1)) != 0) {
|
||
|
|
fprintf(stderr, "[V5_SEG] ERROR: Address 0x%lx not aligned to 0x%lx\n",
|
||
|
|
(unsigned long)addr, (unsigned long)SMALL_SEGMENT_V5_SIZE);
|
||
|
|
if (addr != (uintptr_t)mem) {
|
||
|
|
munmap(mem, SMALL_SEGMENT_V5_SIZE * 2);
|
||
|
|
} else {
|
||
|
|
munmap(mem, SMALL_SEGMENT_V5_SIZE);
|
||
|
|
}
|
||
|
|
return NULL;
|
||
|
|
}
|
||
|
|
|
||
|
|
// Use TLS slot for metadata (no malloc needed)
|
||
|
|
SmallSegmentV5* seg = &slot->seg;
|
||
|
|
slot->in_use = 1;
|
||
|
|
slot->used_pages = 0; // Initially no pages are allocated
|
||
|
|
|
||
|
|
// Initialize segment metadata
|
||
|
|
seg->base = addr;
|
||
|
|
seg->num_pages = SMALL_SEGMENT_V5_NUM_PAGES;
|
||
|
|
seg->owner_tid = 0; // Will be set by caller if needed
|
||
|
|
seg->magic = SMALL_SEGMENT_V5_MAGIC;
|
||
|
|
|
||
|
|
// Initialize all page metadata
|
||
|
|
for (uint32_t i = 0; i < seg->num_pages; i++) {
|
||
|
|
SmallPageMetaV5* m = &seg->page_meta[i];
|
||
|
|
m->free_list = NULL;
|
||
|
|
m->used = 0;
|
||
|
|
m->capacity = 0;
|
||
|
|
m->class_idx = 0;
|
||
|
|
m->flags = 0;
|
||
|
|
m->page_idx = (uint16_t)i;
|
||
|
|
m->segment = seg;
|
||
|
|
}
|
||
|
|
|
||
|
|
return seg;
|
||
|
|
}
|
||
|
|
|
||
|
|
void small_segment_v5_release(SmallSegmentV5* seg) {
|
||
|
|
if (!seg) return;
|
||
|
|
|
||
|
|
// Verify magic before releasing
|
||
|
|
if (seg->magic != SMALL_SEGMENT_V5_MAGIC) {
|
||
|
|
return; // Invalid segment, don't release
|
||
|
|
}
|
||
|
|
|
||
|
|
// Clear magic to prevent use-after-free
|
||
|
|
seg->magic = 0;
|
||
|
|
|
||
|
|
// Release the 2MiB backing memory
|
||
|
|
munmap((void*)seg->base, SMALL_SEGMENT_V5_SIZE);
|
||
|
|
|
||
|
|
// Mark slot as free (TLS memory is never freed, just reused)
|
||
|
|
for (int i = 0; i < MAX_SEGMENTS_PER_THREAD; i++) {
|
||
|
|
if (&g_segment_slots_v5[i].seg == seg) {
|
||
|
|
g_segment_slots_v5[i].in_use = 0;
|
||
|
|
g_segment_slots_v5[i].used_pages = 0;
|
||
|
|
if (g_last_alloc_slot_v5 == i) {
|
||
|
|
g_last_alloc_slot_v5 = -1;
|
||
|
|
}
|
||
|
|
break;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
// ============================================================================
|
||
|
|
// Page Allocation from Segment (Phase v5-2 fix)
|
||
|
|
// ============================================================================
|
||
|
|
|
||
|
|
SmallPageMetaV5* small_segment_v5_alloc_page(void) {
|
||
|
|
// Try to reuse existing segment with free pages
|
||
|
|
if (g_last_alloc_slot_v5 >= 0 && g_last_alloc_slot_v5 < MAX_SEGMENTS_PER_THREAD) {
|
||
|
|
TLSSegmentSlot* slot = &g_segment_slots_v5[g_last_alloc_slot_v5];
|
||
|
|
// Check if not all pages are used (used_pages != 0xFFFFFFFF for 32 pages)
|
||
|
|
if (slot->in_use && slot->used_pages != 0xFFFFFFFF) {
|
||
|
|
// This segment has free pages
|
||
|
|
SmallSegmentV5* seg = &slot->seg;
|
||
|
|
for (uint32_t i = 0; i < seg->num_pages; i++) {
|
||
|
|
if ((slot->used_pages & (1U << i)) == 0) {
|
||
|
|
// Found free page
|
||
|
|
slot->used_pages |= (1U << i);
|
||
|
|
return &seg->page_meta[i];
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
// Search all slots for a segment with free pages
|
||
|
|
for (int s = 0; s < MAX_SEGMENTS_PER_THREAD; s++) {
|
||
|
|
TLSSegmentSlot* slot = &g_segment_slots_v5[s];
|
||
|
|
if (slot->in_use && slot->used_pages != 0xFFFFFFFF) {
|
||
|
|
SmallSegmentV5* seg = &slot->seg;
|
||
|
|
for (uint32_t i = 0; i < seg->num_pages; i++) {
|
||
|
|
if ((slot->used_pages & (1U << i)) == 0) {
|
||
|
|
// Found free page
|
||
|
|
slot->used_pages |= (1U << i);
|
||
|
|
g_last_alloc_slot_v5 = s;
|
||
|
|
return &seg->page_meta[i];
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
// No free pages in existing segments, allocate new segment
|
||
|
|
SmallSegmentV5* seg = small_segment_v5_acquire();
|
||
|
|
if (!seg) {
|
||
|
|
return NULL;
|
||
|
|
}
|
||
|
|
|
||
|
|
// Mark first page as used
|
||
|
|
for (int s = 0; s < MAX_SEGMENTS_PER_THREAD; s++) {
|
||
|
|
if (&g_segment_slots_v5[s].seg == seg) {
|
||
|
|
g_segment_slots_v5[s].used_pages |= 1U; // Mark page 0 as used
|
||
|
|
g_last_alloc_slot_v5 = s;
|
||
|
|
break;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
return &seg->page_meta[0];
|
||
|
|
}
|
||
|
|
|
||
|
|
void small_segment_v5_free_page(SmallPageMetaV5* page) {
|
||
|
|
if (!page || !page->segment) {
|
||
|
|
return;
|
||
|
|
}
|
||
|
|
|
||
|
|
SmallSegmentV5* seg = (SmallSegmentV5*)page->segment;
|
||
|
|
|
||
|
|
// Find the slot and clear the used bit
|
||
|
|
for (int s = 0; s < MAX_SEGMENTS_PER_THREAD; s++) {
|
||
|
|
if (&g_segment_slots_v5[s].seg == seg) {
|
||
|
|
g_segment_slots_v5[s].used_pages &= ~(1U << page->page_idx);
|
||
|
|
|
||
|
|
// If segment is now empty, we could release it
|
||
|
|
// For now, keep it for reuse
|
||
|
|
break;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
// ============================================================================
|
||
|
|
// O(1) Page Metadata Lookup (Phase v5-2)
|
||
|
|
// ============================================================================
|
||
|
|
|
||
|
|
SmallPageMetaV5* small_segment_v5_page_meta_of(void* ptr) {
|
||
|
|
if (unlikely(!ptr)) {
|
||
|
|
return NULL;
|
||
|
|
}
|
||
|
|
|
||
|
|
uintptr_t addr = (uintptr_t)ptr;
|
||
|
|
uintptr_t seg_base = addr & ~(SMALL_SEGMENT_V5_SIZE - 1);
|
||
|
|
|
||
|
|
// Search for segment in TLS slots
|
||
|
|
SmallSegmentV5* seg = NULL;
|
||
|
|
for (int i = 0; i < MAX_SEGMENTS_PER_THREAD; i++) {
|
||
|
|
if (g_segment_slots_v5[i].in_use) {
|
||
|
|
SmallSegmentV5* candidate = &g_segment_slots_v5[i].seg;
|
||
|
|
if (candidate->base == seg_base) {
|
||
|
|
seg = candidate;
|
||
|
|
break;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
if (unlikely(!seg)) {
|
||
|
|
return NULL;
|
||
|
|
}
|
||
|
|
|
||
|
|
// Verify magic number (Fail-Fast validation)
|
||
|
|
if (unlikely(seg->magic != SMALL_SEGMENT_V5_MAGIC)) {
|
||
|
|
return NULL;
|
||
|
|
}
|
||
|
|
|
||
|
|
// Compute page index via shift
|
||
|
|
size_t page_idx = (addr - seg_base) >> SMALL_SEGMENT_V5_PAGE_SHIFT;
|
||
|
|
|
||
|
|
// Bounds check
|
||
|
|
if (unlikely(page_idx >= seg->num_pages)) {
|
||
|
|
return NULL;
|
||
|
|
}
|
||
|
|
|
||
|
|
SmallPageMetaV5* page = &seg->page_meta[page_idx];
|
||
|
|
|
||
|
|
// Validate that this page is actually in use (has been allocated)
|
||
|
|
// Unallocated pages have capacity == 0
|
||
|
|
if (unlikely(page->capacity == 0)) {
|
||
|
|
return NULL;
|
||
|
|
}
|
||
|
|
|
||
|
|
// Return page metadata
|
||
|
|
return page;
|
||
|
|
}
|