Phase v5-3: O(1) path optimization for C6-only v5
- Single TLS segment (eliminates slot search loop) - O(1) page_meta_of() (direct segment range check, no iteration) - __builtin_ctz for O(1) free page finding in bitmap - Simplified free path using page_meta_of() only (no find_page) - Partial limit 1 (minimal list traversal) Performance: - Before (v5-2): 14.7M ops/s - After (v5-3): 38.5M ops/s (+162%) - vs baseline: 44.9M ops/s (-14%) - SEGV: None, stable at ws=800 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@ -19,29 +19,22 @@
|
||||
// Segment Allocation (Phase v5-2)
|
||||
// ============================================================================
|
||||
|
||||
// Thread-local segment list (static allocation to avoid malloc recursion)
|
||||
#define MAX_SEGMENTS_PER_THREAD 4
|
||||
// Thread-local segment (Phase v5-3: single segment per thread for O(1) lookup)
|
||||
// C6-only v5 uses at most 1 segment per thread - this eliminates slot search
|
||||
typedef struct {
|
||||
SmallSegmentV5 seg;
|
||||
int in_use;
|
||||
uint32_t used_pages; // Bitmap: which pages are currently in use
|
||||
} TLSSegmentSlot;
|
||||
|
||||
static __thread TLSSegmentSlot g_segment_slots_v5[MAX_SEGMENTS_PER_THREAD];
|
||||
static __thread int g_last_alloc_slot_v5 = -1; // Last slot we allocated from
|
||||
static __thread TLSSegmentSlot g_tls_segment_v5; // Single TLS segment
|
||||
|
||||
SmallSegmentV5* small_segment_v5_acquire(void) {
|
||||
// Find free slot in TLS (avoid malloc to prevent recursion)
|
||||
TLSSegmentSlot* slot = NULL;
|
||||
for (int i = 0; i < MAX_SEGMENTS_PER_THREAD; i++) {
|
||||
if (!g_segment_slots_v5[i].in_use) {
|
||||
slot = &g_segment_slots_v5[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
// Phase v5-3: Single segment per thread - no slot search needed
|
||||
TLSSegmentSlot* slot = &g_tls_segment_v5;
|
||||
|
||||
if (!slot) {
|
||||
return NULL; // Out of TLS segment slots
|
||||
if (slot->in_use) {
|
||||
return NULL; // Already have a segment (reuse pages instead)
|
||||
}
|
||||
|
||||
// Allocate 2MiB aligned segment via mmap
|
||||
@ -138,17 +131,9 @@ void small_segment_v5_release(SmallSegmentV5* seg) {
|
||||
// Release the 2MiB backing memory
|
||||
munmap((void*)seg->base, SMALL_SEGMENT_V5_SIZE);
|
||||
|
||||
// Mark slot as free (TLS memory is never freed, just reused)
|
||||
for (int i = 0; i < MAX_SEGMENTS_PER_THREAD; i++) {
|
||||
if (&g_segment_slots_v5[i].seg == seg) {
|
||||
g_segment_slots_v5[i].in_use = 0;
|
||||
g_segment_slots_v5[i].used_pages = 0;
|
||||
if (g_last_alloc_slot_v5 == i) {
|
||||
g_last_alloc_slot_v5 = -1;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
// Phase v5-3: Single segment - direct reset
|
||||
g_tls_segment_v5.in_use = 0;
|
||||
g_tls_segment_v5.used_pages = 0;
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
@ -156,55 +141,31 @@ void small_segment_v5_release(SmallSegmentV5* seg) {
|
||||
// ============================================================================
|
||||
|
||||
SmallPageMetaV5* small_segment_v5_alloc_page(void) {
|
||||
// Try to reuse existing segment with free pages
|
||||
if (g_last_alloc_slot_v5 >= 0 && g_last_alloc_slot_v5 < MAX_SEGMENTS_PER_THREAD) {
|
||||
TLSSegmentSlot* slot = &g_segment_slots_v5[g_last_alloc_slot_v5];
|
||||
// Check if not all pages are used (used_pages != 0xFFFFFFFF for 32 pages)
|
||||
if (slot->in_use && slot->used_pages != 0xFFFFFFFF) {
|
||||
// This segment has free pages
|
||||
SmallSegmentV5* seg = &slot->seg;
|
||||
for (uint32_t i = 0; i < seg->num_pages; i++) {
|
||||
if ((slot->used_pages & (1U << i)) == 0) {
|
||||
// Found free page
|
||||
slot->used_pages |= (1U << i);
|
||||
return &seg->page_meta[i];
|
||||
}
|
||||
}
|
||||
TLSSegmentSlot* slot = &g_tls_segment_v5;
|
||||
|
||||
// Phase v5-3: Single segment - direct access, no slot search
|
||||
if (slot->in_use && slot->used_pages != 0xFFFFFFFF) {
|
||||
// Segment has free pages - use __builtin_ctz for O(1) free page find
|
||||
uint32_t free_mask = ~slot->used_pages;
|
||||
if (free_mask) {
|
||||
uint32_t page_idx = (uint32_t)__builtin_ctz(free_mask);
|
||||
slot->used_pages |= (1U << page_idx);
|
||||
return &slot->seg.page_meta[page_idx];
|
||||
}
|
||||
}
|
||||
|
||||
// Search all slots for a segment with free pages
|
||||
for (int s = 0; s < MAX_SEGMENTS_PER_THREAD; s++) {
|
||||
TLSSegmentSlot* slot = &g_segment_slots_v5[s];
|
||||
if (slot->in_use && slot->used_pages != 0xFFFFFFFF) {
|
||||
SmallSegmentV5* seg = &slot->seg;
|
||||
for (uint32_t i = 0; i < seg->num_pages; i++) {
|
||||
if ((slot->used_pages & (1U << i)) == 0) {
|
||||
// Found free page
|
||||
slot->used_pages |= (1U << i);
|
||||
g_last_alloc_slot_v5 = s;
|
||||
return &seg->page_meta[i];
|
||||
}
|
||||
}
|
||||
// Need new segment
|
||||
if (!slot->in_use) {
|
||||
SmallSegmentV5* seg = small_segment_v5_acquire();
|
||||
if (!seg) {
|
||||
return NULL;
|
||||
}
|
||||
// First page
|
||||
slot->used_pages |= 1U;
|
||||
return &seg->page_meta[0];
|
||||
}
|
||||
|
||||
// No free pages in existing segments, allocate new segment
|
||||
SmallSegmentV5* seg = small_segment_v5_acquire();
|
||||
if (!seg) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Mark first page as used
|
||||
for (int s = 0; s < MAX_SEGMENTS_PER_THREAD; s++) {
|
||||
if (&g_segment_slots_v5[s].seg == seg) {
|
||||
g_segment_slots_v5[s].used_pages |= 1U; // Mark page 0 as used
|
||||
g_last_alloc_slot_v5 = s;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return &seg->page_meta[0];
|
||||
return NULL; // Segment full (shouldn't happen with page recycling)
|
||||
}
|
||||
|
||||
void small_segment_v5_free_page(SmallPageMetaV5* page) {
|
||||
@ -212,22 +173,16 @@ void small_segment_v5_free_page(SmallPageMetaV5* page) {
|
||||
return;
|
||||
}
|
||||
|
||||
SmallSegmentV5* seg = (SmallSegmentV5*)page->segment;
|
||||
|
||||
// Find the slot and clear the used bit
|
||||
for (int s = 0; s < MAX_SEGMENTS_PER_THREAD; s++) {
|
||||
if (&g_segment_slots_v5[s].seg == seg) {
|
||||
g_segment_slots_v5[s].used_pages &= ~(1U << page->page_idx);
|
||||
|
||||
// If segment is now empty, we could release it
|
||||
// For now, keep it for reuse
|
||||
break;
|
||||
}
|
||||
// Phase v5-3: Single segment - direct bitmap clear
|
||||
TLSSegmentSlot* slot = &g_tls_segment_v5;
|
||||
if (slot->in_use && page->segment == &slot->seg) {
|
||||
slot->used_pages &= ~(1U << page->page_idx);
|
||||
// Keep segment for reuse even if empty
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// O(1) Page Metadata Lookup (Phase v5-2)
|
||||
// O(1) Page Metadata Lookup (Phase v5-3: Single segment, no search)
|
||||
// ============================================================================
|
||||
|
||||
SmallPageMetaV5* small_segment_v5_page_meta_of(void* ptr) {
|
||||
@ -235,46 +190,35 @@ SmallPageMetaV5* small_segment_v5_page_meta_of(void* ptr) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Phase v5-3: Single segment - direct TLS access, no loop
|
||||
TLSSegmentSlot* slot = &g_tls_segment_v5;
|
||||
if (unlikely(!slot->in_use)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
SmallSegmentV5* seg = &slot->seg;
|
||||
uintptr_t addr = (uintptr_t)ptr;
|
||||
uintptr_t seg_base = addr & ~(SMALL_SEGMENT_V5_SIZE - 1);
|
||||
uintptr_t seg_base = seg->base;
|
||||
|
||||
// Search for segment in TLS slots
|
||||
SmallSegmentV5* seg = NULL;
|
||||
for (int i = 0; i < MAX_SEGMENTS_PER_THREAD; i++) {
|
||||
if (g_segment_slots_v5[i].in_use) {
|
||||
SmallSegmentV5* candidate = &g_segment_slots_v5[i].seg;
|
||||
if (candidate->base == seg_base) {
|
||||
seg = candidate;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (unlikely(!seg)) {
|
||||
// Check if ptr is within this segment's range
|
||||
if (unlikely(addr < seg_base || addr >= seg_base + SMALL_SEGMENT_V5_SIZE)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Verify magic number (Fail-Fast validation)
|
||||
if (unlikely(seg->magic != SMALL_SEGMENT_V5_MAGIC)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Compute page index via shift
|
||||
// Compute page index via shift (O(1))
|
||||
size_t page_idx = (addr - seg_base) >> SMALL_SEGMENT_V5_PAGE_SHIFT;
|
||||
|
||||
// Bounds check
|
||||
// Bounds check (should always pass if within segment)
|
||||
if (unlikely(page_idx >= seg->num_pages)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
SmallPageMetaV5* page = &seg->page_meta[page_idx];
|
||||
|
||||
// Validate that this page is actually in use (has been allocated)
|
||||
// Unallocated pages have capacity == 0
|
||||
// Validate that this page is actually in use
|
||||
if (unlikely(page->capacity == 0)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Return page metadata
|
||||
return page;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user