Add 1-slot TLS cache to C6 v5 to reduce page_meta access overhead. Implementation: - Add HAKMEM_SMALL_HEAP_V5_TLS_CACHE_ENABLED ENV (default: 0) - SmallHeapCtxV5: add c6_cached_block field for TLS cache - alloc: cache hit bypasses page_meta lookup, returns immediately - free: empty cache stores block, full cache evicts old block first Results (1M iter, ws=400, HEADER_MODE=full): - C6-heavy (257-768B): 35.53M → 37.02M ops/s (+4.2%) - Mixed 16-1024B: 38.04M → 37.93M ops/s (-0.3%, noise) Known issue: header_mode=light has infinite loop bug (freelist pointer/header collision). Full mode only for now. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
124 lines
4.2 KiB
C
124 lines
4.2 KiB
C
// smallobject_cold_iface_v5.c - SmallObject Cold Interface v5 (Phase v5-2)
|
|
//
|
|
// Purpose: Page refill/retire operations for SmallObject v5
|
|
// Design: C6-only implementation with segment-based allocation
|
|
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <stdint.h>
|
|
#include "box/smallobject_cold_iface_v5.h"
|
|
#include "box/smallsegment_v5_box.h"
|
|
#include "box/smallobject_hotbox_v5_box.h"
|
|
#include "box/smallobject_v5_env_box.h"
|
|
#include "tiny_region_id.h" // For HEADER_MAGIC and HEADER_CLASS_MASK
|
|
|
|
#ifndef likely
|
|
#define likely(x) __builtin_expect(!!(x), 1)
|
|
#define unlikely(x) __builtin_expect(!!(x), 0)
|
|
#endif
|
|
|
|
// ============================================================================
|
|
// Cold Refill: Allocate a new page for the given class (Phase v5-2)
|
|
// ============================================================================
|
|
|
|
SmallPageMetaV5* small_cold_v5_refill_page(SmallHeapCtxV5* ctx, uint32_t class_idx) {
|
|
(void)ctx; // Not used in v5-2 C6-only implementation
|
|
|
|
// Phase v5-2: C6-only implementation
|
|
if (unlikely(class_idx != SMALL_HEAP_V5_C6_CLASS_IDX)) {
|
|
return NULL; // Only C6 supported in v5-2
|
|
}
|
|
|
|
// Step 1: Allocate a page from segment pool (reuses existing segments)
|
|
SmallPageMetaV5* page = small_segment_v5_alloc_page();
|
|
if (unlikely(!page)) {
|
|
return NULL; // OOM or TLS slot exhaustion
|
|
}
|
|
|
|
// Step 2: Get segment pointer (already set by alloc_page)
|
|
SmallSegmentV5* seg = (SmallSegmentV5*)page->segment;
|
|
if (unlikely(!seg)) {
|
|
return NULL;
|
|
}
|
|
|
|
// Step 3: Initialize page metadata for C6
|
|
page->class_idx = (uint8_t)class_idx;
|
|
page->capacity = SMALL_SEGMENT_V5_PAGE_SIZE / SMALL_HEAP_V5_C6_BLOCK_SIZE;
|
|
page->used = 0;
|
|
page->flags = 0;
|
|
|
|
// Step 4: Build freelist for the page
|
|
// Page starts at: seg->base + (page_idx * SMALL_SEGMENT_V5_PAGE_SIZE)
|
|
uintptr_t page_base = seg->base + ((uintptr_t)page->page_idx * SMALL_SEGMENT_V5_PAGE_SIZE);
|
|
uint8_t* base = (uint8_t*)page_base;
|
|
|
|
// Phase v5-4: Check header mode for carve-time optimization
|
|
int header_mode = small_heap_v5_header_mode();
|
|
|
|
// Build intrusive freelist (last to first for cache locality)
|
|
// Freelist pointers are stored at block[0-7], overwriting any header that might be there
|
|
void* freelist = NULL;
|
|
for (int i = (int)page->capacity - 1; i >= 0; i--) {
|
|
uint8_t* block = base + ((size_t)i * SMALL_HEAP_V5_C6_BLOCK_SIZE);
|
|
|
|
// Build freelist using BASE pointers
|
|
// This will overwrite block[0-7] with the next pointer
|
|
void* next = freelist;
|
|
memcpy(block, &next, sizeof(void*));
|
|
freelist = block;
|
|
}
|
|
// NOTE: Headers are written during alloc (not during carve) since freelist uses block[0-7]
|
|
|
|
page->free_list = freelist;
|
|
|
|
return page;
|
|
}
|
|
|
|
// ============================================================================
|
|
// Cold Retire: Return an empty page to the segment (Phase v5-2)
|
|
// ============================================================================
|
|
|
|
void small_cold_v5_retire_page(SmallHeapCtxV5* ctx, SmallPageMetaV5* page) {
|
|
(void)ctx; // Not used in v5-2
|
|
|
|
if (unlikely(!page)) {
|
|
return;
|
|
}
|
|
|
|
// Phase v5-2: C6-only implementation
|
|
if (unlikely(page->class_idx != SMALL_HEAP_V5_C6_CLASS_IDX)) {
|
|
return; // Only C6 supported in v5-2
|
|
}
|
|
|
|
// Sanity check: Page should be empty (used == 0)
|
|
if (page->used != 0) {
|
|
return; // Don't retire non-empty pages
|
|
}
|
|
|
|
// Reset page metadata to unused state
|
|
page->free_list = NULL;
|
|
page->used = 0;
|
|
page->capacity = 0;
|
|
page->class_idx = 0;
|
|
page->flags = 0;
|
|
|
|
// Free the page back to segment pool (makes it available for reuse)
|
|
small_segment_v5_free_page(page);
|
|
}
|
|
|
|
// ============================================================================
|
|
// Remote Operations (Stub for Phase v5-2)
|
|
// ============================================================================
|
|
|
|
bool small_cold_v5_remote_push(SmallPageMetaV5* page, void* ptr, uint32_t tid) {
|
|
(void)page;
|
|
(void)ptr;
|
|
(void)tid;
|
|
return false; // Not implemented in v5-2
|
|
}
|
|
|
|
void small_cold_v5_remote_drain(SmallHeapCtxV5* ctx) {
|
|
(void)ctx;
|
|
// Not implemented in v5-2
|
|
}
|