Files
hakmem/core/smallobject_cold_iface_v7.c

248 lines
8.3 KiB
C
Raw Permalink Normal View History

// smallobject_cold_iface_v7.c - SmallObject ColdIface v7 implementation (Phase v7-5b)
//
// Purpose:
// - Page refill: acquire page from segment, carve freelist
// - Page retire: release empty page back to segment, publish stats
//
// v7-5a optimizations:
// - Header written at alloc time (not carve - freelist overlaps block[0])
// - Stats collected at retire time (not on hot path)
//
// v7-5b additions:
// - C5 support (256B blocks) alongside C6 (512B blocks)
// - Same segment shared between C5/C6, page_meta.class_idx distinguishes
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <stdio.h>
#include <unistd.h>
#include "box/smallobject_cold_iface_v7_box.h"
#include "box/smallsegment_v7_box.h"
#include "box/region_id_v6_box.h"
#include "box/smallobject_policy_v7_box.h" // v7-7: Learner integration
#include "tiny_region_id.h" // v7-5a: For HEADER_MAGIC, HEADER_CLASS_MASK
#ifndef likely
#define likely(x) __builtin_expect(!!(x), 1)
#define unlikely(x) __builtin_expect(!!(x), 0)
#endif
// ============================================================================
// V7 Stats (for debugging/OBSERVE)
// ============================================================================
static uint64_t g_v7_alloc_count = 0;
static uint64_t g_v7_free_count = 0;
static uint64_t g_v7_refill_count = 0;
static uint64_t g_v7_retire_count = 0;
// Destructor to print stats at exit
__attribute__((destructor))
static void small_v7_stats_report(void) {
if (g_v7_alloc_count > 0 || g_v7_free_count > 0) {
fprintf(stderr, "[SMALL_V7] alloc=%lu free=%lu refill=%lu retire=%lu\n",
(unsigned long)g_v7_alloc_count, (unsigned long)g_v7_free_count,
(unsigned long)g_v7_refill_count, (unsigned long)g_v7_retire_count);
}
}
void small_v7_stat_alloc(void) { __sync_fetch_and_add(&g_v7_alloc_count, 1); }
void small_v7_stat_free(void) { __sync_fetch_and_add(&g_v7_free_count, 1); }
void small_v7_stat_refill(void) { __sync_fetch_and_add(&g_v7_refill_count, 1); }
void small_v7_stat_retire(void) { __sync_fetch_and_add(&g_v7_retire_count, 1); }
// ============================================================================
// TLS Heap Context
// ============================================================================
static __thread SmallHeapCtx_v7 g_small_heap_ctx_v7;
static __thread int g_small_heap_ctx_v7_init = 0;
SmallHeapCtx_v7* small_heap_ctx_v7(void) {
if (unlikely(!g_small_heap_ctx_v7_init)) {
// Initialize context
memset(&g_small_heap_ctx_v7, 0, sizeof(g_small_heap_ctx_v7));
for (int i = 0; i < HAK_SMALL_NUM_CLASSES_V7; i++) {
g_small_heap_ctx_v7.cls[i].class_idx = (uint16_t)i;
g_small_heap_ctx_v7.cls[i].current = NULL;
g_small_heap_ctx_v7.cls[i].partial_head = NULL;
g_small_heap_ctx_v7.cls[i].full_head = NULL;
g_small_heap_ctx_v7.cls[i].local_freelist = NULL;
g_small_heap_ctx_v7.cls[i].local_freelist_count = 0;
g_small_heap_ctx_v7.cls[i].local_freelist_cap = 0;
g_small_heap_ctx_v7.cls[i].flags = 0;
}
g_small_heap_ctx_v7.segment = NULL;
// Phase v7-3: Initialize TLS fast hints and cache
g_small_heap_ctx_v7.tls_seg_base = 0;
g_small_heap_ctx_v7.tls_seg_end = 0;
g_small_heap_ctx_v7.last_page_base = 0;
g_small_heap_ctx_v7.last_page_end = 0;
g_small_heap_ctx_v7.last_page_meta = NULL;
g_small_heap_ctx_v7_init = 1;
}
return &g_small_heap_ctx_v7;
}
// ============================================================================
// Segment Acquisition (lazy)
// ============================================================================
static SmallSegment_v7* cold_v7_ensure_segment(SmallHeapCtx_v7* ctx) {
if (likely(ctx->segment != NULL)) {
return ctx->segment;
}
// Acquire segment for this thread
uint32_t tid = (uint32_t)getpid();
SmallSegment_v7* seg = small_segment_alloc_v7(tid);
if (unlikely(!seg)) {
return NULL;
}
ctx->segment = seg;
// Phase v7-3: Set TLS segment fast hints
ctx->tls_seg_base = seg->base;
ctx->tls_seg_end = seg->base + SMALL_SEGMENT_V7_SIZE;
// Register with RegionIdBox for ptr->region lookup
// This enables region_id_lookup_v6() to recognize v7 pointers
region_id_register_v6(
(void*)seg->base,
SMALL_SEGMENT_V7_SIZE,
REGION_KIND_SMALL_V7,
seg
);
return seg;
}
// ============================================================================
// Page Refill
// ============================================================================
SmallPageMeta_v7* small_cold_v7_refill_page(SmallHeapCtx_v7* ctx, uint32_t class_idx) {
if (unlikely(!ctx)) {
return NULL;
}
// v7-5b: C5 (256B) and C6 (512B) supported
size_t block_size = small_v7_block_size(class_idx);
if (unlikely(block_size == 0)) {
return NULL; // Unsupported class
}
// Ensure we have a segment
SmallSegment_v7* seg = cold_v7_ensure_segment(ctx);
if (unlikely(!seg)) {
return NULL;
}
// Take a page from segment's free stack
SmallPageMeta_v7* page = small_segment_take_page_v7(seg, class_idx);
if (unlikely(!page)) {
return NULL; // No free pages
}
// Calculate page base address
uintptr_t page_offset = (uintptr_t)page->page_idx * SMALL_PAGE_V7_SIZE;
uintptr_t page_base = seg->base + page_offset;
uint8_t* base = (uint8_t*)page_base;
// Calculate capacity
uint32_t capacity = SMALL_PAGE_V7_SIZE / block_size;
page->capacity = capacity;
page->used = 0;
// Build intrusive freelist (last to first for cache locality on pop)
// freelist points to BASE pointers (block start)
//
// Note: Cannot write header at carve time because freelist next pointer
// is stored at block[0..7], which overlaps with header byte at block[0].
// Header must be written at alloc time after popping from freelist.
void* freelist = NULL;
for (int i = (int)capacity - 1; i >= 0; i--) {
uint8_t* block = base + ((size_t)i * block_size);
// Link: block->next = freelist
void* next = freelist;
memcpy(block, &next, sizeof(void*));
freelist = block;
}
page->free_list = freelist;
// Set as current page for this class
SmallClassHeap_v7* heap = &ctx->cls[class_idx];
heap->current = page;
// v7-7: Feed refill to Learner for workload detection
small_learner_v7_record_refill(class_idx, capacity);
return page;
}
// ============================================================================
// Page Retire
// ============================================================================
void small_cold_v7_retire_page(SmallHeapCtx_v7* ctx, SmallPageMeta_v7* page) {
if (unlikely(!ctx || !page)) {
return;
}
// Only retire pages that are fully empty
if (page->used != 0) {
return;
}
SmallSegment_v7* seg = page->segment;
if (unlikely(!seg)) {
return;
}
// Publish stats before retiring
SmallPageStatsV7 stats = {
.class_idx = (uint8_t)page->class_idx,
.reserved0 = 0,
.page_idx = page->page_idx,
.capacity = page->capacity,
.alloc_count = page->alloc_count,
.free_count = page->free_count,
.remote_free_count = page->remote_free_count,
.peak_live = page->peak_live,
.remote_burst_max = page->remote_burst_max,
.lifetime_ms = 0 // TODO: Calculate from epoch
};
small_cold_v7_publish_stats(&stats);
// Clear current if this was the current page
SmallClassHeap_v7* heap = &ctx->cls[page->class_idx];
if (heap->current == page) {
heap->current = NULL;
}
// Release page back to segment
small_segment_release_page_v7(seg, page);
}
// ============================================================================
// Stats Publishing (v7-7: Learner integration)
// ============================================================================
void small_cold_v7_publish_stats(const SmallPageStatsV7* stats) {
if (!stats) return;
// v7-7: Feed stats to Learner for dynamic route switching
// Note: v7-5a removed alloc_count from hot path, use capacity as traffic proxy
small_learner_v7_record_retire(stats->class_idx, stats->capacity);
}