Files
hakmem/core/box/smallobject_hotbox_v7_box.h
Moe Charm (CI) d5aa3110c6 Phase v7-5b: C5+C6 multi-class expansion (+4.3% improvement)
- Add C5 (256B blocks) support alongside C6 (512B blocks)
- Same segment shared between C5/C6 (page_meta.class_idx distinguishes)
- SMALL_V7_CLASS_SUPPORTED() macro for class validation
- Extend small_v7_block_size() for C5 (switch statement)

A/B Result: C6-only v7 avg 7.64M ops/s → C5+C6 v7 avg 7.97M ops/s (+4.3%)
Criteria: C6 protected , C5 net positive , TLS bloat none 

ENV: HAKMEM_SMALL_HEAP_V7_CLASSES=0x60 (bit5+bit6)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-12 05:11:02 +09:00

294 lines
9.8 KiB
C

// smallobject_hotbox_v7_box.h - SmallObject HotBox v7 (Phase v7-5b: C5+C6 Multi-class)
//
// Role:
// - SmallObject v7 fast path for alloc/free
// - C5+C6 implementation (256B/512B blocks, 64KiB pages, 2MiB segments)
// - Uses SmallHeapCtx_v7 + SmallSegment_v7 + ColdIface_v7
//
// v7-5a optimizations:
// - Stats (alloc_count, free_count, live_current) removed from hot path
// - Global atomic stats gated by ENV (HAKMEM_V7_HOT_STATS)
// - Header write kept (required due to intrusive freelist overlapping block[0])
//
// v7-5b additions:
// - C5 support (256B blocks) with minimal TLS overhead
// - Same segment shared between C5 and C6 (page_meta.class_idx distinguishes)
#pragma once
#include <stdint.h>
#include <stddef.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h> // for getenv()
#include "smallsegment_v7_box.h"
#include "smallobject_cold_iface_v7_box.h"
#include "region_id_v6_box.h"
#include "../tiny_region_id.h" // For HEADER_MAGIC, HEADER_CLASS_MASK
#ifndef likely
#define likely(x) __builtin_expect(!!(x), 1)
#define unlikely(x) __builtin_expect(!!(x), 0)
#endif
// ============================================================================
// Debug/Observe Support (v7-5a: ENV-gated for hot path)
// ============================================================================
// V7 stats functions (defined in smallobject_cold_iface_v7.c)
extern void small_v7_stat_alloc(void);
extern void small_v7_stat_free(void);
extern void small_v7_stat_refill(void);
extern void small_v7_stat_retire(void);
// v7-5a: ENV gate for hot path stats (default OFF for performance)
// Set HAKMEM_V7_HOT_STATS=1 to enable per-alloc/free atomic counters
static inline int small_v7_hot_stats_enabled(void) {
static int g_enabled = -1;
if (__builtin_expect(g_enabled < 0, 0)) {
const char* e = getenv("HAKMEM_V7_HOT_STATS");
g_enabled = (e && *e && *e != '0') ? 1 : 0;
}
return g_enabled;
}
// Conditional stat increment (only if ENV enabled)
#define SMALL_V7_HOT_STAT_ALLOC() \
do { if (__builtin_expect(small_v7_hot_stats_enabled(), 0)) small_v7_stat_alloc(); } while(0)
#define SMALL_V7_HOT_STAT_FREE() \
do { if (__builtin_expect(small_v7_hot_stats_enabled(), 0)) small_v7_stat_free(); } while(0)
// Class mismatch logging (for hint validation)
static inline void small_v7_log_class_mismatch(void* ptr, uint8_t hint, uint8_t actual) {
// TODO: Make this ENV-controlled
// For now, silent (Fail-Fast mode would assert here)
(void)ptr;
(void)hint;
(void)actual;
}
// ============================================================================
// Alloc Fast Path
// ============================================================================
// small_heap_alloc_fast_v7() - v7 alloc (v7-5b: C5+C6 multi-class)
//
// Flow:
// 1. Get TLS context
// 2. Check current page freelist
// 3. If empty, check partial list
// 4. If no partial, call ColdIface refill
// 5. Pop from freelist and return USER ptr
//
// v7-5a optimizations:
// - Per-page stats (alloc_count, live_current) removed from hot path
// - Global atomic stats gated by ENV (HAKMEM_V7_HOT_STATS)
//
// v7-5b: C5+C6 support (same code path, different block sizes)
//
static inline void* small_heap_alloc_fast_v7(size_t size, uint8_t class_idx) {
// v7-5b: C5 or C6 supported
if (unlikely(!SMALL_V7_CLASS_SUPPORTED(class_idx))) {
return NULL; // Unsupported class -> front falls back
}
SmallHeapCtx_v7* ctx = small_heap_ctx_v7();
SmallClassHeap_v7* h = &ctx->cls[class_idx];
SmallPageMeta_v7* p = h->current;
// Fast path: current page has free slots
if (likely(p && p->free_list)) {
void* base = p->free_list;
p->free_list = *(void**)base;
p->used++;
// Write header (HEADER_MAGIC | class_idx) for front compatibility
// Note: Cannot move to carve time due to intrusive freelist overlapping block[0]
((uint8_t*)base)[0] = (uint8_t)(HEADER_MAGIC | (class_idx & HEADER_CLASS_MASK));
// v7-5a: Stats moved to cold path (ENV-gated only)
SMALL_V7_HOT_STAT_ALLOC();
// Return USER ptr (base + 1 for header compatibility with front)
return (uint8_t*)base + 1;
}
// Current exhausted -> try partial list
if (h->partial_head) {
p = h->partial_head;
h->partial_head = p->segment_next_partial;
p->segment_next_partial = NULL;
h->current = p;
if (likely(p->free_list)) {
void* base = p->free_list;
p->free_list = *(void**)base;
p->used++;
// Write header
((uint8_t*)base)[0] = (uint8_t)(HEADER_MAGIC | (class_idx & HEADER_CLASS_MASK));
SMALL_V7_HOT_STAT_ALLOC();
return (uint8_t*)base + 1;
}
}
// Completely exhausted -> ColdIface refill
small_v7_stat_refill();
p = small_cold_v7_refill_page(ctx, class_idx);
if (unlikely(!p || !p->free_list)) {
return NULL; // front falls back to legacy/pool
}
h->current = p;
// Pop from new page
void* base = p->free_list;
p->free_list = *(void**)base;
p->used++;
// Write header
((uint8_t*)base)[0] = (uint8_t)(HEADER_MAGIC | (class_idx & HEADER_CLASS_MASK));
SMALL_V7_HOT_STAT_ALLOC();
return (uint8_t*)base + 1;
}
// ============================================================================
// Free Fast Path
// ============================================================================
// small_heap_free_fast_v7() - v7 free (v7-5b: C5+C6 multi-class)
//
// Flow:
// 1. TLS segment hint hit (skip RegionIdBox)
// 2. RegionIdBox fallback (cold path)
//
// v7-5a optimizations:
// - Stats (free_count, live_current) removed from hot path
// - Global atomic stats gated by ENV
//
// v7-5b: C5+C6 support (page->class_idx determines actual class)
//
// @param ptr: USER pointer to free
// @param class_idx_hint: Class index hint from front/header (may be ignored)
// @return: true if handled by v7, false if not v7-managed (front should fallback)
//
static inline bool small_heap_free_fast_v7(void* ptr, uint8_t class_idx_hint) {
if (unlikely(!ptr)) {
return false;
}
uintptr_t addr = (uintptr_t)ptr;
// ========================================================================
// Path 1: TLS segment hit (skip RegionIdBox binary search)
// ========================================================================
// Do TLS lookup first to avoid ctx overhead on miss
SmallHeapCtx_v7* ctx = small_heap_ctx_v7();
// Try TLS segment bounds check first (most common case)
if (addr >= ctx->tls_seg_base && addr < ctx->tls_seg_end) {
SmallSegment_v7* seg = ctx->segment;
if (unlikely(!seg)) {
goto regionid_fallback;
}
// Calculate page index
size_t page_idx = (addr - ctx->tls_seg_base) >> SMALL_PAGE_V7_SHIFT;
if (unlikely(page_idx >= seg->num_pages)) {
return false;
}
SmallPageMeta_v7* page = &seg->page_meta[page_idx];
// v7-5b: Validate page is in use and C5 or C6
if (unlikely(page->capacity == 0 || !SMALL_V7_CLASS_SUPPORTED(page->class_idx))) {
return false;
}
// Push BASE ptr to freelist (fast path - no branches)
void* base = (uint8_t*)ptr - 1;
*(void**)base = page->free_list;
page->free_list = base;
// v7-5a: Stats removed from hot path
// Retire if empty
if (unlikely(--page->used == 0)) {
small_v7_stat_retire();
small_cold_v7_retire_page(ctx, page);
}
SMALL_V7_HOT_STAT_FREE();
return true;
}
// ========================================================================
// Path 2: TLS miss -> RegionIdBox fallback (not in TLS segment)
// ========================================================================
regionid_fallback:
{
RegionLookupV6 lk = region_id_lookup_v6(ptr);
if (unlikely(lk.kind != REGION_KIND_SMALL_V7)) {
return false; // Not v7 -> front falls back to legacy/pool/ULTRA
}
// Get segment from registry metadata
SmallSegment_v7* seg = (SmallSegment_v7*)lk.page_meta;
if (unlikely(!seg || !small_segment_v7_valid(seg))) {
return false;
}
// Calculate page index from pointer
if (unlikely(!small_ptr_in_segment_v7(seg, ptr))) {
return false;
}
size_t page_idx = SMALL_V7_PAGE_IDX(seg, addr);
if (unlikely(page_idx >= seg->num_pages)) {
return false;
}
SmallPageMeta_v7* page = &seg->page_meta[page_idx];
// v7-5b: Validate page is in use and C5 or C6
if (unlikely(page->capacity == 0 || !SMALL_V7_CLASS_SUPPORTED(page->class_idx))) {
return false;
}
// Push BASE ptr to page freelist
void* base = (uint8_t*)ptr - 1;
*(void**)base = page->free_list;
page->free_list = base;
// v7-5a: Stats removed from hot path
// Decrement used count
if (unlikely(--page->used == 0)) {
small_v7_stat_retire();
small_cold_v7_retire_page(ctx, page);
}
SMALL_V7_HOT_STAT_FREE();
return true;
}
}
// ============================================================================
// Stub Functions (for compatibility, forwards to real impl)
// ============================================================================
// These maintain backward compatibility with v7-1 stub API
static inline void* small_heap_alloc_fast_v7_stub(size_t size, uint8_t class_idx) {
// v7-2: Use real implementation
return small_heap_alloc_fast_v7(size, class_idx);
}
static inline bool small_heap_free_fast_v7_stub(void* ptr, uint8_t class_idx) {
// v7-2: Use real implementation
return small_heap_free_fast_v7(ptr, class_idx);
}