Files
hakmem/core/smallobject_core_v6.c
Moe Charm (CI) 7b7de53167 Phase FREE-FRONT-V3-1: Free route snapshot infrastructure + build fix
Summary:
========
Implemented Phase FREE-FRONT-V3 infrastructure to optimize free hotpath by:
1. Creating snapshot-based route decision table (consolidating route logic)
2. Removing redundant ENV checks from hot path
3. Preparing for future integration into hak_free_at()

Key Changes:
============

1. NEW FILES:
   - core/box/free_front_v3_env_box.h: Route snapshot definition & API
   - core/box/free_front_v3_env_box.c: Snapshot initialization & caching

2. Infrastructure Details:
   - FreeRouteSnapshotV3: Maps class_idx → free_route_kind for all 8 classes
   - Routes defined: LEGACY, TINY_V3, CORE_V6_C6, POOL_V1
   - ENV-gated initialization (HAKMEM_TINY_FREE_FRONT_V3_ENABLED, default OFF)
   - Per-thread TLS caching to avoid repeated ENV reads

3. Design Goals:
   - Consolidate tiny_route_for_class() results into snapshot table
   - Remove C7 ULTRA / v4 / v5 / v6 ENV checks from hot path
   - Limit lookup (ss_fast_lookup/slab_index_for) to paths that truly need it
   - Clear ownership boundary: front v3 handles routing, downstream handles free

4. Phase Plan:
   - v3-1  COMPLETE: Infrastructure (snapshot table, ENV initialization, TLS cache)
   - v3-2 (INFRASTRUCTURE ONLY): Placeholder integration in hak_free_api.inc.h
   - v3-3 (FUTURE): Full integration + benchmark A/B to measure hotpath improvement

5. BUILD FIX:
   - Added missing core/box/c7_meta_used_counter_box.o to OBJS_BASE in Makefile
   - This symbol was referenced but not linked, causing undefined reference errors
   - Benchmark targets now build cleanly without LTO

Status:
=======
- Build:  PASS (bench_allocators_hakmem builds without errors)
- Integration: Currently DISABLED (default OFF, ready for v3-2 phase)
- No performance impact: Infrastructure-only, hotpath unchanged

Future Work:
============
- Phase v3-2: Integrate snapshot routing into hak_free_at() main path
- Phase v3-3: Measure free hotpath performance improvement (target: 1-2% less branch mispredict)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-11 19:17:30 +09:00

411 lines
14 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// smallobject_core_v6.c - SmallObject Core v6 実装Phase v6-3
#include <stdlib.h>
#include <string.h>
#include "box/smallobject_core_v6_box.h"
#include "box/smallobject_cold_iface_v6.h"
#include "box/smallsegment_v6_box.h"
#include "box/tiny_route_env_box.h"
#ifndef likely
#define likely(x) __builtin_expect(!!(x), 1)
#define unlikely(x) __builtin_expect(!!(x), 0)
#endif
// TLS context
static __thread struct SmallHeapCtxV6 g_small_heap_ctx_v6;
static __thread int g_small_heap_ctx_v6_init = 0;
// TLS policy snapshot
static __thread struct SmallPolicySnapshotV6 g_snap_v6;
static __thread int g_snap_v6_init = 0;
/// Get TLS heap context for v6 (lazy initialization)
/// @return: TLS context pointer (never NULL)
SmallHeapCtxV6* small_heap_ctx_v6(void) {
if (!g_small_heap_ctx_v6_init) {
memset(&g_small_heap_ctx_v6, 0, sizeof(g_small_heap_ctx_v6));
// Initialize TLS segment ownership range
SmallSegmentV6* seg = small_segment_v6_acquire_for_thread();
if (seg && small_segment_v6_valid(seg)) {
g_small_heap_ctx_v6.tls_seg_base = seg->base;
g_small_heap_ctx_v6.tls_seg_end = seg->base + SMALL_SEGMENT_V6_SIZE;
}
g_small_heap_ctx_v6_init = 1;
}
return &g_small_heap_ctx_v6;
}
/// Get TLS policy snapshot for v6 (lazy initialization)
/// @return: Policy snapshot pointer (never NULL)
const SmallPolicySnapshotV6* tiny_policy_snapshot_v6(void) {
if (!g_snap_v6_init) {
memset(&g_snap_v6, 0, sizeof(g_snap_v6));
// Initialize route_kind from tiny_route API (this ensures init is done)
for (int i = 0; i < 8; i++) {
g_snap_v6.route_kind[i] = (uint8_t)tiny_route_for_class((uint8_t)i);
}
g_snap_v6_init = 1;
}
return &g_snap_v6;
}
// Forward declarations for pool v1 fallback
extern void* hak_pool_try_alloc(size_t size, uintptr_t site_id);
extern void hak_pool_free(void* ptr, size_t size, uintptr_t site_id);
// ============================================================================
// Allocation Implementation
// ============================================================================
/// Allocate block from C6 v6 TLS freelist or refill
/// @param size: requested size (unused, class_idx determines size)
/// @param class_idx: size class index (must be C6 for v6 route)
/// @param ctx: TLS context
/// @param snap: policy snapshot
/// @return: USER pointer (BASE+1) or NULL on fallback
void* small_alloc_fast_v6(size_t size,
uint32_t class_idx,
SmallHeapCtxV6* ctx,
const SmallPolicySnapshotV6* snap) {
(void)size;
// Bounds check
if (unlikely(class_idx >= 8)) {
return hak_pool_try_alloc(size, 0);
}
uint8_t route = snap->route_kind[class_idx];
// v6-5: Support C6 and C5 classes
if (route != TINY_ROUTE_SMALL_HEAP_V6) {
return hak_pool_try_alloc(size, 0);
}
// C6 fast path
if (class_idx == SMALL_V6_C6_CLASS_IDX) {
// Fast path: TLS freelist hit
if (likely(ctx->tls_count_c6 > 0)) {
void* blk = ctx->tls_freelist_c6[--ctx->tls_count_c6];
// v6-3: Header already written during refill, just return USER pointer
return SMALL_V6_USER_FROM_BASE(blk);
}
}
// C5 fast path (Phase v6-5)
else if (class_idx == SMALL_V6_C5_CLASS_IDX) {
// Fast path: TLS freelist hit
if (likely(ctx->tls_count_c5 > 0)) {
void* blk = ctx->tls_freelist_c5[--ctx->tls_count_c5];
return SMALL_V6_USER_FROM_BASE(blk);
}
}
// C4 fast path (Phase v6-6)
else if (class_idx == SMALL_V6_C4_CLASS_IDX) {
// Fast path: TLS freelist hit
if (likely(ctx->tls_count_c4 > 0)) {
void* blk = ctx->tls_freelist_c4[--ctx->tls_count_c4];
return SMALL_V6_USER_FROM_BASE(blk);
}
}
else {
// Unsupported class for v6
return hak_pool_try_alloc(size, 0);
}
// Slow path: refill TLS with multiple blocks (batching)
SmallPageMetaV6* page = small_cold_v6_refill_page(class_idx);
if (!page || !page->free_list) {
return hak_pool_try_alloc(size, 0); // Safety fallback
}
// v6-5: Batch refill - support C6 and C5
uint8_t header_byte = SMALL_V6_HEADER_FROM_CLASS(class_idx);
if (class_idx == SMALL_V6_C6_CLASS_IDX) {
// C6 refill path
int max_fill = SMALL_V6_TLS_CAP - ctx->tls_count_c6;
int filled = 0;
// Fill TLS (leave room for 1 to return)
while (page->free_list && filled < max_fill - 1) {
void* blk = page->free_list;
page->free_list = *(void**)blk;
((uint8_t*)blk)[0] = header_byte;
ctx->tls_freelist_c6[ctx->tls_count_c6++] = blk;
filled++;
}
page->used += filled;
// Pop one more to return to caller
if (page->free_list) {
void* blk = page->free_list;
page->free_list = *(void**)blk;
page->used++;
((uint8_t*)blk)[0] = header_byte;
return SMALL_V6_USER_FROM_BASE(blk);
}
// If we filled TLS but no more blocks, pop from TLS
if (ctx->tls_count_c6 > 0) {
void* blk = ctx->tls_freelist_c6[--ctx->tls_count_c6];
return SMALL_V6_USER_FROM_BASE(blk);
}
}
else if (class_idx == SMALL_V6_C5_CLASS_IDX) {
// C5 refill path (Phase v6-5)
int max_fill = SMALL_V6_TLS_CAP - ctx->tls_count_c5;
int filled = 0;
// Fill TLS (leave room for 1 to return)
while (page->free_list && filled < max_fill - 1) {
void* blk = page->free_list;
page->free_list = *(void**)blk;
((uint8_t*)blk)[0] = header_byte;
ctx->tls_freelist_c5[ctx->tls_count_c5++] = blk;
filled++;
}
page->used += filled;
// Pop one more to return to caller
if (page->free_list) {
void* blk = page->free_list;
page->free_list = *(void**)blk;
page->used++;
((uint8_t*)blk)[0] = header_byte;
return SMALL_V6_USER_FROM_BASE(blk);
}
// If we filled TLS but no more blocks, pop from TLS
if (ctx->tls_count_c5 > 0) {
void* blk = ctx->tls_freelist_c5[--ctx->tls_count_c5];
return SMALL_V6_USER_FROM_BASE(blk);
}
}
else if (class_idx == SMALL_V6_C4_CLASS_IDX) {
// C4 refill path (Phase v6-6)
int max_fill = SMALL_V6_TLS_CAP - ctx->tls_count_c4;
int filled = 0;
// Fill TLS (leave room for 1 to return)
while (page->free_list && filled < max_fill - 1) {
void* blk = page->free_list;
page->free_list = *(void**)blk;
((uint8_t*)blk)[0] = header_byte;
ctx->tls_freelist_c4[ctx->tls_count_c4++] = blk;
filled++;
}
page->used += filled;
// Pop one more to return to caller
if (page->free_list) {
void* blk = page->free_list;
page->free_list = *(void**)blk;
page->used++;
((uint8_t*)blk)[0] = header_byte;
return SMALL_V6_USER_FROM_BASE(blk);
}
// If we filled TLS but no more blocks, pop from TLS
if (ctx->tls_count_c4 > 0) {
void* blk = ctx->tls_freelist_c4[--ctx->tls_count_c4];
return SMALL_V6_USER_FROM_BASE(blk);
}
}
// Should not reach here
return hak_pool_try_alloc(size, 0);
}
// ============================================================================
// Free Implementation
// ============================================================================
/// Free block to C6 v6 TLS freelist or page freelist
/// @param ptr: USER pointer to free
/// @param class_idx: size class index
/// @param ctx: TLS context
/// @param snap: policy snapshot
void small_free_fast_v6(void* ptr,
uint32_t class_idx,
SmallHeapCtxV6* ctx,
const SmallPolicySnapshotV6* snap) {
// Bounds check
if (unlikely(class_idx >= 8)) {
hak_pool_free(ptr, 0, 0);
return;
}
uint8_t route = snap->route_kind[class_idx];
// v6-5: Check if this is CORE_V6 route
if (route != TINY_ROUTE_SMALL_HEAP_V6) {
hak_pool_free(ptr, 0, 0);
return;
}
// Convert USER pointer to BASE pointer
void* base = SMALL_V6_BASE_FROM_USER(ptr);
// Fast path: TLS segment ownership + TLS push
if (likely(small_tls_owns_ptr_v6(ctx, ptr))) {
// C6 TLS push
if (class_idx == SMALL_V6_C6_CLASS_IDX && ctx->tls_count_c6 < SMALL_V6_TLS_CAP) {
ctx->tls_freelist_c6[ctx->tls_count_c6++] = base;
return;
}
// C5 TLS push (Phase v6-5)
if (class_idx == SMALL_V6_C5_CLASS_IDX && ctx->tls_count_c5 < SMALL_V6_TLS_CAP) {
ctx->tls_freelist_c5[ctx->tls_count_c5++] = base;
return;
}
// C4 TLS push (Phase v6-6)
if (class_idx == SMALL_V6_C4_CLASS_IDX && ctx->tls_count_c4 < SMALL_V6_TLS_CAP) {
ctx->tls_freelist_c4[ctx->tls_count_c4++] = base;
return;
}
}
// Slow path: page_meta lookup and push to page freelist
SmallPageMetaV6* page = small_page_meta_v6_of(ptr);
if (!page) {
hak_pool_free(ptr, 0, 0);
return;
}
// Push to page freelist (using BASE pointer)
*(void**)base = page->free_list;
page->free_list = base;
if (page->used > 0) page->used--;
// Retire empty page
if (page->used == 0) {
small_cold_v6_retire_page(page);
}
}
// ============================================================================
// Cold Path Implementation (Phase v6-6)
// ============================================================================
/// Cold path: alloc with refill - called when TLS is empty
/// @param class_idx: C4, C5 or C6
/// @param ctx: TLS context
/// @return: USER pointer or NULL
void* small_alloc_cold_v6(uint32_t class_idx, SmallHeapCtxV6* ctx) {
// Refill TLS from page
SmallPageMetaV6* page = small_cold_v6_refill_page(class_idx);
if (!page || !page->free_list) {
return hak_pool_try_alloc(class_idx == SMALL_V6_C6_CLASS_IDX ? 512 : (class_idx == SMALL_V6_C5_CLASS_IDX ? 256 : 128), 0);
}
uint8_t header_byte = SMALL_V6_HEADER_FROM_CLASS(class_idx);
if (class_idx == SMALL_V6_C6_CLASS_IDX) {
int max_fill = SMALL_V6_TLS_CAP - ctx->tls_count_c6;
int filled = 0;
while (page->free_list && filled < max_fill - 1) {
void* blk = page->free_list;
page->free_list = *(void**)blk;
((uint8_t*)blk)[0] = header_byte;
ctx->tls_freelist_c6[ctx->tls_count_c6++] = blk;
filled++;
}
page->used += filled;
if (page->free_list) {
void* blk = page->free_list;
page->free_list = *(void**)blk;
page->used++;
((uint8_t*)blk)[0] = header_byte;
return SMALL_V6_USER_FROM_BASE(blk);
}
if (ctx->tls_count_c6 > 0) {
void* blk = ctx->tls_freelist_c6[--ctx->tls_count_c6];
return SMALL_V6_USER_FROM_BASE(blk);
}
}
else if (class_idx == SMALL_V6_C5_CLASS_IDX) {
int max_fill = SMALL_V6_TLS_CAP - ctx->tls_count_c5;
int filled = 0;
while (page->free_list && filled < max_fill - 1) {
void* blk = page->free_list;
page->free_list = *(void**)blk;
((uint8_t*)blk)[0] = header_byte;
ctx->tls_freelist_c5[ctx->tls_count_c5++] = blk;
filled++;
}
page->used += filled;
if (page->free_list) {
void* blk = page->free_list;
page->free_list = *(void**)blk;
page->used++;
((uint8_t*)blk)[0] = header_byte;
return SMALL_V6_USER_FROM_BASE(blk);
}
if (ctx->tls_count_c5 > 0) {
void* blk = ctx->tls_freelist_c5[--ctx->tls_count_c5];
return SMALL_V6_USER_FROM_BASE(blk);
}
}
else if (class_idx == SMALL_V6_C4_CLASS_IDX) {
int max_fill = SMALL_V6_TLS_CAP - ctx->tls_count_c4;
int filled = 0;
while (page->free_list && filled < max_fill - 1) {
void* blk = page->free_list;
page->free_list = *(void**)blk;
((uint8_t*)blk)[0] = header_byte;
ctx->tls_freelist_c4[ctx->tls_count_c4++] = blk;
filled++;
}
page->used += filled;
if (page->free_list) {
void* blk = page->free_list;
page->free_list = *(void**)blk;
page->used++;
((uint8_t*)blk)[0] = header_byte;
return SMALL_V6_USER_FROM_BASE(blk);
}
if (ctx->tls_count_c4 > 0) {
void* blk = ctx->tls_freelist_c4[--ctx->tls_count_c4];
return SMALL_V6_USER_FROM_BASE(blk);
}
}
return hak_pool_try_alloc(class_idx == SMALL_V6_C6_CLASS_IDX ? 512 : (class_idx == SMALL_V6_C5_CLASS_IDX ? 256 : 128), 0);
}
/// Cold path: free to page freelist - called when TLS full or cross-thread
/// @param ptr: USER pointer
/// @param class_idx: C5 or C6
void small_free_cold_v6(void* ptr, uint32_t class_idx) {
(void)class_idx; // Not needed for page lookup
void* base = SMALL_V6_BASE_FROM_USER(ptr);
SmallPageMetaV6* page = small_page_meta_v6_of(ptr);
if (!page) {
hak_pool_free(ptr, 0, 0);
return;
}
*(void**)base = page->free_list;
page->free_list = base;
if (page->used > 0) page->used--;
if (page->used == 0) {
small_cold_v6_retire_page(page);
}
}