Files
hakmem/core/smallobject_core_v6.c

342 lines
11 KiB
C
Raw Normal View History

Phase v6-1/2/3/4: SmallObject Core v6 - C6-only implementation + refactor Phase v6-1: C6-only route stub (v1/pool fallback) Phase v6-2: Segment v6 + ColdIface v6 + Core v6 HotPath implementation - 2MiB segment / 64KiB page allocation - O(1) ptr→page_meta lookup with segment masking - C6-heavy A/B: SEGV-free but -44% performance (15.3M ops/s) Phase v6-3: Thin-layer optimization (TLS ownership check + batch header + refill batching) - TLS ownership fast-path skip page_meta for 90%+ of frees - Batch header writes during refill (32 allocs = 1 header write) - TLS batch refill (1/32 refill frequency) - C6-heavy A/B: v6-2 15.3M → v6-3 27.1M ops/s (±0% vs baseline) ✅ Phase v6-4: Mixed hang fix (segment metadata lookup correction) - Root cause: metadata lookup was reading mmap region instead of TLS slot - Fix: use TLS slot descriptor with in_use validation - Mixed health: 5M iterations SEGV-free, 35.8M ops/s ✅ Phase v6-refactor: Code quality improvements (macro unification + inline + docs) - Add SMALL_V6_* prefix macros (header, pointer conversion, page index) - Extract inline validation functions (small_page_v6_valid, small_ptr_in_segment_v6) - Doxygen-style comments for all public functions - Result: 0 compiler warnings, maintained +1.2% performance Files: - core/box/smallobject_core_v6_box.h (new, type & API definitions) - core/box/smallobject_cold_iface_v6.h (new, cold iface API) - core/box/smallsegment_v6_box.h (new, segment type definitions) - core/smallobject_core_v6.c (new, C6 alloc/free implementation) - core/smallobject_cold_iface_v6.c (new, refill/retire logic) - core/smallsegment_v6.c (new, segment allocator) - docs/analysis/SMALLOBJECT_CORE_V6_DESIGN.md (new, design document) - core/box/tiny_route_env_box.h (modified, v6 route added) - core/front/malloc_tiny_fast.h (modified, v6 case in route switch) - Makefile (modified, v6 objects added) - CURRENT_TASK.md (modified, v6 status added) Status: - C6-heavy: v6 OFF 27.1M → v6-3 ON 27.1M ops/s (±0%) ✅ - Mixed: v6 ON 35.8M ops/s (C6-only, other classes via v1) ✅ - Build: 0 warnings, fully documented ✅ 🤖 Generated with Claude Code Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-11 15:29:59 +09:00
// smallobject_core_v6.c - SmallObject Core v6 実装Phase v6-3
#include <stdlib.h>
#include <string.h>
#include "box/smallobject_core_v6_box.h"
#include "box/smallobject_cold_iface_v6.h"
#include "box/smallsegment_v6_box.h"
#include "box/tiny_route_env_box.h"
#ifndef likely
#define likely(x) __builtin_expect(!!(x), 1)
#define unlikely(x) __builtin_expect(!!(x), 0)
#endif
// TLS context
static __thread struct SmallHeapCtxV6 g_small_heap_ctx_v6;
static __thread int g_small_heap_ctx_v6_init = 0;
// TLS policy snapshot
static __thread struct SmallPolicySnapshotV6 g_snap_v6;
static __thread int g_snap_v6_init = 0;
/// Get TLS heap context for v6 (lazy initialization)
/// @return: TLS context pointer (never NULL)
SmallHeapCtxV6* small_heap_ctx_v6(void) {
if (!g_small_heap_ctx_v6_init) {
memset(&g_small_heap_ctx_v6, 0, sizeof(g_small_heap_ctx_v6));
// Initialize TLS segment ownership range
SmallSegmentV6* seg = small_segment_v6_acquire_for_thread();
if (seg && small_segment_v6_valid(seg)) {
g_small_heap_ctx_v6.tls_seg_base = seg->base;
g_small_heap_ctx_v6.tls_seg_end = seg->base + SMALL_SEGMENT_V6_SIZE;
}
g_small_heap_ctx_v6_init = 1;
}
return &g_small_heap_ctx_v6;
}
/// Get TLS policy snapshot for v6 (lazy initialization)
/// @return: Policy snapshot pointer (never NULL)
const SmallPolicySnapshotV6* tiny_policy_snapshot_v6(void) {
if (!g_snap_v6_init) {
memset(&g_snap_v6, 0, sizeof(g_snap_v6));
// Initialize route_kind from tiny_route API (this ensures init is done)
for (int i = 0; i < 8; i++) {
g_snap_v6.route_kind[i] = (uint8_t)tiny_route_for_class((uint8_t)i);
}
g_snap_v6_init = 1;
}
return &g_snap_v6;
}
// Forward declarations for pool v1 fallback
extern void* hak_pool_try_alloc(size_t size, uintptr_t site_id);
extern void hak_pool_free(void* ptr, size_t size, uintptr_t site_id);
// ============================================================================
// Allocation Implementation
// ============================================================================
/// Allocate block from C6 v6 TLS freelist or refill
/// @param size: requested size (unused, class_idx determines size)
/// @param class_idx: size class index (must be C6 for v6 route)
/// @param ctx: TLS context
/// @param snap: policy snapshot
/// @return: USER pointer (BASE+1) or NULL on fallback
void* small_alloc_fast_v6(size_t size,
uint32_t class_idx,
SmallHeapCtxV6* ctx,
const SmallPolicySnapshotV6* snap) {
(void)size;
// Bounds check
if (unlikely(class_idx >= 8)) {
return hak_pool_try_alloc(size, 0);
}
uint8_t route = snap->route_kind[class_idx];
// v6-5: Support C6 and C5 classes
if (route != TINY_ROUTE_SMALL_HEAP_V6) {
Phase v6-1/2/3/4: SmallObject Core v6 - C6-only implementation + refactor Phase v6-1: C6-only route stub (v1/pool fallback) Phase v6-2: Segment v6 + ColdIface v6 + Core v6 HotPath implementation - 2MiB segment / 64KiB page allocation - O(1) ptr→page_meta lookup with segment masking - C6-heavy A/B: SEGV-free but -44% performance (15.3M ops/s) Phase v6-3: Thin-layer optimization (TLS ownership check + batch header + refill batching) - TLS ownership fast-path skip page_meta for 90%+ of frees - Batch header writes during refill (32 allocs = 1 header write) - TLS batch refill (1/32 refill frequency) - C6-heavy A/B: v6-2 15.3M → v6-3 27.1M ops/s (±0% vs baseline) ✅ Phase v6-4: Mixed hang fix (segment metadata lookup correction) - Root cause: metadata lookup was reading mmap region instead of TLS slot - Fix: use TLS slot descriptor with in_use validation - Mixed health: 5M iterations SEGV-free, 35.8M ops/s ✅ Phase v6-refactor: Code quality improvements (macro unification + inline + docs) - Add SMALL_V6_* prefix macros (header, pointer conversion, page index) - Extract inline validation functions (small_page_v6_valid, small_ptr_in_segment_v6) - Doxygen-style comments for all public functions - Result: 0 compiler warnings, maintained +1.2% performance Files: - core/box/smallobject_core_v6_box.h (new, type & API definitions) - core/box/smallobject_cold_iface_v6.h (new, cold iface API) - core/box/smallsegment_v6_box.h (new, segment type definitions) - core/smallobject_core_v6.c (new, C6 alloc/free implementation) - core/smallobject_cold_iface_v6.c (new, refill/retire logic) - core/smallsegment_v6.c (new, segment allocator) - docs/analysis/SMALLOBJECT_CORE_V6_DESIGN.md (new, design document) - core/box/tiny_route_env_box.h (modified, v6 route added) - core/front/malloc_tiny_fast.h (modified, v6 case in route switch) - Makefile (modified, v6 objects added) - CURRENT_TASK.md (modified, v6 status added) Status: - C6-heavy: v6 OFF 27.1M → v6-3 ON 27.1M ops/s (±0%) ✅ - Mixed: v6 ON 35.8M ops/s (C6-only, other classes via v1) ✅ - Build: 0 warnings, fully documented ✅ 🤖 Generated with Claude Code Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-11 15:29:59 +09:00
return hak_pool_try_alloc(size, 0);
}
// C6 fast path
if (class_idx == SMALL_V6_C6_CLASS_IDX) {
// Fast path: TLS freelist hit
if (likely(ctx->tls_count_c6 > 0)) {
void* blk = ctx->tls_freelist_c6[--ctx->tls_count_c6];
// v6-3: Header already written during refill, just return USER pointer
return SMALL_V6_USER_FROM_BASE(blk);
}
}
// C5 fast path (Phase v6-5)
else if (class_idx == SMALL_V6_C5_CLASS_IDX) {
// Fast path: TLS freelist hit
if (likely(ctx->tls_count_c5 > 0)) {
void* blk = ctx->tls_freelist_c5[--ctx->tls_count_c5];
return SMALL_V6_USER_FROM_BASE(blk);
}
}
else {
// Unsupported class for v6
return hak_pool_try_alloc(size, 0);
Phase v6-1/2/3/4: SmallObject Core v6 - C6-only implementation + refactor Phase v6-1: C6-only route stub (v1/pool fallback) Phase v6-2: Segment v6 + ColdIface v6 + Core v6 HotPath implementation - 2MiB segment / 64KiB page allocation - O(1) ptr→page_meta lookup with segment masking - C6-heavy A/B: SEGV-free but -44% performance (15.3M ops/s) Phase v6-3: Thin-layer optimization (TLS ownership check + batch header + refill batching) - TLS ownership fast-path skip page_meta for 90%+ of frees - Batch header writes during refill (32 allocs = 1 header write) - TLS batch refill (1/32 refill frequency) - C6-heavy A/B: v6-2 15.3M → v6-3 27.1M ops/s (±0% vs baseline) ✅ Phase v6-4: Mixed hang fix (segment metadata lookup correction) - Root cause: metadata lookup was reading mmap region instead of TLS slot - Fix: use TLS slot descriptor with in_use validation - Mixed health: 5M iterations SEGV-free, 35.8M ops/s ✅ Phase v6-refactor: Code quality improvements (macro unification + inline + docs) - Add SMALL_V6_* prefix macros (header, pointer conversion, page index) - Extract inline validation functions (small_page_v6_valid, small_ptr_in_segment_v6) - Doxygen-style comments for all public functions - Result: 0 compiler warnings, maintained +1.2% performance Files: - core/box/smallobject_core_v6_box.h (new, type & API definitions) - core/box/smallobject_cold_iface_v6.h (new, cold iface API) - core/box/smallsegment_v6_box.h (new, segment type definitions) - core/smallobject_core_v6.c (new, C6 alloc/free implementation) - core/smallobject_cold_iface_v6.c (new, refill/retire logic) - core/smallsegment_v6.c (new, segment allocator) - docs/analysis/SMALLOBJECT_CORE_V6_DESIGN.md (new, design document) - core/box/tiny_route_env_box.h (modified, v6 route added) - core/front/malloc_tiny_fast.h (modified, v6 case in route switch) - Makefile (modified, v6 objects added) - CURRENT_TASK.md (modified, v6 status added) Status: - C6-heavy: v6 OFF 27.1M → v6-3 ON 27.1M ops/s (±0%) ✅ - Mixed: v6 ON 35.8M ops/s (C6-only, other classes via v1) ✅ - Build: 0 warnings, fully documented ✅ 🤖 Generated with Claude Code Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-11 15:29:59 +09:00
}
// Slow path: refill TLS with multiple blocks (batching)
SmallPageMetaV6* page = small_cold_v6_refill_page(class_idx);
if (!page || !page->free_list) {
return hak_pool_try_alloc(size, 0); // Safety fallback
}
// v6-5: Batch refill - support C6 and C5
Phase v6-1/2/3/4: SmallObject Core v6 - C6-only implementation + refactor Phase v6-1: C6-only route stub (v1/pool fallback) Phase v6-2: Segment v6 + ColdIface v6 + Core v6 HotPath implementation - 2MiB segment / 64KiB page allocation - O(1) ptr→page_meta lookup with segment masking - C6-heavy A/B: SEGV-free but -44% performance (15.3M ops/s) Phase v6-3: Thin-layer optimization (TLS ownership check + batch header + refill batching) - TLS ownership fast-path skip page_meta for 90%+ of frees - Batch header writes during refill (32 allocs = 1 header write) - TLS batch refill (1/32 refill frequency) - C6-heavy A/B: v6-2 15.3M → v6-3 27.1M ops/s (±0% vs baseline) ✅ Phase v6-4: Mixed hang fix (segment metadata lookup correction) - Root cause: metadata lookup was reading mmap region instead of TLS slot - Fix: use TLS slot descriptor with in_use validation - Mixed health: 5M iterations SEGV-free, 35.8M ops/s ✅ Phase v6-refactor: Code quality improvements (macro unification + inline + docs) - Add SMALL_V6_* prefix macros (header, pointer conversion, page index) - Extract inline validation functions (small_page_v6_valid, small_ptr_in_segment_v6) - Doxygen-style comments for all public functions - Result: 0 compiler warnings, maintained +1.2% performance Files: - core/box/smallobject_core_v6_box.h (new, type & API definitions) - core/box/smallobject_cold_iface_v6.h (new, cold iface API) - core/box/smallsegment_v6_box.h (new, segment type definitions) - core/smallobject_core_v6.c (new, C6 alloc/free implementation) - core/smallobject_cold_iface_v6.c (new, refill/retire logic) - core/smallsegment_v6.c (new, segment allocator) - docs/analysis/SMALLOBJECT_CORE_V6_DESIGN.md (new, design document) - core/box/tiny_route_env_box.h (modified, v6 route added) - core/front/malloc_tiny_fast.h (modified, v6 case in route switch) - Makefile (modified, v6 objects added) - CURRENT_TASK.md (modified, v6 status added) Status: - C6-heavy: v6 OFF 27.1M → v6-3 ON 27.1M ops/s (±0%) ✅ - Mixed: v6 ON 35.8M ops/s (C6-only, other classes via v1) ✅ - Build: 0 warnings, fully documented ✅ 🤖 Generated with Claude Code Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-11 15:29:59 +09:00
uint8_t header_byte = SMALL_V6_HEADER_FROM_CLASS(class_idx);
if (class_idx == SMALL_V6_C6_CLASS_IDX) {
// C6 refill path
int max_fill = SMALL_V6_TLS_CAP - ctx->tls_count_c6;
int filled = 0;
// Fill TLS (leave room for 1 to return)
while (page->free_list && filled < max_fill - 1) {
void* blk = page->free_list;
page->free_list = *(void**)blk;
((uint8_t*)blk)[0] = header_byte;
ctx->tls_freelist_c6[ctx->tls_count_c6++] = blk;
filled++;
}
page->used += filled;
// Pop one more to return to caller
if (page->free_list) {
void* blk = page->free_list;
page->free_list = *(void**)blk;
page->used++;
((uint8_t*)blk)[0] = header_byte;
return SMALL_V6_USER_FROM_BASE(blk);
}
Phase v6-1/2/3/4: SmallObject Core v6 - C6-only implementation + refactor Phase v6-1: C6-only route stub (v1/pool fallback) Phase v6-2: Segment v6 + ColdIface v6 + Core v6 HotPath implementation - 2MiB segment / 64KiB page allocation - O(1) ptr→page_meta lookup with segment masking - C6-heavy A/B: SEGV-free but -44% performance (15.3M ops/s) Phase v6-3: Thin-layer optimization (TLS ownership check + batch header + refill batching) - TLS ownership fast-path skip page_meta for 90%+ of frees - Batch header writes during refill (32 allocs = 1 header write) - TLS batch refill (1/32 refill frequency) - C6-heavy A/B: v6-2 15.3M → v6-3 27.1M ops/s (±0% vs baseline) ✅ Phase v6-4: Mixed hang fix (segment metadata lookup correction) - Root cause: metadata lookup was reading mmap region instead of TLS slot - Fix: use TLS slot descriptor with in_use validation - Mixed health: 5M iterations SEGV-free, 35.8M ops/s ✅ Phase v6-refactor: Code quality improvements (macro unification + inline + docs) - Add SMALL_V6_* prefix macros (header, pointer conversion, page index) - Extract inline validation functions (small_page_v6_valid, small_ptr_in_segment_v6) - Doxygen-style comments for all public functions - Result: 0 compiler warnings, maintained +1.2% performance Files: - core/box/smallobject_core_v6_box.h (new, type & API definitions) - core/box/smallobject_cold_iface_v6.h (new, cold iface API) - core/box/smallsegment_v6_box.h (new, segment type definitions) - core/smallobject_core_v6.c (new, C6 alloc/free implementation) - core/smallobject_cold_iface_v6.c (new, refill/retire logic) - core/smallsegment_v6.c (new, segment allocator) - docs/analysis/SMALLOBJECT_CORE_V6_DESIGN.md (new, design document) - core/box/tiny_route_env_box.h (modified, v6 route added) - core/front/malloc_tiny_fast.h (modified, v6 case in route switch) - Makefile (modified, v6 objects added) - CURRENT_TASK.md (modified, v6 status added) Status: - C6-heavy: v6 OFF 27.1M → v6-3 ON 27.1M ops/s (±0%) ✅ - Mixed: v6 ON 35.8M ops/s (C6-only, other classes via v1) ✅ - Build: 0 warnings, fully documented ✅ 🤖 Generated with Claude Code Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-11 15:29:59 +09:00
// If we filled TLS but no more blocks, pop from TLS
if (ctx->tls_count_c6 > 0) {
void* blk = ctx->tls_freelist_c6[--ctx->tls_count_c6];
return SMALL_V6_USER_FROM_BASE(blk);
}
Phase v6-1/2/3/4: SmallObject Core v6 - C6-only implementation + refactor Phase v6-1: C6-only route stub (v1/pool fallback) Phase v6-2: Segment v6 + ColdIface v6 + Core v6 HotPath implementation - 2MiB segment / 64KiB page allocation - O(1) ptr→page_meta lookup with segment masking - C6-heavy A/B: SEGV-free but -44% performance (15.3M ops/s) Phase v6-3: Thin-layer optimization (TLS ownership check + batch header + refill batching) - TLS ownership fast-path skip page_meta for 90%+ of frees - Batch header writes during refill (32 allocs = 1 header write) - TLS batch refill (1/32 refill frequency) - C6-heavy A/B: v6-2 15.3M → v6-3 27.1M ops/s (±0% vs baseline) ✅ Phase v6-4: Mixed hang fix (segment metadata lookup correction) - Root cause: metadata lookup was reading mmap region instead of TLS slot - Fix: use TLS slot descriptor with in_use validation - Mixed health: 5M iterations SEGV-free, 35.8M ops/s ✅ Phase v6-refactor: Code quality improvements (macro unification + inline + docs) - Add SMALL_V6_* prefix macros (header, pointer conversion, page index) - Extract inline validation functions (small_page_v6_valid, small_ptr_in_segment_v6) - Doxygen-style comments for all public functions - Result: 0 compiler warnings, maintained +1.2% performance Files: - core/box/smallobject_core_v6_box.h (new, type & API definitions) - core/box/smallobject_cold_iface_v6.h (new, cold iface API) - core/box/smallsegment_v6_box.h (new, segment type definitions) - core/smallobject_core_v6.c (new, C6 alloc/free implementation) - core/smallobject_cold_iface_v6.c (new, refill/retire logic) - core/smallsegment_v6.c (new, segment allocator) - docs/analysis/SMALLOBJECT_CORE_V6_DESIGN.md (new, design document) - core/box/tiny_route_env_box.h (modified, v6 route added) - core/front/malloc_tiny_fast.h (modified, v6 case in route switch) - Makefile (modified, v6 objects added) - CURRENT_TASK.md (modified, v6 status added) Status: - C6-heavy: v6 OFF 27.1M → v6-3 ON 27.1M ops/s (±0%) ✅ - Mixed: v6 ON 35.8M ops/s (C6-only, other classes via v1) ✅ - Build: 0 warnings, fully documented ✅ 🤖 Generated with Claude Code Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-11 15:29:59 +09:00
}
else if (class_idx == SMALL_V6_C5_CLASS_IDX) {
// C5 refill path (Phase v6-5)
int max_fill = SMALL_V6_TLS_CAP - ctx->tls_count_c5;
int filled = 0;
// Fill TLS (leave room for 1 to return)
while (page->free_list && filled < max_fill - 1) {
void* blk = page->free_list;
page->free_list = *(void**)blk;
((uint8_t*)blk)[0] = header_byte;
ctx->tls_freelist_c5[ctx->tls_count_c5++] = blk;
filled++;
}
page->used += filled;
// Pop one more to return to caller
if (page->free_list) {
void* blk = page->free_list;
page->free_list = *(void**)blk;
page->used++;
((uint8_t*)blk)[0] = header_byte;
return SMALL_V6_USER_FROM_BASE(blk);
}
Phase v6-1/2/3/4: SmallObject Core v6 - C6-only implementation + refactor Phase v6-1: C6-only route stub (v1/pool fallback) Phase v6-2: Segment v6 + ColdIface v6 + Core v6 HotPath implementation - 2MiB segment / 64KiB page allocation - O(1) ptr→page_meta lookup with segment masking - C6-heavy A/B: SEGV-free but -44% performance (15.3M ops/s) Phase v6-3: Thin-layer optimization (TLS ownership check + batch header + refill batching) - TLS ownership fast-path skip page_meta for 90%+ of frees - Batch header writes during refill (32 allocs = 1 header write) - TLS batch refill (1/32 refill frequency) - C6-heavy A/B: v6-2 15.3M → v6-3 27.1M ops/s (±0% vs baseline) ✅ Phase v6-4: Mixed hang fix (segment metadata lookup correction) - Root cause: metadata lookup was reading mmap region instead of TLS slot - Fix: use TLS slot descriptor with in_use validation - Mixed health: 5M iterations SEGV-free, 35.8M ops/s ✅ Phase v6-refactor: Code quality improvements (macro unification + inline + docs) - Add SMALL_V6_* prefix macros (header, pointer conversion, page index) - Extract inline validation functions (small_page_v6_valid, small_ptr_in_segment_v6) - Doxygen-style comments for all public functions - Result: 0 compiler warnings, maintained +1.2% performance Files: - core/box/smallobject_core_v6_box.h (new, type & API definitions) - core/box/smallobject_cold_iface_v6.h (new, cold iface API) - core/box/smallsegment_v6_box.h (new, segment type definitions) - core/smallobject_core_v6.c (new, C6 alloc/free implementation) - core/smallobject_cold_iface_v6.c (new, refill/retire logic) - core/smallsegment_v6.c (new, segment allocator) - docs/analysis/SMALLOBJECT_CORE_V6_DESIGN.md (new, design document) - core/box/tiny_route_env_box.h (modified, v6 route added) - core/front/malloc_tiny_fast.h (modified, v6 case in route switch) - Makefile (modified, v6 objects added) - CURRENT_TASK.md (modified, v6 status added) Status: - C6-heavy: v6 OFF 27.1M → v6-3 ON 27.1M ops/s (±0%) ✅ - Mixed: v6 ON 35.8M ops/s (C6-only, other classes via v1) ✅ - Build: 0 warnings, fully documented ✅ 🤖 Generated with Claude Code Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-11 15:29:59 +09:00
// If we filled TLS but no more blocks, pop from TLS
if (ctx->tls_count_c5 > 0) {
void* blk = ctx->tls_freelist_c5[--ctx->tls_count_c5];
return SMALL_V6_USER_FROM_BASE(blk);
}
Phase v6-1/2/3/4: SmallObject Core v6 - C6-only implementation + refactor Phase v6-1: C6-only route stub (v1/pool fallback) Phase v6-2: Segment v6 + ColdIface v6 + Core v6 HotPath implementation - 2MiB segment / 64KiB page allocation - O(1) ptr→page_meta lookup with segment masking - C6-heavy A/B: SEGV-free but -44% performance (15.3M ops/s) Phase v6-3: Thin-layer optimization (TLS ownership check + batch header + refill batching) - TLS ownership fast-path skip page_meta for 90%+ of frees - Batch header writes during refill (32 allocs = 1 header write) - TLS batch refill (1/32 refill frequency) - C6-heavy A/B: v6-2 15.3M → v6-3 27.1M ops/s (±0% vs baseline) ✅ Phase v6-4: Mixed hang fix (segment metadata lookup correction) - Root cause: metadata lookup was reading mmap region instead of TLS slot - Fix: use TLS slot descriptor with in_use validation - Mixed health: 5M iterations SEGV-free, 35.8M ops/s ✅ Phase v6-refactor: Code quality improvements (macro unification + inline + docs) - Add SMALL_V6_* prefix macros (header, pointer conversion, page index) - Extract inline validation functions (small_page_v6_valid, small_ptr_in_segment_v6) - Doxygen-style comments for all public functions - Result: 0 compiler warnings, maintained +1.2% performance Files: - core/box/smallobject_core_v6_box.h (new, type & API definitions) - core/box/smallobject_cold_iface_v6.h (new, cold iface API) - core/box/smallsegment_v6_box.h (new, segment type definitions) - core/smallobject_core_v6.c (new, C6 alloc/free implementation) - core/smallobject_cold_iface_v6.c (new, refill/retire logic) - core/smallsegment_v6.c (new, segment allocator) - docs/analysis/SMALLOBJECT_CORE_V6_DESIGN.md (new, design document) - core/box/tiny_route_env_box.h (modified, v6 route added) - core/front/malloc_tiny_fast.h (modified, v6 case in route switch) - Makefile (modified, v6 objects added) - CURRENT_TASK.md (modified, v6 status added) Status: - C6-heavy: v6 OFF 27.1M → v6-3 ON 27.1M ops/s (±0%) ✅ - Mixed: v6 ON 35.8M ops/s (C6-only, other classes via v1) ✅ - Build: 0 warnings, fully documented ✅ 🤖 Generated with Claude Code Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-11 15:29:59 +09:00
}
// Should not reach here
return hak_pool_try_alloc(size, 0);
}
// ============================================================================
// Free Implementation
// ============================================================================
/// Free block to C6 v6 TLS freelist or page freelist
/// @param ptr: USER pointer to free
/// @param class_idx: size class index
/// @param ctx: TLS context
/// @param snap: policy snapshot
void small_free_fast_v6(void* ptr,
uint32_t class_idx,
SmallHeapCtxV6* ctx,
const SmallPolicySnapshotV6* snap) {
// Bounds check
if (unlikely(class_idx >= 8)) {
hak_pool_free(ptr, 0, 0);
return;
}
uint8_t route = snap->route_kind[class_idx];
// v6-5: Check if this is CORE_V6 route
if (route != TINY_ROUTE_SMALL_HEAP_V6) {
Phase v6-1/2/3/4: SmallObject Core v6 - C6-only implementation + refactor Phase v6-1: C6-only route stub (v1/pool fallback) Phase v6-2: Segment v6 + ColdIface v6 + Core v6 HotPath implementation - 2MiB segment / 64KiB page allocation - O(1) ptr→page_meta lookup with segment masking - C6-heavy A/B: SEGV-free but -44% performance (15.3M ops/s) Phase v6-3: Thin-layer optimization (TLS ownership check + batch header + refill batching) - TLS ownership fast-path skip page_meta for 90%+ of frees - Batch header writes during refill (32 allocs = 1 header write) - TLS batch refill (1/32 refill frequency) - C6-heavy A/B: v6-2 15.3M → v6-3 27.1M ops/s (±0% vs baseline) ✅ Phase v6-4: Mixed hang fix (segment metadata lookup correction) - Root cause: metadata lookup was reading mmap region instead of TLS slot - Fix: use TLS slot descriptor with in_use validation - Mixed health: 5M iterations SEGV-free, 35.8M ops/s ✅ Phase v6-refactor: Code quality improvements (macro unification + inline + docs) - Add SMALL_V6_* prefix macros (header, pointer conversion, page index) - Extract inline validation functions (small_page_v6_valid, small_ptr_in_segment_v6) - Doxygen-style comments for all public functions - Result: 0 compiler warnings, maintained +1.2% performance Files: - core/box/smallobject_core_v6_box.h (new, type & API definitions) - core/box/smallobject_cold_iface_v6.h (new, cold iface API) - core/box/smallsegment_v6_box.h (new, segment type definitions) - core/smallobject_core_v6.c (new, C6 alloc/free implementation) - core/smallobject_cold_iface_v6.c (new, refill/retire logic) - core/smallsegment_v6.c (new, segment allocator) - docs/analysis/SMALLOBJECT_CORE_V6_DESIGN.md (new, design document) - core/box/tiny_route_env_box.h (modified, v6 route added) - core/front/malloc_tiny_fast.h (modified, v6 case in route switch) - Makefile (modified, v6 objects added) - CURRENT_TASK.md (modified, v6 status added) Status: - C6-heavy: v6 OFF 27.1M → v6-3 ON 27.1M ops/s (±0%) ✅ - Mixed: v6 ON 35.8M ops/s (C6-only, other classes via v1) ✅ - Build: 0 warnings, fully documented ✅ 🤖 Generated with Claude Code Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-11 15:29:59 +09:00
hak_pool_free(ptr, 0, 0);
return;
}
// Convert USER pointer to BASE pointer
void* base = SMALL_V6_BASE_FROM_USER(ptr);
// Fast path: TLS segment ownership + TLS push
if (likely(small_tls_owns_ptr_v6(ctx, ptr))) {
// C6 TLS push
if (class_idx == SMALL_V6_C6_CLASS_IDX && ctx->tls_count_c6 < SMALL_V6_TLS_CAP) {
ctx->tls_freelist_c6[ctx->tls_count_c6++] = base;
return;
}
// C5 TLS push (Phase v6-5)
if (class_idx == SMALL_V6_C5_CLASS_IDX && ctx->tls_count_c5 < SMALL_V6_TLS_CAP) {
ctx->tls_freelist_c5[ctx->tls_count_c5++] = base;
Phase v6-1/2/3/4: SmallObject Core v6 - C6-only implementation + refactor Phase v6-1: C6-only route stub (v1/pool fallback) Phase v6-2: Segment v6 + ColdIface v6 + Core v6 HotPath implementation - 2MiB segment / 64KiB page allocation - O(1) ptr→page_meta lookup with segment masking - C6-heavy A/B: SEGV-free but -44% performance (15.3M ops/s) Phase v6-3: Thin-layer optimization (TLS ownership check + batch header + refill batching) - TLS ownership fast-path skip page_meta for 90%+ of frees - Batch header writes during refill (32 allocs = 1 header write) - TLS batch refill (1/32 refill frequency) - C6-heavy A/B: v6-2 15.3M → v6-3 27.1M ops/s (±0% vs baseline) ✅ Phase v6-4: Mixed hang fix (segment metadata lookup correction) - Root cause: metadata lookup was reading mmap region instead of TLS slot - Fix: use TLS slot descriptor with in_use validation - Mixed health: 5M iterations SEGV-free, 35.8M ops/s ✅ Phase v6-refactor: Code quality improvements (macro unification + inline + docs) - Add SMALL_V6_* prefix macros (header, pointer conversion, page index) - Extract inline validation functions (small_page_v6_valid, small_ptr_in_segment_v6) - Doxygen-style comments for all public functions - Result: 0 compiler warnings, maintained +1.2% performance Files: - core/box/smallobject_core_v6_box.h (new, type & API definitions) - core/box/smallobject_cold_iface_v6.h (new, cold iface API) - core/box/smallsegment_v6_box.h (new, segment type definitions) - core/smallobject_core_v6.c (new, C6 alloc/free implementation) - core/smallobject_cold_iface_v6.c (new, refill/retire logic) - core/smallsegment_v6.c (new, segment allocator) - docs/analysis/SMALLOBJECT_CORE_V6_DESIGN.md (new, design document) - core/box/tiny_route_env_box.h (modified, v6 route added) - core/front/malloc_tiny_fast.h (modified, v6 case in route switch) - Makefile (modified, v6 objects added) - CURRENT_TASK.md (modified, v6 status added) Status: - C6-heavy: v6 OFF 27.1M → v6-3 ON 27.1M ops/s (±0%) ✅ - Mixed: v6 ON 35.8M ops/s (C6-only, other classes via v1) ✅ - Build: 0 warnings, fully documented ✅ 🤖 Generated with Claude Code Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-11 15:29:59 +09:00
return;
}
}
// Slow path: page_meta lookup and push to page freelist
SmallPageMetaV6* page = small_page_meta_v6_of(ptr);
if (!page) {
hak_pool_free(ptr, 0, 0);
return;
}
// Push to page freelist (using BASE pointer)
*(void**)base = page->free_list;
page->free_list = base;
if (page->used > 0) page->used--;
// Retire empty page
if (page->used == 0) {
small_cold_v6_retire_page(page);
}
}
// ============================================================================
// Cold Path Implementation (Phase v6-6)
// ============================================================================
/// Cold path: alloc with refill - called when TLS is empty
/// @param class_idx: C5 or C6
/// @param ctx: TLS context
/// @return: USER pointer or NULL
void* small_alloc_cold_v6(uint32_t class_idx, SmallHeapCtxV6* ctx) {
// Refill TLS from page
SmallPageMetaV6* page = small_cold_v6_refill_page(class_idx);
if (!page || !page->free_list) {
return hak_pool_try_alloc(class_idx == SMALL_V6_C6_CLASS_IDX ? 512 : 256, 0);
}
uint8_t header_byte = SMALL_V6_HEADER_FROM_CLASS(class_idx);
if (class_idx == SMALL_V6_C6_CLASS_IDX) {
int max_fill = SMALL_V6_TLS_CAP - ctx->tls_count_c6;
int filled = 0;
while (page->free_list && filled < max_fill - 1) {
void* blk = page->free_list;
page->free_list = *(void**)blk;
((uint8_t*)blk)[0] = header_byte;
ctx->tls_freelist_c6[ctx->tls_count_c6++] = blk;
filled++;
}
page->used += filled;
if (page->free_list) {
void* blk = page->free_list;
page->free_list = *(void**)blk;
page->used++;
((uint8_t*)blk)[0] = header_byte;
return SMALL_V6_USER_FROM_BASE(blk);
}
if (ctx->tls_count_c6 > 0) {
void* blk = ctx->tls_freelist_c6[--ctx->tls_count_c6];
return SMALL_V6_USER_FROM_BASE(blk);
}
}
else if (class_idx == SMALL_V6_C5_CLASS_IDX) {
int max_fill = SMALL_V6_TLS_CAP - ctx->tls_count_c5;
int filled = 0;
while (page->free_list && filled < max_fill - 1) {
void* blk = page->free_list;
page->free_list = *(void**)blk;
((uint8_t*)blk)[0] = header_byte;
ctx->tls_freelist_c5[ctx->tls_count_c5++] = blk;
filled++;
}
page->used += filled;
if (page->free_list) {
void* blk = page->free_list;
page->free_list = *(void**)blk;
page->used++;
((uint8_t*)blk)[0] = header_byte;
return SMALL_V6_USER_FROM_BASE(blk);
}
if (ctx->tls_count_c5 > 0) {
void* blk = ctx->tls_freelist_c5[--ctx->tls_count_c5];
return SMALL_V6_USER_FROM_BASE(blk);
}
}
return hak_pool_try_alloc(class_idx == SMALL_V6_C6_CLASS_IDX ? 512 : 256, 0);
}
/// Cold path: free to page freelist - called when TLS full or cross-thread
/// @param ptr: USER pointer
/// @param class_idx: C5 or C6
void small_free_cold_v6(void* ptr, uint32_t class_idx) {
(void)class_idx; // Not needed for page lookup
void* base = SMALL_V6_BASE_FROM_USER(ptr);
SmallPageMetaV6* page = small_page_meta_v6_of(ptr);
if (!page) {
hak_pool_free(ptr, 0, 0);
return;
}
*(void**)base = page->free_list;
page->free_list = base;
if (page->used > 0) page->used--;
if (page->used == 0) {
small_cold_v6_retire_page(page);
}
}