2025-12-11 23:07:26 +09:00
|
|
|
// smallobject_core_v6.c - SmallObject Core v6 実装
|
|
|
|
|
//
|
|
|
|
|
// Phase V6-HDR-0: C6-only headerless core with OBSERVE mode
|
Phase v6-1/2/3/4: SmallObject Core v6 - C6-only implementation + refactor
Phase v6-1: C6-only route stub (v1/pool fallback)
Phase v6-2: Segment v6 + ColdIface v6 + Core v6 HotPath implementation
- 2MiB segment / 64KiB page allocation
- O(1) ptr→page_meta lookup with segment masking
- C6-heavy A/B: SEGV-free but -44% performance (15.3M ops/s)
Phase v6-3: Thin-layer optimization (TLS ownership check + batch header + refill batching)
- TLS ownership fast-path skip page_meta for 90%+ of frees
- Batch header writes during refill (32 allocs = 1 header write)
- TLS batch refill (1/32 refill frequency)
- C6-heavy A/B: v6-2 15.3M → v6-3 27.1M ops/s (±0% vs baseline) ✅
Phase v6-4: Mixed hang fix (segment metadata lookup correction)
- Root cause: metadata lookup was reading mmap region instead of TLS slot
- Fix: use TLS slot descriptor with in_use validation
- Mixed health: 5M iterations SEGV-free, 35.8M ops/s ✅
Phase v6-refactor: Code quality improvements (macro unification + inline + docs)
- Add SMALL_V6_* prefix macros (header, pointer conversion, page index)
- Extract inline validation functions (small_page_v6_valid, small_ptr_in_segment_v6)
- Doxygen-style comments for all public functions
- Result: 0 compiler warnings, maintained +1.2% performance
Files:
- core/box/smallobject_core_v6_box.h (new, type & API definitions)
- core/box/smallobject_cold_iface_v6.h (new, cold iface API)
- core/box/smallsegment_v6_box.h (new, segment type definitions)
- core/smallobject_core_v6.c (new, C6 alloc/free implementation)
- core/smallobject_cold_iface_v6.c (new, refill/retire logic)
- core/smallsegment_v6.c (new, segment allocator)
- docs/analysis/SMALLOBJECT_CORE_V6_DESIGN.md (new, design document)
- core/box/tiny_route_env_box.h (modified, v6 route added)
- core/front/malloc_tiny_fast.h (modified, v6 case in route switch)
- Makefile (modified, v6 objects added)
- CURRENT_TASK.md (modified, v6 status added)
Status:
- C6-heavy: v6 OFF 27.1M → v6-3 ON 27.1M ops/s (±0%) ✅
- Mixed: v6 ON 35.8M ops/s (C6-only, other classes via v1) ✅
- Build: 0 warnings, fully documented ✅
🤖 Generated with Claude Code
Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-11 15:29:59 +09:00
|
|
|
|
|
|
|
|
#include <stdlib.h>
|
|
|
|
|
#include <string.h>
|
2025-12-11 23:07:26 +09:00
|
|
|
#include <stdio.h>
|
Phase v6-1/2/3/4: SmallObject Core v6 - C6-only implementation + refactor
Phase v6-1: C6-only route stub (v1/pool fallback)
Phase v6-2: Segment v6 + ColdIface v6 + Core v6 HotPath implementation
- 2MiB segment / 64KiB page allocation
- O(1) ptr→page_meta lookup with segment masking
- C6-heavy A/B: SEGV-free but -44% performance (15.3M ops/s)
Phase v6-3: Thin-layer optimization (TLS ownership check + batch header + refill batching)
- TLS ownership fast-path skip page_meta for 90%+ of frees
- Batch header writes during refill (32 allocs = 1 header write)
- TLS batch refill (1/32 refill frequency)
- C6-heavy A/B: v6-2 15.3M → v6-3 27.1M ops/s (±0% vs baseline) ✅
Phase v6-4: Mixed hang fix (segment metadata lookup correction)
- Root cause: metadata lookup was reading mmap region instead of TLS slot
- Fix: use TLS slot descriptor with in_use validation
- Mixed health: 5M iterations SEGV-free, 35.8M ops/s ✅
Phase v6-refactor: Code quality improvements (macro unification + inline + docs)
- Add SMALL_V6_* prefix macros (header, pointer conversion, page index)
- Extract inline validation functions (small_page_v6_valid, small_ptr_in_segment_v6)
- Doxygen-style comments for all public functions
- Result: 0 compiler warnings, maintained +1.2% performance
Files:
- core/box/smallobject_core_v6_box.h (new, type & API definitions)
- core/box/smallobject_cold_iface_v6.h (new, cold iface API)
- core/box/smallsegment_v6_box.h (new, segment type definitions)
- core/smallobject_core_v6.c (new, C6 alloc/free implementation)
- core/smallobject_cold_iface_v6.c (new, refill/retire logic)
- core/smallsegment_v6.c (new, segment allocator)
- docs/analysis/SMALLOBJECT_CORE_V6_DESIGN.md (new, design document)
- core/box/tiny_route_env_box.h (modified, v6 route added)
- core/front/malloc_tiny_fast.h (modified, v6 case in route switch)
- Makefile (modified, v6 objects added)
- CURRENT_TASK.md (modified, v6 status added)
Status:
- C6-heavy: v6 OFF 27.1M → v6-3 ON 27.1M ops/s (±0%) ✅
- Mixed: v6 ON 35.8M ops/s (C6-only, other classes via v1) ✅
- Build: 0 warnings, fully documented ✅
🤖 Generated with Claude Code
Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-11 15:29:59 +09:00
|
|
|
#include "box/smallobject_core_v6_box.h"
|
|
|
|
|
#include "box/smallobject_cold_iface_v6.h"
|
|
|
|
|
#include "box/smallsegment_v6_box.h"
|
|
|
|
|
#include "box/tiny_route_env_box.h"
|
2025-12-11 23:51:48 +09:00
|
|
|
#include "box/region_id_v6_box.h"
|
|
|
|
|
#include "box/smallobject_v6_env_box.h"
|
Phase v6-1/2/3/4: SmallObject Core v6 - C6-only implementation + refactor
Phase v6-1: C6-only route stub (v1/pool fallback)
Phase v6-2: Segment v6 + ColdIface v6 + Core v6 HotPath implementation
- 2MiB segment / 64KiB page allocation
- O(1) ptr→page_meta lookup with segment masking
- C6-heavy A/B: SEGV-free but -44% performance (15.3M ops/s)
Phase v6-3: Thin-layer optimization (TLS ownership check + batch header + refill batching)
- TLS ownership fast-path skip page_meta for 90%+ of frees
- Batch header writes during refill (32 allocs = 1 header write)
- TLS batch refill (1/32 refill frequency)
- C6-heavy A/B: v6-2 15.3M → v6-3 27.1M ops/s (±0% vs baseline) ✅
Phase v6-4: Mixed hang fix (segment metadata lookup correction)
- Root cause: metadata lookup was reading mmap region instead of TLS slot
- Fix: use TLS slot descriptor with in_use validation
- Mixed health: 5M iterations SEGV-free, 35.8M ops/s ✅
Phase v6-refactor: Code quality improvements (macro unification + inline + docs)
- Add SMALL_V6_* prefix macros (header, pointer conversion, page index)
- Extract inline validation functions (small_page_v6_valid, small_ptr_in_segment_v6)
- Doxygen-style comments for all public functions
- Result: 0 compiler warnings, maintained +1.2% performance
Files:
- core/box/smallobject_core_v6_box.h (new, type & API definitions)
- core/box/smallobject_cold_iface_v6.h (new, cold iface API)
- core/box/smallsegment_v6_box.h (new, segment type definitions)
- core/smallobject_core_v6.c (new, C6 alloc/free implementation)
- core/smallobject_cold_iface_v6.c (new, refill/retire logic)
- core/smallsegment_v6.c (new, segment allocator)
- docs/analysis/SMALLOBJECT_CORE_V6_DESIGN.md (new, design document)
- core/box/tiny_route_env_box.h (modified, v6 route added)
- core/front/malloc_tiny_fast.h (modified, v6 case in route switch)
- Makefile (modified, v6 objects added)
- CURRENT_TASK.md (modified, v6 status added)
Status:
- C6-heavy: v6 OFF 27.1M → v6-3 ON 27.1M ops/s (±0%) ✅
- Mixed: v6 ON 35.8M ops/s (C6-only, other classes via v1) ✅
- Build: 0 warnings, fully documented ✅
🤖 Generated with Claude Code
Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-11 15:29:59 +09:00
|
|
|
|
|
|
|
|
#ifndef likely
|
|
|
|
|
#define likely(x) __builtin_expect(!!(x), 1)
|
|
|
|
|
#define unlikely(x) __builtin_expect(!!(x), 0)
|
|
|
|
|
#endif
|
|
|
|
|
|
2025-12-11 23:07:26 +09:00
|
|
|
// ============================================================================
|
|
|
|
|
// OBSERVE Mode (V6-HDR-0)
|
|
|
|
|
// ============================================================================
|
|
|
|
|
// ENV: HAKMEM_SMALL_V6_OBSERVE=1 enables logging at free entry
|
|
|
|
|
|
|
|
|
|
#define V6_OBSERVE_UNINIT (-1)
|
|
|
|
|
#define V6_OBSERVE_OFF 0
|
|
|
|
|
#define V6_OBSERVE_ON 1
|
|
|
|
|
|
|
|
|
|
static int g_v6_observe = V6_OBSERVE_UNINIT;
|
|
|
|
|
|
|
|
|
|
static inline int small_v6_observe_enabled(void) {
|
|
|
|
|
if (unlikely(g_v6_observe == V6_OBSERVE_UNINIT)) {
|
|
|
|
|
const char* env = getenv("HAKMEM_SMALL_V6_OBSERVE");
|
|
|
|
|
g_v6_observe = (env && env[0] == '1') ? V6_OBSERVE_ON : V6_OBSERVE_OFF;
|
|
|
|
|
}
|
|
|
|
|
return g_v6_observe == V6_OBSERVE_ON;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Log free entry (called when OBSERVE=1)
|
|
|
|
|
static void small_v6_observe_free(void* ptr, uint32_t class_idx, int tls_owned) {
|
|
|
|
|
fprintf(stderr, "[V6_OBSERVE] free ptr=%p class=%u tls_owned=%d\n",
|
|
|
|
|
ptr, class_idx, tls_owned);
|
|
|
|
|
}
|
|
|
|
|
|
2025-12-11 23:51:48 +09:00
|
|
|
// ============================================================================
|
|
|
|
|
// REGION_OBSERVE Validation (V6-HDR-1)
|
|
|
|
|
// ============================================================================
|
|
|
|
|
// Note: small_v6_region_observe_enabled() is now in smallobject_v6_env_box.h
|
|
|
|
|
|
|
|
|
|
/// Validate class_idx via RegionIdBox lookup (called when REGION_OBSERVE=1)
|
|
|
|
|
/// @param ptr: USER pointer
|
|
|
|
|
/// @param class_idx_hint: class_idx from front caller
|
|
|
|
|
static void small_v6_region_observe_validate(void* ptr, uint32_t class_idx_hint) {
|
|
|
|
|
RegionLookupV6 lk = region_id_lookup_v6(ptr);
|
|
|
|
|
|
|
|
|
|
// Log the lookup to REGION_ID_BOX observe (if enabled)
|
|
|
|
|
region_id_observe_lookup(ptr, &lk);
|
|
|
|
|
|
|
|
|
|
if (lk.kind == REGION_KIND_SMALL_V6 && lk.page_meta != NULL) {
|
|
|
|
|
SmallPageMetaV6* page = (SmallPageMetaV6*)lk.page_meta;
|
|
|
|
|
if (page->class_idx != class_idx_hint) {
|
|
|
|
|
fprintf(stderr, "[V6_REGION_OBSERVE] MISMATCH ptr=%p "
|
|
|
|
|
"hint=%u actual=%u page_meta=%p\n",
|
|
|
|
|
ptr, class_idx_hint, page->class_idx, (void*)page);
|
|
|
|
|
}
|
|
|
|
|
} else if (lk.kind != REGION_KIND_UNKNOWN) {
|
|
|
|
|
// ptr is in a different kind of region (not v6)
|
|
|
|
|
fprintf(stderr, "[V6_REGION_OBSERVE] KIND_MISMATCH ptr=%p "
|
|
|
|
|
"kind=%s (expected SMALL_V6)\n",
|
|
|
|
|
ptr, region_kind_to_string(lk.kind));
|
|
|
|
|
}
|
|
|
|
|
// REGION_KIND_UNKNOWN: ptr not in any v6 segment (OK for now)
|
|
|
|
|
}
|
|
|
|
|
|
Phase v6-1/2/3/4: SmallObject Core v6 - C6-only implementation + refactor
Phase v6-1: C6-only route stub (v1/pool fallback)
Phase v6-2: Segment v6 + ColdIface v6 + Core v6 HotPath implementation
- 2MiB segment / 64KiB page allocation
- O(1) ptr→page_meta lookup with segment masking
- C6-heavy A/B: SEGV-free but -44% performance (15.3M ops/s)
Phase v6-3: Thin-layer optimization (TLS ownership check + batch header + refill batching)
- TLS ownership fast-path skip page_meta for 90%+ of frees
- Batch header writes during refill (32 allocs = 1 header write)
- TLS batch refill (1/32 refill frequency)
- C6-heavy A/B: v6-2 15.3M → v6-3 27.1M ops/s (±0% vs baseline) ✅
Phase v6-4: Mixed hang fix (segment metadata lookup correction)
- Root cause: metadata lookup was reading mmap region instead of TLS slot
- Fix: use TLS slot descriptor with in_use validation
- Mixed health: 5M iterations SEGV-free, 35.8M ops/s ✅
Phase v6-refactor: Code quality improvements (macro unification + inline + docs)
- Add SMALL_V6_* prefix macros (header, pointer conversion, page index)
- Extract inline validation functions (small_page_v6_valid, small_ptr_in_segment_v6)
- Doxygen-style comments for all public functions
- Result: 0 compiler warnings, maintained +1.2% performance
Files:
- core/box/smallobject_core_v6_box.h (new, type & API definitions)
- core/box/smallobject_cold_iface_v6.h (new, cold iface API)
- core/box/smallsegment_v6_box.h (new, segment type definitions)
- core/smallobject_core_v6.c (new, C6 alloc/free implementation)
- core/smallobject_cold_iface_v6.c (new, refill/retire logic)
- core/smallsegment_v6.c (new, segment allocator)
- docs/analysis/SMALLOBJECT_CORE_V6_DESIGN.md (new, design document)
- core/box/tiny_route_env_box.h (modified, v6 route added)
- core/front/malloc_tiny_fast.h (modified, v6 case in route switch)
- Makefile (modified, v6 objects added)
- CURRENT_TASK.md (modified, v6 status added)
Status:
- C6-heavy: v6 OFF 27.1M → v6-3 ON 27.1M ops/s (±0%) ✅
- Mixed: v6 ON 35.8M ops/s (C6-only, other classes via v1) ✅
- Build: 0 warnings, fully documented ✅
🤖 Generated with Claude Code
Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-11 15:29:59 +09:00
|
|
|
// TLS context
|
|
|
|
|
static __thread struct SmallHeapCtxV6 g_small_heap_ctx_v6;
|
|
|
|
|
static __thread int g_small_heap_ctx_v6_init = 0;
|
|
|
|
|
|
|
|
|
|
// TLS policy snapshot
|
|
|
|
|
static __thread struct SmallPolicySnapshotV6 g_snap_v6;
|
|
|
|
|
static __thread int g_snap_v6_init = 0;
|
|
|
|
|
|
|
|
|
|
/// Get TLS heap context for v6 (lazy initialization)
|
|
|
|
|
/// @return: TLS context pointer (never NULL)
|
|
|
|
|
SmallHeapCtxV6* small_heap_ctx_v6(void) {
|
|
|
|
|
if (!g_small_heap_ctx_v6_init) {
|
|
|
|
|
memset(&g_small_heap_ctx_v6, 0, sizeof(g_small_heap_ctx_v6));
|
|
|
|
|
|
|
|
|
|
// Initialize TLS segment ownership range
|
|
|
|
|
SmallSegmentV6* seg = small_segment_v6_acquire_for_thread();
|
|
|
|
|
if (seg && small_segment_v6_valid(seg)) {
|
|
|
|
|
g_small_heap_ctx_v6.tls_seg_base = seg->base;
|
|
|
|
|
g_small_heap_ctx_v6.tls_seg_end = seg->base + SMALL_SEGMENT_V6_SIZE;
|
2025-12-11 23:51:48 +09:00
|
|
|
|
|
|
|
|
// Phase V6-HDR-3: Register segment with RegionIdBox (TLS scope)
|
|
|
|
|
region_id_register_v6_segment(seg);
|
Phase v6-1/2/3/4: SmallObject Core v6 - C6-only implementation + refactor
Phase v6-1: C6-only route stub (v1/pool fallback)
Phase v6-2: Segment v6 + ColdIface v6 + Core v6 HotPath implementation
- 2MiB segment / 64KiB page allocation
- O(1) ptr→page_meta lookup with segment masking
- C6-heavy A/B: SEGV-free but -44% performance (15.3M ops/s)
Phase v6-3: Thin-layer optimization (TLS ownership check + batch header + refill batching)
- TLS ownership fast-path skip page_meta for 90%+ of frees
- Batch header writes during refill (32 allocs = 1 header write)
- TLS batch refill (1/32 refill frequency)
- C6-heavy A/B: v6-2 15.3M → v6-3 27.1M ops/s (±0% vs baseline) ✅
Phase v6-4: Mixed hang fix (segment metadata lookup correction)
- Root cause: metadata lookup was reading mmap region instead of TLS slot
- Fix: use TLS slot descriptor with in_use validation
- Mixed health: 5M iterations SEGV-free, 35.8M ops/s ✅
Phase v6-refactor: Code quality improvements (macro unification + inline + docs)
- Add SMALL_V6_* prefix macros (header, pointer conversion, page index)
- Extract inline validation functions (small_page_v6_valid, small_ptr_in_segment_v6)
- Doxygen-style comments for all public functions
- Result: 0 compiler warnings, maintained +1.2% performance
Files:
- core/box/smallobject_core_v6_box.h (new, type & API definitions)
- core/box/smallobject_cold_iface_v6.h (new, cold iface API)
- core/box/smallsegment_v6_box.h (new, segment type definitions)
- core/smallobject_core_v6.c (new, C6 alloc/free implementation)
- core/smallobject_cold_iface_v6.c (new, refill/retire logic)
- core/smallsegment_v6.c (new, segment allocator)
- docs/analysis/SMALLOBJECT_CORE_V6_DESIGN.md (new, design document)
- core/box/tiny_route_env_box.h (modified, v6 route added)
- core/front/malloc_tiny_fast.h (modified, v6 case in route switch)
- Makefile (modified, v6 objects added)
- CURRENT_TASK.md (modified, v6 status added)
Status:
- C6-heavy: v6 OFF 27.1M → v6-3 ON 27.1M ops/s (±0%) ✅
- Mixed: v6 ON 35.8M ops/s (C6-only, other classes via v1) ✅
- Build: 0 warnings, fully documented ✅
🤖 Generated with Claude Code
Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-11 15:29:59 +09:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
g_small_heap_ctx_v6_init = 1;
|
|
|
|
|
}
|
|
|
|
|
return &g_small_heap_ctx_v6;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Get TLS policy snapshot for v6 (lazy initialization)
|
|
|
|
|
/// @return: Policy snapshot pointer (never NULL)
|
|
|
|
|
const SmallPolicySnapshotV6* tiny_policy_snapshot_v6(void) {
|
|
|
|
|
if (!g_snap_v6_init) {
|
|
|
|
|
memset(&g_snap_v6, 0, sizeof(g_snap_v6));
|
|
|
|
|
|
|
|
|
|
// Initialize route_kind from tiny_route API (this ensures init is done)
|
|
|
|
|
for (int i = 0; i < 8; i++) {
|
|
|
|
|
g_snap_v6.route_kind[i] = (uint8_t)tiny_route_for_class((uint8_t)i);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
g_snap_v6_init = 1;
|
|
|
|
|
}
|
|
|
|
|
return &g_snap_v6;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Forward declarations for pool v1 fallback
|
|
|
|
|
extern void* hak_pool_try_alloc(size_t size, uintptr_t site_id);
|
|
|
|
|
extern void hak_pool_free(void* ptr, size_t size, uintptr_t site_id);
|
|
|
|
|
|
|
|
|
|
// ============================================================================
|
|
|
|
|
// Allocation Implementation
|
|
|
|
|
// ============================================================================
|
|
|
|
|
|
|
|
|
|
/// Allocate block from C6 v6 TLS freelist or refill
|
|
|
|
|
/// @param size: requested size (unused, class_idx determines size)
|
|
|
|
|
/// @param class_idx: size class index (must be C6 for v6 route)
|
|
|
|
|
/// @param ctx: TLS context
|
|
|
|
|
/// @param snap: policy snapshot
|
|
|
|
|
/// @return: USER pointer (BASE+1) or NULL on fallback
|
|
|
|
|
void* small_alloc_fast_v6(size_t size,
|
|
|
|
|
uint32_t class_idx,
|
|
|
|
|
SmallHeapCtxV6* ctx,
|
|
|
|
|
const SmallPolicySnapshotV6* snap) {
|
|
|
|
|
(void)size;
|
|
|
|
|
|
|
|
|
|
// Bounds check
|
|
|
|
|
if (unlikely(class_idx >= 8)) {
|
|
|
|
|
return hak_pool_try_alloc(size, 0);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
uint8_t route = snap->route_kind[class_idx];
|
|
|
|
|
|
2025-12-11 15:50:14 +09:00
|
|
|
// v6-5: Support C6 and C5 classes
|
|
|
|
|
if (route != TINY_ROUTE_SMALL_HEAP_V6) {
|
Phase v6-1/2/3/4: SmallObject Core v6 - C6-only implementation + refactor
Phase v6-1: C6-only route stub (v1/pool fallback)
Phase v6-2: Segment v6 + ColdIface v6 + Core v6 HotPath implementation
- 2MiB segment / 64KiB page allocation
- O(1) ptr→page_meta lookup with segment masking
- C6-heavy A/B: SEGV-free but -44% performance (15.3M ops/s)
Phase v6-3: Thin-layer optimization (TLS ownership check + batch header + refill batching)
- TLS ownership fast-path skip page_meta for 90%+ of frees
- Batch header writes during refill (32 allocs = 1 header write)
- TLS batch refill (1/32 refill frequency)
- C6-heavy A/B: v6-2 15.3M → v6-3 27.1M ops/s (±0% vs baseline) ✅
Phase v6-4: Mixed hang fix (segment metadata lookup correction)
- Root cause: metadata lookup was reading mmap region instead of TLS slot
- Fix: use TLS slot descriptor with in_use validation
- Mixed health: 5M iterations SEGV-free, 35.8M ops/s ✅
Phase v6-refactor: Code quality improvements (macro unification + inline + docs)
- Add SMALL_V6_* prefix macros (header, pointer conversion, page index)
- Extract inline validation functions (small_page_v6_valid, small_ptr_in_segment_v6)
- Doxygen-style comments for all public functions
- Result: 0 compiler warnings, maintained +1.2% performance
Files:
- core/box/smallobject_core_v6_box.h (new, type & API definitions)
- core/box/smallobject_cold_iface_v6.h (new, cold iface API)
- core/box/smallsegment_v6_box.h (new, segment type definitions)
- core/smallobject_core_v6.c (new, C6 alloc/free implementation)
- core/smallobject_cold_iface_v6.c (new, refill/retire logic)
- core/smallsegment_v6.c (new, segment allocator)
- docs/analysis/SMALLOBJECT_CORE_V6_DESIGN.md (new, design document)
- core/box/tiny_route_env_box.h (modified, v6 route added)
- core/front/malloc_tiny_fast.h (modified, v6 case in route switch)
- Makefile (modified, v6 objects added)
- CURRENT_TASK.md (modified, v6 status added)
Status:
- C6-heavy: v6 OFF 27.1M → v6-3 ON 27.1M ops/s (±0%) ✅
- Mixed: v6 ON 35.8M ops/s (C6-only, other classes via v1) ✅
- Build: 0 warnings, fully documented ✅
🤖 Generated with Claude Code
Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-11 15:29:59 +09:00
|
|
|
return hak_pool_try_alloc(size, 0);
|
|
|
|
|
}
|
|
|
|
|
|
2025-12-11 15:50:14 +09:00
|
|
|
// C6 fast path
|
|
|
|
|
if (class_idx == SMALL_V6_C6_CLASS_IDX) {
|
|
|
|
|
// Fast path: TLS freelist hit
|
|
|
|
|
if (likely(ctx->tls_count_c6 > 0)) {
|
|
|
|
|
void* blk = ctx->tls_freelist_c6[--ctx->tls_count_c6];
|
|
|
|
|
// v6-3: Header already written during refill, just return USER pointer
|
|
|
|
|
return SMALL_V6_USER_FROM_BASE(blk);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
// C5 fast path (Phase v6-5)
|
|
|
|
|
else if (class_idx == SMALL_V6_C5_CLASS_IDX) {
|
|
|
|
|
// Fast path: TLS freelist hit
|
|
|
|
|
if (likely(ctx->tls_count_c5 > 0)) {
|
|
|
|
|
void* blk = ctx->tls_freelist_c5[--ctx->tls_count_c5];
|
|
|
|
|
return SMALL_V6_USER_FROM_BASE(blk);
|
|
|
|
|
}
|
|
|
|
|
}
|
Phase FREE-FRONT-V3-1: Free route snapshot infrastructure + build fix
Summary:
========
Implemented Phase FREE-FRONT-V3 infrastructure to optimize free hotpath by:
1. Creating snapshot-based route decision table (consolidating route logic)
2. Removing redundant ENV checks from hot path
3. Preparing for future integration into hak_free_at()
Key Changes:
============
1. NEW FILES:
- core/box/free_front_v3_env_box.h: Route snapshot definition & API
- core/box/free_front_v3_env_box.c: Snapshot initialization & caching
2. Infrastructure Details:
- FreeRouteSnapshotV3: Maps class_idx → free_route_kind for all 8 classes
- Routes defined: LEGACY, TINY_V3, CORE_V6_C6, POOL_V1
- ENV-gated initialization (HAKMEM_TINY_FREE_FRONT_V3_ENABLED, default OFF)
- Per-thread TLS caching to avoid repeated ENV reads
3. Design Goals:
- Consolidate tiny_route_for_class() results into snapshot table
- Remove C7 ULTRA / v4 / v5 / v6 ENV checks from hot path
- Limit lookup (ss_fast_lookup/slab_index_for) to paths that truly need it
- Clear ownership boundary: front v3 handles routing, downstream handles free
4. Phase Plan:
- v3-1 ✅ COMPLETE: Infrastructure (snapshot table, ENV initialization, TLS cache)
- v3-2 (INFRASTRUCTURE ONLY): Placeholder integration in hak_free_api.inc.h
- v3-3 (FUTURE): Full integration + benchmark A/B to measure hotpath improvement
5. BUILD FIX:
- Added missing core/box/c7_meta_used_counter_box.o to OBJS_BASE in Makefile
- This symbol was referenced but not linked, causing undefined reference errors
- Benchmark targets now build cleanly without LTO
Status:
=======
- Build: ✅ PASS (bench_allocators_hakmem builds without errors)
- Integration: Currently DISABLED (default OFF, ready for v3-2 phase)
- No performance impact: Infrastructure-only, hotpath unchanged
Future Work:
============
- Phase v3-2: Integrate snapshot routing into hak_free_at() main path
- Phase v3-3: Measure free hotpath performance improvement (target: 1-2% less branch mispredict)
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-11 19:17:30 +09:00
|
|
|
// C4 fast path (Phase v6-6)
|
|
|
|
|
else if (class_idx == SMALL_V6_C4_CLASS_IDX) {
|
|
|
|
|
// Fast path: TLS freelist hit
|
|
|
|
|
if (likely(ctx->tls_count_c4 > 0)) {
|
|
|
|
|
void* blk = ctx->tls_freelist_c4[--ctx->tls_count_c4];
|
|
|
|
|
return SMALL_V6_USER_FROM_BASE(blk);
|
|
|
|
|
}
|
|
|
|
|
}
|
2025-12-11 15:50:14 +09:00
|
|
|
else {
|
|
|
|
|
// Unsupported class for v6
|
|
|
|
|
return hak_pool_try_alloc(size, 0);
|
Phase v6-1/2/3/4: SmallObject Core v6 - C6-only implementation + refactor
Phase v6-1: C6-only route stub (v1/pool fallback)
Phase v6-2: Segment v6 + ColdIface v6 + Core v6 HotPath implementation
- 2MiB segment / 64KiB page allocation
- O(1) ptr→page_meta lookup with segment masking
- C6-heavy A/B: SEGV-free but -44% performance (15.3M ops/s)
Phase v6-3: Thin-layer optimization (TLS ownership check + batch header + refill batching)
- TLS ownership fast-path skip page_meta for 90%+ of frees
- Batch header writes during refill (32 allocs = 1 header write)
- TLS batch refill (1/32 refill frequency)
- C6-heavy A/B: v6-2 15.3M → v6-3 27.1M ops/s (±0% vs baseline) ✅
Phase v6-4: Mixed hang fix (segment metadata lookup correction)
- Root cause: metadata lookup was reading mmap region instead of TLS slot
- Fix: use TLS slot descriptor with in_use validation
- Mixed health: 5M iterations SEGV-free, 35.8M ops/s ✅
Phase v6-refactor: Code quality improvements (macro unification + inline + docs)
- Add SMALL_V6_* prefix macros (header, pointer conversion, page index)
- Extract inline validation functions (small_page_v6_valid, small_ptr_in_segment_v6)
- Doxygen-style comments for all public functions
- Result: 0 compiler warnings, maintained +1.2% performance
Files:
- core/box/smallobject_core_v6_box.h (new, type & API definitions)
- core/box/smallobject_cold_iface_v6.h (new, cold iface API)
- core/box/smallsegment_v6_box.h (new, segment type definitions)
- core/smallobject_core_v6.c (new, C6 alloc/free implementation)
- core/smallobject_cold_iface_v6.c (new, refill/retire logic)
- core/smallsegment_v6.c (new, segment allocator)
- docs/analysis/SMALLOBJECT_CORE_V6_DESIGN.md (new, design document)
- core/box/tiny_route_env_box.h (modified, v6 route added)
- core/front/malloc_tiny_fast.h (modified, v6 case in route switch)
- Makefile (modified, v6 objects added)
- CURRENT_TASK.md (modified, v6 status added)
Status:
- C6-heavy: v6 OFF 27.1M → v6-3 ON 27.1M ops/s (±0%) ✅
- Mixed: v6 ON 35.8M ops/s (C6-only, other classes via v1) ✅
- Build: 0 warnings, fully documented ✅
🤖 Generated with Claude Code
Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-11 15:29:59 +09:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Slow path: refill TLS with multiple blocks (batching)
|
|
|
|
|
SmallPageMetaV6* page = small_cold_v6_refill_page(class_idx);
|
|
|
|
|
if (!page || !page->free_list) {
|
|
|
|
|
return hak_pool_try_alloc(size, 0); // Safety fallback
|
|
|
|
|
}
|
|
|
|
|
|
2025-12-11 15:50:14 +09:00
|
|
|
// v6-5: Batch refill - support C6 and C5
|
Phase v6-1/2/3/4: SmallObject Core v6 - C6-only implementation + refactor
Phase v6-1: C6-only route stub (v1/pool fallback)
Phase v6-2: Segment v6 + ColdIface v6 + Core v6 HotPath implementation
- 2MiB segment / 64KiB page allocation
- O(1) ptr→page_meta lookup with segment masking
- C6-heavy A/B: SEGV-free but -44% performance (15.3M ops/s)
Phase v6-3: Thin-layer optimization (TLS ownership check + batch header + refill batching)
- TLS ownership fast-path skip page_meta for 90%+ of frees
- Batch header writes during refill (32 allocs = 1 header write)
- TLS batch refill (1/32 refill frequency)
- C6-heavy A/B: v6-2 15.3M → v6-3 27.1M ops/s (±0% vs baseline) ✅
Phase v6-4: Mixed hang fix (segment metadata lookup correction)
- Root cause: metadata lookup was reading mmap region instead of TLS slot
- Fix: use TLS slot descriptor with in_use validation
- Mixed health: 5M iterations SEGV-free, 35.8M ops/s ✅
Phase v6-refactor: Code quality improvements (macro unification + inline + docs)
- Add SMALL_V6_* prefix macros (header, pointer conversion, page index)
- Extract inline validation functions (small_page_v6_valid, small_ptr_in_segment_v6)
- Doxygen-style comments for all public functions
- Result: 0 compiler warnings, maintained +1.2% performance
Files:
- core/box/smallobject_core_v6_box.h (new, type & API definitions)
- core/box/smallobject_cold_iface_v6.h (new, cold iface API)
- core/box/smallsegment_v6_box.h (new, segment type definitions)
- core/smallobject_core_v6.c (new, C6 alloc/free implementation)
- core/smallobject_cold_iface_v6.c (new, refill/retire logic)
- core/smallsegment_v6.c (new, segment allocator)
- docs/analysis/SMALLOBJECT_CORE_V6_DESIGN.md (new, design document)
- core/box/tiny_route_env_box.h (modified, v6 route added)
- core/front/malloc_tiny_fast.h (modified, v6 case in route switch)
- Makefile (modified, v6 objects added)
- CURRENT_TASK.md (modified, v6 status added)
Status:
- C6-heavy: v6 OFF 27.1M → v6-3 ON 27.1M ops/s (±0%) ✅
- Mixed: v6 ON 35.8M ops/s (C6-only, other classes via v1) ✅
- Build: 0 warnings, fully documented ✅
🤖 Generated with Claude Code
Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-11 15:29:59 +09:00
|
|
|
uint8_t header_byte = SMALL_V6_HEADER_FROM_CLASS(class_idx);
|
|
|
|
|
|
2025-12-11 15:50:14 +09:00
|
|
|
if (class_idx == SMALL_V6_C6_CLASS_IDX) {
|
|
|
|
|
// C6 refill path
|
|
|
|
|
int max_fill = SMALL_V6_TLS_CAP - ctx->tls_count_c6;
|
|
|
|
|
int filled = 0;
|
|
|
|
|
|
|
|
|
|
// Fill TLS (leave room for 1 to return)
|
|
|
|
|
while (page->free_list && filled < max_fill - 1) {
|
|
|
|
|
void* blk = page->free_list;
|
|
|
|
|
page->free_list = *(void**)blk;
|
|
|
|
|
((uint8_t*)blk)[0] = header_byte;
|
|
|
|
|
ctx->tls_freelist_c6[ctx->tls_count_c6++] = blk;
|
|
|
|
|
filled++;
|
|
|
|
|
}
|
|
|
|
|
page->used += filled;
|
|
|
|
|
|
|
|
|
|
// Pop one more to return to caller
|
|
|
|
|
if (page->free_list) {
|
|
|
|
|
void* blk = page->free_list;
|
|
|
|
|
page->free_list = *(void**)blk;
|
|
|
|
|
page->used++;
|
|
|
|
|
((uint8_t*)blk)[0] = header_byte;
|
|
|
|
|
return SMALL_V6_USER_FROM_BASE(blk);
|
|
|
|
|
}
|
Phase v6-1/2/3/4: SmallObject Core v6 - C6-only implementation + refactor
Phase v6-1: C6-only route stub (v1/pool fallback)
Phase v6-2: Segment v6 + ColdIface v6 + Core v6 HotPath implementation
- 2MiB segment / 64KiB page allocation
- O(1) ptr→page_meta lookup with segment masking
- C6-heavy A/B: SEGV-free but -44% performance (15.3M ops/s)
Phase v6-3: Thin-layer optimization (TLS ownership check + batch header + refill batching)
- TLS ownership fast-path skip page_meta for 90%+ of frees
- Batch header writes during refill (32 allocs = 1 header write)
- TLS batch refill (1/32 refill frequency)
- C6-heavy A/B: v6-2 15.3M → v6-3 27.1M ops/s (±0% vs baseline) ✅
Phase v6-4: Mixed hang fix (segment metadata lookup correction)
- Root cause: metadata lookup was reading mmap region instead of TLS slot
- Fix: use TLS slot descriptor with in_use validation
- Mixed health: 5M iterations SEGV-free, 35.8M ops/s ✅
Phase v6-refactor: Code quality improvements (macro unification + inline + docs)
- Add SMALL_V6_* prefix macros (header, pointer conversion, page index)
- Extract inline validation functions (small_page_v6_valid, small_ptr_in_segment_v6)
- Doxygen-style comments for all public functions
- Result: 0 compiler warnings, maintained +1.2% performance
Files:
- core/box/smallobject_core_v6_box.h (new, type & API definitions)
- core/box/smallobject_cold_iface_v6.h (new, cold iface API)
- core/box/smallsegment_v6_box.h (new, segment type definitions)
- core/smallobject_core_v6.c (new, C6 alloc/free implementation)
- core/smallobject_cold_iface_v6.c (new, refill/retire logic)
- core/smallsegment_v6.c (new, segment allocator)
- docs/analysis/SMALLOBJECT_CORE_V6_DESIGN.md (new, design document)
- core/box/tiny_route_env_box.h (modified, v6 route added)
- core/front/malloc_tiny_fast.h (modified, v6 case in route switch)
- Makefile (modified, v6 objects added)
- CURRENT_TASK.md (modified, v6 status added)
Status:
- C6-heavy: v6 OFF 27.1M → v6-3 ON 27.1M ops/s (±0%) ✅
- Mixed: v6 ON 35.8M ops/s (C6-only, other classes via v1) ✅
- Build: 0 warnings, fully documented ✅
🤖 Generated with Claude Code
Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-11 15:29:59 +09:00
|
|
|
|
2025-12-11 15:50:14 +09:00
|
|
|
// If we filled TLS but no more blocks, pop from TLS
|
|
|
|
|
if (ctx->tls_count_c6 > 0) {
|
|
|
|
|
void* blk = ctx->tls_freelist_c6[--ctx->tls_count_c6];
|
|
|
|
|
return SMALL_V6_USER_FROM_BASE(blk);
|
|
|
|
|
}
|
Phase v6-1/2/3/4: SmallObject Core v6 - C6-only implementation + refactor
Phase v6-1: C6-only route stub (v1/pool fallback)
Phase v6-2: Segment v6 + ColdIface v6 + Core v6 HotPath implementation
- 2MiB segment / 64KiB page allocation
- O(1) ptr→page_meta lookup with segment masking
- C6-heavy A/B: SEGV-free but -44% performance (15.3M ops/s)
Phase v6-3: Thin-layer optimization (TLS ownership check + batch header + refill batching)
- TLS ownership fast-path skip page_meta for 90%+ of frees
- Batch header writes during refill (32 allocs = 1 header write)
- TLS batch refill (1/32 refill frequency)
- C6-heavy A/B: v6-2 15.3M → v6-3 27.1M ops/s (±0% vs baseline) ✅
Phase v6-4: Mixed hang fix (segment metadata lookup correction)
- Root cause: metadata lookup was reading mmap region instead of TLS slot
- Fix: use TLS slot descriptor with in_use validation
- Mixed health: 5M iterations SEGV-free, 35.8M ops/s ✅
Phase v6-refactor: Code quality improvements (macro unification + inline + docs)
- Add SMALL_V6_* prefix macros (header, pointer conversion, page index)
- Extract inline validation functions (small_page_v6_valid, small_ptr_in_segment_v6)
- Doxygen-style comments for all public functions
- Result: 0 compiler warnings, maintained +1.2% performance
Files:
- core/box/smallobject_core_v6_box.h (new, type & API definitions)
- core/box/smallobject_cold_iface_v6.h (new, cold iface API)
- core/box/smallsegment_v6_box.h (new, segment type definitions)
- core/smallobject_core_v6.c (new, C6 alloc/free implementation)
- core/smallobject_cold_iface_v6.c (new, refill/retire logic)
- core/smallsegment_v6.c (new, segment allocator)
- docs/analysis/SMALLOBJECT_CORE_V6_DESIGN.md (new, design document)
- core/box/tiny_route_env_box.h (modified, v6 route added)
- core/front/malloc_tiny_fast.h (modified, v6 case in route switch)
- Makefile (modified, v6 objects added)
- CURRENT_TASK.md (modified, v6 status added)
Status:
- C6-heavy: v6 OFF 27.1M → v6-3 ON 27.1M ops/s (±0%) ✅
- Mixed: v6 ON 35.8M ops/s (C6-only, other classes via v1) ✅
- Build: 0 warnings, fully documented ✅
🤖 Generated with Claude Code
Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-11 15:29:59 +09:00
|
|
|
}
|
2025-12-11 15:50:14 +09:00
|
|
|
else if (class_idx == SMALL_V6_C5_CLASS_IDX) {
|
|
|
|
|
// C5 refill path (Phase v6-5)
|
|
|
|
|
int max_fill = SMALL_V6_TLS_CAP - ctx->tls_count_c5;
|
|
|
|
|
int filled = 0;
|
|
|
|
|
|
|
|
|
|
// Fill TLS (leave room for 1 to return)
|
|
|
|
|
while (page->free_list && filled < max_fill - 1) {
|
|
|
|
|
void* blk = page->free_list;
|
|
|
|
|
page->free_list = *(void**)blk;
|
|
|
|
|
((uint8_t*)blk)[0] = header_byte;
|
|
|
|
|
ctx->tls_freelist_c5[ctx->tls_count_c5++] = blk;
|
|
|
|
|
filled++;
|
|
|
|
|
}
|
|
|
|
|
page->used += filled;
|
|
|
|
|
|
|
|
|
|
// Pop one more to return to caller
|
|
|
|
|
if (page->free_list) {
|
|
|
|
|
void* blk = page->free_list;
|
|
|
|
|
page->free_list = *(void**)blk;
|
|
|
|
|
page->used++;
|
|
|
|
|
((uint8_t*)blk)[0] = header_byte;
|
|
|
|
|
return SMALL_V6_USER_FROM_BASE(blk);
|
|
|
|
|
}
|
Phase v6-1/2/3/4: SmallObject Core v6 - C6-only implementation + refactor
Phase v6-1: C6-only route stub (v1/pool fallback)
Phase v6-2: Segment v6 + ColdIface v6 + Core v6 HotPath implementation
- 2MiB segment / 64KiB page allocation
- O(1) ptr→page_meta lookup with segment masking
- C6-heavy A/B: SEGV-free but -44% performance (15.3M ops/s)
Phase v6-3: Thin-layer optimization (TLS ownership check + batch header + refill batching)
- TLS ownership fast-path skip page_meta for 90%+ of frees
- Batch header writes during refill (32 allocs = 1 header write)
- TLS batch refill (1/32 refill frequency)
- C6-heavy A/B: v6-2 15.3M → v6-3 27.1M ops/s (±0% vs baseline) ✅
Phase v6-4: Mixed hang fix (segment metadata lookup correction)
- Root cause: metadata lookup was reading mmap region instead of TLS slot
- Fix: use TLS slot descriptor with in_use validation
- Mixed health: 5M iterations SEGV-free, 35.8M ops/s ✅
Phase v6-refactor: Code quality improvements (macro unification + inline + docs)
- Add SMALL_V6_* prefix macros (header, pointer conversion, page index)
- Extract inline validation functions (small_page_v6_valid, small_ptr_in_segment_v6)
- Doxygen-style comments for all public functions
- Result: 0 compiler warnings, maintained +1.2% performance
Files:
- core/box/smallobject_core_v6_box.h (new, type & API definitions)
- core/box/smallobject_cold_iface_v6.h (new, cold iface API)
- core/box/smallsegment_v6_box.h (new, segment type definitions)
- core/smallobject_core_v6.c (new, C6 alloc/free implementation)
- core/smallobject_cold_iface_v6.c (new, refill/retire logic)
- core/smallsegment_v6.c (new, segment allocator)
- docs/analysis/SMALLOBJECT_CORE_V6_DESIGN.md (new, design document)
- core/box/tiny_route_env_box.h (modified, v6 route added)
- core/front/malloc_tiny_fast.h (modified, v6 case in route switch)
- Makefile (modified, v6 objects added)
- CURRENT_TASK.md (modified, v6 status added)
Status:
- C6-heavy: v6 OFF 27.1M → v6-3 ON 27.1M ops/s (±0%) ✅
- Mixed: v6 ON 35.8M ops/s (C6-only, other classes via v1) ✅
- Build: 0 warnings, fully documented ✅
🤖 Generated with Claude Code
Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-11 15:29:59 +09:00
|
|
|
|
2025-12-11 15:50:14 +09:00
|
|
|
// If we filled TLS but no more blocks, pop from TLS
|
|
|
|
|
if (ctx->tls_count_c5 > 0) {
|
|
|
|
|
void* blk = ctx->tls_freelist_c5[--ctx->tls_count_c5];
|
|
|
|
|
return SMALL_V6_USER_FROM_BASE(blk);
|
|
|
|
|
}
|
Phase v6-1/2/3/4: SmallObject Core v6 - C6-only implementation + refactor
Phase v6-1: C6-only route stub (v1/pool fallback)
Phase v6-2: Segment v6 + ColdIface v6 + Core v6 HotPath implementation
- 2MiB segment / 64KiB page allocation
- O(1) ptr→page_meta lookup with segment masking
- C6-heavy A/B: SEGV-free but -44% performance (15.3M ops/s)
Phase v6-3: Thin-layer optimization (TLS ownership check + batch header + refill batching)
- TLS ownership fast-path skip page_meta for 90%+ of frees
- Batch header writes during refill (32 allocs = 1 header write)
- TLS batch refill (1/32 refill frequency)
- C6-heavy A/B: v6-2 15.3M → v6-3 27.1M ops/s (±0% vs baseline) ✅
Phase v6-4: Mixed hang fix (segment metadata lookup correction)
- Root cause: metadata lookup was reading mmap region instead of TLS slot
- Fix: use TLS slot descriptor with in_use validation
- Mixed health: 5M iterations SEGV-free, 35.8M ops/s ✅
Phase v6-refactor: Code quality improvements (macro unification + inline + docs)
- Add SMALL_V6_* prefix macros (header, pointer conversion, page index)
- Extract inline validation functions (small_page_v6_valid, small_ptr_in_segment_v6)
- Doxygen-style comments for all public functions
- Result: 0 compiler warnings, maintained +1.2% performance
Files:
- core/box/smallobject_core_v6_box.h (new, type & API definitions)
- core/box/smallobject_cold_iface_v6.h (new, cold iface API)
- core/box/smallsegment_v6_box.h (new, segment type definitions)
- core/smallobject_core_v6.c (new, C6 alloc/free implementation)
- core/smallobject_cold_iface_v6.c (new, refill/retire logic)
- core/smallsegment_v6.c (new, segment allocator)
- docs/analysis/SMALLOBJECT_CORE_V6_DESIGN.md (new, design document)
- core/box/tiny_route_env_box.h (modified, v6 route added)
- core/front/malloc_tiny_fast.h (modified, v6 case in route switch)
- Makefile (modified, v6 objects added)
- CURRENT_TASK.md (modified, v6 status added)
Status:
- C6-heavy: v6 OFF 27.1M → v6-3 ON 27.1M ops/s (±0%) ✅
- Mixed: v6 ON 35.8M ops/s (C6-only, other classes via v1) ✅
- Build: 0 warnings, fully documented ✅
🤖 Generated with Claude Code
Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-11 15:29:59 +09:00
|
|
|
}
|
Phase FREE-FRONT-V3-1: Free route snapshot infrastructure + build fix
Summary:
========
Implemented Phase FREE-FRONT-V3 infrastructure to optimize free hotpath by:
1. Creating snapshot-based route decision table (consolidating route logic)
2. Removing redundant ENV checks from hot path
3. Preparing for future integration into hak_free_at()
Key Changes:
============
1. NEW FILES:
- core/box/free_front_v3_env_box.h: Route snapshot definition & API
- core/box/free_front_v3_env_box.c: Snapshot initialization & caching
2. Infrastructure Details:
- FreeRouteSnapshotV3: Maps class_idx → free_route_kind for all 8 classes
- Routes defined: LEGACY, TINY_V3, CORE_V6_C6, POOL_V1
- ENV-gated initialization (HAKMEM_TINY_FREE_FRONT_V3_ENABLED, default OFF)
- Per-thread TLS caching to avoid repeated ENV reads
3. Design Goals:
- Consolidate tiny_route_for_class() results into snapshot table
- Remove C7 ULTRA / v4 / v5 / v6 ENV checks from hot path
- Limit lookup (ss_fast_lookup/slab_index_for) to paths that truly need it
- Clear ownership boundary: front v3 handles routing, downstream handles free
4. Phase Plan:
- v3-1 ✅ COMPLETE: Infrastructure (snapshot table, ENV initialization, TLS cache)
- v3-2 (INFRASTRUCTURE ONLY): Placeholder integration in hak_free_api.inc.h
- v3-3 (FUTURE): Full integration + benchmark A/B to measure hotpath improvement
5. BUILD FIX:
- Added missing core/box/c7_meta_used_counter_box.o to OBJS_BASE in Makefile
- This symbol was referenced but not linked, causing undefined reference errors
- Benchmark targets now build cleanly without LTO
Status:
=======
- Build: ✅ PASS (bench_allocators_hakmem builds without errors)
- Integration: Currently DISABLED (default OFF, ready for v3-2 phase)
- No performance impact: Infrastructure-only, hotpath unchanged
Future Work:
============
- Phase v3-2: Integrate snapshot routing into hak_free_at() main path
- Phase v3-3: Measure free hotpath performance improvement (target: 1-2% less branch mispredict)
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-11 19:17:30 +09:00
|
|
|
else if (class_idx == SMALL_V6_C4_CLASS_IDX) {
|
|
|
|
|
// C4 refill path (Phase v6-6)
|
|
|
|
|
int max_fill = SMALL_V6_TLS_CAP - ctx->tls_count_c4;
|
|
|
|
|
int filled = 0;
|
|
|
|
|
|
|
|
|
|
// Fill TLS (leave room for 1 to return)
|
|
|
|
|
while (page->free_list && filled < max_fill - 1) {
|
|
|
|
|
void* blk = page->free_list;
|
|
|
|
|
page->free_list = *(void**)blk;
|
|
|
|
|
((uint8_t*)blk)[0] = header_byte;
|
|
|
|
|
ctx->tls_freelist_c4[ctx->tls_count_c4++] = blk;
|
|
|
|
|
filled++;
|
|
|
|
|
}
|
|
|
|
|
page->used += filled;
|
|
|
|
|
|
|
|
|
|
// Pop one more to return to caller
|
|
|
|
|
if (page->free_list) {
|
|
|
|
|
void* blk = page->free_list;
|
|
|
|
|
page->free_list = *(void**)blk;
|
|
|
|
|
page->used++;
|
|
|
|
|
((uint8_t*)blk)[0] = header_byte;
|
|
|
|
|
return SMALL_V6_USER_FROM_BASE(blk);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// If we filled TLS but no more blocks, pop from TLS
|
|
|
|
|
if (ctx->tls_count_c4 > 0) {
|
|
|
|
|
void* blk = ctx->tls_freelist_c4[--ctx->tls_count_c4];
|
|
|
|
|
return SMALL_V6_USER_FROM_BASE(blk);
|
|
|
|
|
}
|
|
|
|
|
}
|
Phase v6-1/2/3/4: SmallObject Core v6 - C6-only implementation + refactor
Phase v6-1: C6-only route stub (v1/pool fallback)
Phase v6-2: Segment v6 + ColdIface v6 + Core v6 HotPath implementation
- 2MiB segment / 64KiB page allocation
- O(1) ptr→page_meta lookup with segment masking
- C6-heavy A/B: SEGV-free but -44% performance (15.3M ops/s)
Phase v6-3: Thin-layer optimization (TLS ownership check + batch header + refill batching)
- TLS ownership fast-path skip page_meta for 90%+ of frees
- Batch header writes during refill (32 allocs = 1 header write)
- TLS batch refill (1/32 refill frequency)
- C6-heavy A/B: v6-2 15.3M → v6-3 27.1M ops/s (±0% vs baseline) ✅
Phase v6-4: Mixed hang fix (segment metadata lookup correction)
- Root cause: metadata lookup was reading mmap region instead of TLS slot
- Fix: use TLS slot descriptor with in_use validation
- Mixed health: 5M iterations SEGV-free, 35.8M ops/s ✅
Phase v6-refactor: Code quality improvements (macro unification + inline + docs)
- Add SMALL_V6_* prefix macros (header, pointer conversion, page index)
- Extract inline validation functions (small_page_v6_valid, small_ptr_in_segment_v6)
- Doxygen-style comments for all public functions
- Result: 0 compiler warnings, maintained +1.2% performance
Files:
- core/box/smallobject_core_v6_box.h (new, type & API definitions)
- core/box/smallobject_cold_iface_v6.h (new, cold iface API)
- core/box/smallsegment_v6_box.h (new, segment type definitions)
- core/smallobject_core_v6.c (new, C6 alloc/free implementation)
- core/smallobject_cold_iface_v6.c (new, refill/retire logic)
- core/smallsegment_v6.c (new, segment allocator)
- docs/analysis/SMALLOBJECT_CORE_V6_DESIGN.md (new, design document)
- core/box/tiny_route_env_box.h (modified, v6 route added)
- core/front/malloc_tiny_fast.h (modified, v6 case in route switch)
- Makefile (modified, v6 objects added)
- CURRENT_TASK.md (modified, v6 status added)
Status:
- C6-heavy: v6 OFF 27.1M → v6-3 ON 27.1M ops/s (±0%) ✅
- Mixed: v6 ON 35.8M ops/s (C6-only, other classes via v1) ✅
- Build: 0 warnings, fully documented ✅
🤖 Generated with Claude Code
Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-11 15:29:59 +09:00
|
|
|
|
|
|
|
|
// Should not reach here
|
|
|
|
|
return hak_pool_try_alloc(size, 0);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ============================================================================
|
|
|
|
|
// Free Implementation
|
|
|
|
|
// ============================================================================
|
|
|
|
|
|
|
|
|
|
/// Free block to C6 v6 TLS freelist or page freelist
|
|
|
|
|
/// @param ptr: USER pointer to free
|
|
|
|
|
/// @param class_idx: size class index
|
|
|
|
|
/// @param ctx: TLS context
|
|
|
|
|
/// @param snap: policy snapshot
|
|
|
|
|
void small_free_fast_v6(void* ptr,
|
|
|
|
|
uint32_t class_idx,
|
|
|
|
|
SmallHeapCtxV6* ctx,
|
|
|
|
|
const SmallPolicySnapshotV6* snap) {
|
|
|
|
|
// Bounds check
|
|
|
|
|
if (unlikely(class_idx >= 8)) {
|
|
|
|
|
hak_pool_free(ptr, 0, 0);
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
uint8_t route = snap->route_kind[class_idx];
|
|
|
|
|
|
2025-12-11 15:50:14 +09:00
|
|
|
// v6-5: Check if this is CORE_V6 route
|
|
|
|
|
if (route != TINY_ROUTE_SMALL_HEAP_V6) {
|
Phase v6-1/2/3/4: SmallObject Core v6 - C6-only implementation + refactor
Phase v6-1: C6-only route stub (v1/pool fallback)
Phase v6-2: Segment v6 + ColdIface v6 + Core v6 HotPath implementation
- 2MiB segment / 64KiB page allocation
- O(1) ptr→page_meta lookup with segment masking
- C6-heavy A/B: SEGV-free but -44% performance (15.3M ops/s)
Phase v6-3: Thin-layer optimization (TLS ownership check + batch header + refill batching)
- TLS ownership fast-path skip page_meta for 90%+ of frees
- Batch header writes during refill (32 allocs = 1 header write)
- TLS batch refill (1/32 refill frequency)
- C6-heavy A/B: v6-2 15.3M → v6-3 27.1M ops/s (±0% vs baseline) ✅
Phase v6-4: Mixed hang fix (segment metadata lookup correction)
- Root cause: metadata lookup was reading mmap region instead of TLS slot
- Fix: use TLS slot descriptor with in_use validation
- Mixed health: 5M iterations SEGV-free, 35.8M ops/s ✅
Phase v6-refactor: Code quality improvements (macro unification + inline + docs)
- Add SMALL_V6_* prefix macros (header, pointer conversion, page index)
- Extract inline validation functions (small_page_v6_valid, small_ptr_in_segment_v6)
- Doxygen-style comments for all public functions
- Result: 0 compiler warnings, maintained +1.2% performance
Files:
- core/box/smallobject_core_v6_box.h (new, type & API definitions)
- core/box/smallobject_cold_iface_v6.h (new, cold iface API)
- core/box/smallsegment_v6_box.h (new, segment type definitions)
- core/smallobject_core_v6.c (new, C6 alloc/free implementation)
- core/smallobject_cold_iface_v6.c (new, refill/retire logic)
- core/smallsegment_v6.c (new, segment allocator)
- docs/analysis/SMALLOBJECT_CORE_V6_DESIGN.md (new, design document)
- core/box/tiny_route_env_box.h (modified, v6 route added)
- core/front/malloc_tiny_fast.h (modified, v6 case in route switch)
- Makefile (modified, v6 objects added)
- CURRENT_TASK.md (modified, v6 status added)
Status:
- C6-heavy: v6 OFF 27.1M → v6-3 ON 27.1M ops/s (±0%) ✅
- Mixed: v6 ON 35.8M ops/s (C6-only, other classes via v1) ✅
- Build: 0 warnings, fully documented ✅
🤖 Generated with Claude Code
Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-11 15:29:59 +09:00
|
|
|
hak_pool_free(ptr, 0, 0);
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Convert USER pointer to BASE pointer
|
|
|
|
|
void* base = SMALL_V6_BASE_FROM_USER(ptr);
|
|
|
|
|
|
2025-12-11 23:07:26 +09:00
|
|
|
// V6-HDR-0: OBSERVE mode logging (check TLS ownership first for log)
|
|
|
|
|
int tls_owned = small_tls_owns_ptr_v6(ctx, ptr);
|
|
|
|
|
if (unlikely(small_v6_observe_enabled())) {
|
|
|
|
|
small_v6_observe_free(ptr, class_idx, tls_owned);
|
|
|
|
|
}
|
|
|
|
|
|
2025-12-11 23:51:48 +09:00
|
|
|
// V6-HDR-1: REGION_OBSERVE mode - validate class_idx via RegionIdBox
|
|
|
|
|
if (unlikely(small_v6_region_observe_enabled())) {
|
|
|
|
|
small_v6_region_observe_validate(ptr, class_idx);
|
|
|
|
|
}
|
|
|
|
|
|
Phase v6-1/2/3/4: SmallObject Core v6 - C6-only implementation + refactor
Phase v6-1: C6-only route stub (v1/pool fallback)
Phase v6-2: Segment v6 + ColdIface v6 + Core v6 HotPath implementation
- 2MiB segment / 64KiB page allocation
- O(1) ptr→page_meta lookup with segment masking
- C6-heavy A/B: SEGV-free but -44% performance (15.3M ops/s)
Phase v6-3: Thin-layer optimization (TLS ownership check + batch header + refill batching)
- TLS ownership fast-path skip page_meta for 90%+ of frees
- Batch header writes during refill (32 allocs = 1 header write)
- TLS batch refill (1/32 refill frequency)
- C6-heavy A/B: v6-2 15.3M → v6-3 27.1M ops/s (±0% vs baseline) ✅
Phase v6-4: Mixed hang fix (segment metadata lookup correction)
- Root cause: metadata lookup was reading mmap region instead of TLS slot
- Fix: use TLS slot descriptor with in_use validation
- Mixed health: 5M iterations SEGV-free, 35.8M ops/s ✅
Phase v6-refactor: Code quality improvements (macro unification + inline + docs)
- Add SMALL_V6_* prefix macros (header, pointer conversion, page index)
- Extract inline validation functions (small_page_v6_valid, small_ptr_in_segment_v6)
- Doxygen-style comments for all public functions
- Result: 0 compiler warnings, maintained +1.2% performance
Files:
- core/box/smallobject_core_v6_box.h (new, type & API definitions)
- core/box/smallobject_cold_iface_v6.h (new, cold iface API)
- core/box/smallsegment_v6_box.h (new, segment type definitions)
- core/smallobject_core_v6.c (new, C6 alloc/free implementation)
- core/smallobject_cold_iface_v6.c (new, refill/retire logic)
- core/smallsegment_v6.c (new, segment allocator)
- docs/analysis/SMALLOBJECT_CORE_V6_DESIGN.md (new, design document)
- core/box/tiny_route_env_box.h (modified, v6 route added)
- core/front/malloc_tiny_fast.h (modified, v6 case in route switch)
- Makefile (modified, v6 objects added)
- CURRENT_TASK.md (modified, v6 status added)
Status:
- C6-heavy: v6 OFF 27.1M → v6-3 ON 27.1M ops/s (±0%) ✅
- Mixed: v6 ON 35.8M ops/s (C6-only, other classes via v1) ✅
- Build: 0 warnings, fully documented ✅
🤖 Generated with Claude Code
Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-11 15:29:59 +09:00
|
|
|
// Fast path: TLS segment ownership + TLS push
|
2025-12-11 23:07:26 +09:00
|
|
|
if (likely(tls_owned)) {
|
2025-12-11 15:50:14 +09:00
|
|
|
// C6 TLS push
|
|
|
|
|
if (class_idx == SMALL_V6_C6_CLASS_IDX && ctx->tls_count_c6 < SMALL_V6_TLS_CAP) {
|
|
|
|
|
ctx->tls_freelist_c6[ctx->tls_count_c6++] = base;
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
// C5 TLS push (Phase v6-5)
|
|
|
|
|
if (class_idx == SMALL_V6_C5_CLASS_IDX && ctx->tls_count_c5 < SMALL_V6_TLS_CAP) {
|
|
|
|
|
ctx->tls_freelist_c5[ctx->tls_count_c5++] = base;
|
Phase v6-1/2/3/4: SmallObject Core v6 - C6-only implementation + refactor
Phase v6-1: C6-only route stub (v1/pool fallback)
Phase v6-2: Segment v6 + ColdIface v6 + Core v6 HotPath implementation
- 2MiB segment / 64KiB page allocation
- O(1) ptr→page_meta lookup with segment masking
- C6-heavy A/B: SEGV-free but -44% performance (15.3M ops/s)
Phase v6-3: Thin-layer optimization (TLS ownership check + batch header + refill batching)
- TLS ownership fast-path skip page_meta for 90%+ of frees
- Batch header writes during refill (32 allocs = 1 header write)
- TLS batch refill (1/32 refill frequency)
- C6-heavy A/B: v6-2 15.3M → v6-3 27.1M ops/s (±0% vs baseline) ✅
Phase v6-4: Mixed hang fix (segment metadata lookup correction)
- Root cause: metadata lookup was reading mmap region instead of TLS slot
- Fix: use TLS slot descriptor with in_use validation
- Mixed health: 5M iterations SEGV-free, 35.8M ops/s ✅
Phase v6-refactor: Code quality improvements (macro unification + inline + docs)
- Add SMALL_V6_* prefix macros (header, pointer conversion, page index)
- Extract inline validation functions (small_page_v6_valid, small_ptr_in_segment_v6)
- Doxygen-style comments for all public functions
- Result: 0 compiler warnings, maintained +1.2% performance
Files:
- core/box/smallobject_core_v6_box.h (new, type & API definitions)
- core/box/smallobject_cold_iface_v6.h (new, cold iface API)
- core/box/smallsegment_v6_box.h (new, segment type definitions)
- core/smallobject_core_v6.c (new, C6 alloc/free implementation)
- core/smallobject_cold_iface_v6.c (new, refill/retire logic)
- core/smallsegment_v6.c (new, segment allocator)
- docs/analysis/SMALLOBJECT_CORE_V6_DESIGN.md (new, design document)
- core/box/tiny_route_env_box.h (modified, v6 route added)
- core/front/malloc_tiny_fast.h (modified, v6 case in route switch)
- Makefile (modified, v6 objects added)
- CURRENT_TASK.md (modified, v6 status added)
Status:
- C6-heavy: v6 OFF 27.1M → v6-3 ON 27.1M ops/s (±0%) ✅
- Mixed: v6 ON 35.8M ops/s (C6-only, other classes via v1) ✅
- Build: 0 warnings, fully documented ✅
🤖 Generated with Claude Code
Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-11 15:29:59 +09:00
|
|
|
return;
|
|
|
|
|
}
|
Phase FREE-FRONT-V3-1: Free route snapshot infrastructure + build fix
Summary:
========
Implemented Phase FREE-FRONT-V3 infrastructure to optimize free hotpath by:
1. Creating snapshot-based route decision table (consolidating route logic)
2. Removing redundant ENV checks from hot path
3. Preparing for future integration into hak_free_at()
Key Changes:
============
1. NEW FILES:
- core/box/free_front_v3_env_box.h: Route snapshot definition & API
- core/box/free_front_v3_env_box.c: Snapshot initialization & caching
2. Infrastructure Details:
- FreeRouteSnapshotV3: Maps class_idx → free_route_kind for all 8 classes
- Routes defined: LEGACY, TINY_V3, CORE_V6_C6, POOL_V1
- ENV-gated initialization (HAKMEM_TINY_FREE_FRONT_V3_ENABLED, default OFF)
- Per-thread TLS caching to avoid repeated ENV reads
3. Design Goals:
- Consolidate tiny_route_for_class() results into snapshot table
- Remove C7 ULTRA / v4 / v5 / v6 ENV checks from hot path
- Limit lookup (ss_fast_lookup/slab_index_for) to paths that truly need it
- Clear ownership boundary: front v3 handles routing, downstream handles free
4. Phase Plan:
- v3-1 ✅ COMPLETE: Infrastructure (snapshot table, ENV initialization, TLS cache)
- v3-2 (INFRASTRUCTURE ONLY): Placeholder integration in hak_free_api.inc.h
- v3-3 (FUTURE): Full integration + benchmark A/B to measure hotpath improvement
5. BUILD FIX:
- Added missing core/box/c7_meta_used_counter_box.o to OBJS_BASE in Makefile
- This symbol was referenced but not linked, causing undefined reference errors
- Benchmark targets now build cleanly without LTO
Status:
=======
- Build: ✅ PASS (bench_allocators_hakmem builds without errors)
- Integration: Currently DISABLED (default OFF, ready for v3-2 phase)
- No performance impact: Infrastructure-only, hotpath unchanged
Future Work:
============
- Phase v3-2: Integrate snapshot routing into hak_free_at() main path
- Phase v3-3: Measure free hotpath performance improvement (target: 1-2% less branch mispredict)
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-11 19:17:30 +09:00
|
|
|
// C4 TLS push (Phase v6-6)
|
|
|
|
|
if (class_idx == SMALL_V6_C4_CLASS_IDX && ctx->tls_count_c4 < SMALL_V6_TLS_CAP) {
|
|
|
|
|
ctx->tls_freelist_c4[ctx->tls_count_c4++] = base;
|
|
|
|
|
return;
|
|
|
|
|
}
|
Phase v6-1/2/3/4: SmallObject Core v6 - C6-only implementation + refactor
Phase v6-1: C6-only route stub (v1/pool fallback)
Phase v6-2: Segment v6 + ColdIface v6 + Core v6 HotPath implementation
- 2MiB segment / 64KiB page allocation
- O(1) ptr→page_meta lookup with segment masking
- C6-heavy A/B: SEGV-free but -44% performance (15.3M ops/s)
Phase v6-3: Thin-layer optimization (TLS ownership check + batch header + refill batching)
- TLS ownership fast-path skip page_meta for 90%+ of frees
- Batch header writes during refill (32 allocs = 1 header write)
- TLS batch refill (1/32 refill frequency)
- C6-heavy A/B: v6-2 15.3M → v6-3 27.1M ops/s (±0% vs baseline) ✅
Phase v6-4: Mixed hang fix (segment metadata lookup correction)
- Root cause: metadata lookup was reading mmap region instead of TLS slot
- Fix: use TLS slot descriptor with in_use validation
- Mixed health: 5M iterations SEGV-free, 35.8M ops/s ✅
Phase v6-refactor: Code quality improvements (macro unification + inline + docs)
- Add SMALL_V6_* prefix macros (header, pointer conversion, page index)
- Extract inline validation functions (small_page_v6_valid, small_ptr_in_segment_v6)
- Doxygen-style comments for all public functions
- Result: 0 compiler warnings, maintained +1.2% performance
Files:
- core/box/smallobject_core_v6_box.h (new, type & API definitions)
- core/box/smallobject_cold_iface_v6.h (new, cold iface API)
- core/box/smallsegment_v6_box.h (new, segment type definitions)
- core/smallobject_core_v6.c (new, C6 alloc/free implementation)
- core/smallobject_cold_iface_v6.c (new, refill/retire logic)
- core/smallsegment_v6.c (new, segment allocator)
- docs/analysis/SMALLOBJECT_CORE_V6_DESIGN.md (new, design document)
- core/box/tiny_route_env_box.h (modified, v6 route added)
- core/front/malloc_tiny_fast.h (modified, v6 case in route switch)
- Makefile (modified, v6 objects added)
- CURRENT_TASK.md (modified, v6 status added)
Status:
- C6-heavy: v6 OFF 27.1M → v6-3 ON 27.1M ops/s (±0%) ✅
- Mixed: v6 ON 35.8M ops/s (C6-only, other classes via v1) ✅
- Build: 0 warnings, fully documented ✅
🤖 Generated with Claude Code
Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-11 15:29:59 +09:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Slow path: page_meta lookup and push to page freelist
|
|
|
|
|
SmallPageMetaV6* page = small_page_meta_v6_of(ptr);
|
|
|
|
|
if (!page) {
|
|
|
|
|
hak_pool_free(ptr, 0, 0);
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Push to page freelist (using BASE pointer)
|
|
|
|
|
*(void**)base = page->free_list;
|
|
|
|
|
page->free_list = base;
|
|
|
|
|
if (page->used > 0) page->used--;
|
|
|
|
|
|
|
|
|
|
// Retire empty page
|
|
|
|
|
if (page->used == 0) {
|
|
|
|
|
small_cold_v6_retire_page(page);
|
|
|
|
|
}
|
|
|
|
|
}
|
2025-12-11 15:59:29 +09:00
|
|
|
|
|
|
|
|
// ============================================================================
|
|
|
|
|
// Cold Path Implementation (Phase v6-6)
|
|
|
|
|
// ============================================================================
|
|
|
|
|
|
|
|
|
|
/// Cold path: alloc with refill - called when TLS is empty
|
Phase FREE-FRONT-V3-1: Free route snapshot infrastructure + build fix
Summary:
========
Implemented Phase FREE-FRONT-V3 infrastructure to optimize free hotpath by:
1. Creating snapshot-based route decision table (consolidating route logic)
2. Removing redundant ENV checks from hot path
3. Preparing for future integration into hak_free_at()
Key Changes:
============
1. NEW FILES:
- core/box/free_front_v3_env_box.h: Route snapshot definition & API
- core/box/free_front_v3_env_box.c: Snapshot initialization & caching
2. Infrastructure Details:
- FreeRouteSnapshotV3: Maps class_idx → free_route_kind for all 8 classes
- Routes defined: LEGACY, TINY_V3, CORE_V6_C6, POOL_V1
- ENV-gated initialization (HAKMEM_TINY_FREE_FRONT_V3_ENABLED, default OFF)
- Per-thread TLS caching to avoid repeated ENV reads
3. Design Goals:
- Consolidate tiny_route_for_class() results into snapshot table
- Remove C7 ULTRA / v4 / v5 / v6 ENV checks from hot path
- Limit lookup (ss_fast_lookup/slab_index_for) to paths that truly need it
- Clear ownership boundary: front v3 handles routing, downstream handles free
4. Phase Plan:
- v3-1 ✅ COMPLETE: Infrastructure (snapshot table, ENV initialization, TLS cache)
- v3-2 (INFRASTRUCTURE ONLY): Placeholder integration in hak_free_api.inc.h
- v3-3 (FUTURE): Full integration + benchmark A/B to measure hotpath improvement
5. BUILD FIX:
- Added missing core/box/c7_meta_used_counter_box.o to OBJS_BASE in Makefile
- This symbol was referenced but not linked, causing undefined reference errors
- Benchmark targets now build cleanly without LTO
Status:
=======
- Build: ✅ PASS (bench_allocators_hakmem builds without errors)
- Integration: Currently DISABLED (default OFF, ready for v3-2 phase)
- No performance impact: Infrastructure-only, hotpath unchanged
Future Work:
============
- Phase v3-2: Integrate snapshot routing into hak_free_at() main path
- Phase v3-3: Measure free hotpath performance improvement (target: 1-2% less branch mispredict)
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-11 19:17:30 +09:00
|
|
|
/// @param class_idx: C4, C5 or C6
|
2025-12-11 15:59:29 +09:00
|
|
|
/// @param ctx: TLS context
|
|
|
|
|
/// @return: USER pointer or NULL
|
|
|
|
|
void* small_alloc_cold_v6(uint32_t class_idx, SmallHeapCtxV6* ctx) {
|
|
|
|
|
// Refill TLS from page
|
|
|
|
|
SmallPageMetaV6* page = small_cold_v6_refill_page(class_idx);
|
|
|
|
|
if (!page || !page->free_list) {
|
Phase FREE-FRONT-V3-1: Free route snapshot infrastructure + build fix
Summary:
========
Implemented Phase FREE-FRONT-V3 infrastructure to optimize free hotpath by:
1. Creating snapshot-based route decision table (consolidating route logic)
2. Removing redundant ENV checks from hot path
3. Preparing for future integration into hak_free_at()
Key Changes:
============
1. NEW FILES:
- core/box/free_front_v3_env_box.h: Route snapshot definition & API
- core/box/free_front_v3_env_box.c: Snapshot initialization & caching
2. Infrastructure Details:
- FreeRouteSnapshotV3: Maps class_idx → free_route_kind for all 8 classes
- Routes defined: LEGACY, TINY_V3, CORE_V6_C6, POOL_V1
- ENV-gated initialization (HAKMEM_TINY_FREE_FRONT_V3_ENABLED, default OFF)
- Per-thread TLS caching to avoid repeated ENV reads
3. Design Goals:
- Consolidate tiny_route_for_class() results into snapshot table
- Remove C7 ULTRA / v4 / v5 / v6 ENV checks from hot path
- Limit lookup (ss_fast_lookup/slab_index_for) to paths that truly need it
- Clear ownership boundary: front v3 handles routing, downstream handles free
4. Phase Plan:
- v3-1 ✅ COMPLETE: Infrastructure (snapshot table, ENV initialization, TLS cache)
- v3-2 (INFRASTRUCTURE ONLY): Placeholder integration in hak_free_api.inc.h
- v3-3 (FUTURE): Full integration + benchmark A/B to measure hotpath improvement
5. BUILD FIX:
- Added missing core/box/c7_meta_used_counter_box.o to OBJS_BASE in Makefile
- This symbol was referenced but not linked, causing undefined reference errors
- Benchmark targets now build cleanly without LTO
Status:
=======
- Build: ✅ PASS (bench_allocators_hakmem builds without errors)
- Integration: Currently DISABLED (default OFF, ready for v3-2 phase)
- No performance impact: Infrastructure-only, hotpath unchanged
Future Work:
============
- Phase v3-2: Integrate snapshot routing into hak_free_at() main path
- Phase v3-3: Measure free hotpath performance improvement (target: 1-2% less branch mispredict)
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-11 19:17:30 +09:00
|
|
|
return hak_pool_try_alloc(class_idx == SMALL_V6_C6_CLASS_IDX ? 512 : (class_idx == SMALL_V6_C5_CLASS_IDX ? 256 : 128), 0);
|
2025-12-11 15:59:29 +09:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
uint8_t header_byte = SMALL_V6_HEADER_FROM_CLASS(class_idx);
|
|
|
|
|
|
|
|
|
|
if (class_idx == SMALL_V6_C6_CLASS_IDX) {
|
|
|
|
|
int max_fill = SMALL_V6_TLS_CAP - ctx->tls_count_c6;
|
|
|
|
|
int filled = 0;
|
|
|
|
|
|
|
|
|
|
while (page->free_list && filled < max_fill - 1) {
|
|
|
|
|
void* blk = page->free_list;
|
|
|
|
|
page->free_list = *(void**)blk;
|
|
|
|
|
((uint8_t*)blk)[0] = header_byte;
|
|
|
|
|
ctx->tls_freelist_c6[ctx->tls_count_c6++] = blk;
|
|
|
|
|
filled++;
|
|
|
|
|
}
|
|
|
|
|
page->used += filled;
|
|
|
|
|
|
|
|
|
|
if (page->free_list) {
|
|
|
|
|
void* blk = page->free_list;
|
|
|
|
|
page->free_list = *(void**)blk;
|
|
|
|
|
page->used++;
|
|
|
|
|
((uint8_t*)blk)[0] = header_byte;
|
|
|
|
|
return SMALL_V6_USER_FROM_BASE(blk);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (ctx->tls_count_c6 > 0) {
|
|
|
|
|
void* blk = ctx->tls_freelist_c6[--ctx->tls_count_c6];
|
|
|
|
|
return SMALL_V6_USER_FROM_BASE(blk);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else if (class_idx == SMALL_V6_C5_CLASS_IDX) {
|
|
|
|
|
int max_fill = SMALL_V6_TLS_CAP - ctx->tls_count_c5;
|
|
|
|
|
int filled = 0;
|
|
|
|
|
|
|
|
|
|
while (page->free_list && filled < max_fill - 1) {
|
|
|
|
|
void* blk = page->free_list;
|
|
|
|
|
page->free_list = *(void**)blk;
|
|
|
|
|
((uint8_t*)blk)[0] = header_byte;
|
|
|
|
|
ctx->tls_freelist_c5[ctx->tls_count_c5++] = blk;
|
|
|
|
|
filled++;
|
|
|
|
|
}
|
|
|
|
|
page->used += filled;
|
|
|
|
|
|
|
|
|
|
if (page->free_list) {
|
|
|
|
|
void* blk = page->free_list;
|
|
|
|
|
page->free_list = *(void**)blk;
|
|
|
|
|
page->used++;
|
|
|
|
|
((uint8_t*)blk)[0] = header_byte;
|
|
|
|
|
return SMALL_V6_USER_FROM_BASE(blk);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (ctx->tls_count_c5 > 0) {
|
|
|
|
|
void* blk = ctx->tls_freelist_c5[--ctx->tls_count_c5];
|
|
|
|
|
return SMALL_V6_USER_FROM_BASE(blk);
|
|
|
|
|
}
|
|
|
|
|
}
|
Phase FREE-FRONT-V3-1: Free route snapshot infrastructure + build fix
Summary:
========
Implemented Phase FREE-FRONT-V3 infrastructure to optimize free hotpath by:
1. Creating snapshot-based route decision table (consolidating route logic)
2. Removing redundant ENV checks from hot path
3. Preparing for future integration into hak_free_at()
Key Changes:
============
1. NEW FILES:
- core/box/free_front_v3_env_box.h: Route snapshot definition & API
- core/box/free_front_v3_env_box.c: Snapshot initialization & caching
2. Infrastructure Details:
- FreeRouteSnapshotV3: Maps class_idx → free_route_kind for all 8 classes
- Routes defined: LEGACY, TINY_V3, CORE_V6_C6, POOL_V1
- ENV-gated initialization (HAKMEM_TINY_FREE_FRONT_V3_ENABLED, default OFF)
- Per-thread TLS caching to avoid repeated ENV reads
3. Design Goals:
- Consolidate tiny_route_for_class() results into snapshot table
- Remove C7 ULTRA / v4 / v5 / v6 ENV checks from hot path
- Limit lookup (ss_fast_lookup/slab_index_for) to paths that truly need it
- Clear ownership boundary: front v3 handles routing, downstream handles free
4. Phase Plan:
- v3-1 ✅ COMPLETE: Infrastructure (snapshot table, ENV initialization, TLS cache)
- v3-2 (INFRASTRUCTURE ONLY): Placeholder integration in hak_free_api.inc.h
- v3-3 (FUTURE): Full integration + benchmark A/B to measure hotpath improvement
5. BUILD FIX:
- Added missing core/box/c7_meta_used_counter_box.o to OBJS_BASE in Makefile
- This symbol was referenced but not linked, causing undefined reference errors
- Benchmark targets now build cleanly without LTO
Status:
=======
- Build: ✅ PASS (bench_allocators_hakmem builds without errors)
- Integration: Currently DISABLED (default OFF, ready for v3-2 phase)
- No performance impact: Infrastructure-only, hotpath unchanged
Future Work:
============
- Phase v3-2: Integrate snapshot routing into hak_free_at() main path
- Phase v3-3: Measure free hotpath performance improvement (target: 1-2% less branch mispredict)
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-11 19:17:30 +09:00
|
|
|
else if (class_idx == SMALL_V6_C4_CLASS_IDX) {
|
|
|
|
|
int max_fill = SMALL_V6_TLS_CAP - ctx->tls_count_c4;
|
|
|
|
|
int filled = 0;
|
|
|
|
|
|
|
|
|
|
while (page->free_list && filled < max_fill - 1) {
|
|
|
|
|
void* blk = page->free_list;
|
|
|
|
|
page->free_list = *(void**)blk;
|
|
|
|
|
((uint8_t*)blk)[0] = header_byte;
|
|
|
|
|
ctx->tls_freelist_c4[ctx->tls_count_c4++] = blk;
|
|
|
|
|
filled++;
|
|
|
|
|
}
|
|
|
|
|
page->used += filled;
|
|
|
|
|
|
|
|
|
|
if (page->free_list) {
|
|
|
|
|
void* blk = page->free_list;
|
|
|
|
|
page->free_list = *(void**)blk;
|
|
|
|
|
page->used++;
|
|
|
|
|
((uint8_t*)blk)[0] = header_byte;
|
|
|
|
|
return SMALL_V6_USER_FROM_BASE(blk);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (ctx->tls_count_c4 > 0) {
|
|
|
|
|
void* blk = ctx->tls_freelist_c4[--ctx->tls_count_c4];
|
|
|
|
|
return SMALL_V6_USER_FROM_BASE(blk);
|
|
|
|
|
}
|
|
|
|
|
}
|
2025-12-11 15:59:29 +09:00
|
|
|
|
Phase FREE-FRONT-V3-1: Free route snapshot infrastructure + build fix
Summary:
========
Implemented Phase FREE-FRONT-V3 infrastructure to optimize free hotpath by:
1. Creating snapshot-based route decision table (consolidating route logic)
2. Removing redundant ENV checks from hot path
3. Preparing for future integration into hak_free_at()
Key Changes:
============
1. NEW FILES:
- core/box/free_front_v3_env_box.h: Route snapshot definition & API
- core/box/free_front_v3_env_box.c: Snapshot initialization & caching
2. Infrastructure Details:
- FreeRouteSnapshotV3: Maps class_idx → free_route_kind for all 8 classes
- Routes defined: LEGACY, TINY_V3, CORE_V6_C6, POOL_V1
- ENV-gated initialization (HAKMEM_TINY_FREE_FRONT_V3_ENABLED, default OFF)
- Per-thread TLS caching to avoid repeated ENV reads
3. Design Goals:
- Consolidate tiny_route_for_class() results into snapshot table
- Remove C7 ULTRA / v4 / v5 / v6 ENV checks from hot path
- Limit lookup (ss_fast_lookup/slab_index_for) to paths that truly need it
- Clear ownership boundary: front v3 handles routing, downstream handles free
4. Phase Plan:
- v3-1 ✅ COMPLETE: Infrastructure (snapshot table, ENV initialization, TLS cache)
- v3-2 (INFRASTRUCTURE ONLY): Placeholder integration in hak_free_api.inc.h
- v3-3 (FUTURE): Full integration + benchmark A/B to measure hotpath improvement
5. BUILD FIX:
- Added missing core/box/c7_meta_used_counter_box.o to OBJS_BASE in Makefile
- This symbol was referenced but not linked, causing undefined reference errors
- Benchmark targets now build cleanly without LTO
Status:
=======
- Build: ✅ PASS (bench_allocators_hakmem builds without errors)
- Integration: Currently DISABLED (default OFF, ready for v3-2 phase)
- No performance impact: Infrastructure-only, hotpath unchanged
Future Work:
============
- Phase v3-2: Integrate snapshot routing into hak_free_at() main path
- Phase v3-3: Measure free hotpath performance improvement (target: 1-2% less branch mispredict)
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2025-12-11 19:17:30 +09:00
|
|
|
return hak_pool_try_alloc(class_idx == SMALL_V6_C6_CLASS_IDX ? 512 : (class_idx == SMALL_V6_C5_CLASS_IDX ? 256 : 128), 0);
|
2025-12-11 15:59:29 +09:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Cold path: free to page freelist - called when TLS full or cross-thread
|
|
|
|
|
/// @param ptr: USER pointer
|
|
|
|
|
/// @param class_idx: C5 or C6
|
|
|
|
|
void small_free_cold_v6(void* ptr, uint32_t class_idx) {
|
|
|
|
|
(void)class_idx; // Not needed for page lookup
|
|
|
|
|
|
|
|
|
|
void* base = SMALL_V6_BASE_FROM_USER(ptr);
|
|
|
|
|
|
|
|
|
|
SmallPageMetaV6* page = small_page_meta_v6_of(ptr);
|
|
|
|
|
if (!page) {
|
|
|
|
|
hak_pool_free(ptr, 0, 0);
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
*(void**)base = page->free_list;
|
|
|
|
|
page->free_list = base;
|
|
|
|
|
if (page->used > 0) page->used--;
|
|
|
|
|
|
|
|
|
|
if (page->used == 0) {
|
|
|
|
|
small_cold_v6_retire_page(page);
|
|
|
|
|
}
|
|
|
|
|
}
|
2025-12-11 23:51:48 +09:00
|
|
|
|
|
|
|
|
// ============================================================================
|
|
|
|
|
// Phase V6-HDR-2: Headerless Free/Alloc Implementation
|
|
|
|
|
// ============================================================================
|
|
|
|
|
|
|
|
|
|
/// Headerless free: uses RegionIdBox for ptr classification
|
|
|
|
|
/// @param ctx: TLS context
|
|
|
|
|
/// @param ptr: USER pointer to free
|
|
|
|
|
/// @param class_idx_hint: class_idx from front (header byte)
|
|
|
|
|
/// @return: true if handled by v6, false if fallback needed
|
|
|
|
|
bool small_v6_headerless_free(SmallHeapCtxV6* ctx, void* ptr, uint8_t class_idx_hint) {
|
|
|
|
|
// Step 1: RegionIdBox lookup (no header read)
|
|
|
|
|
RegionLookupV6 lk = region_id_lookup_v6(ptr);
|
|
|
|
|
|
|
|
|
|
if (lk.kind != REGION_KIND_SMALL_V6) {
|
|
|
|
|
// Not a v6 managed region -> front should fallback
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
SmallPageMetaV6* page = (SmallPageMetaV6*)lk.page_meta;
|
|
|
|
|
if (!page) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
uint8_t class_idx = page->class_idx;
|
|
|
|
|
|
|
|
|
|
// Step 2: OBSERVE mode - validate class_idx hint
|
|
|
|
|
if (unlikely(small_v6_region_observe_enabled())) {
|
|
|
|
|
if (class_idx != class_idx_hint) {
|
|
|
|
|
fprintf(stderr, "[V6_HDR_FREE] MISMATCH ptr=%p hint=%u actual=%u\n",
|
|
|
|
|
ptr, class_idx_hint, class_idx);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Step 3: Convert USER -> BASE (no header touch)
|
|
|
|
|
void* base = SMALL_V6_BASE_FROM_USER(ptr);
|
|
|
|
|
|
|
|
|
|
// Step 4: TLS ownership check + TLS push
|
|
|
|
|
if (small_tls_owns_ptr_v6(ctx, ptr)) {
|
|
|
|
|
// C6 TLS push
|
|
|
|
|
if (class_idx == SMALL_V6_C6_CLASS_IDX && ctx->tls_count_c6 < SMALL_V6_TLS_CAP) {
|
|
|
|
|
ctx->tls_freelist_c6[ctx->tls_count_c6++] = base;
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
// C5 TLS push
|
|
|
|
|
if (class_idx == SMALL_V6_C5_CLASS_IDX && ctx->tls_count_c5 < SMALL_V6_TLS_CAP) {
|
|
|
|
|
ctx->tls_freelist_c5[ctx->tls_count_c5++] = base;
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
// C4 TLS push
|
|
|
|
|
if (class_idx == SMALL_V6_C4_CLASS_IDX && ctx->tls_count_c4 < SMALL_V6_TLS_CAP) {
|
|
|
|
|
ctx->tls_freelist_c4[ctx->tls_count_c4++] = base;
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Step 5: Cold path - push to page freelist
|
|
|
|
|
*(void**)base = page->free_list;
|
|
|
|
|
page->free_list = base;
|
|
|
|
|
if (page->used > 0) page->used--;
|
|
|
|
|
|
|
|
|
|
// Retire empty page
|
|
|
|
|
if (page->used == 0) {
|
|
|
|
|
small_cold_v6_retire_page(page);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Headerless alloc: TLS pop without header write
|
|
|
|
|
/// Header is already written during carve/refill
|
|
|
|
|
/// @param ctx: TLS context
|
|
|
|
|
/// @param class_idx: class index (4=C4, 5=C5, 6=C6)
|
|
|
|
|
/// @return: USER pointer or NULL (fallback needed)
|
|
|
|
|
void* small_v6_headerless_alloc(SmallHeapCtxV6* ctx, uint8_t class_idx) {
|
|
|
|
|
// TLS fast path (no header write - already done in refill)
|
|
|
|
|
if (class_idx == SMALL_V6_C6_CLASS_IDX) {
|
|
|
|
|
if (likely(ctx->tls_count_c6 > 0)) {
|
|
|
|
|
void* blk = ctx->tls_freelist_c6[--ctx->tls_count_c6];
|
|
|
|
|
return SMALL_V6_USER_FROM_BASE(blk);
|
|
|
|
|
}
|
|
|
|
|
} else if (class_idx == SMALL_V6_C5_CLASS_IDX) {
|
|
|
|
|
if (likely(ctx->tls_count_c5 > 0)) {
|
|
|
|
|
void* blk = ctx->tls_freelist_c5[--ctx->tls_count_c5];
|
|
|
|
|
return SMALL_V6_USER_FROM_BASE(blk);
|
|
|
|
|
}
|
|
|
|
|
} else if (class_idx == SMALL_V6_C4_CLASS_IDX) {
|
|
|
|
|
if (likely(ctx->tls_count_c4 > 0)) {
|
|
|
|
|
void* blk = ctx->tls_freelist_c4[--ctx->tls_count_c4];
|
|
|
|
|
return SMALL_V6_USER_FROM_BASE(blk);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// TLS empty -> need refill from cold path
|
|
|
|
|
// NOTE: Refill writes header, so alloc doesn't need to
|
|
|
|
|
SmallPageMetaV6* page = small_cold_v6_refill_page(class_idx);
|
|
|
|
|
if (!page || !page->free_list) {
|
|
|
|
|
return NULL; // Front should fallback to legacy
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
uint8_t header_byte = SMALL_V6_HEADER_FROM_CLASS(class_idx);
|
|
|
|
|
int max_fill = SMALL_V6_TLS_CAP;
|
|
|
|
|
int filled = 0;
|
|
|
|
|
|
|
|
|
|
// Refill TLS from page
|
|
|
|
|
if (class_idx == SMALL_V6_C6_CLASS_IDX) {
|
|
|
|
|
max_fill -= ctx->tls_count_c6;
|
|
|
|
|
while (page->free_list && filled < max_fill - 1) {
|
|
|
|
|
void* blk = page->free_list;
|
|
|
|
|
page->free_list = *(void**)blk;
|
|
|
|
|
((uint8_t*)blk)[0] = header_byte; // Header write on refill only
|
|
|
|
|
ctx->tls_freelist_c6[ctx->tls_count_c6++] = blk;
|
|
|
|
|
filled++;
|
|
|
|
|
}
|
|
|
|
|
page->used += filled;
|
|
|
|
|
|
|
|
|
|
if (page->free_list) {
|
|
|
|
|
void* blk = page->free_list;
|
|
|
|
|
page->free_list = *(void**)blk;
|
|
|
|
|
page->used++;
|
|
|
|
|
((uint8_t*)blk)[0] = header_byte;
|
|
|
|
|
return SMALL_V6_USER_FROM_BASE(blk);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (ctx->tls_count_c6 > 0) {
|
|
|
|
|
void* blk = ctx->tls_freelist_c6[--ctx->tls_count_c6];
|
|
|
|
|
return SMALL_V6_USER_FROM_BASE(blk);
|
|
|
|
|
}
|
|
|
|
|
} else if (class_idx == SMALL_V6_C5_CLASS_IDX) {
|
|
|
|
|
max_fill -= ctx->tls_count_c5;
|
|
|
|
|
while (page->free_list && filled < max_fill - 1) {
|
|
|
|
|
void* blk = page->free_list;
|
|
|
|
|
page->free_list = *(void**)blk;
|
|
|
|
|
((uint8_t*)blk)[0] = header_byte;
|
|
|
|
|
ctx->tls_freelist_c5[ctx->tls_count_c5++] = blk;
|
|
|
|
|
filled++;
|
|
|
|
|
}
|
|
|
|
|
page->used += filled;
|
|
|
|
|
|
|
|
|
|
if (page->free_list) {
|
|
|
|
|
void* blk = page->free_list;
|
|
|
|
|
page->free_list = *(void**)blk;
|
|
|
|
|
page->used++;
|
|
|
|
|
((uint8_t*)blk)[0] = header_byte;
|
|
|
|
|
return SMALL_V6_USER_FROM_BASE(blk);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (ctx->tls_count_c5 > 0) {
|
|
|
|
|
void* blk = ctx->tls_freelist_c5[--ctx->tls_count_c5];
|
|
|
|
|
return SMALL_V6_USER_FROM_BASE(blk);
|
|
|
|
|
}
|
|
|
|
|
} else if (class_idx == SMALL_V6_C4_CLASS_IDX) {
|
|
|
|
|
max_fill -= ctx->tls_count_c4;
|
|
|
|
|
while (page->free_list && filled < max_fill - 1) {
|
|
|
|
|
void* blk = page->free_list;
|
|
|
|
|
page->free_list = *(void**)blk;
|
|
|
|
|
((uint8_t*)blk)[0] = header_byte;
|
|
|
|
|
ctx->tls_freelist_c4[ctx->tls_count_c4++] = blk;
|
|
|
|
|
filled++;
|
|
|
|
|
}
|
|
|
|
|
page->used += filled;
|
|
|
|
|
|
|
|
|
|
if (page->free_list) {
|
|
|
|
|
void* blk = page->free_list;
|
|
|
|
|
page->free_list = *(void**)blk;
|
|
|
|
|
page->used++;
|
|
|
|
|
((uint8_t*)blk)[0] = header_byte;
|
|
|
|
|
return SMALL_V6_USER_FROM_BASE(blk);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (ctx->tls_count_c4 > 0) {
|
|
|
|
|
void* blk = ctx->tls_freelist_c4[--ctx->tls_count_c4];
|
|
|
|
|
return SMALL_V6_USER_FROM_BASE(blk);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|