Phase 36-37: TinyHotHeap v2 HotBox redesign and C7 current_page policy fixes

- Redefine TinyHotHeap v2 as per-thread Hot Box with clear boundaries
- Add comprehensive OS statistics tracking for SS allocations
- Implement route-based free handling for TinyHeap v2
- Add C6/C7 debugging and statistics improvements
- Update documentation with implementation guidelines and analysis
- Add new box headers for stats, routing, and front-end management
This commit is contained in:
Moe Charm (CI)
2025-12-08 21:30:21 +09:00
parent 34a8fd69b6
commit 8f18963ad5
37 changed files with 3205 additions and 167 deletions

View File

@ -24,6 +24,7 @@
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
#include <stdatomic.h>
#include <pthread.h> // For pthread_self() in cross-thread check
#include "../hakmem_build_flags.h"
#include "../hakmem_tiny_config.h" // For TINY_NUM_CLASSES
@ -38,7 +39,10 @@
#include "../box/tiny_front_cold_box.h" // Phase 4-Step2: Cold Path Box
#include "../box/tiny_c7_hotbox.h" // Optional: C7 専用ホットボックス
#include "../box/tiny_heap_box.h" // TinyHeap 汎用 Box
#include "../box/tiny_hotheap_v2_box.h" // TinyHotHeap v2 (Phase31 A/B)
#include "../box/tiny_heap_env_box.h" // ENV gate for TinyHeap front (A/B)
#include "../box/tiny_route_env_box.h" // Route snapshot (Heap vs Legacy)
#include "../box/tiny_front_stats_box.h" // Front class distribution counters
// Helper: current thread id (low 32 bits) for owner check
#ifndef TINY_SELF_U32_LOCAL_DEFINED
@ -98,31 +102,47 @@ static inline int front_gate_unified_enabled(void) {
//
__attribute__((always_inline))
static inline void* malloc_tiny_fast(size_t size) {
// 1. size → class_idx (inline table lookup, 1-2 instructions)
// size → class_idx を 1 回だけ決定
int class_idx = hak_tiny_size_to_class(size);
if (__builtin_expect(class_idx < 0 || class_idx >= TINY_NUM_CLASSES, 0)) {
return NULL;
}
tiny_front_alloc_stat_inc(class_idx);
// Optional: TinyHeap frontENV: HAKMEM_TINY_HEAP_BOX=1 + HAKMEM_TINY_HEAP_CLASSES bitmask
const int use_tiny_heap = (class_idx == 7)
? tiny_c7_heap_mode_enabled()
: tiny_heap_class_route_enabled(class_idx);
if (__builtin_expect(use_tiny_heap, 0)) {
tiny_heap_ctx_t* ctx = tiny_heap_ctx_for_thread();
if (class_idx == 7 && size == 1024) {
return tiny_c7_alloc_fast(size);
tiny_route_kind_t route = tiny_route_for_class((uint8_t)class_idx);
switch (route) {
case TINY_ROUTE_HOTHEAP_V2: {
if (class_idx == 7) {
void* v2p = tiny_hotheap_v2_alloc(7);
if (TINY_HOT_LIKELY(v2p != NULL)) {
return v2p;
}
tiny_hotheap_v2_record_route_fallback();
}
// fallthrough to TinyHeap v1
}
return tiny_heap_alloc_class_fast(ctx, class_idx, size);
case TINY_ROUTE_HEAP: {
void* heap_ptr = NULL;
if (class_idx == 7) {
heap_ptr = tiny_c7_alloc_fast(size);
} else {
heap_ptr = tiny_heap_alloc_class_fast(tiny_heap_ctx_for_thread(), class_idx, size);
}
if (heap_ptr) {
return heap_ptr;
}
break;
}
case TINY_ROUTE_LEGACY:
default:
break;
}
// 2. Phase 4-Step2: Hot/Cold Path Box
// Try hot path first (cache hit, 1 branch)
// Legacy Tiny front
void* ptr = tiny_hot_alloc_fast(class_idx);
if (TINY_HOT_LIKELY(ptr != NULL)) {
// Hot path: Cache hit → return USER pointer
return ptr;
}
// 3. Cold path: Cache miss → refill + alloc
// noinline, cold attribute keeps this code out of hot path
return tiny_cold_refill_and_alloc(class_idx);
}
@ -141,7 +161,7 @@ __attribute__((always_inline))
static inline int free_tiny_fast(void* ptr) {
if (__builtin_expect(!ptr, 0)) return 0;
#if HAKMEM_TINY_HEADER_CLASSIDX
#if HAKMEM_TINY_HEADER_CLASSIDX
// 1. ページ境界ガード:
// ptr がページ先頭 (offset==0) の場合、ptr-1 は別ページか未マップ領域になる可能性がある。
// その場合はヘッダ読みを行わず、通常 free 経路にフォールバックする。
@ -169,6 +189,9 @@ static inline int free_tiny_fast(void* ptr) {
// 4. BASE を計算して Unified Cache に push
void* base = (void*)((char*)ptr - 1);
tiny_front_free_stat_inc(class_idx);
tiny_route_kind_t route = tiny_route_for_class((uint8_t)class_idx);
const int use_tiny_heap = tiny_route_is_heap_kind(route);
// TWO-SPEED: SuperSlab registration check is DEBUG-ONLY to keep HOT PATH fast.
// In Release builds, we trust header magic (0xA0) as sufficient validation.
@ -192,9 +215,6 @@ static inline int free_tiny_fast(void* ptr) {
#endif
}
const int use_tiny_heap = (class_idx == 7)
? tiny_c7_heap_mode_enabled()
: tiny_heap_class_route_enabled(class_idx);
if (__builtin_expect(g_larson_fix || use_tiny_heap, 0)) {
// Phase 12 optimization: Use fast mask-based lookup (~5-10 cycles vs 50-100)
SuperSlab* ss = ss_fast_lookup(base);
@ -203,6 +223,7 @@ static inline int free_tiny_fast(void* ptr) {
if (__builtin_expect(slab_idx >= 0 && slab_idx < ss_slabs_capacity(ss), 1)) {
uint32_t self_tid = tiny_self_u32_local();
uint8_t owner_tid_low = ss_slab_meta_owner_tid_low_get(ss, slab_idx);
TinySlabMeta* meta = &ss->slabs[slab_idx];
// LARSON FIX: Use bits 8-15 for comparison (pthread TIDs aligned to 256 bytes)
uint8_t self_tid_cmp = (uint8_t)((self_tid >> 8) & 0xFFu);
#if !HAKMEM_BUILD_RELEASE
@ -226,24 +247,37 @@ static inline int free_tiny_fast(void* ptr) {
fflush(stderr);
}
#endif
TinySlabMeta* meta = &ss->slabs[slab_idx];
if (tiny_free_remote_box(ss, slab_idx, meta, ptr, self_tid)) {
return 1; // handled via remote queue
}
return 0; // remote push failed; fall back to normal path
} else if (__builtin_expect(use_tiny_heap, 0)) {
tiny_heap_ctx_t* ctx = tiny_heap_ctx_for_thread();
if (class_idx == 7) {
tiny_c7_free_fast_with_meta(ss, slab_idx, base);
} else {
tiny_heap_free_class_fast_with_meta(ctx, class_idx, ss, slab_idx, base);
}
// Same-thread + TinyHeap route → route-based free
if (__builtin_expect(use_tiny_heap, 0)) {
switch (route) {
case TINY_ROUTE_HOTHEAP_V2:
tiny_hotheap_v2_free((uint8_t)class_idx, base, meta);
return 1;
case TINY_ROUTE_HEAP: {
tiny_heap_ctx_t* ctx = tiny_heap_ctx_for_thread();
if (class_idx == 7) {
tiny_c7_free_fast_with_meta(ss, slab_idx, base);
} else {
tiny_heap_free_class_fast_with_meta(ctx, class_idx, ss, slab_idx, base);
}
return 1;
}
default:
break;
}
return 1;
}
}
}
if (use_tiny_heap) {
// fallback: lookup failed but TinyHeap front is ON → use generic TinyHeap free
if (route == TINY_ROUTE_HOTHEAP_V2) {
tiny_hotheap_v2_record_free_fallback();
}
tiny_heap_free_class_fast(tiny_heap_ctx_for_thread(), class_idx, ptr);
return 1;
}