Phase 36-37: TinyHotHeap v2 HotBox redesign and C7 current_page policy fixes
- Redefine TinyHotHeap v2 as per-thread Hot Box with clear boundaries - Add comprehensive OS statistics tracking for SS allocations - Implement route-based free handling for TinyHeap v2 - Add C6/C7 debugging and statistics improvements - Update documentation with implementation guidelines and analysis - Add new box headers for stats, routing, and front-end management
This commit is contained in:
@ -24,6 +24,7 @@
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <stdatomic.h>
|
||||
#include <pthread.h> // For pthread_self() in cross-thread check
|
||||
#include "../hakmem_build_flags.h"
|
||||
#include "../hakmem_tiny_config.h" // For TINY_NUM_CLASSES
|
||||
@ -38,7 +39,10 @@
|
||||
#include "../box/tiny_front_cold_box.h" // Phase 4-Step2: Cold Path Box
|
||||
#include "../box/tiny_c7_hotbox.h" // Optional: C7 専用ホットボックス
|
||||
#include "../box/tiny_heap_box.h" // TinyHeap 汎用 Box
|
||||
#include "../box/tiny_hotheap_v2_box.h" // TinyHotHeap v2 (Phase31 A/B)
|
||||
#include "../box/tiny_heap_env_box.h" // ENV gate for TinyHeap front (A/B)
|
||||
#include "../box/tiny_route_env_box.h" // Route snapshot (Heap vs Legacy)
|
||||
#include "../box/tiny_front_stats_box.h" // Front class distribution counters
|
||||
|
||||
// Helper: current thread id (low 32 bits) for owner check
|
||||
#ifndef TINY_SELF_U32_LOCAL_DEFINED
|
||||
@ -98,31 +102,47 @@ static inline int front_gate_unified_enabled(void) {
|
||||
//
|
||||
__attribute__((always_inline))
|
||||
static inline void* malloc_tiny_fast(size_t size) {
|
||||
// 1. size → class_idx (inline table lookup, 1-2 instructions)
|
||||
// size → class_idx を 1 回だけ決定
|
||||
int class_idx = hak_tiny_size_to_class(size);
|
||||
if (__builtin_expect(class_idx < 0 || class_idx >= TINY_NUM_CLASSES, 0)) {
|
||||
return NULL;
|
||||
}
|
||||
tiny_front_alloc_stat_inc(class_idx);
|
||||
|
||||
// Optional: TinyHeap front(ENV: HAKMEM_TINY_HEAP_BOX=1 + HAKMEM_TINY_HEAP_CLASSES bitmask)
|
||||
const int use_tiny_heap = (class_idx == 7)
|
||||
? tiny_c7_heap_mode_enabled()
|
||||
: tiny_heap_class_route_enabled(class_idx);
|
||||
if (__builtin_expect(use_tiny_heap, 0)) {
|
||||
tiny_heap_ctx_t* ctx = tiny_heap_ctx_for_thread();
|
||||
if (class_idx == 7 && size == 1024) {
|
||||
return tiny_c7_alloc_fast(size);
|
||||
tiny_route_kind_t route = tiny_route_for_class((uint8_t)class_idx);
|
||||
switch (route) {
|
||||
case TINY_ROUTE_HOTHEAP_V2: {
|
||||
if (class_idx == 7) {
|
||||
void* v2p = tiny_hotheap_v2_alloc(7);
|
||||
if (TINY_HOT_LIKELY(v2p != NULL)) {
|
||||
return v2p;
|
||||
}
|
||||
tiny_hotheap_v2_record_route_fallback();
|
||||
}
|
||||
// fallthrough to TinyHeap v1
|
||||
}
|
||||
return tiny_heap_alloc_class_fast(ctx, class_idx, size);
|
||||
case TINY_ROUTE_HEAP: {
|
||||
void* heap_ptr = NULL;
|
||||
if (class_idx == 7) {
|
||||
heap_ptr = tiny_c7_alloc_fast(size);
|
||||
} else {
|
||||
heap_ptr = tiny_heap_alloc_class_fast(tiny_heap_ctx_for_thread(), class_idx, size);
|
||||
}
|
||||
if (heap_ptr) {
|
||||
return heap_ptr;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case TINY_ROUTE_LEGACY:
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
// 2. Phase 4-Step2: Hot/Cold Path Box
|
||||
// Try hot path first (cache hit, 1 branch)
|
||||
// Legacy Tiny front
|
||||
void* ptr = tiny_hot_alloc_fast(class_idx);
|
||||
if (TINY_HOT_LIKELY(ptr != NULL)) {
|
||||
// Hot path: Cache hit → return USER pointer
|
||||
return ptr;
|
||||
}
|
||||
|
||||
// 3. Cold path: Cache miss → refill + alloc
|
||||
// noinline, cold attribute keeps this code out of hot path
|
||||
return tiny_cold_refill_and_alloc(class_idx);
|
||||
}
|
||||
|
||||
@ -141,7 +161,7 @@ __attribute__((always_inline))
|
||||
static inline int free_tiny_fast(void* ptr) {
|
||||
if (__builtin_expect(!ptr, 0)) return 0;
|
||||
|
||||
#if HAKMEM_TINY_HEADER_CLASSIDX
|
||||
#if HAKMEM_TINY_HEADER_CLASSIDX
|
||||
// 1. ページ境界ガード:
|
||||
// ptr がページ先頭 (offset==0) の場合、ptr-1 は別ページか未マップ領域になる可能性がある。
|
||||
// その場合はヘッダ読みを行わず、通常 free 経路にフォールバックする。
|
||||
@ -169,6 +189,9 @@ static inline int free_tiny_fast(void* ptr) {
|
||||
|
||||
// 4. BASE を計算して Unified Cache に push
|
||||
void* base = (void*)((char*)ptr - 1);
|
||||
tiny_front_free_stat_inc(class_idx);
|
||||
tiny_route_kind_t route = tiny_route_for_class((uint8_t)class_idx);
|
||||
const int use_tiny_heap = tiny_route_is_heap_kind(route);
|
||||
|
||||
// TWO-SPEED: SuperSlab registration check is DEBUG-ONLY to keep HOT PATH fast.
|
||||
// In Release builds, we trust header magic (0xA0) as sufficient validation.
|
||||
@ -192,9 +215,6 @@ static inline int free_tiny_fast(void* ptr) {
|
||||
#endif
|
||||
}
|
||||
|
||||
const int use_tiny_heap = (class_idx == 7)
|
||||
? tiny_c7_heap_mode_enabled()
|
||||
: tiny_heap_class_route_enabled(class_idx);
|
||||
if (__builtin_expect(g_larson_fix || use_tiny_heap, 0)) {
|
||||
// Phase 12 optimization: Use fast mask-based lookup (~5-10 cycles vs 50-100)
|
||||
SuperSlab* ss = ss_fast_lookup(base);
|
||||
@ -203,6 +223,7 @@ static inline int free_tiny_fast(void* ptr) {
|
||||
if (__builtin_expect(slab_idx >= 0 && slab_idx < ss_slabs_capacity(ss), 1)) {
|
||||
uint32_t self_tid = tiny_self_u32_local();
|
||||
uint8_t owner_tid_low = ss_slab_meta_owner_tid_low_get(ss, slab_idx);
|
||||
TinySlabMeta* meta = &ss->slabs[slab_idx];
|
||||
// LARSON FIX: Use bits 8-15 for comparison (pthread TIDs aligned to 256 bytes)
|
||||
uint8_t self_tid_cmp = (uint8_t)((self_tid >> 8) & 0xFFu);
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
@ -226,24 +247,37 @@ static inline int free_tiny_fast(void* ptr) {
|
||||
fflush(stderr);
|
||||
}
|
||||
#endif
|
||||
TinySlabMeta* meta = &ss->slabs[slab_idx];
|
||||
if (tiny_free_remote_box(ss, slab_idx, meta, ptr, self_tid)) {
|
||||
return 1; // handled via remote queue
|
||||
}
|
||||
return 0; // remote push failed; fall back to normal path
|
||||
} else if (__builtin_expect(use_tiny_heap, 0)) {
|
||||
tiny_heap_ctx_t* ctx = tiny_heap_ctx_for_thread();
|
||||
if (class_idx == 7) {
|
||||
tiny_c7_free_fast_with_meta(ss, slab_idx, base);
|
||||
} else {
|
||||
tiny_heap_free_class_fast_with_meta(ctx, class_idx, ss, slab_idx, base);
|
||||
}
|
||||
// Same-thread + TinyHeap route → route-based free
|
||||
if (__builtin_expect(use_tiny_heap, 0)) {
|
||||
switch (route) {
|
||||
case TINY_ROUTE_HOTHEAP_V2:
|
||||
tiny_hotheap_v2_free((uint8_t)class_idx, base, meta);
|
||||
return 1;
|
||||
case TINY_ROUTE_HEAP: {
|
||||
tiny_heap_ctx_t* ctx = tiny_heap_ctx_for_thread();
|
||||
if (class_idx == 7) {
|
||||
tiny_c7_free_fast_with_meta(ss, slab_idx, base);
|
||||
} else {
|
||||
tiny_heap_free_class_fast_with_meta(ctx, class_idx, ss, slab_idx, base);
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (use_tiny_heap) {
|
||||
// fallback: lookup failed but TinyHeap front is ON → use generic TinyHeap free
|
||||
if (route == TINY_ROUTE_HOTHEAP_V2) {
|
||||
tiny_hotheap_v2_record_free_fallback();
|
||||
}
|
||||
tiny_heap_free_class_fast(tiny_heap_ctx_for_thread(), class_idx, ptr);
|
||||
return 1;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user