Refactor: Extract TLS Bind Box for unified slab binding
- Created core/box/ss_tls_bind_box.h containing ss_tls_bind_one(). - Refactored superslab_refill() to use the new box. - Updated signatures to avoid circular dependencies (tiny_self_u32). - Added future integration points for Warm Pool and Page Box.
This commit is contained in:
112
core/box/ss_tls_bind_box.h
Normal file
112
core/box/ss_tls_bind_box.h
Normal file
@ -0,0 +1,112 @@
|
||||
// ss_tls_bind_box.h - TLS Bind Box
|
||||
//
|
||||
// Purpose:
|
||||
// - Encapsulate the logic for binding a SuperSlab slab to a thread's TLS.
|
||||
// - Ensures consistent initialization (superslab_init_slab) and TLS state updates.
|
||||
// - Acts as a "public-ish" internal API for Shared Pool, Warm Pool, and Page Box.
|
||||
|
||||
#ifndef HAK_SS_TLS_BIND_BOX_H
|
||||
#define HAK_SS_TLS_BIND_BOX_H
|
||||
|
||||
#include "../hakmem_tiny_superslab.h"
|
||||
#include "../tiny_tls.h"
|
||||
#include "../hakmem_tiny_config.h"
|
||||
#include "../box/tiny_page_box.h" // For tiny_page_box_on_new_slab()
|
||||
#include <stdio.h>
|
||||
|
||||
// Forward declaration if not included
|
||||
// CRITICAL FIX: type must match core/hakmem_tiny_config.h (const size_t, not uint16_t)
|
||||
extern const size_t g_tiny_class_sizes[TINY_NUM_CLASSES];
|
||||
|
||||
// TLS Bind Box: initialize one slab within a SuperSlab and bind it to TLS.
|
||||
// Returns 1 on success, 0 on failure (TLS is left in a safe state).
|
||||
//
|
||||
// Arguments:
|
||||
// class_idx: Target size class index (0-7)
|
||||
// tls: Pointer to thread-local TLS slab state (e.g. &g_tls_slabs[class_idx])
|
||||
// ss: Target SuperSlab
|
||||
// slab_idx: Index of the slab within the SuperSlab
|
||||
// owner_tid: Thread ID of the caller (used for slab ownership initialization)
|
||||
//
|
||||
// Pre-conditions:
|
||||
// - ss and slab_idx must be valid and acquired by the caller.
|
||||
// - tls must be the correct TLS state for the current thread/class.
|
||||
//
|
||||
// Post-conditions:
|
||||
// - On success: TLS is updated to point to the new slab, and the slab is initialized.
|
||||
// - On failure: TLS is reset to a clean empty state.
|
||||
//
|
||||
// Future Usage:
|
||||
// - Shared Pool: superslab_refill() calls this after acquiring from global pool.
|
||||
// - Warm Pool: Will call this after popping a warm SuperSlab to re-bind it to TLS.
|
||||
// - Page Box: Will call this to bind a specific page (slab) chosen from its list.
|
||||
static inline int ss_tls_bind_one(int class_idx,
|
||||
TinyTLSSlab* tls,
|
||||
SuperSlab* ss,
|
||||
int slab_idx,
|
||||
uint32_t owner_tid)
|
||||
{
|
||||
if (!ss || slab_idx < 0 || class_idx < 0 || class_idx >= TINY_NUM_CLASSES) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Initialize slab metadata for this class/thread.
|
||||
// NOTE:
|
||||
// - superslab_init_slab must not recursively call superslab_refill().
|
||||
// - class_idx will be reflected in slab_meta->class_idx.
|
||||
superslab_init_slab(ss,
|
||||
slab_idx,
|
||||
g_tiny_class_sizes[class_idx],
|
||||
owner_tid);
|
||||
|
||||
// CRITICAL FIX: Ensure class_idx is set after init.
|
||||
// New SuperSlabs start with meta->class_idx=0 (mmap zero-init).
|
||||
// superslab_init_slab() only sets it if meta->class_idx==255.
|
||||
// We must explicitly set it to the requested class to avoid C0/C7 confusion.
|
||||
TinySlabMeta* meta = &ss->slabs[slab_idx];
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
uint8_t old_cls = meta->class_idx;
|
||||
#endif
|
||||
meta->class_idx = (uint8_t)class_idx;
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
if (class_idx == 7 && old_cls != class_idx) {
|
||||
fprintf(stderr, "[SUPERSLAB_REFILL_FIX_C7] ss=%p slab=%d old_cls=%u new_cls=%d\n",
|
||||
(void*)ss, slab_idx, old_cls, class_idx);
|
||||
}
|
||||
#endif
|
||||
|
||||
// Bind this slab to TLS for fast subsequent allocations.
|
||||
// Inline implementation of tiny_tls_bind_slab() to avoid header dependencies.
|
||||
// Original logic:
|
||||
// tls->ss = ss;
|
||||
// tls->slab_idx = (uint8_t)slab_idx;
|
||||
// tls->meta = &ss->slabs[slab_idx];
|
||||
// tls->slab_base = tiny_slab_base_for(ss, slab_idx);
|
||||
// tiny_page_box_on_new_slab(tls);
|
||||
|
||||
tls->ss = ss;
|
||||
tls->slab_idx = (uint8_t)slab_idx;
|
||||
tls->meta = meta; // already computed above
|
||||
tls->slab_base = tiny_slab_base_for(ss, slab_idx);
|
||||
|
||||
// Notify Tiny Page Box (if enabled for this class)
|
||||
tiny_page_box_on_new_slab(tls);
|
||||
|
||||
// Sanity check: TLS must now describe this slab for this class.
|
||||
// On failure, revert TLS to safe state and return 0.
|
||||
if (!(tls->ss == ss &&
|
||||
tls->slab_idx == (uint8_t)slab_idx &&
|
||||
tls->meta != NULL &&
|
||||
tls->meta->class_idx == (uint8_t)class_idx &&
|
||||
tls->slab_base != NULL)) {
|
||||
tls->ss = NULL;
|
||||
tls->meta = NULL;
|
||||
tls->slab_base = NULL;
|
||||
tls->slab_idx = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
#endif // HAK_SS_TLS_BIND_BOX_H
|
||||
@ -13,6 +13,12 @@
|
||||
// `max` BASE pointers using per-page freelist before falling back.
|
||||
// - When disabled for a class: the box returns 0 and caller uses legacy path.
|
||||
//
|
||||
// - TLS Bind:
|
||||
// Future direction: The Page Box will select a (SuperSlab, slab_idx)
|
||||
// pair and use ss_tls_bind_one() to bind it to TLS. Subsequent
|
||||
// allocations will carve directly from that TLS-bound slab,
|
||||
// clarifying the boundary between Superslab Backend and TLS Bind.
|
||||
//
|
||||
// ENV:
|
||||
// HAKMEM_TINY_PAGE_BOX_CLASSES (optional)
|
||||
// - Comma-separated class indices, e.g. "7" or "5,6,7"
|
||||
@ -37,7 +43,7 @@ extern void ss_active_add(SuperSlab* ss, uint32_t n);
|
||||
// 最大保持ページ数(1クラスあたり)
|
||||
// C7 専用実験では 1〜2 枚あれば十分だが、将来 C5/C6 にも拡張することを考え 4 枚まで許容する。
|
||||
#ifndef TINY_PAGE_BOX_MAX_PAGES
|
||||
#define TINY_PAGE_BOX_MAX_PAGES 4
|
||||
#define TINY_PAGE_BOX_MAX_PAGES 12
|
||||
#endif
|
||||
|
||||
// 1 ページ分のメタデータ
|
||||
@ -191,6 +197,12 @@ static inline void tiny_page_box_on_new_slab(TinyTLSSlab* tls)
|
||||
|
||||
// Page Box で追跡している間は Superslab を pin しておく
|
||||
superslab_ref_inc(ss);
|
||||
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
// Debug: Track Page Box stats per-class
|
||||
fprintf(stderr, "[PAGE_BOX_REG] class=%d num_pages=%u capacity=%u carved=%u\n",
|
||||
class_idx, st->num_pages, meta->capacity, meta->carved);
|
||||
#endif
|
||||
}
|
||||
|
||||
// Phase 1 implementation strategy:
|
||||
|
||||
@ -15,6 +15,7 @@
|
||||
#include "../box/warm_pool_prefill_box.h" // Box: Warm Pool Prefill (secondary optimization)
|
||||
#include "../hakmem_env_cache.h" // Priority-2: ENV cache (eliminate syscalls)
|
||||
#include "../box/tiny_page_box.h" // Tiny-Plus Page Box (C5–C7 initial hook)
|
||||
#include "../box/ss_tls_bind_box.h" // Box: TLS Bind (SuperSlab -> TLS binding)
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdatomic.h>
|
||||
@ -86,6 +87,21 @@ __thread uint64_t g_unified_cache_full[TINY_NUM_CLASSES] = {0};
|
||||
// Note: These are kept outside !HAKMEM_BUILD_RELEASE for profiling in release builds
|
||||
__thread TinyWarmPoolStats g_warm_pool_stats[TINY_NUM_CLASSES] = {0};
|
||||
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
// Debug-only diagnostics for Warm Pool effectiveness
|
||||
_Atomic uint64_t g_dbg_warm_prefill_attempts = 0;
|
||||
_Atomic uint64_t g_dbg_warm_prefill_refill_fail = 0;
|
||||
_Atomic uint64_t g_dbg_warm_prefill_push_ok = 0;
|
||||
_Atomic uint64_t g_dbg_warm_prefill_push_full = 0;
|
||||
_Atomic uint64_t g_dbg_warm_pop_attempts = 0;
|
||||
_Atomic uint64_t g_dbg_warm_pop_hits = 0;
|
||||
_Atomic uint64_t g_dbg_warm_pop_empty = 0;
|
||||
_Atomic uint64_t g_dbg_warm_pop_carve_zero = 0;
|
||||
#endif
|
||||
|
||||
// Forward declaration for Warm Pool stats printer (defined later in this file)
|
||||
static inline void tiny_warm_pool_print_stats(void);
|
||||
|
||||
// ============================================================================
|
||||
// Phase 8-Step1-Fix: unified_cache_enabled() implementation (non-static)
|
||||
// ============================================================================
|
||||
@ -231,9 +247,9 @@ static inline void tiny_warm_pool_print_stats(void) {
|
||||
|
||||
for (int i = 0; i < TINY_NUM_CLASSES; i++) {
|
||||
uint64_t total = g_warm_pool_stats[i].hits + g_warm_pool_stats[i].misses;
|
||||
if (total == 0) continue; // Skip unused classes
|
||||
|
||||
float hit_rate = 100.0 * g_warm_pool_stats[i].hits / total;
|
||||
float hit_rate = (total > 0)
|
||||
? (100.0 * g_warm_pool_stats[i].hits / total)
|
||||
: 0.0;
|
||||
fprintf(stderr, " C%d: hits=%llu misses=%llu hit_rate=%.1f%% prefilled=%llu\n",
|
||||
i,
|
||||
(unsigned long long)g_warm_pool_stats[i].hits,
|
||||
@ -241,6 +257,21 @@ static inline void tiny_warm_pool_print_stats(void) {
|
||||
hit_rate,
|
||||
(unsigned long long)g_warm_pool_stats[i].prefilled);
|
||||
}
|
||||
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
// Debug-only aggregated diagnostics for Warm Pool
|
||||
fprintf(stderr,
|
||||
" [DBG] prefill_attempts=%llu refill_fail=%llu push_ok=%llu push_full=%llu "
|
||||
"pop_attempts=%llu pop_hits=%llu pop_empty=%llu pop_carve_zero=%llu\n",
|
||||
(unsigned long long)atomic_load_explicit(&g_dbg_warm_prefill_attempts, memory_order_relaxed),
|
||||
(unsigned long long)atomic_load_explicit(&g_dbg_warm_prefill_refill_fail, memory_order_relaxed),
|
||||
(unsigned long long)atomic_load_explicit(&g_dbg_warm_prefill_push_ok, memory_order_relaxed),
|
||||
(unsigned long long)atomic_load_explicit(&g_dbg_warm_prefill_push_full, memory_order_relaxed),
|
||||
(unsigned long long)atomic_load_explicit(&g_dbg_warm_pop_attempts, memory_order_relaxed),
|
||||
(unsigned long long)atomic_load_explicit(&g_dbg_warm_pop_hits, memory_order_relaxed),
|
||||
(unsigned long long)atomic_load_explicit(&g_dbg_warm_pop_empty, memory_order_relaxed),
|
||||
(unsigned long long)atomic_load_explicit(&g_dbg_warm_pop_carve_zero, memory_order_relaxed));
|
||||
#endif
|
||||
fflush(stderr);
|
||||
}
|
||||
|
||||
@ -426,15 +457,23 @@ hak_base_ptr_t unified_cache_refill(int class_idx) {
|
||||
if (room <= 0) return HAK_BASE_FROM_RAW(NULL);
|
||||
// Batch size limit(クラス別チューニング)
|
||||
// - 通常: 128
|
||||
// - C5〜C7(129B〜1024B 混在レンジ): 256 まで拡張して refill 頻度を下げる
|
||||
// - 安全性のため、下の out[] 配列サイズ(256)と常に整合させる
|
||||
int max_batch = (class_idx >= 5 && class_idx <= 7) ? 256 : 128;
|
||||
// - C5〜C6(129B〜512B): 256 まで拡張
|
||||
// - C7(≈1KB): 512 まで拡張して refill 頻度をさらに下げる
|
||||
// - 安全性のため、下の out[] 配列サイズ(512)と常に整合させる
|
||||
int max_batch;
|
||||
if (class_idx == 7) {
|
||||
max_batch = 512;
|
||||
} else if (class_idx >= 5 && class_idx <= 6) {
|
||||
max_batch = 256;
|
||||
} else {
|
||||
max_batch = 128;
|
||||
}
|
||||
if (room > max_batch) room = max_batch;
|
||||
|
||||
// NOTE:
|
||||
// - C5〜C7 では max_batch を 256 まで拡張するため、スタック配列も 256 エントリ確保する。
|
||||
// - これにより、room <= max_batch <= 256 が常に成り立ち、out[] オーバーランを防止する。
|
||||
void* out[256];
|
||||
// - C7 では max_batch を 512 まで拡張するため、スタック配列も 512 エントリ確保する。
|
||||
// - これにより、room <= max_batch <= 512 が常に成り立ち、out[] オーバーランを防止する。
|
||||
void* out[512];
|
||||
int produced = 0;
|
||||
|
||||
// ========== PAGE BOX HOT PATH(Tiny-Plus 層): Try page box FIRST ==========
|
||||
@ -473,8 +512,21 @@ hak_base_ptr_t unified_cache_refill(int class_idx) {
|
||||
|
||||
// ========== WARM POOL HOT PATH: Check warm pool FIRST ==========
|
||||
// This is the critical optimization - avoid superslab_refill() registry scan
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
atomic_fetch_add_explicit(&g_dbg_warm_pop_attempts, 1, memory_order_relaxed);
|
||||
#endif
|
||||
SuperSlab* warm_ss = tiny_warm_pool_pop(class_idx);
|
||||
if (warm_ss) {
|
||||
// FUTURE: TLS Bind Box Integration
|
||||
// Currently we carve directly from warm_ss via slab_carve_from_ss().
|
||||
// To unify logic, we should eventually:
|
||||
// 1. Choose a slab index (via tiny_page_box or heuristic).
|
||||
// 2. Bind it to TLS via ss_tls_bind_one(..., warm_ss, slab_idx, ...).
|
||||
// 3. Fall through to TLS-based allocation.
|
||||
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
atomic_fetch_add_explicit(&g_dbg_warm_pop_hits, 1, memory_order_relaxed);
|
||||
#endif
|
||||
// HOT PATH: Warm pool hit, try to carve directly
|
||||
produced = slab_carve_from_ss(class_idx, warm_ss, out, room);
|
||||
if (produced > 0) {
|
||||
@ -518,12 +570,19 @@ hak_base_ptr_t unified_cache_refill(int class_idx) {
|
||||
}
|
||||
|
||||
// SuperSlab carve failed (produced == 0)
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
atomic_fetch_add_explicit(&g_dbg_warm_pop_carve_zero, 1, memory_order_relaxed);
|
||||
#endif
|
||||
// This slab is either exhausted or has no more available capacity
|
||||
// The statistics counter 'prefilled' tracks how often we try to prefill
|
||||
if (produced == 0 && tiny_warm_pool_count(class_idx) == 0) {
|
||||
// Pool is empty and carve failed - prefill would help here
|
||||
warm_pool_record_prefilled(class_idx);
|
||||
}
|
||||
} else {
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
atomic_fetch_add_explicit(&g_dbg_warm_pop_empty, 1, memory_order_relaxed);
|
||||
#endif
|
||||
}
|
||||
|
||||
// ========== COLD PATH: Warm pool miss, use superslab_refill ==========
|
||||
|
||||
@ -210,6 +210,9 @@ static inline void* superslab_alloc_from_slab(SuperSlab* ss, int slab_idx) {
|
||||
* 呼ばない設計前提(自己呼び出し禁止)。ここで安全側に防御チェックを行う。
|
||||
*/
|
||||
|
||||
// TLS Bind Box Logic moved to core/box/ss_tls_bind_box.h
|
||||
#include "box/ss_tls_bind_box.h"
|
||||
|
||||
SuperSlab* superslab_refill(int class_idx)
|
||||
{
|
||||
#if HAKMEM_DEBUG_COUNTERS
|
||||
@ -236,49 +239,7 @@ SuperSlab* superslab_refill(int class_idx)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Initialize slab metadata for this class/thread.
|
||||
// NOTE:
|
||||
// - superslab_init_slab は再帰的に superslab_refill() を呼ばない設計前提。
|
||||
// - class_idx は slab_meta->class_idx に反映される。
|
||||
uint32_t my_tid = tiny_self_u32();
|
||||
superslab_init_slab(ss,
|
||||
slab_idx,
|
||||
g_tiny_class_sizes[class_idx],
|
||||
my_tid);
|
||||
|
||||
// CRITICAL FIX: Ensure class_idx is set after init.
|
||||
// New SuperSlabs start with meta->class_idx=0 (mmap zero-init).
|
||||
// superslab_init_slab() only sets it if meta->class_idx==255.
|
||||
// We must explicitly set it to the requested class to avoid C0/C7 confusion.
|
||||
TinySlabMeta* meta = &ss->slabs[slab_idx];
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
uint8_t old_cls = meta->class_idx;
|
||||
#endif
|
||||
meta->class_idx = (uint8_t)class_idx;
|
||||
#if !HAKMEM_BUILD_RELEASE
|
||||
if (class_idx == 7 && old_cls != class_idx) {
|
||||
fprintf(stderr, "[SUPERSLAB_REFILL_FIX_C7] ss=%p slab=%d old_cls=%u new_cls=%d\n",
|
||||
(void*)ss, slab_idx, old_cls, class_idx);
|
||||
}
|
||||
#endif
|
||||
|
||||
// Bind this slab to TLS for fast subsequent allocations.
|
||||
// tiny_tls_bind_slab は:
|
||||
// tls->ss, tls->slab_idx, tls->meta, tls->slab_base
|
||||
// を一貫して更新する。
|
||||
tiny_tls_bind_slab(tls, ss, slab_idx);
|
||||
|
||||
// Sanity: TLS must now describe this slab for this class.
|
||||
// 失敗時は TLS を巻き戻して NULL を返す(呼び出し側は安全に再試行できる)。
|
||||
if (!(tls->ss == ss &&
|
||||
tls->slab_idx == (uint8_t)slab_idx &&
|
||||
tls->meta != NULL &&
|
||||
tls->meta->class_idx == (uint8_t)class_idx &&
|
||||
tls->slab_base != NULL)) {
|
||||
tls->ss = NULL;
|
||||
tls->meta = NULL;
|
||||
tls->slab_base = NULL;
|
||||
tls->slab_idx = 0;
|
||||
if (!ss_tls_bind_one(class_idx, tls, ss, slab_idx, tiny_self_u32())) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user