Refactor: Extract 3 more Box modules from hakmem_tiny.c (-70% total reduction)
Continue hakmem_tiny.c refactoring with 3 large module extractions. ## Changes **hakmem_tiny.c**: 995 → 616 lines (-379 lines, -38% this phase) **Total reduction**: 2081 → 616 lines (-1465 lines, -70% cumulative) 🏆 ## Extracted Modules (3 new boxes) 6. **tls_state_box** (224 lines) - TLS SLL enable flags and configuration - TLS canaries and SLL array definitions - Debug counters (path, ultra, allocation) - Frontend/backend configuration - TLS thread ID caching helpers - Frontend hit/miss counters - HotMag, QuickSlot, Ultra-front configuration - Helper functions (is_hot_class, tiny_optional_push) - Intelligence system helpers 7. **legacy_slow_box** (96 lines) - tiny_slow_alloc_fast() function (cold/unused) - Legacy slab-based allocation with refill - TLS cache/fast cache refill from slabs - Remote drain handling - List management (move to full/free lists) - Marked __attribute__((cold, noinline, unused)) 8. **slab_lookup_box** (77 lines) - registry_lookup() - O(1) hash-based lookup - hak_tiny_owner_slab() - public API for slab discovery - Linear probing search with atomic owner access - O(N) fallback for non-registry mode - Safety validation for membership checking ## Cumulative Progress (8 boxes total) **Previously extracted** (Phase 1): 1. config_box (211 lines) 2. publish_box (419 lines) 3. globals_box (256 lines) 4. phase6_wrappers_box (122 lines) 5. ace_guard_box (100 lines) **This phase** (Phase 2): 6. tls_state_box (224 lines) 7. legacy_slow_box (96 lines) 8. slab_lookup_box (77 lines) **Total extracted**: 1,505 lines across 8 coherent modules **Remaining core**: 616 lines (well-organized, focused) ## Benefits - **Readability**: 2k monolith → focused 616-line core - **Maintainability**: Each box has single responsibility - **Organization**: TLS state, legacy code, lookup utilities separated - **Build**: All modules compile successfully ✅ 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@ -187,331 +187,23 @@ static inline int fastcache_push(int class_idx, void* ptr);
|
|||||||
// Functions: tiny_hot_pop_class0(), tiny_hot_pop_class1(), tiny_hot_pop_class2(), tiny_hot_pop_class3()
|
// Functions: tiny_hot_pop_class0(), tiny_hot_pop_class1(), tiny_hot_pop_class2(), tiny_hot_pop_class3()
|
||||||
// 88 lines (lines 407-494)
|
// 88 lines (lines 407-494)
|
||||||
|
|
||||||
static __attribute__((cold, noinline, unused)) void* tiny_slow_alloc_fast(int class_idx) {
|
|
||||||
int tls_enabled = g_tls_list_enable;
|
|
||||||
TinyTLSList* tls = &g_tls_lists[class_idx];
|
|
||||||
pthread_mutex_t* lock = &g_tiny_class_locks[class_idx].m;
|
|
||||||
pthread_mutex_lock(lock);
|
|
||||||
|
|
||||||
TinySlab* slab = g_tiny_pool.free_slabs[class_idx];
|
// ============================================================================
|
||||||
if (slab) {
|
// Legacy Slow Allocation Path - EXTRACTED to hakmem_tiny_legacy_slow_box.inc
|
||||||
g_tiny_pool.free_slabs[class_idx] = slab->next;
|
// ============================================================================
|
||||||
} else {
|
#include "hakmem_tiny_legacy_slow_box.inc"
|
||||||
slab = allocate_new_slab(class_idx);
|
|
||||||
if (!slab) {
|
|
||||||
pthread_mutex_unlock(lock);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
slab->next = NULL;
|
|
||||||
|
|
||||||
if (atomic_load_explicit(&slab->remote_head, memory_order_acquire)) {
|
|
||||||
tiny_remote_drain_locked(slab);
|
|
||||||
}
|
|
||||||
|
|
||||||
int block_idx = hak_tiny_find_free_block(slab);
|
|
||||||
if (block_idx < 0) {
|
|
||||||
slab->next = g_tiny_pool.free_slabs[class_idx];
|
|
||||||
g_tiny_pool.free_slabs[class_idx] = slab;
|
|
||||||
pthread_mutex_unlock(lock);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
hak_tiny_set_used(slab, block_idx);
|
|
||||||
slab->free_count--;
|
|
||||||
size_t block_size = g_tiny_class_sizes[class_idx];
|
|
||||||
uint8_t* base = (uint8_t*)slab->base;
|
|
||||||
void* ret = (void*)(base + ((size_t)block_idx * block_size));
|
|
||||||
g_tiny_pool.alloc_count[class_idx]++;
|
|
||||||
|
|
||||||
uint16_t cap = g_fast_cap_defaults[class_idx];
|
|
||||||
uint16_t count = g_fast_count[class_idx];
|
|
||||||
uint16_t fast_need = (cap > count) ? (uint16_t)(cap - count) : 0;
|
|
||||||
if (fast_need > slab->free_count) fast_need = (uint16_t)slab->free_count;
|
|
||||||
|
|
||||||
uint32_t tls_need = 0;
|
|
||||||
if (tls_enabled && tls_list_needs_refill(tls)) {
|
|
||||||
uint32_t target = tls_list_refill_threshold(tls);
|
|
||||||
if (tls->count < target) {
|
|
||||||
tls_need = target - tls->count;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
uint32_t remaining = slab->free_count;
|
|
||||||
if (fast_need > remaining) fast_need = (uint16_t)remaining;
|
|
||||||
remaining -= fast_need;
|
|
||||||
if (tls_need > remaining) tls_need = remaining;
|
|
||||||
|
|
||||||
while (fast_need > 0) {
|
|
||||||
int extra_idx = hak_tiny_find_free_block(slab);
|
|
||||||
if (extra_idx < 0) break;
|
|
||||||
hak_tiny_set_used(slab, extra_idx);
|
|
||||||
slab->free_count--;
|
|
||||||
void* extra = (void*)(base + ((size_t)extra_idx * block_size));
|
|
||||||
int pushed = 0;
|
|
||||||
if (__builtin_expect(g_fastcache_enable && class_idx <= 3, 1)) {
|
|
||||||
pushed = fastcache_push(class_idx, extra);
|
|
||||||
} else {
|
|
||||||
pushed = tiny_fast_push(class_idx, extra);
|
|
||||||
}
|
|
||||||
if (!pushed) {
|
|
||||||
if (tls_enabled) {
|
|
||||||
tiny_tls_list_guard_push(class_idx, tls, extra);
|
|
||||||
tls_list_push(tls, extra, class_idx);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
fast_need--;
|
|
||||||
}
|
|
||||||
|
|
||||||
while (tls_enabled && tls_need > 0) {
|
|
||||||
int extra_idx = hak_tiny_find_free_block(slab);
|
|
||||||
if (extra_idx < 0) break;
|
|
||||||
hak_tiny_set_used(slab, extra_idx);
|
|
||||||
slab->free_count--;
|
|
||||||
void* extra = (void*)(base + ((size_t)extra_idx * block_size));
|
|
||||||
tiny_tls_list_guard_push(class_idx, tls, extra);
|
|
||||||
tls_list_push(tls, extra, class_idx);
|
|
||||||
tls_need--;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (slab->free_count == 0) {
|
|
||||||
move_to_full_list(class_idx, slab);
|
|
||||||
} else {
|
|
||||||
slab->next = g_tiny_pool.free_slabs[class_idx];
|
|
||||||
g_tiny_pool.free_slabs[class_idx] = slab;
|
|
||||||
}
|
|
||||||
|
|
||||||
pthread_mutex_unlock(lock);
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
// EXTRACTED TO hakmem_tiny_refill.inc.h (Phase 2D-1)
|
// EXTRACTED TO hakmem_tiny_refill.inc.h (Phase 2D-1)
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
// Function: tiny_fast_refill_and_take() - 39 lines (lines 584-622)
|
// Function: tiny_fast_refill_and_take() - 39 lines (lines 584-622)
|
||||||
// Hot-path cheap sampling counter to avoid rand() in allocation path
|
|
||||||
// Phase 9.4: TLS single-linked freelist (mimalloc-inspired) for hottest classes (≤128B/≤256B)
|
|
||||||
int g_tls_sll_enable = 1; // HAKMEM_TINY_TLS_SLL=0 to disable
|
|
||||||
int g_tiny_hotpath_class5 = 0; // HAKMEM_TINY_HOTPATH_CLASS5=1 to enable class 5 hotpath
|
|
||||||
// Phase 6-1.7: Export TLS variables for box refactor (Box 5/6 need access from hakmem.c)
|
|
||||||
// CRITICAL FIX: Explicit initializers prevent SEGV from uninitialized TLS in worker threads
|
|
||||||
// PRIORITY 3: TLS Canaries - Add canaries around TLS arrays to detect buffer overruns
|
|
||||||
#define TLS_CANARY_MAGIC 0xDEADBEEFDEADBEEFULL
|
|
||||||
// Phase 3d-B: Unified TLS SLL (head+count in same cache line for +12-18% cache hit rate)
|
|
||||||
__thread uint64_t g_tls_canary_before_sll = TLS_CANARY_MAGIC;
|
|
||||||
__thread TinyTLSSLL g_tls_sll[TINY_NUM_CLASSES] __attribute__((aligned(64))) = {0};
|
|
||||||
__thread uint64_t g_tls_canary_after_sll = TLS_CANARY_MAGIC;
|
|
||||||
static int g_tiny_ultra = 0; // HAKMEM_TINY_ULTRA=1 for SLL-only ultra mode
|
|
||||||
static int g_ultra_validate = 0; // HAKMEM_TINY_ULTRA_VALIDATE=1 to enable per-pop validation
|
|
||||||
// Ultra debug counters
|
|
||||||
#if HAKMEM_DEBUG_COUNTERS
|
|
||||||
static __attribute__((unused)) uint64_t g_ultra_pop_hits[TINY_NUM_CLASSES] = {0};
|
|
||||||
static uint64_t g_ultra_refill_calls[TINY_NUM_CLASSES] = {0};
|
|
||||||
static __attribute__((unused)) uint64_t g_ultra_resets[TINY_NUM_CLASSES] = {0};
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// Path counters (normal mode visibility): lightweight, for debugging/bench only
|
// ============================================================================
|
||||||
#if HAKMEM_DEBUG_COUNTERS
|
// TLS/Frontend State & Configuration - EXTRACTED to hakmem_tiny_tls_state_box.inc
|
||||||
static __attribute__((unused)) uint64_t g_path_sll_pop[TINY_NUM_CLASSES] = {0};
|
// ============================================================================
|
||||||
static __attribute__((unused)) uint64_t g_path_mag_pop[TINY_NUM_CLASSES] = {0};
|
#include "hakmem_tiny_tls_state_box.inc"
|
||||||
static __attribute__((unused)) uint64_t g_path_front_pop[TINY_NUM_CLASSES] = {0};
|
|
||||||
static __attribute__((unused)) uint64_t g_path_superslab[TINY_NUM_CLASSES] = {0};
|
|
||||||
static __attribute__((unused)) uint64_t g_path_refill_calls[TINY_NUM_CLASSES] = {0};
|
|
||||||
// New: slow/bitmap/bump/bin instrumentation
|
|
||||||
static __attribute__((unused)) uint64_t g_alloc_slow_calls[TINY_NUM_CLASSES] = {0};
|
|
||||||
static __attribute__((unused)) uint64_t g_superslab_refill_calls_dbg[TINY_NUM_CLASSES] = {0};
|
|
||||||
static __attribute__((unused)) uint64_t g_bitmap_scan_calls[TINY_NUM_CLASSES] = {0};
|
|
||||||
static __attribute__((unused)) uint64_t g_bgbin_pops[TINY_NUM_CLASSES] = {0};
|
|
||||||
static __attribute__((unused)) uint64_t g_bump_hits[TINY_NUM_CLASSES] = {0};
|
|
||||||
static __attribute__((unused)) uint64_t g_bump_arms[TINY_NUM_CLASSES] = {0};
|
|
||||||
static __attribute__((unused)) uint64_t g_spec_calls[TINY_NUM_CLASSES] = {0};
|
|
||||||
static __attribute__((unused)) uint64_t g_spec_hits[TINY_NUM_CLASSES] = {0};
|
|
||||||
#endif
|
|
||||||
static int g_path_debug_enabled = 0;
|
|
||||||
|
|
||||||
// Spill hysteresis(freeホットパスからgetenvを排除)
|
|
||||||
static int g_spill_hyst = 32; // default margin (configured at init; never getenv on hot path)
|
|
||||||
|
|
||||||
// Optional per-class refill batch overrides (0=use global defaults)
|
|
||||||
static int g_refill_max_c[TINY_NUM_CLASSES] = {0};
|
|
||||||
static int g_refill_max_hot_c[TINY_NUM_CLASSES] = {0};
|
|
||||||
static inline __attribute__((always_inline)) int tiny_refill_max_for_class(int class_idx) {
|
|
||||||
int v = g_refill_max_c[class_idx];
|
|
||||||
if (v > 0) return v;
|
|
||||||
if (class_idx <= 3) {
|
|
||||||
int hv = g_refill_max_hot_c[class_idx];
|
|
||||||
if (hv > 0) return hv;
|
|
||||||
return g_tiny_refill_max_hot;
|
|
||||||
}
|
|
||||||
return g_tiny_refill_max;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Phase 9.5: Frontend/Backend split - Tiny Front modules(QuickSlot / FastCache)
|
|
||||||
#include "front/quick_slot.h"
|
|
||||||
#include "front/fast_cache.h"
|
|
||||||
__thread TinyFastCache g_fast_cache[TINY_NUM_CLASSES];
|
|
||||||
static int g_frontend_enable = 0; // HAKMEM_TINY_FRONTEND=1 (experimental ultra-fast frontend)
|
|
||||||
// SLL capacity multiplier for hot tiny classes (env: HAKMEM_SLL_MULTIPLIER)
|
|
||||||
int g_sll_multiplier = 2;
|
|
||||||
// Cached thread id (uint32) to avoid repeated pthread_self() in hot paths
|
|
||||||
static __thread uint32_t g_tls_tid32;
|
|
||||||
static __thread int g_tls_tid32_inited;
|
|
||||||
// Phase 6-1.7: Export for box refactor (Box 6 needs access from hakmem.c)
|
|
||||||
#ifdef HAKMEM_TINY_PHASE6_BOX_REFACTOR
|
|
||||||
inline __attribute__((always_inline)) uint32_t tiny_self_u32(void) {
|
|
||||||
#else
|
|
||||||
static inline __attribute__((always_inline)) uint32_t tiny_self_u32(void) {
|
|
||||||
#endif
|
|
||||||
if (__builtin_expect(!g_tls_tid32_inited, 0)) {
|
|
||||||
g_tls_tid32 = (uint32_t)(uintptr_t)pthread_self();
|
|
||||||
g_tls_tid32_inited = 1;
|
|
||||||
}
|
|
||||||
return g_tls_tid32;
|
|
||||||
}
|
|
||||||
// Cached pthread_t as-is for APIs that require pthread_t comparison
|
|
||||||
static __thread pthread_t g_tls_pt_self;
|
|
||||||
static __thread int g_tls_pt_inited;
|
|
||||||
|
|
||||||
// Frontend FastCache hit/miss counters (Small diagnostics)
|
|
||||||
unsigned long long g_front_fc_hit[TINY_NUM_CLASSES] = {0};
|
|
||||||
unsigned long long g_front_fc_miss[TINY_NUM_CLASSES] = {0};
|
|
||||||
// TLS SLL class mask: bit i = 1 allows SLL for class i. Default: all 8 classes enabled.
|
|
||||||
int g_tls_sll_class_mask = 0xFF;
|
|
||||||
// Phase 6-1.7: Export for box refactor (Box 6 needs access from hakmem.c)
|
|
||||||
#ifdef HAKMEM_TINY_PHASE6_BOX_REFACTOR
|
|
||||||
inline __attribute__((always_inline)) pthread_t tiny_self_pt(void) {
|
|
||||||
#else
|
|
||||||
static inline __attribute__((always_inline)) pthread_t tiny_self_pt(void) {
|
|
||||||
#endif
|
|
||||||
if (__builtin_expect(!g_tls_pt_inited, 0)) {
|
|
||||||
g_tls_pt_self = pthread_self();
|
|
||||||
g_tls_pt_inited = 1;
|
|
||||||
}
|
|
||||||
return g_tls_pt_self;
|
|
||||||
}
|
|
||||||
|
|
||||||
#include "tiny_refill.h"
|
|
||||||
// tiny_mmap_gate.h already included at top
|
|
||||||
#include "tiny_publish.h"
|
|
||||||
|
|
||||||
int g_sll_cap_override[TINY_NUM_CLASSES] = {0}; // LEGACY (Phase12以降は参照しない/互換用ダミー)
|
|
||||||
// Optional prefetch on SLL pop (guarded by env: HAKMEM_TINY_PREFETCH=1)
|
|
||||||
static int g_tiny_prefetch = 0;
|
|
||||||
|
|
||||||
// Small-class magazine pre-initialization (to avoid cap==0 checks on hot path)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// Hot-class small TLS magazine(実体とスイッチ)
|
|
||||||
typedef struct {
|
|
||||||
void* slots[128];
|
|
||||||
uint16_t top; // 0..128
|
|
||||||
uint16_t cap; // =128
|
|
||||||
} TinyHotMag;
|
|
||||||
static int g_hotmag_cap_default = 128; // default capacity (env override)
|
|
||||||
static int g_hotmag_refill_default = 32; // default refill batch (env override)
|
|
||||||
static int g_hotmag_enable = 0; // 既定OFF(A/B用)。envでON可。
|
|
||||||
static uint16_t g_hotmag_cap_current[TINY_NUM_CLASSES];
|
|
||||||
static uint8_t g_hotmag_cap_locked[TINY_NUM_CLASSES];
|
|
||||||
static uint16_t g_hotmag_refill_current[TINY_NUM_CLASSES];
|
|
||||||
static uint8_t g_hotmag_refill_locked[TINY_NUM_CLASSES];
|
|
||||||
static uint8_t g_hotmag_class_en[TINY_NUM_CLASSES]; // 0=disabled for class, 1=enabled
|
|
||||||
static __thread TinyHotMag g_tls_hot_mag[TINY_NUM_CLASSES];
|
|
||||||
// Inline helpers
|
|
||||||
|
|
||||||
#include "box/tls_sll_box.h" // Box TLS-SLL: Safe SLL operations API (needed by hotmag)
|
|
||||||
#include "hakmem_tiny_hotmag.inc.h"
|
|
||||||
|
|
||||||
// Size-specialized tiny alloc (32B/64B) via function pointers (A/B用)
|
|
||||||
// TinyQuickSlot: 1 cache line per class (quick 6 items + small metadata)
|
|
||||||
// Opt-in via HAKMEM_TINY_QUICK=1
|
|
||||||
// NOTE: This type definition must come BEFORE the Phase 2D-1 includes below
|
|
||||||
int g_quick_enable = 0; // HAKMEM_TINY_QUICK=1
|
|
||||||
__thread TinyQuickSlot g_tls_quick[TINY_NUM_CLASSES]; // compile-out via guards below
|
|
||||||
|
|
||||||
// Phase 2D-1: Hot-path inline function extractions(Front)
|
|
||||||
// NOTE: TinyFastCache/TinyQuickSlot は front/ で定義済み
|
|
||||||
#include "hakmem_tiny_hot_pop.inc.h" // 4 functions: tiny_hot_pop_class{0..3}
|
|
||||||
#include "hakmem_tiny_refill.inc.h" // 8 functions: refill operations
|
|
||||||
#if HAKMEM_TINY_P0_BATCH_REFILL
|
|
||||||
#include "hakmem_tiny_refill_p0.inc.h" // P0 batch refill → FastCache 直補充
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// Phase 7 Task 3: Pre-warm TLS cache at init
|
|
||||||
// Pre-allocate blocks to reduce first-allocation miss penalty
|
|
||||||
#if HAKMEM_TINY_PREWARM_TLS
|
|
||||||
void hak_tiny_prewarm_tls_cache(void) {
|
|
||||||
// Pre-warm each class with HAKMEM_TINY_PREWARM_COUNT blocks
|
|
||||||
// This reduces the first-allocation miss penalty by populating TLS cache
|
|
||||||
// Phase E1-CORRECT: ALL classes (including C7) now use TLS SLL
|
|
||||||
for (int class_idx = 0; class_idx < TINY_NUM_CLASSES; class_idx++) {
|
|
||||||
int count = HAKMEM_TINY_PREWARM_COUNT; // Default: 16 blocks per class
|
|
||||||
|
|
||||||
// Trigger refill to populate TLS cache
|
|
||||||
// P0 Fix: Use appropriate refill function based on P0 status
|
|
||||||
#if HAKMEM_TINY_P0_BATCH_REFILL
|
|
||||||
sll_refill_batch_from_ss(class_idx, count);
|
|
||||||
#else
|
|
||||||
sll_refill_small_from_ss(class_idx, count);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// Ultra-Simple front (small per-class stack) — combines tiny front to minimize
|
|
||||||
// instructions and memory touches on alloc/free. Uses existing TLS bump shadow
|
|
||||||
// (g_tls_bcur/bend) when enabled to avoid per-alloc header writes.
|
|
||||||
// UltraFront capacity for 32/64B fast pop
|
|
||||||
#ifndef ULTRA_FRONT_CAP
|
|
||||||
#define ULTRA_FRONT_CAP 64
|
|
||||||
#endif
|
|
||||||
typedef struct __attribute__((aligned(64))) {
|
|
||||||
void* slots[ULTRA_FRONT_CAP];
|
|
||||||
uint16_t top; // 0..ULTRA_FRONT_CAP
|
|
||||||
uint16_t _pad;
|
|
||||||
} TinyUltraFront;
|
|
||||||
static int g_ultra_simple = 0; // HAKMEM_TINY_ULTRA_SIMPLE=1
|
|
||||||
static __thread TinyUltraFront g_tls_ultra[TINY_NUM_CLASSES];
|
|
||||||
// Inline helpers
|
|
||||||
#include "hakmem_tiny_ultra_front.inc.h"
|
|
||||||
|
|
||||||
// Ultra-Bump TLS shadow (bench/opt-in): keep a TLS-only bump window
|
|
||||||
// to avoid per-alloc header writes. Header is updated per-chunk reservation.
|
|
||||||
// NOTE: Non-static because used in hakmem_tiny_refill.inc.h
|
|
||||||
int g_bump_chunk = 32; // HAKMEM_TINY_BUMP_CHUNK (blocks)
|
|
||||||
__thread uint8_t* g_tls_bcur[TINY_NUM_CLASSES] = {0};
|
|
||||||
__thread uint8_t* g_tls_bend[TINY_NUM_CLASSES] = {0};
|
|
||||||
|
|
||||||
// SLL small refill batch for specialized class (32/64B)
|
|
||||||
// Specialized order toggle: 1 = mag-first, 0 = sll-first
|
|
||||||
// HotMag helpers (for classes 0..3)
|
|
||||||
static inline int is_hot_class(int class_idx) { return class_idx >= 0 && class_idx <= 3; }
|
|
||||||
|
|
||||||
// Optional front (Ultra/HotMag) push helper: compile-out in release builds
|
|
||||||
static inline int tiny_optional_push(int class_idx, void* ptr) {
|
|
||||||
#if HAKMEM_BUILD_RELEASE
|
|
||||||
(void)class_idx;
|
|
||||||
(void)ptr;
|
|
||||||
return 0;
|
|
||||||
#else
|
|
||||||
if (__builtin_expect(g_ultra_simple && is_hot_class(class_idx), 0)) {
|
|
||||||
if (__builtin_expect(ultra_push(class_idx, ptr), 0)) {
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (__builtin_expect(is_hot_class(class_idx), 0)) {
|
|
||||||
if (__builtin_expect(hotmag_push(class_idx, ptr), 0)) {
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
// Ultra-Simple helpers
|
|
||||||
|
|
||||||
// Phase 9.6: Deferred Intelligence (event queue + background)
|
|
||||||
// Extended event for FLINT Intelligence (lightweight; recorded off hot path only)
|
|
||||||
// Observability, ACE, and intelligence helpers
|
|
||||||
#include "hakmem_tiny_intel.inc"
|
#include "hakmem_tiny_intel.inc"
|
||||||
|
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
@ -767,83 +459,12 @@ static inline int ultra_batch_for_class(int class_idx) {
|
|||||||
// EXTRACTED: }
|
// EXTRACTED: }
|
||||||
|
|
||||||
// Lookup slab by base address (O(1) average)
|
// Lookup slab by base address (O(1) average)
|
||||||
static TinySlab* registry_lookup(uintptr_t slab_base) {
|
|
||||||
// Lock-free read with atomic owner access (MT-safe)
|
|
||||||
int hash = registry_hash(slab_base);
|
|
||||||
|
|
||||||
// Linear probing search
|
|
||||||
for (int i = 0; i < SLAB_REGISTRY_MAX_PROBE; i++) {
|
|
||||||
int idx = (hash + i) & SLAB_REGISTRY_MASK;
|
|
||||||
SlabRegistryEntry* entry = &g_slab_registry[idx];
|
|
||||||
|
|
||||||
if (entry->slab_base == slab_base) {
|
|
||||||
// Atomic load to prevent TOCTOU race with registry_unregister()
|
|
||||||
TinySlab* owner = atomic_load_explicit(&entry->owner, memory_order_acquire);
|
|
||||||
if (!owner) return NULL; // Entry cleared by unregister
|
|
||||||
return owner;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (entry->slab_base == 0) {
|
|
||||||
return NULL; // Empty slot - not found
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return NULL; // Not found after max probes
|
|
||||||
}
|
|
||||||
|
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
// EXTRACTED TO hakmem_tiny_slab_mgmt.inc (Phase 2D-4 FINAL)
|
// Registry Lookup & Owner Slab Discovery - EXTRACTED to hakmem_tiny_slab_lookup_box.inc
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
// Function: allocate_new_slab() - 79 lines (lines 952-1030)
|
#include "hakmem_tiny_slab_lookup_box.inc"
|
||||||
// Allocate new slab for a class
|
|
||||||
|
|
||||||
// Function: release_slab() - 23 lines (lines 1033-1055)
|
|
||||||
// Release a slab back to system
|
|
||||||
|
|
||||||
// Step 2: Find slab owner by pointer (O(1) via hash table registry, or O(N) fallback)
|
|
||||||
TinySlab* hak_tiny_owner_slab(void* ptr) {
|
|
||||||
if (!ptr || !g_tiny_initialized) return NULL;
|
|
||||||
|
|
||||||
// Phase 6.14: Runtime toggle between Registry (O(1)) and List (O(N))
|
|
||||||
if (g_use_registry) {
|
|
||||||
// O(1) lookup via hash table
|
|
||||||
uintptr_t slab_base = (uintptr_t)ptr & ~(TINY_SLAB_SIZE - 1);
|
|
||||||
TinySlab* slab = registry_lookup(slab_base);
|
|
||||||
if (!slab) return NULL;
|
|
||||||
// SAFETY: validate membership (ptr must be inside [base, base+64KB))
|
|
||||||
uintptr_t start = (uintptr_t)slab->base;
|
|
||||||
uintptr_t end = start + TINY_SLAB_SIZE;
|
|
||||||
if ((uintptr_t)ptr < start || (uintptr_t)ptr >= end) {
|
|
||||||
return NULL; // false positive from registry → treat as non-Tiny
|
|
||||||
}
|
|
||||||
return slab;
|
|
||||||
} else {
|
|
||||||
// O(N) fallback: linear search through all slab lists (lock per class)
|
|
||||||
for (int class_idx = 0; class_idx < TINY_NUM_CLASSES; class_idx++) {
|
|
||||||
pthread_mutex_t* lock = &g_tiny_class_locks[class_idx].m;
|
|
||||||
pthread_mutex_lock(lock);
|
|
||||||
// Search free slabs
|
|
||||||
for (TinySlab* slab = g_tiny_pool.free_slabs[class_idx]; slab; slab = slab->next) {
|
|
||||||
uintptr_t slab_start = (uintptr_t)slab->base;
|
|
||||||
uintptr_t slab_end = slab_start + TINY_SLAB_SIZE;
|
|
||||||
if ((uintptr_t)ptr >= slab_start && (uintptr_t)ptr < slab_end) {
|
|
||||||
pthread_mutex_unlock(lock);
|
|
||||||
return slab;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Search full slabs
|
|
||||||
for (TinySlab* slab = g_tiny_pool.full_slabs[class_idx]; slab; slab = slab->next) {
|
|
||||||
uintptr_t slab_start = (uintptr_t)slab->base;
|
|
||||||
uintptr_t slab_end = slab_start + TINY_SLAB_SIZE;
|
|
||||||
if ((uintptr_t)ptr >= slab_start && (uintptr_t)ptr < slab_end) {
|
|
||||||
pthread_mutex_unlock(lock);
|
|
||||||
return slab;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
pthread_mutex_unlock(lock);
|
|
||||||
}
|
|
||||||
return NULL; // Not found
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Function: move_to_full_list() - 20 lines (lines 1104-1123)
|
// Function: move_to_full_list() - 20 lines (lines 1104-1123)
|
||||||
// Move slab to full list
|
// Move slab to full list
|
||||||
|
|||||||
96
core/hakmem_tiny_legacy_slow_box.inc
Normal file
96
core/hakmem_tiny_legacy_slow_box.inc
Normal file
@ -0,0 +1,96 @@
|
|||||||
|
static __attribute__((cold, noinline, unused)) void* tiny_slow_alloc_fast(int class_idx) {
|
||||||
|
int tls_enabled = g_tls_list_enable;
|
||||||
|
TinyTLSList* tls = &g_tls_lists[class_idx];
|
||||||
|
pthread_mutex_t* lock = &g_tiny_class_locks[class_idx].m;
|
||||||
|
pthread_mutex_lock(lock);
|
||||||
|
|
||||||
|
TinySlab* slab = g_tiny_pool.free_slabs[class_idx];
|
||||||
|
if (slab) {
|
||||||
|
g_tiny_pool.free_slabs[class_idx] = slab->next;
|
||||||
|
} else {
|
||||||
|
slab = allocate_new_slab(class_idx);
|
||||||
|
if (!slab) {
|
||||||
|
pthread_mutex_unlock(lock);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
slab->next = NULL;
|
||||||
|
|
||||||
|
if (atomic_load_explicit(&slab->remote_head, memory_order_acquire)) {
|
||||||
|
tiny_remote_drain_locked(slab);
|
||||||
|
}
|
||||||
|
|
||||||
|
int block_idx = hak_tiny_find_free_block(slab);
|
||||||
|
if (block_idx < 0) {
|
||||||
|
slab->next = g_tiny_pool.free_slabs[class_idx];
|
||||||
|
g_tiny_pool.free_slabs[class_idx] = slab;
|
||||||
|
pthread_mutex_unlock(lock);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
hak_tiny_set_used(slab, block_idx);
|
||||||
|
slab->free_count--;
|
||||||
|
size_t block_size = g_tiny_class_sizes[class_idx];
|
||||||
|
uint8_t* base = (uint8_t*)slab->base;
|
||||||
|
void* ret = (void*)(base + ((size_t)block_idx * block_size));
|
||||||
|
g_tiny_pool.alloc_count[class_idx]++;
|
||||||
|
|
||||||
|
uint16_t cap = g_fast_cap_defaults[class_idx];
|
||||||
|
uint16_t count = g_fast_count[class_idx];
|
||||||
|
uint16_t fast_need = (cap > count) ? (uint16_t)(cap - count) : 0;
|
||||||
|
if (fast_need > slab->free_count) fast_need = (uint16_t)slab->free_count;
|
||||||
|
|
||||||
|
uint32_t tls_need = 0;
|
||||||
|
if (tls_enabled && tls_list_needs_refill(tls)) {
|
||||||
|
uint32_t target = tls_list_refill_threshold(tls);
|
||||||
|
if (tls->count < target) {
|
||||||
|
tls_need = target - tls->count;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
uint32_t remaining = slab->free_count;
|
||||||
|
if (fast_need > remaining) fast_need = (uint16_t)remaining;
|
||||||
|
remaining -= fast_need;
|
||||||
|
if (tls_need > remaining) tls_need = remaining;
|
||||||
|
|
||||||
|
while (fast_need > 0) {
|
||||||
|
int extra_idx = hak_tiny_find_free_block(slab);
|
||||||
|
if (extra_idx < 0) break;
|
||||||
|
hak_tiny_set_used(slab, extra_idx);
|
||||||
|
slab->free_count--;
|
||||||
|
void* extra = (void*)(base + ((size_t)extra_idx * block_size));
|
||||||
|
int pushed = 0;
|
||||||
|
if (__builtin_expect(g_fastcache_enable && class_idx <= 3, 1)) {
|
||||||
|
pushed = fastcache_push(class_idx, extra);
|
||||||
|
} else {
|
||||||
|
pushed = tiny_fast_push(class_idx, extra);
|
||||||
|
}
|
||||||
|
if (!pushed) {
|
||||||
|
if (tls_enabled) {
|
||||||
|
tiny_tls_list_guard_push(class_idx, tls, extra);
|
||||||
|
tls_list_push(tls, extra, class_idx);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fast_need--;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (tls_enabled && tls_need > 0) {
|
||||||
|
int extra_idx = hak_tiny_find_free_block(slab);
|
||||||
|
if (extra_idx < 0) break;
|
||||||
|
hak_tiny_set_used(slab, extra_idx);
|
||||||
|
slab->free_count--;
|
||||||
|
void* extra = (void*)(base + ((size_t)extra_idx * block_size));
|
||||||
|
tiny_tls_list_guard_push(class_idx, tls, extra);
|
||||||
|
tls_list_push(tls, extra, class_idx);
|
||||||
|
tls_need--;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (slab->free_count == 0) {
|
||||||
|
move_to_full_list(class_idx, slab);
|
||||||
|
} else {
|
||||||
|
slab->next = g_tiny_pool.free_slabs[class_idx];
|
||||||
|
g_tiny_pool.free_slabs[class_idx] = slab;
|
||||||
|
}
|
||||||
|
|
||||||
|
pthread_mutex_unlock(lock);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
77
core/hakmem_tiny_slab_lookup_box.inc
Normal file
77
core/hakmem_tiny_slab_lookup_box.inc
Normal file
@ -0,0 +1,77 @@
|
|||||||
|
static TinySlab* registry_lookup(uintptr_t slab_base) {
|
||||||
|
// Lock-free read with atomic owner access (MT-safe)
|
||||||
|
int hash = registry_hash(slab_base);
|
||||||
|
|
||||||
|
// Linear probing search
|
||||||
|
for (int i = 0; i < SLAB_REGISTRY_MAX_PROBE; i++) {
|
||||||
|
int idx = (hash + i) & SLAB_REGISTRY_MASK;
|
||||||
|
SlabRegistryEntry* entry = &g_slab_registry[idx];
|
||||||
|
|
||||||
|
if (entry->slab_base == slab_base) {
|
||||||
|
// Atomic load to prevent TOCTOU race with registry_unregister()
|
||||||
|
TinySlab* owner = atomic_load_explicit(&entry->owner, memory_order_acquire);
|
||||||
|
if (!owner) return NULL; // Entry cleared by unregister
|
||||||
|
return owner;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (entry->slab_base == 0) {
|
||||||
|
return NULL; // Empty slot - not found
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return NULL; // Not found after max probes
|
||||||
|
}
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// EXTRACTED TO hakmem_tiny_slab_mgmt.inc (Phase 2D-4 FINAL)
|
||||||
|
// ============================================================================
|
||||||
|
// Function: allocate_new_slab() - 79 lines (lines 952-1030)
|
||||||
|
// Allocate new slab for a class
|
||||||
|
|
||||||
|
// Function: release_slab() - 23 lines (lines 1033-1055)
|
||||||
|
// Release a slab back to system
|
||||||
|
|
||||||
|
// Step 2: Find slab owner by pointer (O(1) via hash table registry, or O(N) fallback)
|
||||||
|
TinySlab* hak_tiny_owner_slab(void* ptr) {
|
||||||
|
if (!ptr || !g_tiny_initialized) return NULL;
|
||||||
|
|
||||||
|
// Phase 6.14: Runtime toggle between Registry (O(1)) and List (O(N))
|
||||||
|
if (g_use_registry) {
|
||||||
|
// O(1) lookup via hash table
|
||||||
|
uintptr_t slab_base = (uintptr_t)ptr & ~(TINY_SLAB_SIZE - 1);
|
||||||
|
TinySlab* slab = registry_lookup(slab_base);
|
||||||
|
if (!slab) return NULL;
|
||||||
|
// SAFETY: validate membership (ptr must be inside [base, base+64KB))
|
||||||
|
uintptr_t start = (uintptr_t)slab->base;
|
||||||
|
uintptr_t end = start + TINY_SLAB_SIZE;
|
||||||
|
if ((uintptr_t)ptr < start || (uintptr_t)ptr >= end) {
|
||||||
|
return NULL; // false positive from registry → treat as non-Tiny
|
||||||
|
}
|
||||||
|
return slab;
|
||||||
|
} else {
|
||||||
|
// O(N) fallback: linear search through all slab lists (lock per class)
|
||||||
|
for (int class_idx = 0; class_idx < TINY_NUM_CLASSES; class_idx++) {
|
||||||
|
pthread_mutex_t* lock = &g_tiny_class_locks[class_idx].m;
|
||||||
|
pthread_mutex_lock(lock);
|
||||||
|
// Search free slabs
|
||||||
|
for (TinySlab* slab = g_tiny_pool.free_slabs[class_idx]; slab; slab = slab->next) {
|
||||||
|
uintptr_t slab_start = (uintptr_t)slab->base;
|
||||||
|
uintptr_t slab_end = slab_start + TINY_SLAB_SIZE;
|
||||||
|
if ((uintptr_t)ptr >= slab_start && (uintptr_t)ptr < slab_end) {
|
||||||
|
pthread_mutex_unlock(lock);
|
||||||
|
return slab;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Search full slabs
|
||||||
|
for (TinySlab* slab = g_tiny_pool.full_slabs[class_idx]; slab; slab = slab->next) {
|
||||||
|
uintptr_t slab_start = (uintptr_t)slab->base;
|
||||||
|
uintptr_t slab_end = slab_start + TINY_SLAB_SIZE;
|
||||||
|
if ((uintptr_t)ptr >= slab_start && (uintptr_t)ptr < slab_end) {
|
||||||
|
pthread_mutex_unlock(lock);
|
||||||
|
return slab;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pthread_mutex_unlock(lock);
|
||||||
|
}
|
||||||
|
return NULL; // Not found
|
||||||
|
}
|
||||||
|
}
|
||||||
224
core/hakmem_tiny_tls_state_box.inc
Normal file
224
core/hakmem_tiny_tls_state_box.inc
Normal file
@ -0,0 +1,224 @@
|
|||||||
|
// Hot-path cheap sampling counter to avoid rand() in allocation path
|
||||||
|
// Phase 9.4: TLS single-linked freelist (mimalloc-inspired) for hottest classes (≤128B/≤256B)
|
||||||
|
int g_tls_sll_enable = 1; // HAKMEM_TINY_TLS_SLL=0 to disable
|
||||||
|
int g_tiny_hotpath_class5 = 0; // HAKMEM_TINY_HOTPATH_CLASS5=1 to enable class 5 hotpath
|
||||||
|
// Phase 6-1.7: Export TLS variables for box refactor (Box 5/6 need access from hakmem.c)
|
||||||
|
// CRITICAL FIX: Explicit initializers prevent SEGV from uninitialized TLS in worker threads
|
||||||
|
// PRIORITY 3: TLS Canaries - Add canaries around TLS arrays to detect buffer overruns
|
||||||
|
#define TLS_CANARY_MAGIC 0xDEADBEEFDEADBEEFULL
|
||||||
|
// Phase 3d-B: Unified TLS SLL (head+count in same cache line for +12-18% cache hit rate)
|
||||||
|
__thread uint64_t g_tls_canary_before_sll = TLS_CANARY_MAGIC;
|
||||||
|
__thread TinyTLSSLL g_tls_sll[TINY_NUM_CLASSES] __attribute__((aligned(64))) = {0};
|
||||||
|
__thread uint64_t g_tls_canary_after_sll = TLS_CANARY_MAGIC;
|
||||||
|
static int g_tiny_ultra = 0; // HAKMEM_TINY_ULTRA=1 for SLL-only ultra mode
|
||||||
|
static int g_ultra_validate = 0; // HAKMEM_TINY_ULTRA_VALIDATE=1 to enable per-pop validation
|
||||||
|
// Ultra debug counters
|
||||||
|
#if HAKMEM_DEBUG_COUNTERS
|
||||||
|
static __attribute__((unused)) uint64_t g_ultra_pop_hits[TINY_NUM_CLASSES] = {0};
|
||||||
|
static uint64_t g_ultra_refill_calls[TINY_NUM_CLASSES] = {0};
|
||||||
|
static __attribute__((unused)) uint64_t g_ultra_resets[TINY_NUM_CLASSES] = {0};
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Path counters (normal mode visibility): lightweight, for debugging/bench only
|
||||||
|
#if HAKMEM_DEBUG_COUNTERS
|
||||||
|
static __attribute__((unused)) uint64_t g_path_sll_pop[TINY_NUM_CLASSES] = {0};
|
||||||
|
static __attribute__((unused)) uint64_t g_path_mag_pop[TINY_NUM_CLASSES] = {0};
|
||||||
|
static __attribute__((unused)) uint64_t g_path_front_pop[TINY_NUM_CLASSES] = {0};
|
||||||
|
static __attribute__((unused)) uint64_t g_path_superslab[TINY_NUM_CLASSES] = {0};
|
||||||
|
static __attribute__((unused)) uint64_t g_path_refill_calls[TINY_NUM_CLASSES] = {0};
|
||||||
|
// New: slow/bitmap/bump/bin instrumentation
|
||||||
|
static __attribute__((unused)) uint64_t g_alloc_slow_calls[TINY_NUM_CLASSES] = {0};
|
||||||
|
static __attribute__((unused)) uint64_t g_superslab_refill_calls_dbg[TINY_NUM_CLASSES] = {0};
|
||||||
|
static __attribute__((unused)) uint64_t g_bitmap_scan_calls[TINY_NUM_CLASSES] = {0};
|
||||||
|
static __attribute__((unused)) uint64_t g_bgbin_pops[TINY_NUM_CLASSES] = {0};
|
||||||
|
static __attribute__((unused)) uint64_t g_bump_hits[TINY_NUM_CLASSES] = {0};
|
||||||
|
static __attribute__((unused)) uint64_t g_bump_arms[TINY_NUM_CLASSES] = {0};
|
||||||
|
static __attribute__((unused)) uint64_t g_spec_calls[TINY_NUM_CLASSES] = {0};
|
||||||
|
static __attribute__((unused)) uint64_t g_spec_hits[TINY_NUM_CLASSES] = {0};
|
||||||
|
#endif
|
||||||
|
static int g_path_debug_enabled = 0;
|
||||||
|
|
||||||
|
// Spill hysteresis(freeホットパスからgetenvを排除)
|
||||||
|
static int g_spill_hyst = 32; // default margin (configured at init; never getenv on hot path)
|
||||||
|
|
||||||
|
// Optional per-class refill batch overrides (0=use global defaults)
|
||||||
|
static int g_refill_max_c[TINY_NUM_CLASSES] = {0};
|
||||||
|
static int g_refill_max_hot_c[TINY_NUM_CLASSES] = {0};
|
||||||
|
static inline __attribute__((always_inline)) int tiny_refill_max_for_class(int class_idx) {
|
||||||
|
int v = g_refill_max_c[class_idx];
|
||||||
|
if (v > 0) return v;
|
||||||
|
if (class_idx <= 3) {
|
||||||
|
int hv = g_refill_max_hot_c[class_idx];
|
||||||
|
if (hv > 0) return hv;
|
||||||
|
return g_tiny_refill_max_hot;
|
||||||
|
}
|
||||||
|
return g_tiny_refill_max;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Phase 9.5: Frontend/Backend split - Tiny Front modules(QuickSlot / FastCache)
|
||||||
|
#include "front/quick_slot.h"
|
||||||
|
#include "front/fast_cache.h"
|
||||||
|
__thread TinyFastCache g_fast_cache[TINY_NUM_CLASSES];
|
||||||
|
static int g_frontend_enable = 0; // HAKMEM_TINY_FRONTEND=1 (experimental ultra-fast frontend)
|
||||||
|
// SLL capacity multiplier for hot tiny classes (env: HAKMEM_SLL_MULTIPLIER)
|
||||||
|
int g_sll_multiplier = 2;
|
||||||
|
// Cached thread id (uint32) to avoid repeated pthread_self() in hot paths
|
||||||
|
static __thread uint32_t g_tls_tid32;
|
||||||
|
static __thread int g_tls_tid32_inited;
|
||||||
|
// Phase 6-1.7: Export for box refactor (Box 6 needs access from hakmem.c)
|
||||||
|
#ifdef HAKMEM_TINY_PHASE6_BOX_REFACTOR
|
||||||
|
inline __attribute__((always_inline)) uint32_t tiny_self_u32(void) {
|
||||||
|
#else
|
||||||
|
static inline __attribute__((always_inline)) uint32_t tiny_self_u32(void) {
|
||||||
|
#endif
|
||||||
|
if (__builtin_expect(!g_tls_tid32_inited, 0)) {
|
||||||
|
g_tls_tid32 = (uint32_t)(uintptr_t)pthread_self();
|
||||||
|
g_tls_tid32_inited = 1;
|
||||||
|
}
|
||||||
|
return g_tls_tid32;
|
||||||
|
}
|
||||||
|
// Cached pthread_t as-is for APIs that require pthread_t comparison
|
||||||
|
static __thread pthread_t g_tls_pt_self;
|
||||||
|
static __thread int g_tls_pt_inited;
|
||||||
|
|
||||||
|
// Frontend FastCache hit/miss counters (Small diagnostics)
|
||||||
|
unsigned long long g_front_fc_hit[TINY_NUM_CLASSES] = {0};
|
||||||
|
unsigned long long g_front_fc_miss[TINY_NUM_CLASSES] = {0};
|
||||||
|
// TLS SLL class mask: bit i = 1 allows SLL for class i. Default: all 8 classes enabled.
|
||||||
|
int g_tls_sll_class_mask = 0xFF;
|
||||||
|
// Phase 6-1.7: Export for box refactor (Box 6 needs access from hakmem.c)
|
||||||
|
#ifdef HAKMEM_TINY_PHASE6_BOX_REFACTOR
|
||||||
|
inline __attribute__((always_inline)) pthread_t tiny_self_pt(void) {
|
||||||
|
#else
|
||||||
|
static inline __attribute__((always_inline)) pthread_t tiny_self_pt(void) {
|
||||||
|
#endif
|
||||||
|
if (__builtin_expect(!g_tls_pt_inited, 0)) {
|
||||||
|
g_tls_pt_self = pthread_self();
|
||||||
|
g_tls_pt_inited = 1;
|
||||||
|
}
|
||||||
|
return g_tls_pt_self;
|
||||||
|
}
|
||||||
|
|
||||||
|
#include "tiny_refill.h"
|
||||||
|
// tiny_mmap_gate.h already included at top
|
||||||
|
#include "tiny_publish.h"
|
||||||
|
|
||||||
|
int g_sll_cap_override[TINY_NUM_CLASSES] = {0}; // LEGACY (Phase12以降は参照しない/互換用ダミー)
|
||||||
|
// Optional prefetch on SLL pop (guarded by env: HAKMEM_TINY_PREFETCH=1)
|
||||||
|
static int g_tiny_prefetch = 0;
|
||||||
|
|
||||||
|
// Small-class magazine pre-initialization (to avoid cap==0 checks on hot path)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// Hot-class small TLS magazine(実体とスイッチ)
|
||||||
|
typedef struct {
|
||||||
|
void* slots[128];
|
||||||
|
uint16_t top; // 0..128
|
||||||
|
uint16_t cap; // =128
|
||||||
|
} TinyHotMag;
|
||||||
|
static int g_hotmag_cap_default = 128; // default capacity (env override)
|
||||||
|
static int g_hotmag_refill_default = 32; // default refill batch (env override)
|
||||||
|
static int g_hotmag_enable = 0; // 既定OFF(A/B用)。envでON可。
|
||||||
|
static uint16_t g_hotmag_cap_current[TINY_NUM_CLASSES];
|
||||||
|
static uint8_t g_hotmag_cap_locked[TINY_NUM_CLASSES];
|
||||||
|
static uint16_t g_hotmag_refill_current[TINY_NUM_CLASSES];
|
||||||
|
static uint8_t g_hotmag_refill_locked[TINY_NUM_CLASSES];
|
||||||
|
static uint8_t g_hotmag_class_en[TINY_NUM_CLASSES]; // 0=disabled for class, 1=enabled
|
||||||
|
static __thread TinyHotMag g_tls_hot_mag[TINY_NUM_CLASSES];
|
||||||
|
// Inline helpers
|
||||||
|
|
||||||
|
#include "box/tls_sll_box.h" // Box TLS-SLL: Safe SLL operations API (needed by hotmag)
|
||||||
|
#include "hakmem_tiny_hotmag.inc.h"
|
||||||
|
|
||||||
|
// Size-specialized tiny alloc (32B/64B) via function pointers (A/B用)
|
||||||
|
// TinyQuickSlot: 1 cache line per class (quick 6 items + small metadata)
|
||||||
|
// Opt-in via HAKMEM_TINY_QUICK=1
|
||||||
|
// NOTE: This type definition must come BEFORE the Phase 2D-1 includes below
|
||||||
|
int g_quick_enable = 0; // HAKMEM_TINY_QUICK=1
|
||||||
|
__thread TinyQuickSlot g_tls_quick[TINY_NUM_CLASSES]; // compile-out via guards below
|
||||||
|
|
||||||
|
// Phase 2D-1: Hot-path inline function extractions(Front)
|
||||||
|
// NOTE: TinyFastCache/TinyQuickSlot は front/ で定義済み
|
||||||
|
#include "hakmem_tiny_hot_pop.inc.h" // 4 functions: tiny_hot_pop_class{0..3}
|
||||||
|
#include "hakmem_tiny_refill.inc.h" // 8 functions: refill operations
|
||||||
|
#if HAKMEM_TINY_P0_BATCH_REFILL
|
||||||
|
#include "hakmem_tiny_refill_p0.inc.h" // P0 batch refill → FastCache 直補充
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Phase 7 Task 3: Pre-warm TLS cache at init
|
||||||
|
// Pre-allocate blocks to reduce first-allocation miss penalty
|
||||||
|
#if HAKMEM_TINY_PREWARM_TLS
|
||||||
|
void hak_tiny_prewarm_tls_cache(void) {
|
||||||
|
// Pre-warm each class with HAKMEM_TINY_PREWARM_COUNT blocks
|
||||||
|
// This reduces the first-allocation miss penalty by populating TLS cache
|
||||||
|
// Phase E1-CORRECT: ALL classes (including C7) now use TLS SLL
|
||||||
|
for (int class_idx = 0; class_idx < TINY_NUM_CLASSES; class_idx++) {
|
||||||
|
int count = HAKMEM_TINY_PREWARM_COUNT; // Default: 16 blocks per class
|
||||||
|
|
||||||
|
// Trigger refill to populate TLS cache
|
||||||
|
// P0 Fix: Use appropriate refill function based on P0 status
|
||||||
|
#if HAKMEM_TINY_P0_BATCH_REFILL
|
||||||
|
sll_refill_batch_from_ss(class_idx, count);
|
||||||
|
#else
|
||||||
|
sll_refill_small_from_ss(class_idx, count);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Ultra-Simple front (small per-class stack) — combines tiny front to minimize
|
||||||
|
// instructions and memory touches on alloc/free. Uses existing TLS bump shadow
|
||||||
|
// (g_tls_bcur/bend) when enabled to avoid per-alloc header writes.
|
||||||
|
// UltraFront capacity for 32/64B fast pop
|
||||||
|
#ifndef ULTRA_FRONT_CAP
|
||||||
|
#define ULTRA_FRONT_CAP 64
|
||||||
|
#endif
|
||||||
|
typedef struct __attribute__((aligned(64))) {
|
||||||
|
void* slots[ULTRA_FRONT_CAP];
|
||||||
|
uint16_t top; // 0..ULTRA_FRONT_CAP
|
||||||
|
uint16_t _pad;
|
||||||
|
} TinyUltraFront;
|
||||||
|
static int g_ultra_simple = 0; // HAKMEM_TINY_ULTRA_SIMPLE=1
|
||||||
|
static __thread TinyUltraFront g_tls_ultra[TINY_NUM_CLASSES];
|
||||||
|
// Inline helpers
|
||||||
|
#include "hakmem_tiny_ultra_front.inc.h"
|
||||||
|
|
||||||
|
// Ultra-Bump TLS shadow (bench/opt-in): keep a TLS-only bump window
|
||||||
|
// to avoid per-alloc header writes. Header is updated per-chunk reservation.
|
||||||
|
// NOTE: Non-static because used in hakmem_tiny_refill.inc.h
|
||||||
|
int g_bump_chunk = 32; // HAKMEM_TINY_BUMP_CHUNK (blocks)
|
||||||
|
__thread uint8_t* g_tls_bcur[TINY_NUM_CLASSES] = {0};
|
||||||
|
__thread uint8_t* g_tls_bend[TINY_NUM_CLASSES] = {0};
|
||||||
|
|
||||||
|
// SLL small refill batch for specialized class (32/64B)
|
||||||
|
// Specialized order toggle: 1 = mag-first, 0 = sll-first
|
||||||
|
// HotMag helpers (for classes 0..3)
|
||||||
|
static inline int is_hot_class(int class_idx) { return class_idx >= 0 && class_idx <= 3; }
|
||||||
|
|
||||||
|
// Optional front (Ultra/HotMag) push helper: compile-out in release builds
|
||||||
|
static inline int tiny_optional_push(int class_idx, void* ptr) {
|
||||||
|
#if HAKMEM_BUILD_RELEASE
|
||||||
|
(void)class_idx;
|
||||||
|
(void)ptr;
|
||||||
|
return 0;
|
||||||
|
#else
|
||||||
|
if (__builtin_expect(g_ultra_simple && is_hot_class(class_idx), 0)) {
|
||||||
|
if (__builtin_expect(ultra_push(class_idx, ptr), 0)) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (__builtin_expect(is_hot_class(class_idx), 0)) {
|
||||||
|
if (__builtin_expect(hotmag_push(class_idx, ptr), 0)) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ultra-Simple helpers
|
||||||
|
|
||||||
|
// Phase 9.6: Deferred Intelligence (event queue + background)
|
||||||
|
// Extended event for FLINT Intelligence (lightweight; recorded off hot path only)
|
||||||
|
// Observability, ACE, and intelligence helpers
|
||||||
Reference in New Issue
Block a user