Refactor FastCache and TLS cache APIs to use Phantom types (hak_base_ptr_t) for compile-time type safety, preventing BASE/USER pointer confusion. Changes: 1. core/hakmem_tiny_fastcache.inc.h: - fastcache_pop() returns hak_base_ptr_t instead of void* - fastcache_push() accepts hak_base_ptr_t instead of void* 2. core/hakmem_tiny.c: - Updated forward declarations to match new signatures 3. core/tiny_alloc_fast.inc.h, core/hakmem_tiny_alloc.inc: - Alloc paths now use hak_base_ptr_t for cache operations - BASE->USER conversion via HAK_RET_ALLOC macro 4. core/hakmem_tiny_refill.inc.h, core/refill/ss_refill_fc.h: - Refill paths properly handle BASE pointer types - Fixed: Removed unnecessary HAK_BASE_FROM_RAW() in ss_refill_fc.h line 176 5. core/hakmem_tiny_free.inc, core/tiny_free_magazine.inc.h: - Free paths convert USER->BASE before cache push - USER->BASE conversion via HAK_USER_TO_BASE or ptr_user_to_base() 6. core/hakmem_tiny_legacy_slow_box.inc: - Legacy path properly wraps pointers for cache API Benefits: - Type safety at compile time (in debug builds) - Zero runtime overhead (debug builds only, release builds use typedef=void*) - All BASE->USER conversions verified via Task analysis - Prevents pointer type confusion bugs Testing: - Build: SUCCESS (all 9 files) - Smoke test: PASS (sh8bench runs to completion) - Conversion path verification: 3/3 paths correct 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
669 lines
30 KiB
C
669 lines
30 KiB
C
#include "hakmem_tiny.h"
|
||
#include "hakmem_tiny_config.h" // Centralized configuration
|
||
#include "hakmem_phase7_config.h" // Phase 7: Task 3 constants (PREWARM_COUNT, etc.)
|
||
#include "hakmem_tiny_superslab.h"
|
||
#include "box/ss_slab_meta_box.h" // Phase 3d-A: SlabMeta Box boundary // Phase 6.22: SuperSlab allocator
|
||
#include "hakmem_super_registry.h" // Phase 8.2: SuperSlab registry for memory profiling
|
||
#include "hakmem_internal.h"
|
||
#include "hakmem_syscall.h" // Phase 6.X P0 Fix: Box 3 syscall layer (bypasses LD_PRELOAD)
|
||
#include "hakmem_tiny_magazine.h"
|
||
#include "hakmem_tiny_integrity.h" // PRIORITY 1-4: Corruption detection
|
||
#include "box/tiny_next_ptr_box.h" // Box API: next pointer read/write
|
||
#include "box/ptr_conversion_box.h" // Box API: pointer conversion
|
||
#include "hakmem_env_cache.h" // Priority-2: ENV cache
|
||
// Phase 1 modules (must come AFTER hakmem_tiny.h for TinyPool definition)
|
||
#include "hakmem_tiny_batch_refill.h" // Phase 1: Batch refill/spill for mini-magazine
|
||
#include "hakmem_tiny_stats.h" // Phase 1: Batched statistics (replaces XOR RNG)
|
||
// Phase 2B modules
|
||
#include "tiny_api.h" // Consolidated: stats_api, query_api, rss_api, registry_api
|
||
#include "tiny_tls.h"
|
||
#include "tiny_debug.h"
|
||
#include "hakmem_debug_master.h" // For unified debug level control
|
||
#include "tiny_mmap_gate.h"
|
||
#include "tiny_debug_ring.h"
|
||
#include "tiny_route.h"
|
||
#include "front/tiny_heap_v2.h"
|
||
#include "tiny_tls_guard.h"
|
||
#include "tiny_ready.h"
|
||
#include "hakmem_tiny_tls_list.h"
|
||
#include "hakmem_tiny_remote_target.h" // Phase 2C-1: Remote target queue
|
||
#include "hakmem_tiny_bg_spill.h" // Phase 2C-2: Background spill queue
|
||
#include "tiny_adaptive_sizing.h" // Phase 2b: Adaptive TLS cache sizing
|
||
// NOTE: hakmem_tiny_tls_ops.h included later (after type definitions)
|
||
#include "tiny_system.h" // Consolidated: stdio, stdlib, string, etc.
|
||
#include "hakmem_prof.h"
|
||
#include "hakmem_trace.h" // Optional USDT (perf) tracepoints
|
||
|
||
extern uint64_t g_bytes_allocated; // from hakmem_tiny_superslab.c
|
||
|
||
// Tiny allocator configuration, debug counters, and return helpers
|
||
#include "hakmem_tiny_config_box.inc"
|
||
|
||
// ============================================================================
|
||
// Debug: TLS SLL last push tracking (for core/box/tls_sll_box.h)
|
||
// ============================================================================
|
||
__thread hak_base_ptr_t s_tls_sll_last_push[TINY_NUM_CLASSES] = {0};
|
||
|
||
#if !HAKMEM_BUILD_RELEASE
|
||
// Helper to dump last push from core/hakmem.c (SEGV handler)
|
||
// Must be visible to other TUs (extern in hakmem_tiny.h or similar if needed,
|
||
// but SEGV handler is in core/hakmem.c which can dlsym or weak link it)
|
||
__attribute__((noinline))
|
||
void tiny_debug_dump_last_push(int cls) {
|
||
hak_base_ptr_t p = s_tls_sll_last_push[cls];
|
||
void* raw = HAK_BASE_TO_RAW(p);
|
||
fprintf(stderr, "[DEBUG] s_tls_sll_last_push[%d] = %p\n", cls, raw);
|
||
if (raw && (uintptr_t)raw > 4096) {
|
||
unsigned long* vals = (unsigned long*)raw;
|
||
fprintf(stderr, "[DEBUG] Memory at %p: %016lx %016lx\n", raw, vals[0], vals[1]);
|
||
}
|
||
}
|
||
#endif
|
||
// Forward declarations for static helpers used before definition
|
||
struct TinySlab; // forward
|
||
static void move_to_free_list(int class_idx, struct TinySlab* target_slab);
|
||
static void move_to_full_list(int class_idx, struct TinySlab* target_slab);
|
||
static void release_slab(struct TinySlab* slab);
|
||
static TinySlab* allocate_new_slab(int class_idx);
|
||
static void tiny_tls_cache_drain(int class_idx);
|
||
static void tiny_apply_mem_diet(void);
|
||
|
||
// Phase 6.23: SuperSlab allocation forward declaration
|
||
static inline void* hak_tiny_alloc_superslab(int class_idx);
|
||
static inline void* superslab_tls_bump_fast(int class_idx);
|
||
SuperSlab* superslab_refill(int class_idx);
|
||
static inline void* superslab_alloc_from_slab(SuperSlab* ss, int slab_idx);
|
||
static inline uint32_t sll_cap_for_class(int class_idx, uint32_t mag_cap);
|
||
// Forward decl: used by tiny_spec_pop_path before its definition
|
||
#if HAKMEM_TINY_P0_BATCH_REFILL
|
||
// P0 enabled: sll_refill_batch_from_ss is defined in hakmem_tiny_refill_p0.inc.h
|
||
static inline int sll_refill_batch_from_ss(int class_idx, int max_take);
|
||
#else
|
||
// Phase 12: sll_refill_small_from_ss is defined in hakmem_tiny_refill.inc.h
|
||
// Only a single implementation exists there; declare here for callers.
|
||
#ifdef HAKMEM_TINY_PHASE6_BOX_REFACTOR
|
||
int sll_refill_small_from_ss(int class_idx, int max_take);
|
||
#else
|
||
static inline int sll_refill_small_from_ss(int class_idx, int max_take);
|
||
#endif
|
||
#endif
|
||
static inline void hak_tiny_free_superslab(void* ptr, SuperSlab* ss);
|
||
static void* __attribute__((cold, noinline)) tiny_slow_alloc_fast(int class_idx);
|
||
static inline void tiny_remote_drain_owner(struct TinySlab* slab);
|
||
static void tiny_remote_drain_locked(struct TinySlab* slab);
|
||
// Ultra-fast try-only variant: attempt a direct SuperSlab bump/freelist pop
|
||
// without any refill or slow-path work. Returns NULL on miss.
|
||
/* moved below TinyTLSSlab definition */
|
||
|
||
// Step 3d: Forced inlining for readability + performance (306M target)
|
||
__attribute__((always_inline))
|
||
static inline void* hak_tiny_alloc_wrapper(int class_idx);
|
||
// Helpers for SuperSlab active block accounting (atomic, saturating dec)
|
||
|
||
// SuperSlab Active Counter Helpers - EXTRACTED to hakmem_tiny_ss_active_box.inc
|
||
#include "hakmem_tiny_ss_active_box.inc"
|
||
|
||
// EXTRACTED: ss_active_dec_one() moved to hakmem_tiny_superslab.h (Phase 2C-2)
|
||
|
||
// Front refill count global config (declare before init.inc uses them)
|
||
extern int g_refill_count_global;
|
||
extern int g_refill_count_hot;
|
||
extern int g_refill_count_mid;
|
||
extern int g_refill_count_class[TINY_NUM_CLASSES];
|
||
|
||
// Step 3d: Forced inlining for slow path (maintain monolithic performance)
|
||
// Phase 6-1.7: Export for box refactor (Box 5 needs access from hakmem.c)
|
||
#ifdef HAKMEM_TINY_PHASE6_BOX_REFACTOR
|
||
void* __attribute__((cold, noinline)) hak_tiny_alloc_slow(size_t size, int class_idx);
|
||
#else
|
||
static void* __attribute__((cold, noinline)) hak_tiny_alloc_slow(size_t size, int class_idx);
|
||
#endif
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// Box: adopt_gate_try (implementation moved from header for robust linkage)
|
||
// ---------------------------------------------------------------------------
|
||
#include "box/adopt_gate_box.h"
|
||
extern SuperSlab* g_super_reg_by_class[TINY_NUM_CLASSES][SUPER_REG_PER_CLASS];
|
||
extern int g_super_reg_class_size[TINY_NUM_CLASSES];
|
||
extern unsigned long long g_adopt_gate_calls[];
|
||
extern unsigned long long g_adopt_gate_success[];
|
||
extern unsigned long long g_reg_scan_attempts[];
|
||
extern unsigned long long g_reg_scan_hits[];
|
||
SuperSlab* adopt_gate_try(int class_idx, TinyTLSSlab* tls) {
|
||
g_adopt_gate_calls[class_idx]++;
|
||
ROUTE_MARK(13);
|
||
SuperSlab* ss = tiny_refill_try_fast(class_idx, tls);
|
||
if (ss) { g_adopt_gate_success[class_idx]++; return ss; }
|
||
g_reg_scan_attempts[class_idx]++;
|
||
int reg_size = g_super_reg_class_size[class_idx];
|
||
int scan_limit = tiny_reg_scan_max();
|
||
if (scan_limit > reg_size) scan_limit = reg_size;
|
||
uint32_t self_tid = tiny_self_u32();
|
||
// Local helper (mirror adopt_bind_if_safe) to avoid including alloc inline here
|
||
auto int adopt_bind_if_safe_local(TinyTLSSlab* tls_l, SuperSlab* ss, int slab_idx, int class_idx_l) {
|
||
uint32_t self_tid = tiny_self_u32();
|
||
SlabHandle h = slab_try_acquire(ss, slab_idx, self_tid);
|
||
if (!slab_is_valid(&h)) return 0;
|
||
slab_drain_remote_full(&h);
|
||
if (__builtin_expect(slab_is_safe_to_bind(&h), 1)) {
|
||
tiny_tls_bind_slab(tls_l, h.ss, h.slab_idx);
|
||
slab_release(&h);
|
||
return 1;
|
||
}
|
||
slab_release(&h);
|
||
return 0;
|
||
}
|
||
|
||
for (int i = 0; i < scan_limit; i++) {
|
||
SuperSlab* cand = g_super_reg_by_class[class_idx][i];
|
||
if (!(cand && cand->magic == SUPERSLAB_MAGIC)) continue;
|
||
// Fast path: use nonempty_mask / freelist_mask to locate candidates in O(1)
|
||
uint32_t mask = cand->nonempty_mask;
|
||
// Fallback to atomic freelist_mask for cross-thread visibility
|
||
if (mask == 0) {
|
||
mask = atomic_load_explicit(&cand->freelist_mask, memory_order_acquire);
|
||
}
|
||
if (mask == 0) continue; // No visible freelists in this SS
|
||
int cap = ss_slabs_capacity(cand);
|
||
while (mask) {
|
||
int sidx = __builtin_ctz(mask);
|
||
mask &= (mask - 1);
|
||
if (sidx >= cap) continue;
|
||
if (adopt_bind_if_safe_local(tls, cand, sidx, class_idx)) {
|
||
g_adopt_gate_success[class_idx]++;
|
||
g_reg_scan_hits[class_idx]++;
|
||
ROUTE_MARK(14); ROUTE_COMMIT(class_idx, 0x07);
|
||
return cand;
|
||
}
|
||
}
|
||
}
|
||
return NULL;
|
||
}
|
||
|
||
|
||
// ============================================================================
|
||
// Global State - EXTRACTED to hakmem_tiny_globals_box.inc
|
||
// ============================================================================
|
||
#include "hakmem_tiny_globals_box.inc"
|
||
|
||
#include "hakmem_tiny_publish_box.inc"
|
||
|
||
// ============================================================================
|
||
// EXTRACTED TO hakmem_tiny_fastcache.inc.h (Phase 2D-1)
|
||
// ============================================================================
|
||
// Functions: tiny_fast_pop(), tiny_fast_push() - 28 lines (lines 377-404)
|
||
// Forward declarations for functions defined in hakmem_tiny_fastcache.inc.h
|
||
static inline hak_base_ptr_t tiny_fast_pop(int class_idx);
|
||
static inline int tiny_fast_push(int class_idx, hak_base_ptr_t ptr);
|
||
static inline hak_base_ptr_t fastcache_pop(int class_idx);
|
||
static inline int fastcache_push(int class_idx, hak_base_ptr_t ptr);
|
||
|
||
// ============================================================================
|
||
// EXTRACTED TO hakmem_tiny_hot_pop.inc.h (Phase 2D-1)
|
||
// ============================================================================
|
||
// Functions: tiny_hot_pop_class0(), tiny_hot_pop_class1(), tiny_hot_pop_class2(), tiny_hot_pop_class3()
|
||
// 88 lines (lines 407-494)
|
||
|
||
|
||
// ============================================================================
|
||
// Legacy Slow Allocation Path - EXTRACTED to hakmem_tiny_legacy_slow_box.inc
|
||
// ============================================================================
|
||
#include "hakmem_tiny_legacy_slow_box.inc"
|
||
|
||
|
||
// ============================================================================
|
||
// EXTRACTED TO hakmem_tiny_refill.inc.h (Phase 2D-1)
|
||
// ============================================================================
|
||
// Function: tiny_fast_refill_and_take() - 39 lines (lines 584-622)
|
||
|
||
// ============================================================================
|
||
// TLS/Frontend State & Configuration - EXTRACTED to hakmem_tiny_tls_state_box.inc
|
||
// ============================================================================
|
||
#include "hakmem_tiny_tls_state_box.inc"
|
||
|
||
#include "hakmem_tiny_intel.inc"
|
||
|
||
// ============================================================================
|
||
// EXTRACTED TO hakmem_tiny_rss.c (Phase 2B-2)
|
||
// ============================================================================
|
||
// EXTRACTED: static int get_rss_kb_self(void) {
|
||
// EXTRACTED: FILE* f = fopen("/proc/self/status", "r");
|
||
// EXTRACTED: if (!f) return 0;
|
||
// EXTRACTED: char buf[256];
|
||
// EXTRACTED: int kb = 0;
|
||
// EXTRACTED: while (fgets(buf, sizeof(buf), f)) {
|
||
// EXTRACTED: if (strncmp(buf, "VmRSS:", 6) == 0) {
|
||
// EXTRACTED: char* p = buf;
|
||
// EXTRACTED: while (*p && (*p < '0' || *p > '9')) {
|
||
// EXTRACTED: p++;
|
||
// EXTRACTED: }
|
||
// EXTRACTED: kb = atoi(p);
|
||
// EXTRACTED: break;
|
||
// EXTRACTED: }
|
||
// EXTRACTED: }
|
||
// EXTRACTED: fclose(f);
|
||
// EXTRACTED: return kb;
|
||
// EXTRACTED: }
|
||
|
||
// Miss時にマガジンへ大量リフィルせず、1個だけ確保して即返すオプション
|
||
// Env: HAKMEM_TINY_REFILL_ONE_ON_MISS=1 で有効(デフォルト: 0)
|
||
int g_refill_one_on_miss = 0;
|
||
|
||
// Frontend fill target per class (adaptive)
|
||
// NOTE: Non-static because used in hakmem_tiny_refill.inc.h
|
||
_Atomic uint32_t g_frontend_fill_target[TINY_NUM_CLASSES];
|
||
|
||
// Adaptive CAS: Active thread counter (for single-threaded optimization)
|
||
// Incremented on thread init, decremented on thread shutdown
|
||
_Atomic uint32_t g_hakmem_active_threads = 0;
|
||
|
||
// Per-thread registration flag (TLS variable)
|
||
static __thread int g_thread_registered = 0;
|
||
|
||
// Adaptive CAS: Register current thread (called on first allocation)
|
||
// NOTE: Non-static for cross-TU visibility (called from hak_alloc_api.inc.h)
|
||
__attribute__((always_inline))
|
||
inline void hakmem_thread_register(void) {
|
||
if (__builtin_expect(g_thread_registered == 0, 0)) {
|
||
g_thread_registered = 1;
|
||
atomic_fetch_add_explicit(&g_hakmem_active_threads, 1, memory_order_relaxed);
|
||
}
|
||
}
|
||
|
||
// SLL capacity override array (moved from deleted hakmem_tiny_ultra_batch_box.inc)
|
||
static int g_ultra_batch_override[TINY_NUM_CLASSES] = {0};
|
||
static int g_ultra_sll_cap_override[TINY_NUM_CLASSES] = {0};
|
||
|
||
// Helper function for batch size (moved from deleted hakmem_tiny_ultra_batch_box.inc)
|
||
static inline int ultra_batch_for_class(int class_idx) {
|
||
int ov = g_ultra_batch_override[class_idx];
|
||
if (ov > 0) return ov;
|
||
switch (class_idx) {
|
||
case 0: return 64; // 8B
|
||
case 1: return 96; // 16B
|
||
case 2: return 96; // 32B
|
||
case 3: return 224; // 64B
|
||
case 4: return 96; // 128B
|
||
case 5: return 64; // 256B
|
||
case 6: return 64; // 512B
|
||
default: return 32; // 1024B and others
|
||
}
|
||
}
|
||
|
||
// Helper function for SLL capacity (moved from deleted hakmem_tiny_ultra_batch_box.inc)
|
||
static inline int ultra_sll_cap_for_class(int class_idx) {
|
||
int ov = g_ultra_sll_cap_override[class_idx];
|
||
if (ov > 0) return ov;
|
||
switch (class_idx) {
|
||
case 0: return 256; // 8B
|
||
case 1: return 384; // 16B
|
||
case 2: return 384; // 32B
|
||
case 3: return 768; // 64B
|
||
case 4: return 256; // 128B
|
||
default: return 128; // others
|
||
}
|
||
}
|
||
|
||
enum { HAK_TIER_SLL=1, HAK_TIER_MAG=2, HAK_TIER_SLAB=3, HAK_TIER_SUPER=4, HAK_TIER_FRONT=5 };
|
||
|
||
|
||
// Event Queue & Telemetry Helpers - EXTRACTED to hakmem_tiny_eventq_box.inc
|
||
#include "hakmem_tiny_eventq_box.inc"
|
||
|
||
|
||
// Background refill workers and intelligence engine
|
||
#include "hakmem_tiny_background.inc"
|
||
|
||
// ============================================================================
|
||
// EXTRACTED TO hakmem_tiny_fastcache.inc.h (Phase 2D-1)
|
||
// ============================================================================
|
||
// Functions: fastcache_pop(), fastcache_push(), quick_pop() - 25 lines (lines 873-896)
|
||
|
||
// Ultra-fast try-only variant: attempt a direct SuperSlab bump/freelist pop
|
||
// without any refill or slow-path work. Returns NULL on miss.
|
||
static inline void* hak_tiny_alloc_superslab_try_fast(int class_idx) {
|
||
if (!g_use_superslab) return NULL;
|
||
TinyTLSSlab* tls = &g_tls_slabs[class_idx];
|
||
TinySlabMeta* meta = tls->meta;
|
||
if (!meta) return NULL;
|
||
// Try linear (bump) allocation first when freelist is empty
|
||
if (meta->freelist == NULL && meta->used < meta->capacity && tls->slab_base) {
|
||
// Use per-slab class_idx to get stride
|
||
size_t block_size = tiny_stride_for_class(meta->class_idx);
|
||
void* block = tls->slab_base + ((size_t)meta->used * block_size);
|
||
meta->used++;
|
||
// Track active blocks in SuperSlab for conservative reclamation
|
||
ss_active_inc(tls->ss);
|
||
return block;
|
||
}
|
||
// Do not pop freelist here (keep magazine/SLL handling consistent)
|
||
return NULL;
|
||
}
|
||
|
||
// ============================================================================
|
||
// EXTRACTED TO hakmem_tiny_refill.inc.h (Phase 2D-1)
|
||
// ============================================================================
|
||
// Functions: quick_refill_from_sll(), quick_refill_from_mag() - 31 lines (lines 918-949)
|
||
|
||
// ============================================================================
|
||
// EXTRACTED TO hakmem_tiny_refill.inc.h (Phase 2D-1)
|
||
// ============================================================================
|
||
// Function: sll_refill_small_from_ss() - 45 lines (lines 952-996)
|
||
|
||
// Phase 2C-3: TLS operations module (included after helper function definitions)
|
||
#include "hakmem_tiny_tls_ops.h"
|
||
|
||
// New TLS list refill: owner-only bulk take from TLS-cached SuperSlab slab
|
||
// ============================================================================
|
||
// EXTRACTED TO hakmem_tiny_tls_ops.h (Phase 2C-3)
|
||
// ============================================================================
|
||
// Function: tls_refill_from_tls_slab() - 101 lines
|
||
// Hot path refill operation, moved to inline function in header
|
||
|
||
// ============================================================================
|
||
// EXTRACTED TO hakmem_tiny_tls_ops.h (Phase 2C-3)
|
||
// ============================================================================
|
||
// Function: tls_list_spill_excess() - 97 lines
|
||
// Hot path spill operation, moved to inline function in header
|
||
|
||
// ============================================================================
|
||
// EXTRACTED TO hakmem_tiny_refill.inc.h (Phase 2D-1)
|
||
// ============================================================================
|
||
// Function: superslab_tls_bump_fast() - 45 lines (lines 1016-1060)
|
||
|
||
// ============================================================================
|
||
// EXTRACTED TO hakmem_tiny_refill.inc.h (Phase 2D-1)
|
||
// ============================================================================
|
||
// Function: frontend_refill_fc() - 44 lines (lines 1063-1106)
|
||
|
||
|
||
|
||
|
||
// SLL capacity policy: for hot tiny classes (0..3), allow larger SLL up to multiplier * mag_cap
|
||
// for >=4 keep current conservative half (to limit footprint).
|
||
|
||
// SLL Capacity Policy - EXTRACTED to hakmem_tiny_sll_cap_box.inc
|
||
#include "hakmem_tiny_sll_cap_box.inc"
|
||
|
||
|
||
// ============================================================================
|
||
// EXTRACTED TO hakmem_tiny_refill.inc.h (Phase 2D-1)
|
||
// ============================================================================
|
||
// Function: bulk_mag_to_sll_if_room() - 22 lines (lines 1133-1154)
|
||
|
||
// Ultra-Mode Batch Configuration - REMOVED (dead code cleanup 2025-11-27)
|
||
|
||
#include "hakmem_tiny_remote.inc"
|
||
|
||
// ============================================================================
|
||
// Internal Helpers
|
||
// ============================================================================
|
||
|
||
// Step 2: Slab Registry Operations
|
||
|
||
// Hash function for slab_base (64KB aligned)
|
||
// ============================================================================
|
||
// EXTRACTED TO hakmem_tiny_registry.c (Phase 2B-3)
|
||
// ============================================================================
|
||
// EXTRACTED: static inline int registry_hash(uintptr_t slab_base) {
|
||
// EXTRACTED: return (slab_base >> 16) & SLAB_REGISTRY_MASK;
|
||
// EXTRACTED: }
|
||
|
||
// Register slab in hash table (returns 1 on success, 0 on failure)
|
||
// EXTRACTED: static int registry_register(uintptr_t slab_base, TinySlab* owner) {
|
||
// EXTRACTED: pthread_mutex_lock(&g_tiny_registry_lock);
|
||
// EXTRACTED: int hash = registry_hash(slab_base);
|
||
// EXTRACTED:
|
||
// EXTRACTED: // Linear probing (max 8 attempts)
|
||
// EXTRACTED: for (int i = 0; i < SLAB_REGISTRY_MAX_PROBE; i++) {
|
||
// EXTRACTED: int idx = (hash + i) & SLAB_REGISTRY_MASK;
|
||
// EXTRACTED: SlabRegistryEntry* entry = &g_slab_registry[idx];
|
||
// EXTRACTED:
|
||
// EXTRACTED: if (entry->slab_base == 0) {
|
||
// EXTRACTED: // Empty slot found
|
||
// EXTRACTED: entry->slab_base = slab_base;
|
||
// EXTRACTED: atomic_store_explicit(&entry->owner, owner, memory_order_release);
|
||
// EXTRACTED: pthread_mutex_unlock(&g_tiny_registry_lock);
|
||
// EXTRACTED: return 1;
|
||
// EXTRACTED: }
|
||
// EXTRACTED: }
|
||
// EXTRACTED:
|
||
// EXTRACTED: // Registry full (collision limit exceeded)
|
||
// EXTRACTED: pthread_mutex_unlock(&g_tiny_registry_lock);
|
||
// EXTRACTED: return 0;
|
||
// EXTRACTED: }
|
||
|
||
// Unregister slab from hash table
|
||
// EXTRACTED: static void registry_unregister(uintptr_t slab_base) {
|
||
// EXTRACTED: pthread_mutex_lock(&g_tiny_registry_lock);
|
||
// EXTRACTED: int hash = registry_hash(slab_base);
|
||
// EXTRACTED:
|
||
// EXTRACTED: // Linear probing search
|
||
// EXTRACTED: for (int i = 0; i < SLAB_REGISTRY_MAX_PROBE; i++) {
|
||
// EXTRACTED: int idx = (hash + i) & SLAB_REGISTRY_MASK;
|
||
// EXTRACTED: SlabRegistryEntry* entry = &g_slab_registry[idx];
|
||
// EXTRACTED:
|
||
// EXTRACTED: if (entry->slab_base == slab_base) {
|
||
// EXTRACTED: // Found - clear entry (atomic store prevents TOCTOU race)
|
||
// EXTRACTED: atomic_store_explicit(&entry->owner, NULL, memory_order_release);
|
||
// EXTRACTED: entry->slab_base = 0;
|
||
// EXTRACTED: pthread_mutex_unlock(&g_tiny_registry_lock);
|
||
// EXTRACTED: return;
|
||
// EXTRACTED: }
|
||
// EXTRACTED:
|
||
// EXTRACTED: if (entry->slab_base == 0) {
|
||
// EXTRACTED: // Empty slot - not found
|
||
// EXTRACTED: pthread_mutex_unlock(&g_tiny_registry_lock);
|
||
// EXTRACTED: return;
|
||
// EXTRACTED: }
|
||
// EXTRACTED: }
|
||
// EXTRACTED: pthread_mutex_unlock(&g_tiny_registry_lock);
|
||
// EXTRACTED: }
|
||
|
||
// Lookup slab by base address (O(1) average)
|
||
|
||
// ============================================================================
|
||
// Registry Lookup & Owner Slab Discovery - EXTRACTED to hakmem_tiny_slab_lookup_box.inc
|
||
// ============================================================================
|
||
#include "hakmem_tiny_slab_lookup_box.inc"
|
||
|
||
|
||
// Function: move_to_full_list() - 20 lines (lines 1104-1123)
|
||
// Move slab to full list
|
||
|
||
// Function: move_to_free_list() - 20 lines (lines 1126-1145)
|
||
// Move slab to free list
|
||
|
||
// ============================================================================
|
||
// Public API
|
||
// ============================================================================
|
||
|
||
// ============================================================================
|
||
// Phase 2D-2: Initialization function (extracted to hakmem_tiny_init.inc)
|
||
// ============================================================================
|
||
#include "hakmem_tiny_init.inc"
|
||
|
||
// ============================================================================
|
||
// 3-Layer Architecture (2025-11-01 Simplification)
|
||
// ============================================================================
|
||
// Layer 1: TLS Bump Allocator (ultra-fast, 2-3 instructions/op)
|
||
#include "hakmem_tiny_bump.inc.h"
|
||
|
||
// Layer 2: TLS Small Magazine (fast, 5-10 instructions/op)
|
||
#include "hakmem_tiny_smallmag.inc.h"
|
||
|
||
// ============================================================================
|
||
// Phase 6 Fast Path Options (mutually exclusive)
|
||
// ============================================================================
|
||
// Choose ONE of the following Phase 6 optimizations:
|
||
//
|
||
// Phase 6-1.5: Alignment Guessing (LEGACY - committed 2025-11-02)
|
||
// - Enable: -DHAKMEM_TINY_PHASE6_ULTRA_SIMPLE=1
|
||
// - Speed: 235 M ops/sec
|
||
// - Memory: 0% overhead
|
||
// - Method: Guess size class from pointer alignment (__builtin_ctzl)
|
||
// - Risk: Alignment assumptions may break with future changes
|
||
//
|
||
// Phase 6-1.6: Metadata Header (NEW - recommended for production)
|
||
// - Enable: -DHAKMEM_TINY_PHASE6_METADATA=1
|
||
// - Speed: 450-480 M ops/sec (expected, Phase 6-1 level)
|
||
// - Memory: ~6-12% overhead (8 bytes/allocation)
|
||
// - Method: Store pool_type + size_class in 8-byte header
|
||
// - Benefit: Extends to ALL pools (Tiny/Mid/L25/Whale)
|
||
// - Eliminates: Registry lookups, mid_lookup, owner checks
|
||
// ============================================================================
|
||
|
||
// Forward declarations for Phase 6 alloc/free functions
|
||
#ifdef HAKMEM_TINY_PHASE6_ULTRA_SIMPLE
|
||
void* hak_tiny_alloc_ultra_simple(size_t size);
|
||
void hak_tiny_free_ultra_simple(void* ptr);
|
||
#endif
|
||
|
||
#if defined(HAKMEM_TINY_PHASE6_METADATA) && defined(HAKMEM_TINY_PHASE6_ULTRA_SIMPLE)
|
||
#error "Cannot enable both PHASE6_METADATA and PHASE6_ULTRA_SIMPLE"
|
||
#endif
|
||
|
||
|
||
// ============================================================================
|
||
// Phase 6 Wrapper Functions - EXTRACTED to hakmem_tiny_phase6_wrappers_box.inc
|
||
// ============================================================================
|
||
#include "hakmem_tiny_phase6_wrappers_box.inc"
|
||
|
||
|
||
// Layer 1-3: Main allocation function (simplified)
|
||
// Build-time configurable via: -DHAKMEM_TINY_USE_NEW_3LAYER=1
|
||
#ifndef HAKMEM_TINY_USE_NEW_3LAYER
|
||
#define HAKMEM_TINY_USE_NEW_3LAYER 0 // default OFF (legacy path)
|
||
#endif
|
||
#if HAKMEM_TINY_USE_NEW_3LAYER
|
||
#include "hakmem_tiny_alloc_new.inc"
|
||
#else
|
||
// Old 6-7 layer architecture (backup)
|
||
#include "hakmem_tiny_alloc.inc"
|
||
#endif
|
||
|
||
#include "hakmem_tiny_slow.inc"
|
||
|
||
// Free path implementations
|
||
#include "hakmem_tiny_free.inc"
|
||
|
||
// ---- Phase 1: Provide default batch-refill symbol (fallback to small refill)
|
||
// Allows runtime gate HAKMEM_TINY_REFILL_BATCH=1 without requiring a rebuild.
|
||
#ifndef HAKMEM_TINY_P0_BATCH_REFILL
|
||
int sll_refill_small_from_ss(int class_idx, int max_take);
|
||
__attribute__((weak)) int sll_refill_batch_from_ss(int class_idx, int max_take)
|
||
{
|
||
return sll_refill_small_from_ss(class_idx, max_take);
|
||
}
|
||
#endif
|
||
|
||
// ============================================================================
|
||
// EXTRACTED TO hakmem_tiny_lifecycle.inc (Phase 2D-3)
|
||
// ============================================================================
|
||
// Function: hak_tiny_trim() - 116 lines (lines 1164-1279)
|
||
// Public trim and cleanup operation for lifecycle management
|
||
|
||
// Forward decl for internal registry lookup used by ultra safety validation
|
||
static TinySlab* registry_lookup(uintptr_t slab_base);
|
||
|
||
// ultra_sll_cap_for_class moved earlier in file (before hakmem_tiny_free.inc)
|
||
|
||
static inline int ultra_validate_sll_head(int class_idx, void* head) {
|
||
uintptr_t base = ((uintptr_t)head) & ~(TINY_SLAB_SIZE - 1);
|
||
TinySlab* owner = registry_lookup(base);
|
||
if (!owner) return 0;
|
||
uintptr_t start = (uintptr_t)owner->base;
|
||
if ((uintptr_t)head < start || (uintptr_t)head >= start + TINY_SLAB_SIZE) return 0;
|
||
return (owner->class_idx == class_idx);
|
||
}
|
||
// Optional: wrapper TLS guard(ラッパー再入検知をTLSカウンタで)
|
||
#ifndef HAKMEM_WRAPPER_TLS_GUARD
|
||
#define HAKMEM_WRAPPER_TLS_GUARD 0
|
||
#endif
|
||
#if HAKMEM_WRAPPER_TLS_GUARD
|
||
extern __thread int g_tls_in_wrapper;
|
||
#endif
|
||
|
||
// ============================================================================
|
||
// EXTRACTED TO hakmem_tiny_lifecycle.inc (Phase 2D-3)
|
||
// ============================================================================
|
||
// Function: tiny_tls_cache_drain() - 90 lines (lines 1314-1403)
|
||
// Static function for draining TLS caches
|
||
//
|
||
// Function: tiny_apply_mem_diet() - 20 lines (lines 1405-1424)
|
||
// Static function for memory diet mode application
|
||
//
|
||
// Phase 2D-3: Lifecycle management functions (226 lines total)
|
||
#include "hakmem_tiny_lifecycle.inc"
|
||
|
||
// Phase 2D-4 (FINAL): Slab management functions (142 lines total)
|
||
#include "hakmem_tiny_slab_mgmt.inc"
|
||
|
||
// Tiny Heap v2 stats dump (opt-in)
|
||
void tiny_heap_v2_print_stats(void) {
|
||
// Priority-2: Use cached ENV
|
||
if (!HAK_ENV_TINY_HEAP_V2_STATS()) return;
|
||
|
||
fprintf(stderr, "\n[HeapV2] TLS magazine stats (per class, thread-local)\n");
|
||
for (int cls = 0; cls < TINY_NUM_CLASSES; cls++) {
|
||
TinyHeapV2Mag* mag = &g_tiny_heap_v2_mag[cls];
|
||
TinyHeapV2Stats* st = &g_tiny_heap_v2_stats[cls];
|
||
fprintf(stderr,
|
||
"C%d: top=%d alloc_calls=%llu mag_hits=%llu refill_calls=%llu refill_blocks=%llu backend_oom=%llu\n",
|
||
cls,
|
||
mag->top,
|
||
(unsigned long long)st->alloc_calls,
|
||
(unsigned long long)st->mag_hits,
|
||
(unsigned long long)st->refill_calls,
|
||
(unsigned long long)st->refill_blocks,
|
||
(unsigned long long)st->backend_oom);
|
||
}
|
||
}
|
||
|
||
static void tiny_heap_v2_stats_atexit(void) __attribute__((destructor));
|
||
static void tiny_heap_v2_stats_atexit(void) {
|
||
tiny_heap_v2_print_stats();
|
||
}
|
||
|
||
// Size→class routing for >=1024B (env: HAKMEM_TINY_ALLOC_1024_METRIC)
|
||
_Atomic uint64_t g_tiny_alloc_ge1024[TINY_NUM_CLASSES] = {0};
|
||
static void tiny_alloc_1024_diag_atexit(void) __attribute__((destructor));
|
||
static void tiny_alloc_1024_diag_atexit(void) {
|
||
// Priority-2: Use cached ENV
|
||
if (!HAK_ENV_TINY_ALLOC_1024_METRIC()) return;
|
||
fprintf(stderr, "\n[ALLOC_GE1024] per-class counts (size>=1024)\n");
|
||
for (int cls = 0; cls < TINY_NUM_CLASSES; cls++) {
|
||
uint64_t v = atomic_load_explicit(&g_tiny_alloc_ge1024[cls], memory_order_relaxed);
|
||
if (v) {
|
||
fprintf(stderr, " C%d=%llu", cls, (unsigned long long)v);
|
||
}
|
||
}
|
||
fprintf(stderr, "\n");
|
||
}
|
||
|
||
// TLS SLL pointer diagnostics (optional)
|
||
extern _Atomic uint64_t g_tls_sll_invalid_head[TINY_NUM_CLASSES];
|
||
extern _Atomic uint64_t g_tls_sll_invalid_push[TINY_NUM_CLASSES];
|
||
static void tiny_tls_sll_diag_atexit(void) __attribute__((destructor));
|
||
static void tiny_tls_sll_diag_atexit(void) {
|
||
#if !HAKMEM_BUILD_RELEASE
|
||
// Priority-2: Use cached ENV
|
||
if (!HAK_ENV_TINY_SLL_DIAG()) return;
|
||
fprintf(stderr, "\n[TLS_SLL_DIAG] invalid head/push counts per class\n");
|
||
for (int cls = 0; cls < TINY_NUM_CLASSES; cls++) {
|
||
uint64_t ih = atomic_load_explicit(&g_tls_sll_invalid_head[cls], memory_order_relaxed);
|
||
uint64_t ip = atomic_load_explicit(&g_tls_sll_invalid_push[cls], memory_order_relaxed);
|
||
if (ih || ip) {
|
||
fprintf(stderr, " C%d: invalid_head=%llu invalid_push=%llu\n",
|
||
cls, (unsigned long long)ih, (unsigned long long)ip);
|
||
}
|
||
}
|
||
#endif
|
||
}
|
||
|
||
|
||
// ============================================================================
|
||
// ACE Learning Layer & Tiny Guard - EXTRACTED to hakmem_tiny_ace_guard_box.inc
|
||
// ============================================================================
|
||
#include "hakmem_tiny_ace_guard_box.inc"
|