1503 lines
68 KiB
C
1503 lines
68 KiB
C
#include "hakmem_tiny.h"
|
||
#include "hakmem_tiny_config.h" // Centralized configuration
|
||
#include "hakmem_phase7_config.h" // Phase 7: Task 3 constants (PREWARM_COUNT, etc.)
|
||
#include "hakmem_tiny_superslab.h"
|
||
#include "box/ss_slab_meta_box.h" // Phase 3d-A: SlabMeta Box boundary // Phase 6.22: SuperSlab allocator
|
||
#include "hakmem_super_registry.h" // Phase 8.2: SuperSlab registry for memory profiling
|
||
#include "hakmem_internal.h"
|
||
#include "hakmem_syscall.h" // Phase 6.X P0 Fix: Box 3 syscall layer (bypasses LD_PRELOAD)
|
||
#include "hakmem_tiny_magazine.h"
|
||
#include "hakmem_tiny_integrity.h" // PRIORITY 1-4: Corruption detection
|
||
#include "box/tiny_next_ptr_box.h" // Box API: next pointer read/write
|
||
#include "box/ptr_conversion_box.h" // Box API: pointer conversion
|
||
#include "hakmem_env_cache.h" // Priority-2: ENV cache
|
||
#include "box/tiny_cold_iface_v1.h" // Cold boundary wrapper for TinyHotHeap v2
|
||
// Phase 1 modules (must come AFTER hakmem_tiny.h for TinyPool definition)
|
||
#include "hakmem_tiny_batch_refill.h" // Phase 1: Batch refill/spill for mini-magazine
|
||
#include "hakmem_tiny_stats.h" // Phase 1: Batched statistics (replaces XOR RNG)
|
||
// Phase 2B modules
|
||
#include "tiny_api.h" // Consolidated: stats_api, query_api, rss_api, registry_api
|
||
#include "tiny_tls.h"
|
||
#include "tiny_debug.h"
|
||
#include "hakmem_debug_master.h" // For unified debug level control
|
||
#include "tiny_mmap_gate.h"
|
||
#include "tiny_debug_ring.h"
|
||
#include "tiny_route.h"
|
||
#include "front/tiny_heap_v2.h"
|
||
#include "box/tiny_front_stats_box.h"
|
||
#include "box/tiny_front_v3_env_box.h"
|
||
#include "box/ss_os_acquire_box.h"
|
||
#include "tiny_tls_guard.h"
|
||
#include "tiny_ready.h"
|
||
#include "box/c7_meta_used_counter_box.h"
|
||
#include "box/tiny_c7_hotbox.h"
|
||
#include "box/tiny_heap_box.h"
|
||
#include "box/tiny_hotheap_v2_box.h"
|
||
#include "box/tiny_route_env_box.h"
|
||
#include "box/super_reg_box.h"
|
||
#include "tiny_region_id.h"
|
||
#include "tiny_debug_api.h"
|
||
#include "hakmem_tiny_tls_list.h"
|
||
#include "hakmem_tiny_remote_target.h" // Phase 2C-1: Remote target queue
|
||
#include "hakmem_tiny_bg_spill.h" // Phase 2C-2: Background spill queue
|
||
#include "tiny_adaptive_sizing.h" // Phase 2b: Adaptive TLS cache sizing
|
||
// NOTE: hakmem_tiny_tls_ops.h included later (after type definitions)
|
||
#include "tiny_system.h" // Consolidated: stdio, stdlib, string, etc.
|
||
#include "hakmem_prof.h"
|
||
#include "hakmem_trace.h" // Optional USDT (perf) tracepoints
|
||
|
||
extern uint64_t g_bytes_allocated; // from hakmem_tiny_superslab.c
|
||
|
||
// Tiny allocator configuration, debug counters, and return helpers
|
||
#include "hakmem_tiny_config_box.inc"
|
||
|
||
// ============================================================================
|
||
// Debug: TLS SLL last push tracking (for core/box/tls_sll_box.h)
|
||
// ============================================================================
|
||
__thread hak_base_ptr_t s_tls_sll_last_push[TINY_NUM_CLASSES] = {0};
|
||
__thread tiny_heap_ctx_t g_tiny_heap_ctx;
|
||
__thread int g_tiny_heap_ctx_init = 0;
|
||
__thread tiny_hotheap_ctx_v2* g_tiny_hotheap_ctx_v2 = NULL;
|
||
TinyHeapClassStats g_tiny_heap_stats[TINY_NUM_CLASSES] = {0};
|
||
TinyC7PageStats g_c7_page_stats = {0};
|
||
tiny_route_kind_t g_tiny_route_class[TINY_NUM_CLASSES] = {0};
|
||
int g_tiny_route_snapshot_done = 0;
|
||
_Atomic uint64_t g_tiny_front_alloc_class[TINY_NUM_CLASSES] = {0};
|
||
_Atomic uint64_t g_tiny_front_free_class[TINY_NUM_CLASSES] = {0};
|
||
TinyFrontV3Snapshot g_tiny_front_v3_snapshot = {0};
|
||
int g_tiny_front_v3_snapshot_ready = 0;
|
||
static TinyFrontV3SizeClassEntry g_tiny_front_v3_lut[TINY_MAX_SIZE + 1] = {0};
|
||
static int g_tiny_front_v3_lut_ready = 0;
|
||
|
||
// Forward decls (to keep deps light in this TU)
|
||
int unified_cache_enabled(void);
|
||
|
||
static int tiny_heap_stats_dump_enabled(void) {
|
||
static int g = -1;
|
||
if (__builtin_expect(g == -1, 0)) {
|
||
const char* eh = getenv("HAKMEM_TINY_HEAP_STATS_DUMP");
|
||
const char* e = getenv("HAKMEM_TINY_C7_HEAP_STATS_DUMP");
|
||
g = ((eh && *eh && *eh != '0') || (e && *e && *e != '0')) ? 1 : 0;
|
||
}
|
||
return g;
|
||
}
|
||
|
||
void tiny_front_v3_snapshot_init(void) {
|
||
if (g_tiny_front_v3_snapshot_ready) {
|
||
return;
|
||
}
|
||
TinyFrontV3Snapshot snap = {
|
||
.unified_cache_on = unified_cache_enabled(),
|
||
.tiny_guard_on = tiny_guard_is_enabled(),
|
||
.header_mode = (uint8_t)tiny_header_mode(),
|
||
.header_v3_enabled = tiny_header_v3_enabled(),
|
||
.header_v3_skip_c7 = tiny_header_v3_skip_c7(),
|
||
};
|
||
g_tiny_front_v3_snapshot = snap;
|
||
g_tiny_front_v3_snapshot_ready = 1;
|
||
}
|
||
|
||
void tiny_front_v3_size_class_lut_init(void) {
|
||
if (g_tiny_front_v3_lut_ready) {
|
||
return;
|
||
}
|
||
tiny_route_snapshot_init();
|
||
size_t max_size = tiny_get_max_size();
|
||
if (max_size > TINY_MAX_SIZE) {
|
||
max_size = TINY_MAX_SIZE;
|
||
}
|
||
for (size_t sz = 0; sz <= TINY_MAX_SIZE; sz++) {
|
||
TinyFrontV3SizeClassEntry e = {
|
||
.class_idx = TINY_FRONT_V3_INVALID_CLASS,
|
||
.route_kind = (uint8_t)TINY_ROUTE_LEGACY,
|
||
};
|
||
if (sz == 0 || sz > max_size) {
|
||
g_tiny_front_v3_lut[sz] = e;
|
||
continue;
|
||
}
|
||
int cls = hak_tiny_size_to_class((int)sz);
|
||
if (cls >= 0 && cls < TINY_NUM_CLASSES) {
|
||
e.class_idx = (uint8_t)cls;
|
||
e.route_kind = (uint8_t)tiny_route_for_class((uint8_t)cls);
|
||
}
|
||
g_tiny_front_v3_lut[sz] = e;
|
||
}
|
||
g_tiny_front_v3_lut_ready = 1;
|
||
}
|
||
|
||
const TinyFrontV3SizeClassEntry* tiny_front_v3_lut_lookup(size_t size) {
|
||
if (__builtin_expect(!g_tiny_front_v3_lut_ready, 0)) {
|
||
tiny_front_v3_size_class_lut_init();
|
||
}
|
||
if (size == 0 || size > TINY_MAX_SIZE) {
|
||
return NULL;
|
||
}
|
||
return &g_tiny_front_v3_lut[size];
|
||
}
|
||
|
||
__attribute__((destructor))
|
||
static void tiny_heap_stats_dump(void) {
|
||
if (!tiny_heap_stats_enabled() || !tiny_heap_stats_dump_enabled()) {
|
||
return;
|
||
}
|
||
for (int cls = 0; cls < TINY_NUM_CLASSES; cls++) {
|
||
TinyHeapClassStats snap = {
|
||
.alloc_fast_current = atomic_load_explicit(&g_tiny_heap_stats[cls].alloc_fast_current, memory_order_relaxed),
|
||
.alloc_slow_prepare = atomic_load_explicit(&g_tiny_heap_stats[cls].alloc_slow_prepare, memory_order_relaxed),
|
||
.free_fast_local = atomic_load_explicit(&g_tiny_heap_stats[cls].free_fast_local, memory_order_relaxed),
|
||
.free_slow_fallback = atomic_load_explicit(&g_tiny_heap_stats[cls].free_slow_fallback, memory_order_relaxed),
|
||
.alloc_prepare_fail = atomic_load_explicit(&g_tiny_heap_stats[cls].alloc_prepare_fail, memory_order_relaxed),
|
||
.alloc_fail = atomic_load_explicit(&g_tiny_heap_stats[cls].alloc_fail, memory_order_relaxed),
|
||
};
|
||
if (snap.alloc_fast_current == 0 && snap.alloc_slow_prepare == 0 &&
|
||
snap.free_fast_local == 0 && snap.free_slow_fallback == 0 &&
|
||
snap.alloc_prepare_fail == 0 && snap.alloc_fail == 0) {
|
||
continue;
|
||
}
|
||
fprintf(stderr,
|
||
"[HEAP_STATS cls=%d] alloc_fast_current=%llu alloc_slow_prepare=%llu free_fast_local=%llu free_slow_fallback=%llu alloc_prepare_fail=%llu alloc_fail=%llu\n",
|
||
cls,
|
||
(unsigned long long)snap.alloc_fast_current,
|
||
(unsigned long long)snap.alloc_slow_prepare,
|
||
(unsigned long long)snap.free_fast_local,
|
||
(unsigned long long)snap.free_slow_fallback,
|
||
(unsigned long long)snap.alloc_prepare_fail,
|
||
(unsigned long long)snap.alloc_fail);
|
||
}
|
||
TinyC7PageStats ps = {
|
||
.prepare_calls = atomic_load_explicit(&g_c7_page_stats.prepare_calls, memory_order_relaxed),
|
||
.prepare_with_current_null = atomic_load_explicit(&g_c7_page_stats.prepare_with_current_null, memory_order_relaxed),
|
||
.prepare_from_partial = atomic_load_explicit(&g_c7_page_stats.prepare_from_partial, memory_order_relaxed),
|
||
.current_set_from_free = atomic_load_explicit(&g_c7_page_stats.current_set_from_free, memory_order_relaxed),
|
||
.current_dropped_to_partial = atomic_load_explicit(&g_c7_page_stats.current_dropped_to_partial, memory_order_relaxed),
|
||
};
|
||
if (ps.prepare_calls || ps.prepare_with_current_null || ps.prepare_from_partial ||
|
||
ps.current_set_from_free || ps.current_dropped_to_partial) {
|
||
fprintf(stderr,
|
||
"[C7_PAGE_STATS] prepare_calls=%llu prepare_with_current_null=%llu prepare_from_partial=%llu current_set_from_free=%llu current_dropped_to_partial=%llu\n",
|
||
(unsigned long long)ps.prepare_calls,
|
||
(unsigned long long)ps.prepare_with_current_null,
|
||
(unsigned long long)ps.prepare_from_partial,
|
||
(unsigned long long)ps.current_set_from_free,
|
||
(unsigned long long)ps.current_dropped_to_partial);
|
||
fflush(stderr);
|
||
}
|
||
}
|
||
|
||
__attribute__((destructor))
|
||
static void tiny_front_class_stats_dump(void) {
|
||
if (!tiny_front_class_stats_dump_enabled()) {
|
||
return;
|
||
}
|
||
for (int cls = 0; cls < TINY_NUM_CLASSES; cls++) {
|
||
uint64_t a = atomic_load_explicit(&g_tiny_front_alloc_class[cls], memory_order_relaxed);
|
||
uint64_t f = atomic_load_explicit(&g_tiny_front_free_class[cls], memory_order_relaxed);
|
||
if (a == 0 && f == 0) {
|
||
continue;
|
||
}
|
||
fprintf(stderr, "[FRONT_CLASS cls=%d] alloc=%llu free=%llu\n",
|
||
cls, (unsigned long long)a, (unsigned long long)f);
|
||
}
|
||
}
|
||
|
||
__attribute__((destructor))
|
||
static void tiny_c7_delta_debug_destructor(void) {
|
||
if (tiny_c7_meta_light_enabled() && tiny_c7_delta_debug_enabled()) {
|
||
tiny_c7_heap_debug_dump_deltas();
|
||
}
|
||
if (tiny_heap_meta_light_enabled_for_class(6) && tiny_c6_delta_debug_enabled()) {
|
||
tiny_c6_heap_debug_dump_deltas();
|
||
}
|
||
}
|
||
|
||
// =============================================================================
|
||
// TinyHotHeap v2 (Phase30/31 wiring). Currently C7-only thin wrapper.
|
||
// NOTE: Phase34/35 時点では v2 は C7-only でも v1 より遅く、mixed では大きな回帰がある。
|
||
// 実験用フラグを明示 ON にしたときだけ使う前提で、デフォルトは v1 を推奨。
|
||
// =============================================================================
|
||
static inline int tiny_hotheap_v2_stats_enabled(void) {
|
||
static int g = -1;
|
||
if (__builtin_expect(g == -1, 0)) {
|
||
const char* e = getenv("HAKMEM_TINY_HOTHEAP_V2_STATS");
|
||
g = (e && *e && *e != '0') ? 1 : 0;
|
||
}
|
||
return g;
|
||
}
|
||
|
||
static _Atomic uint64_t g_tiny_hotheap_v2_route_hits[TINY_HOTHEAP_MAX_CLASSES] = {0};
|
||
static _Atomic uint64_t g_tiny_hotheap_v2_alloc_calls[TINY_HOTHEAP_MAX_CLASSES] = {0};
|
||
static _Atomic uint64_t g_tiny_hotheap_v2_alloc_fast[TINY_HOTHEAP_MAX_CLASSES] = {0};
|
||
static _Atomic uint64_t g_tiny_hotheap_v2_alloc_lease[TINY_HOTHEAP_MAX_CLASSES] = {0};
|
||
static _Atomic uint64_t g_tiny_hotheap_v2_alloc_fallback_v1[TINY_HOTHEAP_MAX_CLASSES] = {0};
|
||
static _Atomic uint64_t g_tiny_hotheap_v2_alloc_refill[TINY_HOTHEAP_MAX_CLASSES] = {0};
|
||
static _Atomic uint64_t g_tiny_hotheap_v2_refill_with_current[TINY_HOTHEAP_MAX_CLASSES] = {0};
|
||
static _Atomic uint64_t g_tiny_hotheap_v2_refill_with_partial[TINY_HOTHEAP_MAX_CLASSES] = {0};
|
||
static _Atomic uint64_t g_tiny_hotheap_v2_alloc_route_fb[TINY_HOTHEAP_MAX_CLASSES] = {0};
|
||
static _Atomic uint64_t g_tiny_hotheap_v2_free_calls[TINY_HOTHEAP_MAX_CLASSES] = {0};
|
||
static _Atomic uint64_t g_tiny_hotheap_v2_free_fast[TINY_HOTHEAP_MAX_CLASSES] = {0};
|
||
static _Atomic uint64_t g_tiny_hotheap_v2_free_fallback_v1[TINY_HOTHEAP_MAX_CLASSES] = {0};
|
||
static _Atomic uint64_t g_tiny_hotheap_v2_cold_refill_fail[TINY_HOTHEAP_MAX_CLASSES] = {0};
|
||
static _Atomic uint64_t g_tiny_hotheap_v2_cold_retire_calls[TINY_HOTHEAP_MAX_CLASSES] = {0};
|
||
static _Atomic uint64_t g_tiny_hotheap_v2_retire_calls_v2[TINY_HOTHEAP_MAX_CLASSES] = {0};
|
||
static _Atomic uint64_t g_tiny_hotheap_v2_partial_pushes[TINY_HOTHEAP_MAX_CLASSES] = {0};
|
||
static _Atomic uint64_t g_tiny_hotheap_v2_partial_pops[TINY_HOTHEAP_MAX_CLASSES] = {0};
|
||
static _Atomic uint64_t g_tiny_hotheap_v2_partial_peak[TINY_HOTHEAP_MAX_CLASSES] = {0};
|
||
|
||
typedef struct {
|
||
_Atomic uint64_t prepare_calls;
|
||
_Atomic uint64_t prepare_with_current_null;
|
||
_Atomic uint64_t prepare_from_partial;
|
||
_Atomic uint64_t free_made_current;
|
||
_Atomic uint64_t page_retired;
|
||
} TinyHotHeapV2PageStats;
|
||
|
||
static TinyHotHeapV2PageStats g_tiny_hotheap_v2_page_stats[TINY_HOTHEAP_MAX_CLASSES] = {0};
|
||
static void tiny_hotheap_v2_page_retire_slow(tiny_hotheap_ctx_v2* ctx,
|
||
uint8_t class_idx,
|
||
tiny_hotheap_page_v2* page);
|
||
|
||
static inline uint8_t tiny_hotheap_v2_idx(uint8_t class_idx) {
|
||
return (class_idx < TINY_HOTHEAP_MAX_CLASSES) ? class_idx : 0;
|
||
}
|
||
|
||
void tiny_hotheap_v2_record_route_fallback(uint8_t class_idx) {
|
||
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_alloc_route_fb[tiny_hotheap_v2_idx(class_idx)],
|
||
1,
|
||
memory_order_relaxed);
|
||
}
|
||
|
||
void tiny_hotheap_v2_record_free_fallback(uint8_t class_idx) {
|
||
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_free_fallback_v1[tiny_hotheap_v2_idx(class_idx)],
|
||
1,
|
||
memory_order_relaxed);
|
||
}
|
||
|
||
void tiny_hotheap_v2_debug_snapshot(tiny_hotheap_v2_stats_snapshot_t* out) {
|
||
if (!out) return;
|
||
memset(out, 0, sizeof(*out));
|
||
uint8_t ci = 7;
|
||
out->route_hits = atomic_load_explicit(&g_tiny_hotheap_v2_route_hits[ci], memory_order_relaxed);
|
||
out->alloc_calls = atomic_load_explicit(&g_tiny_hotheap_v2_alloc_calls[ci], memory_order_relaxed);
|
||
out->alloc_fast = atomic_load_explicit(&g_tiny_hotheap_v2_alloc_fast[ci], memory_order_relaxed);
|
||
out->alloc_lease = atomic_load_explicit(&g_tiny_hotheap_v2_alloc_lease[ci], memory_order_relaxed);
|
||
out->alloc_refill = atomic_load_explicit(&g_tiny_hotheap_v2_alloc_refill[ci], memory_order_relaxed);
|
||
out->refill_with_current = atomic_load_explicit(&g_tiny_hotheap_v2_refill_with_current[ci], memory_order_relaxed);
|
||
out->refill_with_partial = atomic_load_explicit(&g_tiny_hotheap_v2_refill_with_partial[ci], memory_order_relaxed);
|
||
out->alloc_fallback_v1 = atomic_load_explicit(&g_tiny_hotheap_v2_alloc_fallback_v1[ci], memory_order_relaxed);
|
||
out->alloc_route_fb = atomic_load_explicit(&g_tiny_hotheap_v2_alloc_route_fb[ci], memory_order_relaxed);
|
||
out->free_calls = atomic_load_explicit(&g_tiny_hotheap_v2_free_calls[ci], memory_order_relaxed);
|
||
out->free_fast = atomic_load_explicit(&g_tiny_hotheap_v2_free_fast[ci], memory_order_relaxed);
|
||
out->free_fallback_v1 = atomic_load_explicit(&g_tiny_hotheap_v2_free_fallback_v1[ci], memory_order_relaxed);
|
||
out->cold_refill_fail = atomic_load_explicit(&g_tiny_hotheap_v2_cold_refill_fail[ci], memory_order_relaxed);
|
||
out->cold_retire_calls = atomic_load_explicit(&g_tiny_hotheap_v2_cold_retire_calls[ci], memory_order_relaxed);
|
||
out->retire_calls_v2 = atomic_load_explicit(&g_tiny_hotheap_v2_retire_calls_v2[ci], memory_order_relaxed);
|
||
out->prepare_calls = atomic_load_explicit(&g_tiny_hotheap_v2_page_stats[ci].prepare_calls, memory_order_relaxed);
|
||
out->prepare_with_current_null = atomic_load_explicit(&g_tiny_hotheap_v2_page_stats[ci].prepare_with_current_null, memory_order_relaxed);
|
||
out->prepare_from_partial = atomic_load_explicit(&g_tiny_hotheap_v2_page_stats[ci].prepare_from_partial, memory_order_relaxed);
|
||
out->free_made_current = atomic_load_explicit(&g_tiny_hotheap_v2_page_stats[ci].free_made_current, memory_order_relaxed);
|
||
out->page_retired = atomic_load_explicit(&g_tiny_hotheap_v2_page_stats[ci].page_retired, memory_order_relaxed);
|
||
out->partial_pushes = atomic_load_explicit(&g_tiny_hotheap_v2_partial_pushes[ci], memory_order_relaxed);
|
||
out->partial_pops = atomic_load_explicit(&g_tiny_hotheap_v2_partial_pops[ci], memory_order_relaxed);
|
||
out->partial_peak = atomic_load_explicit(&g_tiny_hotheap_v2_partial_peak[ci], memory_order_relaxed);
|
||
}
|
||
|
||
static tiny_hotheap_page_v2* tiny_hotheap_v2_acquire_page_node(tiny_hotheap_class_v2* hc) {
|
||
if (!hc) return NULL;
|
||
if (hc->storage_page.meta == NULL && hc->storage_page.freelist == NULL &&
|
||
hc->storage_page.capacity == 0) {
|
||
tiny_hotheap_v2_page_reset(&hc->storage_page);
|
||
return &hc->storage_page;
|
||
}
|
||
tiny_hotheap_page_v2* node = (tiny_hotheap_page_v2*)calloc(1, sizeof(tiny_hotheap_page_v2));
|
||
if (!node) {
|
||
return NULL;
|
||
}
|
||
tiny_hotheap_v2_page_reset(node);
|
||
return node;
|
||
}
|
||
|
||
static tiny_hotheap_page_v2* tiny_hotheap_v2_find_page(tiny_hotheap_class_v2* hc,
|
||
uint8_t class_idx,
|
||
void* p,
|
||
TinySlabMeta* meta) {
|
||
if (!hc || !p) return NULL;
|
||
const size_t stride = hc->stride ? hc->stride : tiny_stride_for_class(class_idx);
|
||
const size_t max_span = stride * (size_t)(hc->current_page ? hc->current_page->capacity : 0);
|
||
tiny_hotheap_page_v2* candidates[3] = {hc->current_page, hc->partial_pages, hc->full_pages};
|
||
for (int i = 0; i < 3; i++) {
|
||
for (tiny_hotheap_page_v2* page = candidates[i]; page; page = page->next) {
|
||
if (meta && page->meta && page->meta != meta) continue;
|
||
if (!page->base || page->capacity == 0) continue;
|
||
uint8_t* base = (uint8_t*)page->base;
|
||
size_t span = stride * (size_t)page->capacity;
|
||
if ((uint8_t*)p >= base && (uint8_t*)p < base + span) {
|
||
(void)max_span; // silence unused warning in case stride==0
|
||
return page;
|
||
}
|
||
}
|
||
}
|
||
return NULL;
|
||
}
|
||
|
||
static inline void tiny_hotheap_v2_partial_push(tiny_hotheap_class_v2* hc,
|
||
tiny_hotheap_page_v2* page,
|
||
uint8_t class_idx,
|
||
int stats_on) {
|
||
if (!hc || !page) return;
|
||
page->next = hc->partial_pages;
|
||
hc->partial_pages = page;
|
||
if (hc->partial_count < UINT16_MAX) {
|
||
hc->partial_count++;
|
||
}
|
||
if (stats_on) {
|
||
uint8_t idx = tiny_hotheap_v2_idx(class_idx);
|
||
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_partial_pushes[idx], 1, memory_order_relaxed);
|
||
uint64_t cur = hc->partial_count;
|
||
uint64_t old = atomic_load_explicit(&g_tiny_hotheap_v2_partial_peak[idx], memory_order_relaxed);
|
||
while (cur > old &&
|
||
!atomic_compare_exchange_weak_explicit(&g_tiny_hotheap_v2_partial_peak[idx],
|
||
&old,
|
||
cur,
|
||
memory_order_relaxed,
|
||
memory_order_relaxed)) {
|
||
old = atomic_load_explicit(&g_tiny_hotheap_v2_partial_peak[idx], memory_order_relaxed);
|
||
}
|
||
}
|
||
}
|
||
|
||
static inline void tiny_hotheap_v2_maybe_trim_partial(tiny_hotheap_ctx_v2* ctx,
|
||
tiny_hotheap_class_v2* hc,
|
||
uint8_t class_idx,
|
||
int stats_on) {
|
||
if (!ctx || !hc) return;
|
||
uint16_t limit = hc->max_partial_pages;
|
||
if (limit == 0) {
|
||
return;
|
||
}
|
||
while (hc->partial_count > limit && hc->partial_pages) {
|
||
tiny_hotheap_page_v2* victim = hc->partial_pages;
|
||
hc->partial_pages = victim->next;
|
||
if (hc->partial_count > 0) {
|
||
hc->partial_count--;
|
||
}
|
||
victim->next = NULL;
|
||
if (stats_on) {
|
||
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_partial_pops[tiny_hotheap_v2_idx(class_idx)],
|
||
1,
|
||
memory_order_relaxed);
|
||
}
|
||
tiny_hotheap_v2_page_retire_slow(ctx, class_idx, victim);
|
||
}
|
||
}
|
||
|
||
static inline void tiny_hotheap_v2_build_freelist(tiny_hotheap_page_v2* page,
|
||
uint8_t class_idx,
|
||
uint16_t stride) {
|
||
if (!page || stride == 0) {
|
||
return;
|
||
}
|
||
if (page->used >= page->capacity) {
|
||
page->freelist = NULL;
|
||
return;
|
||
}
|
||
void* head = NULL;
|
||
size_t start = page->capacity;
|
||
while (start > page->used) {
|
||
start--;
|
||
uint8_t* block = (uint8_t*)page->base + (start * (size_t)stride);
|
||
tiny_next_write(class_idx, block, head);
|
||
head = block;
|
||
}
|
||
page->freelist = head;
|
||
}
|
||
|
||
static void tiny_hotheap_v2_unlink_page(tiny_hotheap_class_v2* hc, tiny_hotheap_page_v2* target) {
|
||
if (!hc || !target) return;
|
||
if (hc->current_page == target) {
|
||
hc->current_page = NULL;
|
||
}
|
||
tiny_hotheap_page_v2** lists[2] = {&hc->partial_pages, &hc->full_pages};
|
||
for (int i = 0; i < 2; i++) {
|
||
tiny_hotheap_page_v2** head = lists[i];
|
||
tiny_hotheap_page_v2* prev = NULL;
|
||
tiny_hotheap_page_v2* cur = *head;
|
||
while (cur) {
|
||
if (cur == target) {
|
||
if (prev) {
|
||
prev->next = cur->next;
|
||
} else {
|
||
*head = cur->next;
|
||
}
|
||
cur->next = NULL;
|
||
if (i == 0 && hc->partial_count > 0) {
|
||
hc->partial_count--;
|
||
}
|
||
break;
|
||
}
|
||
prev = cur;
|
||
cur = cur->next;
|
||
}
|
||
}
|
||
}
|
||
|
||
static tiny_hotheap_page_v2* tiny_hotheap_v2_refill_slow(tiny_hotheap_ctx_v2* ctx, uint8_t class_idx) {
|
||
if (!ctx || class_idx >= TINY_HOTHEAP_MAX_CLASSES) {
|
||
return NULL;
|
||
}
|
||
int stats_on = tiny_hotheap_v2_stats_enabled();
|
||
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_alloc_refill[class_idx], 1, memory_order_relaxed);
|
||
TinyHeapClassStats* stats = tiny_heap_stats_for_class(class_idx);
|
||
if (__builtin_expect(stats != NULL, 0)) {
|
||
atomic_fetch_add_explicit(&stats->alloc_slow_prepare, 1, memory_order_relaxed);
|
||
}
|
||
tiny_hotheap_class_v2* hc = &ctx->cls[class_idx];
|
||
if (hc) {
|
||
if (hc->current_page) {
|
||
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_refill_with_current[class_idx],
|
||
1,
|
||
memory_order_relaxed);
|
||
}
|
||
if (hc->partial_pages) {
|
||
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_refill_with_partial[class_idx],
|
||
1,
|
||
memory_order_relaxed);
|
||
}
|
||
}
|
||
|
||
// Cold iface (v1 TinyHeap) からページを 1 枚借りる
|
||
TinyColdIface cold = tiny_cold_iface_v1();
|
||
tiny_heap_ctx_t* cold_ctx = tiny_heap_ctx_for_thread();
|
||
tiny_heap_page_t* ipage = cold.refill_page ? cold.refill_page(cold_ctx, class_idx) : NULL;
|
||
if (!ipage || !ipage->base || ipage->capacity == 0 || ipage->meta == NULL) {
|
||
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_cold_refill_fail[class_idx], 1, memory_order_relaxed);
|
||
return NULL;
|
||
}
|
||
|
||
if (hc->stride == 0) {
|
||
hc->stride = (uint16_t)tiny_stride_for_class(class_idx);
|
||
}
|
||
|
||
tiny_hotheap_page_v2* page = tiny_hotheap_v2_acquire_page_node(hc);
|
||
if (!page) {
|
||
return NULL;
|
||
}
|
||
|
||
page->lease_page = ipage;
|
||
page->meta = ipage->meta;
|
||
page->ss = ipage->ss;
|
||
page->base = ipage->base;
|
||
page->capacity = ipage->capacity;
|
||
page->slab_idx = ipage->slab_idx;
|
||
page->freelist = NULL;
|
||
page->used = 0;
|
||
|
||
const uint16_t stride = hc->stride ? hc->stride : (uint16_t)tiny_stride_for_class(class_idx);
|
||
tiny_hotheap_v2_build_freelist(page, class_idx, stride);
|
||
|
||
tiny_hotheap_page_v2* old_cur = hc->current_page;
|
||
hc->current_page = page;
|
||
page->next = NULL;
|
||
if (old_cur && old_cur != page) {
|
||
tiny_hotheap_v2_partial_push(hc, old_cur, class_idx, stats_on);
|
||
}
|
||
tiny_hotheap_v2_maybe_trim_partial(ctx, hc, class_idx, stats_on);
|
||
if (!hc->current_page || !hc->current_page->freelist || hc->current_page->capacity == 0 ||
|
||
hc->current_page->used > hc->current_page->capacity) {
|
||
fprintf(stderr, "[HOTHEAP_V2_REFILL_ASSERT] current_page missing freelist (page=%p freelist=%p cap=%u used=%u)\n",
|
||
(void*)hc->current_page,
|
||
hc->current_page ? hc->current_page->freelist : NULL,
|
||
hc->current_page ? (unsigned)hc->current_page->capacity : 0u,
|
||
hc->current_page ? (unsigned)hc->current_page->used : 0u);
|
||
return NULL;
|
||
}
|
||
return hc->current_page;
|
||
}
|
||
|
||
static void tiny_hotheap_v2_page_retire_slow(tiny_hotheap_ctx_v2* ctx,
|
||
uint8_t class_idx,
|
||
tiny_hotheap_page_v2* page) {
|
||
if (!ctx || !page) return;
|
||
uint8_t idx = tiny_hotheap_v2_idx(class_idx);
|
||
tiny_hotheap_class_v2* hc = &ctx->cls[class_idx];
|
||
tiny_hotheap_v2_unlink_page(hc, page);
|
||
if (page->lease_page) {
|
||
page->lease_page->used = page->used;
|
||
page->lease_page->free_list = page->freelist;
|
||
if (page->lease_page->meta) {
|
||
atomic_store_explicit(&page->lease_page->meta->freelist, page->freelist, memory_order_release);
|
||
atomic_store_explicit(&page->lease_page->meta->used, page->used, memory_order_relaxed);
|
||
}
|
||
}
|
||
TinyColdIface cold = tiny_cold_iface_v1();
|
||
tiny_heap_ctx_t* cold_ctx = tiny_heap_ctx_for_thread();
|
||
if (cold.retire_page) {
|
||
cold.retire_page(cold_ctx, class_idx, page->lease_page);
|
||
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_cold_retire_calls[idx], 1, memory_order_relaxed);
|
||
}
|
||
if (tiny_hotheap_v2_stats_enabled()) {
|
||
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_retire_calls_v2[idx], 1, memory_order_relaxed);
|
||
}
|
||
if (page != &hc->storage_page) {
|
||
free(page);
|
||
} else {
|
||
tiny_hotheap_v2_page_reset(page);
|
||
}
|
||
if (!hc->current_page && hc->partial_pages) {
|
||
hc->current_page = hc->partial_pages;
|
||
hc->partial_pages = hc->partial_pages->next;
|
||
if (hc->current_page) {
|
||
hc->current_page->next = NULL;
|
||
}
|
||
}
|
||
if (tiny_hotheap_v2_stats_enabled()) {
|
||
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_page_stats[idx].page_retired, 1, memory_order_relaxed);
|
||
}
|
||
}
|
||
|
||
static inline void* tiny_hotheap_v2_try_pop(tiny_hotheap_class_v2* hc,
|
||
tiny_hotheap_page_v2* page,
|
||
uint8_t class_idx,
|
||
TinyHeapClassStats* stats,
|
||
int stats_on) {
|
||
if (!hc || !page || !page->base || page->capacity == 0) {
|
||
return NULL;
|
||
}
|
||
if (hc->stride == 0) {
|
||
hc->stride = (uint16_t)tiny_stride_for_class(class_idx);
|
||
}
|
||
const uint16_t stride = hc->stride;
|
||
void* block = NULL;
|
||
if (page->freelist) {
|
||
block = page->freelist;
|
||
void* next = tiny_next_read(class_idx, block);
|
||
page->freelist = next;
|
||
} else if (page->used < page->capacity) {
|
||
block = (void*)((uint8_t*)page->base + ((size_t)page->used * stride));
|
||
} else {
|
||
return NULL;
|
||
}
|
||
page->used++;
|
||
if (__builtin_expect(stats != NULL, 0)) {
|
||
atomic_fetch_add_explicit(&stats->alloc_fast_current, 1, memory_order_relaxed);
|
||
}
|
||
if (stats_on) {
|
||
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_alloc_fast[tiny_hotheap_v2_idx(class_idx)],
|
||
1,
|
||
memory_order_relaxed);
|
||
}
|
||
return tiny_region_id_write_header(block, class_idx);
|
||
}
|
||
|
||
__attribute__((destructor))
|
||
static void tiny_hotheap_v2_stats_dump(void) {
|
||
if (!tiny_hotheap_v2_stats_enabled()) {
|
||
return;
|
||
}
|
||
for (uint8_t ci = 0; ci < TINY_HOTHEAP_MAX_CLASSES; ci++) {
|
||
uint64_t alloc_calls = atomic_load_explicit(&g_tiny_hotheap_v2_alloc_calls[ci], memory_order_relaxed);
|
||
uint64_t route_hits = atomic_load_explicit(&g_tiny_hotheap_v2_route_hits[ci], memory_order_relaxed);
|
||
uint64_t alloc_fast = atomic_load_explicit(&g_tiny_hotheap_v2_alloc_fast[ci], memory_order_relaxed);
|
||
uint64_t alloc_lease = atomic_load_explicit(&g_tiny_hotheap_v2_alloc_lease[ci], memory_order_relaxed);
|
||
uint64_t alloc_fb = atomic_load_explicit(&g_tiny_hotheap_v2_alloc_fallback_v1[ci], memory_order_relaxed);
|
||
uint64_t free_calls = atomic_load_explicit(&g_tiny_hotheap_v2_free_calls[ci], memory_order_relaxed);
|
||
uint64_t free_fast = atomic_load_explicit(&g_tiny_hotheap_v2_free_fast[ci], memory_order_relaxed);
|
||
uint64_t free_fb = atomic_load_explicit(&g_tiny_hotheap_v2_free_fallback_v1[ci], memory_order_relaxed);
|
||
uint64_t cold_refill_fail = atomic_load_explicit(&g_tiny_hotheap_v2_cold_refill_fail[ci], memory_order_relaxed);
|
||
uint64_t cold_retire_calls = atomic_load_explicit(&g_tiny_hotheap_v2_cold_retire_calls[ci], memory_order_relaxed);
|
||
uint64_t retire_calls_v2 = atomic_load_explicit(&g_tiny_hotheap_v2_retire_calls_v2[ci], memory_order_relaxed);
|
||
uint64_t partial_pushes = atomic_load_explicit(&g_tiny_hotheap_v2_partial_pushes[ci], memory_order_relaxed);
|
||
uint64_t partial_pops = atomic_load_explicit(&g_tiny_hotheap_v2_partial_pops[ci], memory_order_relaxed);
|
||
uint64_t partial_peak = atomic_load_explicit(&g_tiny_hotheap_v2_partial_peak[ci], memory_order_relaxed);
|
||
uint64_t refill_with_cur = atomic_load_explicit(&g_tiny_hotheap_v2_refill_with_current[ci], memory_order_relaxed);
|
||
uint64_t refill_with_partial = atomic_load_explicit(&g_tiny_hotheap_v2_refill_with_partial[ci], memory_order_relaxed);
|
||
|
||
TinyHotHeapV2PageStats ps = {
|
||
.prepare_calls = atomic_load_explicit(&g_tiny_hotheap_v2_page_stats[ci].prepare_calls, memory_order_relaxed),
|
||
.prepare_with_current_null = atomic_load_explicit(&g_tiny_hotheap_v2_page_stats[ci].prepare_with_current_null, memory_order_relaxed),
|
||
.prepare_from_partial = atomic_load_explicit(&g_tiny_hotheap_v2_page_stats[ci].prepare_from_partial, memory_order_relaxed),
|
||
.free_made_current = atomic_load_explicit(&g_tiny_hotheap_v2_page_stats[ci].free_made_current, memory_order_relaxed),
|
||
.page_retired = atomic_load_explicit(&g_tiny_hotheap_v2_page_stats[ci].page_retired, memory_order_relaxed),
|
||
};
|
||
|
||
if (!(alloc_calls || alloc_fast || alloc_lease || alloc_fb || free_calls || free_fast || free_fb ||
|
||
ps.prepare_calls || ps.prepare_with_current_null || ps.prepare_from_partial ||
|
||
ps.free_made_current || ps.page_retired || retire_calls_v2 || partial_pushes || partial_pops || partial_peak)) {
|
||
continue;
|
||
}
|
||
|
||
tiny_route_kind_t route_kind = tiny_route_for_class(ci);
|
||
fprintf(stderr,
|
||
"[HOTHEAP_V2_STATS cls=%u route=%d] route_hits=%llu alloc_calls=%llu alloc_fast=%llu alloc_lease=%llu alloc_refill=%llu refill_cur=%llu refill_partial=%llu alloc_fb_v1=%llu alloc_route_fb=%llu cold_refill_fail=%llu cold_retire_calls=%llu retire_v2=%llu free_calls=%llu free_fast=%llu free_fb_v1=%llu prep_calls=%llu prep_null=%llu prep_from_partial=%llu free_made_current=%llu page_retired=%llu partial_push=%llu partial_pop=%llu partial_peak=%llu\n",
|
||
(unsigned)ci,
|
||
(int)route_kind,
|
||
(unsigned long long)route_hits,
|
||
(unsigned long long)alloc_calls,
|
||
(unsigned long long)alloc_fast,
|
||
(unsigned long long)alloc_lease,
|
||
(unsigned long long)atomic_load_explicit(&g_tiny_hotheap_v2_alloc_refill[ci], memory_order_relaxed),
|
||
(unsigned long long)refill_with_cur,
|
||
(unsigned long long)refill_with_partial,
|
||
(unsigned long long)alloc_fb,
|
||
(unsigned long long)atomic_load_explicit(&g_tiny_hotheap_v2_alloc_route_fb[ci], memory_order_relaxed),
|
||
(unsigned long long)cold_refill_fail,
|
||
(unsigned long long)cold_retire_calls,
|
||
(unsigned long long)retire_calls_v2,
|
||
(unsigned long long)free_calls,
|
||
(unsigned long long)free_fast,
|
||
(unsigned long long)free_fb,
|
||
(unsigned long long)ps.prepare_calls,
|
||
(unsigned long long)ps.prepare_with_current_null,
|
||
(unsigned long long)ps.prepare_from_partial,
|
||
(unsigned long long)ps.free_made_current,
|
||
(unsigned long long)ps.page_retired,
|
||
(unsigned long long)partial_pushes,
|
||
(unsigned long long)partial_pops,
|
||
(unsigned long long)partial_peak);
|
||
}
|
||
}
|
||
tiny_hotheap_ctx_v2* tiny_hotheap_v2_tls_get(void) {
|
||
tiny_hotheap_ctx_v2* ctx = g_tiny_hotheap_ctx_v2;
|
||
if (__builtin_expect(ctx == NULL, 0)) {
|
||
ctx = (tiny_hotheap_ctx_v2*)calloc(1, sizeof(tiny_hotheap_ctx_v2));
|
||
if (__builtin_expect(ctx == NULL, 0)) {
|
||
fprintf(stderr, "[TinyHotHeapV2] TLS alloc failed (OOM)\n");
|
||
abort();
|
||
}
|
||
g_tiny_hotheap_ctx_v2 = ctx;
|
||
for (int i = 0; i < TINY_HOTHEAP_MAX_CLASSES; i++) {
|
||
tiny_hotheap_v2_page_reset(&ctx->cls[i].storage_page);
|
||
ctx->cls[i].stride = (uint16_t)tiny_stride_for_class(i);
|
||
ctx->cls[i].max_partial_pages = (i == 7 || i == 6) ? 2 : 0; // C6/C7 は 1〜2 枚握る
|
||
ctx->cls[i].partial_count = 0;
|
||
}
|
||
}
|
||
return ctx;
|
||
}
|
||
|
||
void* tiny_hotheap_v2_alloc(uint8_t class_idx) {
|
||
int stats_on = tiny_hotheap_v2_stats_enabled();
|
||
uint8_t idx = tiny_hotheap_v2_idx(class_idx);
|
||
if (stats_on) {
|
||
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_route_hits[idx], 1, memory_order_relaxed);
|
||
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_alloc_calls[idx], 1, memory_order_relaxed);
|
||
}
|
||
if (__builtin_expect(!(class_idx == 6 || class_idx == 7), 0)) {
|
||
return NULL; // いまは C6/C7 のみ
|
||
}
|
||
|
||
tiny_hotheap_ctx_v2* v2ctx = tiny_hotheap_v2_tls_get();
|
||
tiny_hotheap_class_v2* vhcls = v2ctx ? &v2ctx->cls[class_idx] : NULL;
|
||
tiny_hotheap_page_v2* v2page = vhcls ? vhcls->current_page : NULL;
|
||
TinyHeapClassStats* stats = tiny_heap_stats_for_class(class_idx);
|
||
|
||
// current_page が壊れていそうなら一度捨てて slow に降りる
|
||
if (v2page && (!v2page->base || v2page->capacity == 0)) {
|
||
vhcls->current_page = NULL;
|
||
v2page = NULL;
|
||
}
|
||
|
||
// Hot path: current_page → partial → refill
|
||
void* user = tiny_hotheap_v2_try_pop(vhcls, v2page, class_idx, stats, stats_on);
|
||
if (user) {
|
||
return user;
|
||
}
|
||
|
||
// move exhausted current_page to full list if needed
|
||
if (vhcls && v2page && v2page->used >= v2page->capacity && vhcls->current_page == v2page) {
|
||
vhcls->current_page = NULL;
|
||
v2page->next = vhcls->full_pages;
|
||
vhcls->full_pages = v2page;
|
||
}
|
||
|
||
while (vhcls && vhcls->partial_pages) {
|
||
if (stats_on) {
|
||
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_page_stats[idx].prepare_calls, 1, memory_order_relaxed);
|
||
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_page_stats[idx].prepare_from_partial, 1, memory_order_relaxed);
|
||
if (vhcls->current_page == NULL) {
|
||
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_page_stats[idx].prepare_with_current_null, 1, memory_order_relaxed);
|
||
}
|
||
}
|
||
v2page = vhcls->partial_pages;
|
||
vhcls->partial_pages = vhcls->partial_pages->next;
|
||
if (vhcls->partial_count > 0) {
|
||
vhcls->partial_count--;
|
||
}
|
||
if (stats_on) {
|
||
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_partial_pops[idx], 1, memory_order_relaxed);
|
||
}
|
||
v2page->next = NULL;
|
||
vhcls->current_page = v2page;
|
||
user = tiny_hotheap_v2_try_pop(vhcls, v2page, class_idx, stats, stats_on);
|
||
if (user) {
|
||
return user;
|
||
}
|
||
if (v2page->used >= v2page->capacity) {
|
||
v2page->next = vhcls->full_pages;
|
||
vhcls->full_pages = v2page;
|
||
vhcls->current_page = NULL;
|
||
}
|
||
}
|
||
|
||
// Lease a page from v1 (C7 SAFE) and wrap it
|
||
tiny_hotheap_page_v2* leased = tiny_hotheap_v2_refill_slow(v2ctx, class_idx);
|
||
if (!leased) {
|
||
if (stats_on) {
|
||
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_alloc_fallback_v1[idx], 1, memory_order_relaxed);
|
||
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_alloc_route_fb[idx], 1, memory_order_relaxed);
|
||
}
|
||
size_t size = vhcls ? (vhcls->stride ? vhcls->stride : tiny_stride_for_class(class_idx)) : tiny_stride_for_class(class_idx);
|
||
if (class_idx == 7) {
|
||
return tiny_c7_alloc_fast(size); // safety fallback to v1
|
||
}
|
||
tiny_heap_ctx_t* cold_ctx = tiny_heap_ctx_for_thread();
|
||
return tiny_heap_alloc_class_fast(cold_ctx, class_idx, size);
|
||
}
|
||
vhcls->current_page = leased;
|
||
v2page = leased;
|
||
if (stats_on) {
|
||
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_alloc_lease[idx], 1, memory_order_relaxed);
|
||
}
|
||
|
||
user = tiny_hotheap_v2_try_pop(vhcls, v2page, class_idx, stats, stats_on);
|
||
if (user) {
|
||
return user;
|
||
}
|
||
|
||
if (stats_on) {
|
||
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_alloc_fallback_v1[idx], 1, memory_order_relaxed);
|
||
}
|
||
size_t size = vhcls ? (vhcls->stride ? vhcls->stride : tiny_stride_for_class(class_idx)) : tiny_stride_for_class(class_idx);
|
||
if (class_idx == 7) {
|
||
return tiny_c7_alloc_fast(size);
|
||
}
|
||
tiny_heap_ctx_t* cold_ctx = tiny_heap_ctx_for_thread();
|
||
return tiny_heap_alloc_class_fast(cold_ctx, class_idx, size);
|
||
}
|
||
|
||
void tiny_hotheap_v2_free(uint8_t class_idx, void* p, void* meta) {
|
||
if (__builtin_expect(!(class_idx == 6 || class_idx == 7), 0)) {
|
||
return;
|
||
}
|
||
uint8_t idx = tiny_hotheap_v2_idx(class_idx);
|
||
int stats_on = tiny_hotheap_v2_stats_enabled();
|
||
if (stats_on) {
|
||
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_free_calls[idx], 1, memory_order_relaxed);
|
||
}
|
||
tiny_hotheap_ctx_v2* v2ctx = tiny_hotheap_v2_tls_get();
|
||
tiny_hotheap_class_v2* vhcls = v2ctx ? &v2ctx->cls[class_idx] : NULL;
|
||
TinySlabMeta* meta_ptr = (TinySlabMeta*)meta;
|
||
|
||
tiny_hotheap_page_v2* page = tiny_hotheap_v2_find_page(vhcls, class_idx, p, meta_ptr);
|
||
if (page && page->base && page->capacity > 0) {
|
||
tiny_next_write(class_idx, p, page->freelist);
|
||
page->freelist = p;
|
||
if (page->used > 0) {
|
||
page->used--;
|
||
}
|
||
if (vhcls && vhcls->current_page != page) {
|
||
tiny_hotheap_v2_unlink_page(vhcls, page);
|
||
page->next = vhcls->current_page;
|
||
vhcls->current_page = page;
|
||
}
|
||
if (stats_on) {
|
||
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_page_stats[idx].free_made_current, 1, memory_order_relaxed);
|
||
}
|
||
if (page->used == 0) {
|
||
// 空ページは一度 partial に温存し、上限を超えたら retire
|
||
tiny_hotheap_v2_unlink_page(vhcls, page);
|
||
page->next = NULL;
|
||
if (vhcls && vhcls->current_page == NULL) {
|
||
vhcls->current_page = page;
|
||
} else if (vhcls) {
|
||
tiny_hotheap_v2_partial_push(vhcls, page, class_idx, stats_on);
|
||
tiny_hotheap_v2_maybe_trim_partial(v2ctx, vhcls, class_idx, stats_on);
|
||
}
|
||
} else if (stats_on) {
|
||
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_free_fast[idx], 1, memory_order_relaxed);
|
||
}
|
||
if (stats_on && page->used == 0) {
|
||
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_free_fast[idx], 1, memory_order_relaxed);
|
||
}
|
||
return;
|
||
}
|
||
|
||
// Fallback: mimic v1 free path
|
||
if (stats_on) {
|
||
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_free_fallback_v1[idx], 1, memory_order_relaxed);
|
||
}
|
||
SuperSlab* ss = hak_super_lookup(p);
|
||
if (ss && ss->magic == SUPERSLAB_MAGIC) {
|
||
int slab_idx = slab_index_for(ss, p);
|
||
if (slab_idx >= 0 && slab_idx < ss_slabs_capacity(ss)) {
|
||
if (class_idx == 7) {
|
||
tiny_c7_free_fast_with_meta(ss, slab_idx, p);
|
||
} else {
|
||
tiny_heap_ctx_t* cold_ctx = tiny_heap_ctx_for_thread();
|
||
tiny_heap_free_class_fast_with_meta(cold_ctx, class_idx, ss, slab_idx, p);
|
||
}
|
||
return;
|
||
}
|
||
}
|
||
if (class_idx == 7) {
|
||
tiny_c7_free_fast(p);
|
||
} else {
|
||
tiny_heap_ctx_t* cold_ctx = tiny_heap_ctx_for_thread();
|
||
tiny_heap_free_class_fast(cold_ctx, class_idx, p);
|
||
}
|
||
}
|
||
|
||
#if !HAKMEM_BUILD_RELEASE
|
||
// Helper to dump last push from core/hakmem.c (SEGV handler)
|
||
// Must be visible to other TUs (extern in hakmem_tiny.h or similar if needed,
|
||
// but SEGV handler is in core/hakmem.c which can dlsym or weak link it)
|
||
__attribute__((noinline))
|
||
void tiny_debug_dump_last_push(int cls) {
|
||
hak_base_ptr_t p = s_tls_sll_last_push[cls];
|
||
void* raw = HAK_BASE_TO_RAW(p);
|
||
fprintf(stderr, "[DEBUG] s_tls_sll_last_push[%d] = %p\n", cls, raw);
|
||
if (raw && (uintptr_t)raw > 4096) {
|
||
unsigned long* vals = (unsigned long*)raw;
|
||
fprintf(stderr, "[DEBUG] Memory at %p: %016lx %016lx\n", raw, vals[0], vals[1]);
|
||
}
|
||
}
|
||
#endif
|
||
// Forward declarations for static helpers used before definition
|
||
struct TinySlab; // forward
|
||
static void move_to_free_list(int class_idx, struct TinySlab* target_slab);
|
||
static void move_to_full_list(int class_idx, struct TinySlab* target_slab);
|
||
static void release_slab(struct TinySlab* slab);
|
||
static TinySlab* allocate_new_slab(int class_idx);
|
||
static void tiny_tls_cache_drain(int class_idx);
|
||
static void tiny_apply_mem_diet(void);
|
||
|
||
// Phase 6.23: SuperSlab allocation forward declaration
|
||
static inline void* hak_tiny_alloc_superslab(int class_idx);
|
||
static inline void* superslab_tls_bump_fast(int class_idx);
|
||
SuperSlab* superslab_refill(int class_idx);
|
||
static inline void* superslab_alloc_from_slab(SuperSlab* ss, int slab_idx);
|
||
static inline uint32_t sll_cap_for_class(int class_idx, uint32_t mag_cap);
|
||
// Forward decl: used by tiny_spec_pop_path before its definition
|
||
#if HAKMEM_TINY_P0_BATCH_REFILL
|
||
// P0 enabled: sll_refill_batch_from_ss is defined in hakmem_tiny_refill_p0.inc.h
|
||
static inline int sll_refill_batch_from_ss(int class_idx, int max_take);
|
||
#else
|
||
// Phase 12: sll_refill_small_from_ss is defined in hakmem_tiny_refill.inc.h
|
||
// Only a single implementation exists there; declare here for callers.
|
||
#ifdef HAKMEM_TINY_PHASE6_BOX_REFACTOR
|
||
int sll_refill_small_from_ss(int class_idx, int max_take);
|
||
#else
|
||
static inline int sll_refill_small_from_ss(int class_idx, int max_take);
|
||
#endif
|
||
#endif
|
||
static inline void hak_tiny_free_superslab(void* ptr, SuperSlab* ss);
|
||
static void* __attribute__((cold, noinline)) tiny_slow_alloc_fast(int class_idx);
|
||
static inline void tiny_remote_drain_owner(struct TinySlab* slab);
|
||
static void tiny_remote_drain_locked(struct TinySlab* slab);
|
||
// Ultra-fast try-only variant: attempt a direct SuperSlab bump/freelist pop
|
||
// without any refill or slow-path work. Returns NULL on miss.
|
||
/* moved below TinyTLSSlab definition */
|
||
|
||
// Step 3d: Forced inlining for readability + performance (306M target)
|
||
__attribute__((always_inline))
|
||
static inline void* hak_tiny_alloc_wrapper(int class_idx);
|
||
// Helpers for SuperSlab active block accounting (atomic, saturating dec)
|
||
|
||
// SuperSlab Active Counter Helpers - EXTRACTED to hakmem_tiny_ss_active_box.inc
|
||
#include "hakmem_tiny_ss_active_box.inc"
|
||
|
||
// EXTRACTED: ss_active_dec_one() moved to hakmem_tiny_superslab.h (Phase 2C-2)
|
||
|
||
// Front refill count global config (declare before init.inc uses them)
|
||
extern int g_refill_count_global;
|
||
extern int g_refill_count_hot;
|
||
extern int g_refill_count_mid;
|
||
extern int g_refill_count_class[TINY_NUM_CLASSES];
|
||
|
||
// Step 3d: Forced inlining for slow path (maintain monolithic performance)
|
||
// Phase 6-1.7: Export for box refactor (Box 5 needs access from hakmem.c)
|
||
#ifdef HAKMEM_TINY_PHASE6_BOX_REFACTOR
|
||
void* __attribute__((cold, noinline)) hak_tiny_alloc_slow(size_t size, int class_idx);
|
||
#else
|
||
static void* __attribute__((cold, noinline)) hak_tiny_alloc_slow(size_t size, int class_idx);
|
||
#endif
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// Box: adopt_gate_try (implementation moved from header for robust linkage)
|
||
// ---------------------------------------------------------------------------
|
||
#include "box/adopt_gate_box.h"
|
||
#include "box/super_reg_box.h"
|
||
extern int g_super_reg_class_size[TINY_NUM_CLASSES];
|
||
extern unsigned long long g_adopt_gate_calls[];
|
||
extern unsigned long long g_adopt_gate_success[];
|
||
extern unsigned long long g_reg_scan_attempts[];
|
||
extern unsigned long long g_reg_scan_hits[];
|
||
SuperSlab* adopt_gate_try(int class_idx, TinyTLSSlab* tls) {
|
||
g_adopt_gate_calls[class_idx]++;
|
||
ROUTE_MARK(13);
|
||
SuperSlab* ss = tiny_refill_try_fast(class_idx, tls);
|
||
if (ss) { g_adopt_gate_success[class_idx]++; return ss; }
|
||
g_reg_scan_attempts[class_idx]++;
|
||
int reg_size = g_super_reg_class_size[class_idx];
|
||
int reg_cap = super_reg_effective_per_class();
|
||
if (reg_cap > 0 && reg_size > reg_cap) {
|
||
reg_size = reg_cap;
|
||
}
|
||
int scan_limit = tiny_reg_scan_max();
|
||
if (scan_limit > reg_size) scan_limit = reg_size;
|
||
uint32_t self_tid = tiny_self_u32();
|
||
// Local helper (mirror adopt_bind_if_safe) to avoid including alloc inline here
|
||
auto int adopt_bind_if_safe_local(TinyTLSSlab* tls_l, SuperSlab* ss, int slab_idx, int class_idx_l) {
|
||
uint32_t self_tid = tiny_self_u32();
|
||
SlabHandle h = slab_try_acquire(ss, slab_idx, self_tid);
|
||
if (!slab_is_valid(&h)) return 0;
|
||
slab_drain_remote_full(&h);
|
||
if (__builtin_expect(slab_is_safe_to_bind(&h), 1)) {
|
||
tiny_tls_bind_slab(tls_l, h.ss, h.slab_idx);
|
||
slab_release(&h);
|
||
return 1;
|
||
}
|
||
slab_release(&h);
|
||
return 0;
|
||
}
|
||
|
||
for (int i = 0; i < scan_limit; i++) {
|
||
SuperSlab* cand = super_reg_by_class_at(class_idx, i);
|
||
if (!(cand && cand->magic == SUPERSLAB_MAGIC)) continue;
|
||
// Fast path: use nonempty_mask / freelist_mask to locate candidates in O(1)
|
||
uint32_t mask = cand->nonempty_mask;
|
||
// Fallback to atomic freelist_mask for cross-thread visibility
|
||
if (mask == 0) {
|
||
mask = atomic_load_explicit(&cand->freelist_mask, memory_order_acquire);
|
||
}
|
||
if (mask == 0) continue; // No visible freelists in this SS
|
||
int cap = ss_slabs_capacity(cand);
|
||
while (mask) {
|
||
int sidx = __builtin_ctz(mask);
|
||
mask &= (mask - 1);
|
||
if (sidx >= cap) continue;
|
||
if (adopt_bind_if_safe_local(tls, cand, sidx, class_idx)) {
|
||
g_adopt_gate_success[class_idx]++;
|
||
g_reg_scan_hits[class_idx]++;
|
||
ROUTE_MARK(14); ROUTE_COMMIT(class_idx, 0x07);
|
||
return cand;
|
||
}
|
||
}
|
||
}
|
||
return NULL;
|
||
}
|
||
|
||
|
||
// ============================================================================
|
||
// Global State - EXTRACTED to hakmem_tiny_globals_box.inc
|
||
// ============================================================================
|
||
#include "hakmem_tiny_globals_box.inc"
|
||
|
||
#include "hakmem_tiny_publish_box.inc"
|
||
|
||
// ============================================================================
|
||
// EXTRACTED TO hakmem_tiny_fastcache.inc.h (Phase 2D-1)
|
||
// ============================================================================
|
||
// Functions: tiny_fast_pop(), tiny_fast_push() - 28 lines (lines 377-404)
|
||
// Forward declarations for functions defined in hakmem_tiny_fastcache.inc.h
|
||
static inline hak_base_ptr_t tiny_fast_pop(int class_idx);
|
||
static inline int tiny_fast_push(int class_idx, hak_base_ptr_t ptr);
|
||
static inline hak_base_ptr_t fastcache_pop(int class_idx);
|
||
static inline int fastcache_push(int class_idx, hak_base_ptr_t ptr);
|
||
|
||
// ============================================================================
|
||
// EXTRACTED TO hakmem_tiny_hot_pop.inc.h (Phase 2D-1)
|
||
// ============================================================================
|
||
// Functions: tiny_hot_pop_class0(), tiny_hot_pop_class1(), tiny_hot_pop_class2(), tiny_hot_pop_class3()
|
||
// 88 lines (lines 407-494)
|
||
|
||
|
||
// ============================================================================
|
||
// Legacy Slow Allocation Path - ARCHIVED
|
||
// ============================================================================
|
||
// Note: tiny_slow_alloc_fast() and related legacy slow path implementation
|
||
// have been moved to archive/hakmem_tiny_legacy_slow_box.inc and are no
|
||
// longer compiled. The current slow path uses Box化された hak_tiny_alloc_slow().
|
||
|
||
|
||
// ============================================================================
|
||
// EXTRACTED TO hakmem_tiny_refill.inc.h (Phase 2D-1)
|
||
// ============================================================================
|
||
// Function: tiny_fast_refill_and_take() - 39 lines (lines 584-622)
|
||
|
||
// ============================================================================
|
||
// TLS/Frontend State & Configuration - EXTRACTED to hakmem_tiny_tls_state_box.inc
|
||
// ============================================================================
|
||
#include "hakmem_tiny_tls_state_box.inc"
|
||
|
||
#include "hakmem_tiny_intel.inc"
|
||
|
||
// ============================================================================
|
||
// EXTRACTED TO hakmem_tiny_rss.c (Phase 2B-2)
|
||
// ============================================================================
|
||
// EXTRACTED: static int get_rss_kb_self(void) {
|
||
// EXTRACTED: FILE* f = fopen("/proc/self/status", "r");
|
||
// EXTRACTED: if (!f) return 0;
|
||
// EXTRACTED: char buf[256];
|
||
// EXTRACTED: int kb = 0;
|
||
// EXTRACTED: while (fgets(buf, sizeof(buf), f)) {
|
||
// EXTRACTED: if (strncmp(buf, "VmRSS:", 6) == 0) {
|
||
// EXTRACTED: char* p = buf;
|
||
// EXTRACTED: while (*p && (*p < '0' || *p > '9')) {
|
||
// EXTRACTED: p++;
|
||
// EXTRACTED: }
|
||
// EXTRACTED: kb = atoi(p);
|
||
// EXTRACTED: break;
|
||
// EXTRACTED: }
|
||
// EXTRACTED: }
|
||
// EXTRACTED: fclose(f);
|
||
// EXTRACTED: return kb;
|
||
// EXTRACTED: }
|
||
|
||
// Miss時にマガジンへ大量リフィルせず、1個だけ確保して即返すオプション
|
||
// Env: HAKMEM_TINY_REFILL_ONE_ON_MISS=1 で有効(デフォルト: 0)
|
||
int g_refill_one_on_miss = 0;
|
||
|
||
// Frontend fill target per class (adaptive)
|
||
// NOTE: Non-static because used in hakmem_tiny_refill.inc.h
|
||
_Atomic uint32_t g_frontend_fill_target[TINY_NUM_CLASSES];
|
||
|
||
// Adaptive CAS: Active thread counter (for single-threaded optimization)
|
||
// Incremented on thread init, decremented on thread shutdown
|
||
_Atomic uint32_t g_hakmem_active_threads = 0;
|
||
|
||
// Per-thread registration flag (TLS variable)
|
||
static __thread int g_thread_registered = 0;
|
||
|
||
// Adaptive CAS: Register current thread (called on first allocation)
|
||
// NOTE: Non-static for cross-TU visibility (called from hak_alloc_api.inc.h)
|
||
__attribute__((always_inline))
|
||
inline void hakmem_thread_register(void) {
|
||
if (__builtin_expect(g_thread_registered == 0, 0)) {
|
||
g_thread_registered = 1;
|
||
atomic_fetch_add_explicit(&g_hakmem_active_threads, 1, memory_order_relaxed);
|
||
}
|
||
}
|
||
|
||
// SLL capacity override array (moved from deleted hakmem_tiny_ultra_batch_box.inc)
|
||
static int g_ultra_batch_override[TINY_NUM_CLASSES] = {0};
|
||
static int g_ultra_sll_cap_override[TINY_NUM_CLASSES] = {0};
|
||
|
||
// Helper function for batch size (moved from deleted hakmem_tiny_ultra_batch_box.inc)
|
||
static inline int ultra_batch_for_class(int class_idx) {
|
||
int ov = g_ultra_batch_override[class_idx];
|
||
if (ov > 0) return ov;
|
||
switch (class_idx) {
|
||
case 0: return 64; // 8B
|
||
case 1: return 96; // 16B
|
||
case 2: return 96; // 32B
|
||
case 3: return 224; // 64B
|
||
case 4: return 96; // 128B
|
||
case 5: return 64; // 256B
|
||
case 6: return 64; // 512B
|
||
default: return 32; // 1024B and others
|
||
}
|
||
}
|
||
|
||
// Helper function for SLL capacity (moved from deleted hakmem_tiny_ultra_batch_box.inc)
|
||
static inline int ultra_sll_cap_for_class(int class_idx) {
|
||
int ov = g_ultra_sll_cap_override[class_idx];
|
||
if (ov > 0) return ov;
|
||
switch (class_idx) {
|
||
case 0: return 256; // 8B
|
||
case 1: return 384; // 16B
|
||
case 2: return 384; // 32B
|
||
case 3: return 768; // 64B
|
||
case 4: return 256; // 128B
|
||
default: return 128; // others
|
||
}
|
||
}
|
||
|
||
enum { HAK_TIER_SLL=1, HAK_TIER_MAG=2, HAK_TIER_SLAB=3, HAK_TIER_SUPER=4, HAK_TIER_FRONT=5 };
|
||
|
||
|
||
// Event Queue & Telemetry Helpers - EXTRACTED to hakmem_tiny_eventq_box.inc
|
||
#include "hakmem_tiny_eventq_box.inc"
|
||
|
||
|
||
// Background refill workers and intelligence engine
|
||
#include "hakmem_tiny_background.inc"
|
||
|
||
// ============================================================================
|
||
// EXTRACTED TO hakmem_tiny_fastcache.inc.h (Phase 2D-1)
|
||
// ============================================================================
|
||
// Functions: fastcache_pop(), fastcache_push(), quick_pop() - 25 lines (lines 873-896)
|
||
|
||
// Ultra-fast try-only variant: attempt a direct SuperSlab bump/freelist pop
|
||
// without any refill or slow-path work. Returns NULL on miss.
|
||
static inline void* hak_tiny_alloc_superslab_try_fast(int class_idx) {
|
||
if (!g_use_superslab) return NULL;
|
||
TinyTLSSlab* tls = &g_tls_slabs[class_idx];
|
||
TinySlabMeta* meta = tls->meta;
|
||
if (!meta) return NULL;
|
||
// Try linear (bump) allocation first when freelist is empty
|
||
if (meta->freelist == NULL && meta->used < meta->capacity && tls->slab_base) {
|
||
// Use per-slab class_idx to get stride
|
||
size_t block_size = tiny_stride_for_class(meta->class_idx);
|
||
void* block = tls->slab_base + ((size_t)meta->used * block_size);
|
||
meta->used++;
|
||
c7_meta_used_note(meta->class_idx, C7_META_USED_SRC_FRONT);
|
||
// Track active blocks in SuperSlab for conservative reclamation
|
||
ss_active_inc(tls->ss);
|
||
return block;
|
||
}
|
||
// Do not pop freelist here (keep magazine/SLL handling consistent)
|
||
return NULL;
|
||
}
|
||
|
||
// ============================================================================
|
||
// EXTRACTED TO hakmem_tiny_refill.inc.h (Phase 2D-1)
|
||
// ============================================================================
|
||
// Functions: quick_refill_from_sll(), quick_refill_from_mag() - 31 lines (lines 918-949)
|
||
|
||
// ============================================================================
|
||
// EXTRACTED TO hakmem_tiny_refill.inc.h (Phase 2D-1)
|
||
// ============================================================================
|
||
// Function: sll_refill_small_from_ss() - 45 lines (lines 952-996)
|
||
|
||
// Phase 2C-3: TLS operations module (included after helper function definitions)
|
||
#include "hakmem_tiny_tls_ops.h"
|
||
|
||
// New TLS list refill: owner-only bulk take from TLS-cached SuperSlab slab
|
||
// ============================================================================
|
||
// EXTRACTED TO hakmem_tiny_tls_ops.h (Phase 2C-3)
|
||
// ============================================================================
|
||
// Function: tls_refill_from_tls_slab() - 101 lines
|
||
// Hot path refill operation, moved to inline function in header
|
||
|
||
// ============================================================================
|
||
// EXTRACTED TO hakmem_tiny_tls_ops.h (Phase 2C-3)
|
||
// ============================================================================
|
||
// Function: tls_list_spill_excess() - 97 lines
|
||
// Hot path spill operation, moved to inline function in header
|
||
|
||
// ============================================================================
|
||
// EXTRACTED TO hakmem_tiny_refill.inc.h (Phase 2D-1)
|
||
// ============================================================================
|
||
// Function: superslab_tls_bump_fast() - 45 lines (lines 1016-1060)
|
||
|
||
// ============================================================================
|
||
// EXTRACTED TO hakmem_tiny_refill.inc.h (Phase 2D-1)
|
||
// ============================================================================
|
||
// Function: frontend_refill_fc() - 44 lines (lines 1063-1106)
|
||
|
||
|
||
|
||
|
||
// SLL capacity policy: for hot tiny classes (0..3), allow larger SLL up to multiplier * mag_cap
|
||
// for >=4 keep current conservative half (to limit footprint).
|
||
|
||
// SLL Capacity Policy - EXTRACTED to hakmem_tiny_sll_cap_box.inc
|
||
#include "hakmem_tiny_sll_cap_box.inc"
|
||
|
||
|
||
// ============================================================================
|
||
// EXTRACTED TO hakmem_tiny_refill.inc.h (Phase 2D-1)
|
||
// ============================================================================
|
||
// Function: bulk_mag_to_sll_if_room() - 22 lines (lines 1133-1154)
|
||
|
||
// Ultra-Mode Batch Configuration - REMOVED (dead code cleanup 2025-11-27)
|
||
|
||
#include "hakmem_tiny_remote.inc"
|
||
|
||
// ============================================================================
|
||
// Internal Helpers
|
||
// ============================================================================
|
||
|
||
// Step 2: Slab Registry Operations
|
||
|
||
// Hash function for slab_base (64KB aligned)
|
||
// ============================================================================
|
||
// EXTRACTED TO hakmem_tiny_registry.c (Phase 2B-3)
|
||
// ============================================================================
|
||
// EXTRACTED: static inline int registry_hash(uintptr_t slab_base) {
|
||
// EXTRACTED: return (slab_base >> 16) & SLAB_REGISTRY_MASK;
|
||
// EXTRACTED: }
|
||
|
||
// Register slab in hash table (returns 1 on success, 0 on failure)
|
||
// EXTRACTED: static int registry_register(uintptr_t slab_base, TinySlab* owner) {
|
||
// EXTRACTED: pthread_mutex_lock(&g_tiny_registry_lock);
|
||
// EXTRACTED: int hash = registry_hash(slab_base);
|
||
// EXTRACTED:
|
||
// EXTRACTED: // Linear probing (max 8 attempts)
|
||
// EXTRACTED: for (int i = 0; i < SLAB_REGISTRY_MAX_PROBE; i++) {
|
||
// EXTRACTED: int idx = (hash + i) & SLAB_REGISTRY_MASK;
|
||
// EXTRACTED: SlabRegistryEntry* entry = &g_slab_registry[idx];
|
||
// EXTRACTED:
|
||
// EXTRACTED: if (entry->slab_base == 0) {
|
||
// EXTRACTED: // Empty slot found
|
||
// EXTRACTED: entry->slab_base = slab_base;
|
||
// EXTRACTED: atomic_store_explicit(&entry->owner, owner, memory_order_release);
|
||
// EXTRACTED: pthread_mutex_unlock(&g_tiny_registry_lock);
|
||
// EXTRACTED: return 1;
|
||
// EXTRACTED: }
|
||
// EXTRACTED: }
|
||
// EXTRACTED:
|
||
// EXTRACTED: // Registry full (collision limit exceeded)
|
||
// EXTRACTED: pthread_mutex_unlock(&g_tiny_registry_lock);
|
||
// EXTRACTED: return 0;
|
||
// EXTRACTED: }
|
||
|
||
// Unregister slab from hash table
|
||
// EXTRACTED: static void registry_unregister(uintptr_t slab_base) {
|
||
// EXTRACTED: pthread_mutex_lock(&g_tiny_registry_lock);
|
||
// EXTRACTED: int hash = registry_hash(slab_base);
|
||
// EXTRACTED:
|
||
// EXTRACTED: // Linear probing search
|
||
// EXTRACTED: for (int i = 0; i < SLAB_REGISTRY_MAX_PROBE; i++) {
|
||
// EXTRACTED: int idx = (hash + i) & SLAB_REGISTRY_MASK;
|
||
// EXTRACTED: SlabRegistryEntry* entry = &g_slab_registry[idx];
|
||
// EXTRACTED:
|
||
// EXTRACTED: if (entry->slab_base == slab_base) {
|
||
// EXTRACTED: // Found - clear entry (atomic store prevents TOCTOU race)
|
||
// EXTRACTED: atomic_store_explicit(&entry->owner, NULL, memory_order_release);
|
||
// EXTRACTED: entry->slab_base = 0;
|
||
// EXTRACTED: pthread_mutex_unlock(&g_tiny_registry_lock);
|
||
// EXTRACTED: return;
|
||
// EXTRACTED: }
|
||
// EXTRACTED:
|
||
// EXTRACTED: if (entry->slab_base == 0) {
|
||
// EXTRACTED: // Empty slot - not found
|
||
// EXTRACTED: pthread_mutex_unlock(&g_tiny_registry_lock);
|
||
// EXTRACTED: return;
|
||
// EXTRACTED: }
|
||
// EXTRACTED: }
|
||
// EXTRACTED: pthread_mutex_unlock(&g_tiny_registry_lock);
|
||
// EXTRACTED: }
|
||
|
||
// Lookup slab by base address (O(1) average)
|
||
|
||
// ============================================================================
|
||
// Registry Lookup & Owner Slab Discovery - EXTRACTED to hakmem_tiny_slab_lookup_box.inc
|
||
// ============================================================================
|
||
#include "hakmem_tiny_slab_lookup_box.inc"
|
||
|
||
|
||
// Function: move_to_full_list() - 20 lines (lines 1104-1123)
|
||
// Move slab to full list
|
||
|
||
// Function: move_to_free_list() - 20 lines (lines 1126-1145)
|
||
// Move slab to free list
|
||
|
||
// ============================================================================
|
||
// Public API
|
||
// ============================================================================
|
||
|
||
// ============================================================================
|
||
// Phase 2D-2: Initialization function (extracted to hakmem_tiny_init.inc)
|
||
// ============================================================================
|
||
#include "hakmem_tiny_init.inc"
|
||
|
||
// ============================================================================
|
||
// 3-Layer Architecture (2025-11-01 Simplification)
|
||
// ============================================================================
|
||
// Layer 1: TLS Bump Allocator (ultra-fast, 2-3 instructions/op)
|
||
#include "hakmem_tiny_bump.inc.h"
|
||
|
||
// Layer 2: TLS Small Magazine (fast, 5-10 instructions/op)
|
||
#include "hakmem_tiny_smallmag.inc.h"
|
||
|
||
// ============================================================================
|
||
// Phase 6 Fast Path Option (Metadata Header)
|
||
// ============================================================================
|
||
// Phase 6-1.6: Metadata Header (recommended)
|
||
// - Enable: -DHAKMEM_TINY_PHASE6_METADATA=1
|
||
// - Speed: 450-480 M ops/sec (expected, Phase 6-1 level)
|
||
// - Memory: ~6-12% overhead (8 bytes/allocation)
|
||
// - Method: Store pool_type + size_class in 8-byte header
|
||
// - Benefit: Extends to ALL pools (Tiny/Mid/L25/Whale)
|
||
// - Eliminates: Registry lookups, mid_lookup, owner checks
|
||
// ============================================================================
|
||
|
||
// Forward declarations for Phase 6 alloc/free functions
|
||
|
||
|
||
// ============================================================================
|
||
// Phase 6 Wrapper Functions - EXTRACTED to hakmem_tiny_phase6_wrappers_box.inc
|
||
// ============================================================================
|
||
#include "hakmem_tiny_phase6_wrappers_box.inc"
|
||
|
||
|
||
// Layer 1-3: Main allocation function (simplified)
|
||
// Build-time configurable via: -DHAKMEM_TINY_USE_NEW_3LAYER=1
|
||
#ifndef HAKMEM_TINY_USE_NEW_3LAYER
|
||
#define HAKMEM_TINY_USE_NEW_3LAYER 0 // default OFF (legacy path)
|
||
#endif
|
||
#if HAKMEM_TINY_USE_NEW_3LAYER
|
||
#include "hakmem_tiny_alloc_new.inc"
|
||
#else
|
||
// Old 6-7 layer architecture (backup)
|
||
#include "hakmem_tiny_alloc.inc"
|
||
#endif
|
||
|
||
#include "hakmem_tiny_slow.inc"
|
||
|
||
// Free path implementations
|
||
#include "hakmem_tiny_free.inc"
|
||
|
||
// ---- Phase 1: Provide default batch-refill symbol (fallback to small refill)
|
||
// Allows runtime gate HAKMEM_TINY_REFILL_BATCH=1 without requiring a rebuild.
|
||
#ifndef HAKMEM_TINY_P0_BATCH_REFILL
|
||
int sll_refill_small_from_ss(int class_idx, int max_take);
|
||
__attribute__((weak)) int sll_refill_batch_from_ss(int class_idx, int max_take)
|
||
{
|
||
return sll_refill_small_from_ss(class_idx, max_take);
|
||
}
|
||
#endif
|
||
|
||
// ============================================================================
|
||
// EXTRACTED TO hakmem_tiny_lifecycle.inc (Phase 2D-3)
|
||
// ============================================================================
|
||
// Function: hak_tiny_trim() - 116 lines (lines 1164-1279)
|
||
// Public trim and cleanup operation for lifecycle management
|
||
|
||
// Forward decl for internal registry lookup used by ultra safety validation
|
||
static TinySlab* registry_lookup(uintptr_t slab_base);
|
||
|
||
// ultra_sll_cap_for_class moved earlier in file (before hakmem_tiny_free.inc)
|
||
|
||
static inline int ultra_validate_sll_head(int class_idx, void* head) {
|
||
uintptr_t base = ((uintptr_t)head) & ~(TINY_SLAB_SIZE - 1);
|
||
TinySlab* owner = registry_lookup(base);
|
||
if (!owner) return 0;
|
||
uintptr_t start = (uintptr_t)owner->base;
|
||
if ((uintptr_t)head < start || (uintptr_t)head >= start + TINY_SLAB_SIZE) return 0;
|
||
return (owner->class_idx == class_idx);
|
||
}
|
||
// Optional: wrapper TLS guard(ラッパー再入検知をTLSカウンタで)
|
||
#ifndef HAKMEM_WRAPPER_TLS_GUARD
|
||
#define HAKMEM_WRAPPER_TLS_GUARD 0
|
||
#endif
|
||
#if HAKMEM_WRAPPER_TLS_GUARD
|
||
extern __thread int g_tls_in_wrapper;
|
||
#endif
|
||
|
||
// ============================================================================
|
||
// EXTRACTED TO hakmem_tiny_lifecycle.inc (Phase 2D-3)
|
||
// ============================================================================
|
||
// Function: tiny_tls_cache_drain() - 90 lines (lines 1314-1403)
|
||
// Static function for draining TLS caches
|
||
//
|
||
// Function: tiny_apply_mem_diet() - 20 lines (lines 1405-1424)
|
||
// Static function for memory diet mode application
|
||
//
|
||
// Phase 2D-3: Lifecycle management functions (226 lines total)
|
||
#include "hakmem_tiny_lifecycle.inc"
|
||
|
||
// Phase 2D-4 (FINAL): Slab management functions (142 lines total)
|
||
#include "hakmem_tiny_slab_mgmt.inc"
|
||
|
||
// Tiny Heap v2 stats dump (opt-in)
|
||
void tiny_heap_v2_print_stats(void) {
|
||
// Priority-2: Use cached ENV
|
||
if (!HAK_ENV_TINY_HEAP_V2_STATS()) return;
|
||
|
||
fprintf(stderr, "\n[HeapV2] TLS magazine stats (per class, thread-local)\n");
|
||
for (int cls = 0; cls < TINY_NUM_CLASSES; cls++) {
|
||
TinyHeapV2Mag* mag = &g_tiny_heap_v2_mag[cls];
|
||
TinyHeapV2Stats* st = &g_tiny_heap_v2_stats[cls];
|
||
fprintf(stderr,
|
||
"C%d: top=%d alloc_calls=%llu mag_hits=%llu refill_calls=%llu refill_blocks=%llu backend_oom=%llu\n",
|
||
cls,
|
||
mag->top,
|
||
(unsigned long long)st->alloc_calls,
|
||
(unsigned long long)st->mag_hits,
|
||
(unsigned long long)st->refill_calls,
|
||
(unsigned long long)st->refill_blocks,
|
||
(unsigned long long)st->backend_oom);
|
||
}
|
||
}
|
||
|
||
static void tiny_heap_v2_stats_atexit(void) __attribute__((destructor));
|
||
static void tiny_heap_v2_stats_atexit(void) {
|
||
tiny_heap_v2_print_stats();
|
||
}
|
||
|
||
// Size→class routing for >=1024B (env: HAKMEM_TINY_ALLOC_1024_METRIC)
|
||
_Atomic uint64_t g_tiny_alloc_ge1024[TINY_NUM_CLASSES] = {0};
|
||
static void tiny_alloc_1024_diag_atexit(void) __attribute__((destructor));
|
||
static void tiny_alloc_1024_diag_atexit(void) {
|
||
// Priority-2: Use cached ENV
|
||
if (!HAK_ENV_TINY_ALLOC_1024_METRIC()) return;
|
||
fprintf(stderr, "\n[ALLOC_GE1024] per-class counts (size>=1024)\n");
|
||
for (int cls = 0; cls < TINY_NUM_CLASSES; cls++) {
|
||
uint64_t v = atomic_load_explicit(&g_tiny_alloc_ge1024[cls], memory_order_relaxed);
|
||
if (v) {
|
||
fprintf(stderr, " C%d=%llu", cls, (unsigned long long)v);
|
||
}
|
||
}
|
||
fprintf(stderr, "\n");
|
||
}
|
||
|
||
// TLS SLL pointer diagnostics (optional)
|
||
extern _Atomic uint64_t g_tls_sll_invalid_head[TINY_NUM_CLASSES];
|
||
extern _Atomic uint64_t g_tls_sll_invalid_push[TINY_NUM_CLASSES];
|
||
static void tiny_tls_sll_diag_atexit(void) __attribute__((destructor));
|
||
static void tiny_tls_sll_diag_atexit(void) {
|
||
#if !HAKMEM_BUILD_RELEASE
|
||
// Priority-2: Use cached ENV
|
||
if (!HAK_ENV_TINY_SLL_DIAG()) return;
|
||
fprintf(stderr, "\n[TLS_SLL_DIAG] invalid head/push counts per class\n");
|
||
for (int cls = 0; cls < TINY_NUM_CLASSES; cls++) {
|
||
uint64_t ih = atomic_load_explicit(&g_tls_sll_invalid_head[cls], memory_order_relaxed);
|
||
uint64_t ip = atomic_load_explicit(&g_tls_sll_invalid_push[cls], memory_order_relaxed);
|
||
if (ih || ip) {
|
||
fprintf(stderr, " C%d: invalid_head=%llu invalid_push=%llu\n",
|
||
cls, (unsigned long long)ih, (unsigned long long)ip);
|
||
}
|
||
}
|
||
#endif
|
||
}
|
||
|
||
|
||
// ============================================================================
|
||
// Performance Measurement: TLS SLL Statistics Print Function
|
||
// ============================================================================
|
||
void tls_sll_print_measurements(void) {
|
||
// Check if measurement is enabled
|
||
static int g_measure = -1;
|
||
if (g_measure == -1) {
|
||
const char* e = getenv("HAKMEM_MEASURE_UNIFIED_CACHE");
|
||
g_measure = (e && *e && *e != '0') ? 1 : 0;
|
||
}
|
||
if (!g_measure) {
|
||
return; // Measurement disabled
|
||
}
|
||
|
||
uint64_t pushes = atomic_load_explicit(&g_tls_sll_push_count_global, memory_order_relaxed);
|
||
uint64_t pops = atomic_load_explicit(&g_tls_sll_pop_count_global, memory_order_relaxed);
|
||
uint64_t pop_empty = atomic_load_explicit(&g_tls_sll_pop_empty_count_global, memory_order_relaxed);
|
||
|
||
uint64_t total_pop_attempts = pops + pop_empty;
|
||
if (total_pop_attempts == 0 && pushes == 0) {
|
||
fprintf(stderr, "\n========================================\n");
|
||
fprintf(stderr, "TLS SLL Statistics\n");
|
||
fprintf(stderr, "========================================\n");
|
||
fprintf(stderr, "No operations recorded\n");
|
||
fprintf(stderr, "========================================\n\n");
|
||
return;
|
||
}
|
||
|
||
double hit_rate = total_pop_attempts > 0 ? (100.0 * pops) / total_pop_attempts : 0.0;
|
||
double empty_rate = total_pop_attempts > 0 ? (100.0 * pop_empty) / total_pop_attempts : 0.0;
|
||
|
||
fprintf(stderr, "\n========================================\n");
|
||
fprintf(stderr, "TLS SLL Statistics\n");
|
||
fprintf(stderr, "========================================\n");
|
||
fprintf(stderr, "Total Pushes: %llu\n", (unsigned long long)pushes);
|
||
fprintf(stderr, "Total Pops: %llu\n", (unsigned long long)pops);
|
||
fprintf(stderr, "Pop Empty Count: %llu (%.1f%% of pops)\n",
|
||
(unsigned long long)pop_empty, empty_rate);
|
||
fprintf(stderr, "Hit Rate: %.1f%%\n", hit_rate);
|
||
fprintf(stderr, "========================================\n\n");
|
||
}
|
||
|
||
// ============================================================================
|
||
// ACE Learning Layer & Tiny Guard - EXTRACTED to hakmem_tiny_ace_guard_box.inc
|
||
// ============================================================================
|
||
#include "hakmem_tiny_ace_guard_box.inc"
|