#include "hakmem_tiny.h" #include "hakmem_tiny_config.h" // Centralized configuration #include "hakmem_phase7_config.h" // Phase 7: Task 3 constants (PREWARM_COUNT, etc.) #include "hakmem_tiny_superslab.h" #include "box/ss_slab_meta_box.h" // Phase 3d-A: SlabMeta Box boundary // Phase 6.22: SuperSlab allocator #include "hakmem_super_registry.h" // Phase 8.2: SuperSlab registry for memory profiling #include "hakmem_internal.h" #include "hakmem_syscall.h" // Phase 6.X P0 Fix: Box 3 syscall layer (bypasses LD_PRELOAD) #include "hakmem_tiny_magazine.h" #include "hakmem_tiny_integrity.h" // PRIORITY 1-4: Corruption detection #include "box/tiny_next_ptr_box.h" // Box API: next pointer read/write #include "box/ptr_conversion_box.h" // Box API: pointer conversion #include "hakmem_env_cache.h" // Priority-2: ENV cache #include "box/tiny_cold_iface_v1.h" // Cold boundary wrapper for TinyHotHeap v2 // Phase 1 modules (must come AFTER hakmem_tiny.h for TinyPool definition) #include "hakmem_tiny_batch_refill.h" // Phase 1: Batch refill/spill for mini-magazine #include "hakmem_tiny_stats.h" // Phase 1: Batched statistics (replaces XOR RNG) // Phase 2B modules #include "tiny_api.h" // Consolidated: stats_api, query_api, rss_api, registry_api #include "tiny_tls.h" #include "tiny_debug.h" #include "hakmem_debug_master.h" // For unified debug level control #include "tiny_mmap_gate.h" #include "tiny_debug_ring.h" #include "tiny_route.h" #include "front/tiny_heap_v2.h" #include "box/tiny_front_stats_box.h" #include "box/tiny_front_v3_env_box.h" #include "box/ss_os_acquire_box.h" #include "tiny_tls_guard.h" #include "tiny_ready.h" #include "box/c7_meta_used_counter_box.h" #include "box/tiny_c7_hotbox.h" #include "box/tiny_heap_box.h" #include "box/tiny_hotheap_v2_box.h" #include "box/tiny_route_env_box.h" #include "box/super_reg_box.h" #include "tiny_region_id.h" #include "tiny_debug_api.h" #include "tiny_destructors.h" #include "hakmem_tiny_tls_list.h" #include "hakmem_tiny_remote_target.h" // Phase 2C-1: Remote target queue #include "hakmem_tiny_bg_spill.h" // Phase 2C-2: Background spill queue #include "tiny_adaptive_sizing.h" // Phase 2b: Adaptive TLS cache sizing // NOTE: hakmem_tiny_tls_ops.h included later (after type definitions) #include "tiny_system.h" // Consolidated: stdio, stdlib, string, etc. #include "hakmem_prof.h" #include "hakmem_trace.h" // Optional USDT (perf) tracepoints extern uint64_t g_bytes_allocated; // from hakmem_tiny_superslab.c // Tiny allocator configuration, debug counters, and return helpers #include "hakmem_tiny_config_box.inc" // ============================================================================ // Debug: TLS SLL last push tracking (for core/box/tls_sll_box.h) // ============================================================================ __thread hak_base_ptr_t s_tls_sll_last_push[TINY_NUM_CLASSES] = {0}; __thread tiny_heap_ctx_t g_tiny_heap_ctx; __thread int g_tiny_heap_ctx_init = 0; __thread tiny_hotheap_ctx_v2* g_tiny_hotheap_ctx_v2 = NULL; TinyHeapClassStats g_tiny_heap_stats[TINY_NUM_CLASSES] = {0}; TinyC7PageStats g_c7_page_stats = {0}; tiny_route_kind_t g_tiny_route_class[TINY_NUM_CLASSES] = {0}; int g_tiny_route_snapshot_done = 0; _Atomic uint64_t g_tiny_front_alloc_class[TINY_NUM_CLASSES] = {0}; _Atomic uint64_t g_tiny_front_free_class[TINY_NUM_CLASSES] = {0}; TinyFrontV3Snapshot g_tiny_front_v3_snapshot = {0}; int g_tiny_front_v3_snapshot_ready = 0; static TinyFrontV3SizeClassEntry g_tiny_front_v3_lut[TINY_MAX_SIZE + 1] = {0}; static int g_tiny_front_v3_lut_ready = 0; // Forward decls (to keep deps light in this TU) int unified_cache_enabled(void); void tiny_front_v3_snapshot_init(void) { if (g_tiny_front_v3_snapshot_ready) { return; } TinyFrontV3Snapshot snap = { .unified_cache_on = unified_cache_enabled(), .tiny_guard_on = tiny_guard_is_enabled(), .header_mode = (uint8_t)tiny_header_mode(), .header_v3_enabled = tiny_header_v3_enabled(), .header_v3_skip_c7 = tiny_header_v3_skip_c7(), .c7_ultra_enabled = tiny_c7_ultra_enabled_env(), .c7_ultra_header_light = tiny_c7_ultra_header_light_enabled_env(), }; g_tiny_front_v3_snapshot = snap; g_tiny_front_v3_snapshot_ready = 1; } void tiny_front_v3_size_class_lut_init(void) { if (g_tiny_front_v3_lut_ready) { return; } tiny_route_snapshot_init(); size_t max_size = tiny_get_max_size(); if (max_size > TINY_MAX_SIZE) { max_size = TINY_MAX_SIZE; } for (size_t sz = 0; sz <= TINY_MAX_SIZE; sz++) { TinyFrontV3SizeClassEntry e = { .class_idx = TINY_FRONT_V3_INVALID_CLASS, .route_kind = (uint8_t)TINY_ROUTE_LEGACY, }; if (sz == 0 || sz > max_size) { g_tiny_front_v3_lut[sz] = e; continue; } int cls = hak_tiny_size_to_class((int)sz); if (cls >= 0 && cls < TINY_NUM_CLASSES) { e.class_idx = (uint8_t)cls; e.route_kind = (uint8_t)tiny_route_for_class((uint8_t)cls); } g_tiny_front_v3_lut[sz] = e; } g_tiny_front_v3_lut_ready = 1; } const TinyFrontV3SizeClassEntry* tiny_front_v3_lut_lookup(size_t size) { if (__builtin_expect(!g_tiny_front_v3_lut_ready, 0)) { tiny_front_v3_size_class_lut_init(); } if (size == 0 || size > TINY_MAX_SIZE) { return NULL; } return &g_tiny_front_v3_lut[size]; } // ============================================================================= // TinyHotHeap v2 (Phase30/31 wiring). Currently C7-only thin wrapper. // NOTE: Phase34/35 時点では v2 は C7-only でも v1 より遅く、mixed では大きな回帰がある。 // 実験用フラグを明示 ON にしたときだけ使う前提で、デフォルトは v1 を推奨。 // ============================================================================= _Atomic uint64_t g_tiny_hotheap_v2_route_hits[TINY_HOTHEAP_MAX_CLASSES] = {0}; _Atomic uint64_t g_tiny_hotheap_v2_alloc_calls[TINY_HOTHEAP_MAX_CLASSES] = {0}; _Atomic uint64_t g_tiny_hotheap_v2_alloc_fast[TINY_HOTHEAP_MAX_CLASSES] = {0}; _Atomic uint64_t g_tiny_hotheap_v2_alloc_lease[TINY_HOTHEAP_MAX_CLASSES] = {0}; _Atomic uint64_t g_tiny_hotheap_v2_alloc_fallback_v1[TINY_HOTHEAP_MAX_CLASSES] = {0}; _Atomic uint64_t g_tiny_hotheap_v2_alloc_refill[TINY_HOTHEAP_MAX_CLASSES] = {0}; _Atomic uint64_t g_tiny_hotheap_v2_refill_with_current[TINY_HOTHEAP_MAX_CLASSES] = {0}; _Atomic uint64_t g_tiny_hotheap_v2_refill_with_partial[TINY_HOTHEAP_MAX_CLASSES] = {0}; _Atomic uint64_t g_tiny_hotheap_v2_alloc_route_fb[TINY_HOTHEAP_MAX_CLASSES] = {0}; _Atomic uint64_t g_tiny_hotheap_v2_free_calls[TINY_HOTHEAP_MAX_CLASSES] = {0}; _Atomic uint64_t g_tiny_hotheap_v2_free_fast[TINY_HOTHEAP_MAX_CLASSES] = {0}; _Atomic uint64_t g_tiny_hotheap_v2_free_fallback_v1[TINY_HOTHEAP_MAX_CLASSES] = {0}; _Atomic uint64_t g_tiny_hotheap_v2_cold_refill_fail[TINY_HOTHEAP_MAX_CLASSES] = {0}; _Atomic uint64_t g_tiny_hotheap_v2_cold_retire_calls[TINY_HOTHEAP_MAX_CLASSES] = {0}; _Atomic uint64_t g_tiny_hotheap_v2_retire_calls_v2[TINY_HOTHEAP_MAX_CLASSES] = {0}; _Atomic uint64_t g_tiny_hotheap_v2_partial_pushes[TINY_HOTHEAP_MAX_CLASSES] = {0}; _Atomic uint64_t g_tiny_hotheap_v2_partial_pops[TINY_HOTHEAP_MAX_CLASSES] = {0}; _Atomic uint64_t g_tiny_hotheap_v2_partial_peak[TINY_HOTHEAP_MAX_CLASSES] = {0}; TinyHotHeapV2PageStats g_tiny_hotheap_v2_page_stats[TINY_HOTHEAP_MAX_CLASSES] = {0}; static void tiny_hotheap_v2_page_retire_slow(tiny_hotheap_ctx_v2* ctx, uint8_t class_idx, tiny_hotheap_page_v2* page); static inline uint8_t tiny_hotheap_v2_idx(uint8_t class_idx) { return (class_idx < TINY_HOTHEAP_MAX_CLASSES) ? class_idx : 0; } void tiny_hotheap_v2_record_route_fallback(uint8_t class_idx) { atomic_fetch_add_explicit(&g_tiny_hotheap_v2_alloc_route_fb[tiny_hotheap_v2_idx(class_idx)], 1, memory_order_relaxed); } void tiny_hotheap_v2_record_free_fallback(uint8_t class_idx) { atomic_fetch_add_explicit(&g_tiny_hotheap_v2_free_fallback_v1[tiny_hotheap_v2_idx(class_idx)], 1, memory_order_relaxed); } void tiny_hotheap_v2_debug_snapshot(tiny_hotheap_v2_stats_snapshot_t* out) { if (!out) return; memset(out, 0, sizeof(*out)); uint8_t ci = 7; out->route_hits = atomic_load_explicit(&g_tiny_hotheap_v2_route_hits[ci], memory_order_relaxed); out->alloc_calls = atomic_load_explicit(&g_tiny_hotheap_v2_alloc_calls[ci], memory_order_relaxed); out->alloc_fast = atomic_load_explicit(&g_tiny_hotheap_v2_alloc_fast[ci], memory_order_relaxed); out->alloc_lease = atomic_load_explicit(&g_tiny_hotheap_v2_alloc_lease[ci], memory_order_relaxed); out->alloc_refill = atomic_load_explicit(&g_tiny_hotheap_v2_alloc_refill[ci], memory_order_relaxed); out->refill_with_current = atomic_load_explicit(&g_tiny_hotheap_v2_refill_with_current[ci], memory_order_relaxed); out->refill_with_partial = atomic_load_explicit(&g_tiny_hotheap_v2_refill_with_partial[ci], memory_order_relaxed); out->alloc_fallback_v1 = atomic_load_explicit(&g_tiny_hotheap_v2_alloc_fallback_v1[ci], memory_order_relaxed); out->alloc_route_fb = atomic_load_explicit(&g_tiny_hotheap_v2_alloc_route_fb[ci], memory_order_relaxed); out->free_calls = atomic_load_explicit(&g_tiny_hotheap_v2_free_calls[ci], memory_order_relaxed); out->free_fast = atomic_load_explicit(&g_tiny_hotheap_v2_free_fast[ci], memory_order_relaxed); out->free_fallback_v1 = atomic_load_explicit(&g_tiny_hotheap_v2_free_fallback_v1[ci], memory_order_relaxed); out->cold_refill_fail = atomic_load_explicit(&g_tiny_hotheap_v2_cold_refill_fail[ci], memory_order_relaxed); out->cold_retire_calls = atomic_load_explicit(&g_tiny_hotheap_v2_cold_retire_calls[ci], memory_order_relaxed); out->retire_calls_v2 = atomic_load_explicit(&g_tiny_hotheap_v2_retire_calls_v2[ci], memory_order_relaxed); out->prepare_calls = atomic_load_explicit(&g_tiny_hotheap_v2_page_stats[ci].prepare_calls, memory_order_relaxed); out->prepare_with_current_null = atomic_load_explicit(&g_tiny_hotheap_v2_page_stats[ci].prepare_with_current_null, memory_order_relaxed); out->prepare_from_partial = atomic_load_explicit(&g_tiny_hotheap_v2_page_stats[ci].prepare_from_partial, memory_order_relaxed); out->free_made_current = atomic_load_explicit(&g_tiny_hotheap_v2_page_stats[ci].free_made_current, memory_order_relaxed); out->page_retired = atomic_load_explicit(&g_tiny_hotheap_v2_page_stats[ci].page_retired, memory_order_relaxed); out->partial_pushes = atomic_load_explicit(&g_tiny_hotheap_v2_partial_pushes[ci], memory_order_relaxed); out->partial_pops = atomic_load_explicit(&g_tiny_hotheap_v2_partial_pops[ci], memory_order_relaxed); out->partial_peak = atomic_load_explicit(&g_tiny_hotheap_v2_partial_peak[ci], memory_order_relaxed); } static tiny_hotheap_page_v2* tiny_hotheap_v2_acquire_page_node(tiny_hotheap_class_v2* hc) { if (!hc) return NULL; if (hc->storage_page.meta == NULL && hc->storage_page.freelist == NULL && hc->storage_page.capacity == 0) { tiny_hotheap_v2_page_reset(&hc->storage_page); return &hc->storage_page; } tiny_hotheap_page_v2* node = (tiny_hotheap_page_v2*)calloc(1, sizeof(tiny_hotheap_page_v2)); if (!node) { return NULL; } tiny_hotheap_v2_page_reset(node); return node; } static tiny_hotheap_page_v2* tiny_hotheap_v2_find_page(tiny_hotheap_class_v2* hc, uint8_t class_idx, void* p, TinySlabMeta* meta) { if (!hc || !p) return NULL; const size_t stride = hc->stride ? hc->stride : tiny_stride_for_class(class_idx); const size_t max_span = stride * (size_t)(hc->current_page ? hc->current_page->capacity : 0); tiny_hotheap_page_v2* candidates[3] = {hc->current_page, hc->partial_pages, hc->full_pages}; for (int i = 0; i < 3; i++) { for (tiny_hotheap_page_v2* page = candidates[i]; page; page = page->next) { if (meta && page->meta && page->meta != meta) continue; if (!page->base || page->capacity == 0) continue; uint8_t* base = (uint8_t*)page->base; size_t span = stride * (size_t)page->capacity; if ((uint8_t*)p >= base && (uint8_t*)p < base + span) { (void)max_span; // silence unused warning in case stride==0 return page; } } } return NULL; } static inline void tiny_hotheap_v2_partial_push(tiny_hotheap_class_v2* hc, tiny_hotheap_page_v2* page, uint8_t class_idx, int stats_on) { if (!hc || !page) return; page->next = hc->partial_pages; hc->partial_pages = page; if (hc->partial_count < UINT16_MAX) { hc->partial_count++; } if (stats_on) { uint8_t idx = tiny_hotheap_v2_idx(class_idx); atomic_fetch_add_explicit(&g_tiny_hotheap_v2_partial_pushes[idx], 1, memory_order_relaxed); uint64_t cur = hc->partial_count; uint64_t old = atomic_load_explicit(&g_tiny_hotheap_v2_partial_peak[idx], memory_order_relaxed); while (cur > old && !atomic_compare_exchange_weak_explicit(&g_tiny_hotheap_v2_partial_peak[idx], &old, cur, memory_order_relaxed, memory_order_relaxed)) { old = atomic_load_explicit(&g_tiny_hotheap_v2_partial_peak[idx], memory_order_relaxed); } } } static inline void tiny_hotheap_v2_maybe_trim_partial(tiny_hotheap_ctx_v2* ctx, tiny_hotheap_class_v2* hc, uint8_t class_idx, int stats_on) { if (!ctx || !hc) return; uint16_t limit = hc->max_partial_pages; if (limit == 0) { return; } while (hc->partial_count > limit && hc->partial_pages) { tiny_hotheap_page_v2* victim = hc->partial_pages; hc->partial_pages = victim->next; if (hc->partial_count > 0) { hc->partial_count--; } victim->next = NULL; if (stats_on) { atomic_fetch_add_explicit(&g_tiny_hotheap_v2_partial_pops[tiny_hotheap_v2_idx(class_idx)], 1, memory_order_relaxed); } tiny_hotheap_v2_page_retire_slow(ctx, class_idx, victim); } } static inline void tiny_hotheap_v2_build_freelist(tiny_hotheap_page_v2* page, uint8_t class_idx, uint16_t stride) { if (!page || stride == 0) { return; } if (page->used >= page->capacity) { page->freelist = NULL; return; } void* head = NULL; size_t start = page->capacity; while (start > page->used) { start--; uint8_t* block = (uint8_t*)page->base + (start * (size_t)stride); tiny_next_write(class_idx, block, head); head = block; } page->freelist = head; } static void tiny_hotheap_v2_unlink_page(tiny_hotheap_class_v2* hc, tiny_hotheap_page_v2* target) { if (!hc || !target) return; if (hc->current_page == target) { hc->current_page = NULL; } tiny_hotheap_page_v2** lists[2] = {&hc->partial_pages, &hc->full_pages}; for (int i = 0; i < 2; i++) { tiny_hotheap_page_v2** head = lists[i]; tiny_hotheap_page_v2* prev = NULL; tiny_hotheap_page_v2* cur = *head; while (cur) { if (cur == target) { if (prev) { prev->next = cur->next; } else { *head = cur->next; } cur->next = NULL; if (i == 0 && hc->partial_count > 0) { hc->partial_count--; } break; } prev = cur; cur = cur->next; } } } static tiny_hotheap_page_v2* tiny_hotheap_v2_refill_slow(tiny_hotheap_ctx_v2* ctx, uint8_t class_idx) { if (!ctx || class_idx >= TINY_HOTHEAP_MAX_CLASSES) { return NULL; } int stats_on = tiny_hotheap_v2_stats_enabled(); atomic_fetch_add_explicit(&g_tiny_hotheap_v2_alloc_refill[class_idx], 1, memory_order_relaxed); TinyHeapClassStats* stats = tiny_heap_stats_for_class(class_idx); if (__builtin_expect(stats != NULL, 0)) { atomic_fetch_add_explicit(&stats->alloc_slow_prepare, 1, memory_order_relaxed); } tiny_hotheap_class_v2* hc = &ctx->cls[class_idx]; if (hc) { if (hc->current_page) { atomic_fetch_add_explicit(&g_tiny_hotheap_v2_refill_with_current[class_idx], 1, memory_order_relaxed); } if (hc->partial_pages) { atomic_fetch_add_explicit(&g_tiny_hotheap_v2_refill_with_partial[class_idx], 1, memory_order_relaxed); } } // Cold iface (v1 TinyHeap) からページを 1 枚借りる TinyColdIface cold = tiny_cold_iface_v1(); tiny_heap_ctx_t* cold_ctx = tiny_heap_ctx_for_thread(); tiny_heap_page_t* ipage = cold.refill_page ? cold.refill_page(cold_ctx, class_idx) : NULL; if (!ipage || !ipage->base || ipage->capacity == 0 || ipage->meta == NULL) { atomic_fetch_add_explicit(&g_tiny_hotheap_v2_cold_refill_fail[class_idx], 1, memory_order_relaxed); return NULL; } if (hc->stride == 0) { hc->stride = (uint16_t)tiny_stride_for_class(class_idx); } tiny_hotheap_page_v2* page = tiny_hotheap_v2_acquire_page_node(hc); if (!page) { return NULL; } page->lease_page = ipage; page->meta = ipage->meta; page->ss = ipage->ss; page->base = ipage->base; page->capacity = ipage->capacity; page->slab_idx = ipage->slab_idx; page->freelist = NULL; page->used = 0; const uint16_t stride = hc->stride ? hc->stride : (uint16_t)tiny_stride_for_class(class_idx); tiny_hotheap_v2_build_freelist(page, class_idx, stride); tiny_hotheap_page_v2* old_cur = hc->current_page; hc->current_page = page; page->next = NULL; if (old_cur && old_cur != page) { tiny_hotheap_v2_partial_push(hc, old_cur, class_idx, stats_on); } tiny_hotheap_v2_maybe_trim_partial(ctx, hc, class_idx, stats_on); if (!hc->current_page || !hc->current_page->freelist || hc->current_page->capacity == 0 || hc->current_page->used > hc->current_page->capacity) { fprintf(stderr, "[HOTHEAP_V2_REFILL_ASSERT] current_page missing freelist (page=%p freelist=%p cap=%u used=%u)\n", (void*)hc->current_page, hc->current_page ? hc->current_page->freelist : NULL, hc->current_page ? (unsigned)hc->current_page->capacity : 0u, hc->current_page ? (unsigned)hc->current_page->used : 0u); return NULL; } return hc->current_page; } static void tiny_hotheap_v2_page_retire_slow(tiny_hotheap_ctx_v2* ctx, uint8_t class_idx, tiny_hotheap_page_v2* page) { if (!ctx || !page) return; uint8_t idx = tiny_hotheap_v2_idx(class_idx); tiny_hotheap_class_v2* hc = &ctx->cls[class_idx]; tiny_hotheap_v2_unlink_page(hc, page); if (page->lease_page) { page->lease_page->used = page->used; page->lease_page->free_list = page->freelist; if (page->lease_page->meta) { atomic_store_explicit(&page->lease_page->meta->freelist, page->freelist, memory_order_release); atomic_store_explicit(&page->lease_page->meta->used, page->used, memory_order_relaxed); } } TinyColdIface cold = tiny_cold_iface_v1(); tiny_heap_ctx_t* cold_ctx = tiny_heap_ctx_for_thread(); if (cold.retire_page) { cold.retire_page(cold_ctx, class_idx, page->lease_page); atomic_fetch_add_explicit(&g_tiny_hotheap_v2_cold_retire_calls[idx], 1, memory_order_relaxed); } if (tiny_hotheap_v2_stats_enabled()) { atomic_fetch_add_explicit(&g_tiny_hotheap_v2_retire_calls_v2[idx], 1, memory_order_relaxed); } if (page != &hc->storage_page) { free(page); } else { tiny_hotheap_v2_page_reset(page); } if (!hc->current_page && hc->partial_pages) { hc->current_page = hc->partial_pages; hc->partial_pages = hc->partial_pages->next; if (hc->current_page) { hc->current_page->next = NULL; } } if (tiny_hotheap_v2_stats_enabled()) { atomic_fetch_add_explicit(&g_tiny_hotheap_v2_page_stats[idx].page_retired, 1, memory_order_relaxed); } } static inline void* tiny_hotheap_v2_try_pop(tiny_hotheap_class_v2* hc, tiny_hotheap_page_v2* page, uint8_t class_idx, TinyHeapClassStats* stats, int stats_on) { if (!hc || !page || !page->base || page->capacity == 0) { return NULL; } if (hc->stride == 0) { hc->stride = (uint16_t)tiny_stride_for_class(class_idx); } const uint16_t stride = hc->stride; void* block = NULL; if (page->freelist) { block = page->freelist; void* next = tiny_next_read(class_idx, block); page->freelist = next; } else if (page->used < page->capacity) { block = (void*)((uint8_t*)page->base + ((size_t)page->used * stride)); } else { return NULL; } page->used++; if (__builtin_expect(stats != NULL, 0)) { atomic_fetch_add_explicit(&stats->alloc_fast_current, 1, memory_order_relaxed); } if (stats_on) { atomic_fetch_add_explicit(&g_tiny_hotheap_v2_alloc_fast[tiny_hotheap_v2_idx(class_idx)], 1, memory_order_relaxed); } return tiny_region_id_write_header(block, class_idx); } tiny_hotheap_ctx_v2* tiny_hotheap_v2_tls_get(void) { tiny_hotheap_ctx_v2* ctx = g_tiny_hotheap_ctx_v2; if (__builtin_expect(ctx == NULL, 0)) { ctx = (tiny_hotheap_ctx_v2*)calloc(1, sizeof(tiny_hotheap_ctx_v2)); if (__builtin_expect(ctx == NULL, 0)) { fprintf(stderr, "[TinyHotHeapV2] TLS alloc failed (OOM)\n"); abort(); } g_tiny_hotheap_ctx_v2 = ctx; for (int i = 0; i < TINY_HOTHEAP_MAX_CLASSES; i++) { tiny_hotheap_v2_page_reset(&ctx->cls[i].storage_page); ctx->cls[i].stride = (uint16_t)tiny_stride_for_class(i); ctx->cls[i].max_partial_pages = (i == 7 || i == 6) ? 2 : 0; // C6/C7 は 1〜2 枚握る ctx->cls[i].partial_count = 0; } } return ctx; } void* tiny_hotheap_v2_alloc(uint8_t class_idx) { int stats_on = tiny_hotheap_v2_stats_enabled(); uint8_t idx = tiny_hotheap_v2_idx(class_idx); if (stats_on) { atomic_fetch_add_explicit(&g_tiny_hotheap_v2_route_hits[idx], 1, memory_order_relaxed); atomic_fetch_add_explicit(&g_tiny_hotheap_v2_alloc_calls[idx], 1, memory_order_relaxed); } if (__builtin_expect(!(class_idx == 6 || class_idx == 7), 0)) { return NULL; // いまは C6/C7 のみ } tiny_hotheap_ctx_v2* v2ctx = tiny_hotheap_v2_tls_get(); tiny_hotheap_class_v2* vhcls = v2ctx ? &v2ctx->cls[class_idx] : NULL; tiny_hotheap_page_v2* v2page = vhcls ? vhcls->current_page : NULL; TinyHeapClassStats* stats = tiny_heap_stats_for_class(class_idx); // current_page が壊れていそうなら一度捨てて slow に降りる if (v2page && (!v2page->base || v2page->capacity == 0)) { vhcls->current_page = NULL; v2page = NULL; } // Hot path: current_page → partial → refill void* user = tiny_hotheap_v2_try_pop(vhcls, v2page, class_idx, stats, stats_on); if (user) { return user; } // move exhausted current_page to full list if needed if (vhcls && v2page && v2page->used >= v2page->capacity && vhcls->current_page == v2page) { vhcls->current_page = NULL; v2page->next = vhcls->full_pages; vhcls->full_pages = v2page; } while (vhcls && vhcls->partial_pages) { if (stats_on) { atomic_fetch_add_explicit(&g_tiny_hotheap_v2_page_stats[idx].prepare_calls, 1, memory_order_relaxed); atomic_fetch_add_explicit(&g_tiny_hotheap_v2_page_stats[idx].prepare_from_partial, 1, memory_order_relaxed); if (vhcls->current_page == NULL) { atomic_fetch_add_explicit(&g_tiny_hotheap_v2_page_stats[idx].prepare_with_current_null, 1, memory_order_relaxed); } } v2page = vhcls->partial_pages; vhcls->partial_pages = vhcls->partial_pages->next; if (vhcls->partial_count > 0) { vhcls->partial_count--; } if (stats_on) { atomic_fetch_add_explicit(&g_tiny_hotheap_v2_partial_pops[idx], 1, memory_order_relaxed); } v2page->next = NULL; vhcls->current_page = v2page; user = tiny_hotheap_v2_try_pop(vhcls, v2page, class_idx, stats, stats_on); if (user) { return user; } if (v2page->used >= v2page->capacity) { v2page->next = vhcls->full_pages; vhcls->full_pages = v2page; vhcls->current_page = NULL; } } // Lease a page from v1 (C7 SAFE) and wrap it tiny_hotheap_page_v2* leased = tiny_hotheap_v2_refill_slow(v2ctx, class_idx); if (!leased) { if (stats_on) { atomic_fetch_add_explicit(&g_tiny_hotheap_v2_alloc_fallback_v1[idx], 1, memory_order_relaxed); atomic_fetch_add_explicit(&g_tiny_hotheap_v2_alloc_route_fb[idx], 1, memory_order_relaxed); } size_t size = vhcls ? (vhcls->stride ? vhcls->stride : tiny_stride_for_class(class_idx)) : tiny_stride_for_class(class_idx); if (class_idx == 7) { return tiny_c7_alloc_fast(size); // safety fallback to v1 } tiny_heap_ctx_t* cold_ctx = tiny_heap_ctx_for_thread(); return tiny_heap_alloc_class_fast(cold_ctx, class_idx, size); } vhcls->current_page = leased; v2page = leased; if (stats_on) { atomic_fetch_add_explicit(&g_tiny_hotheap_v2_alloc_lease[idx], 1, memory_order_relaxed); } user = tiny_hotheap_v2_try_pop(vhcls, v2page, class_idx, stats, stats_on); if (user) { return user; } if (stats_on) { atomic_fetch_add_explicit(&g_tiny_hotheap_v2_alloc_fallback_v1[idx], 1, memory_order_relaxed); } size_t size = vhcls ? (vhcls->stride ? vhcls->stride : tiny_stride_for_class(class_idx)) : tiny_stride_for_class(class_idx); if (class_idx == 7) { return tiny_c7_alloc_fast(size); } tiny_heap_ctx_t* cold_ctx = tiny_heap_ctx_for_thread(); return tiny_heap_alloc_class_fast(cold_ctx, class_idx, size); } void tiny_hotheap_v2_free(uint8_t class_idx, void* p, void* meta) { if (__builtin_expect(!(class_idx == 6 || class_idx == 7), 0)) { return; } uint8_t idx = tiny_hotheap_v2_idx(class_idx); int stats_on = tiny_hotheap_v2_stats_enabled(); if (stats_on) { atomic_fetch_add_explicit(&g_tiny_hotheap_v2_free_calls[idx], 1, memory_order_relaxed); } tiny_hotheap_ctx_v2* v2ctx = tiny_hotheap_v2_tls_get(); tiny_hotheap_class_v2* vhcls = v2ctx ? &v2ctx->cls[class_idx] : NULL; TinySlabMeta* meta_ptr = (TinySlabMeta*)meta; tiny_hotheap_page_v2* page = tiny_hotheap_v2_find_page(vhcls, class_idx, p, meta_ptr); if (page && page->base && page->capacity > 0) { tiny_next_write(class_idx, p, page->freelist); page->freelist = p; if (page->used > 0) { page->used--; } if (vhcls && vhcls->current_page != page) { tiny_hotheap_v2_unlink_page(vhcls, page); page->next = vhcls->current_page; vhcls->current_page = page; } if (stats_on) { atomic_fetch_add_explicit(&g_tiny_hotheap_v2_page_stats[idx].free_made_current, 1, memory_order_relaxed); } if (page->used == 0) { // 空ページは一度 partial に温存し、上限を超えたら retire tiny_hotheap_v2_unlink_page(vhcls, page); page->next = NULL; if (vhcls && vhcls->current_page == NULL) { vhcls->current_page = page; } else if (vhcls) { tiny_hotheap_v2_partial_push(vhcls, page, class_idx, stats_on); tiny_hotheap_v2_maybe_trim_partial(v2ctx, vhcls, class_idx, stats_on); } } else if (stats_on) { atomic_fetch_add_explicit(&g_tiny_hotheap_v2_free_fast[idx], 1, memory_order_relaxed); } if (stats_on && page->used == 0) { atomic_fetch_add_explicit(&g_tiny_hotheap_v2_free_fast[idx], 1, memory_order_relaxed); } return; } // Fallback: mimic v1 free path if (stats_on) { atomic_fetch_add_explicit(&g_tiny_hotheap_v2_free_fallback_v1[idx], 1, memory_order_relaxed); } SuperSlab* ss = hak_super_lookup(p); if (ss && ss->magic == SUPERSLAB_MAGIC) { int slab_idx = slab_index_for(ss, p); if (slab_idx >= 0 && slab_idx < ss_slabs_capacity(ss)) { if (class_idx == 7) { tiny_c7_free_fast_with_meta(ss, slab_idx, p); } else { tiny_heap_ctx_t* cold_ctx = tiny_heap_ctx_for_thread(); tiny_heap_free_class_fast_with_meta(cold_ctx, class_idx, ss, slab_idx, p); } return; } } if (class_idx == 7) { tiny_c7_free_fast(p); } else { tiny_heap_ctx_t* cold_ctx = tiny_heap_ctx_for_thread(); tiny_heap_free_class_fast(cold_ctx, class_idx, p); } } #if !HAKMEM_BUILD_RELEASE // Helper to dump last push from core/hakmem.c (SEGV handler) // Must be visible to other TUs (extern in hakmem_tiny.h or similar if needed, // but SEGV handler is in core/hakmem.c which can dlsym or weak link it) __attribute__((noinline)) void tiny_debug_dump_last_push(int cls) { hak_base_ptr_t p = s_tls_sll_last_push[cls]; void* raw = HAK_BASE_TO_RAW(p); fprintf(stderr, "[DEBUG] s_tls_sll_last_push[%d] = %p\n", cls, raw); if (raw && (uintptr_t)raw > 4096) { unsigned long* vals = (unsigned long*)raw; fprintf(stderr, "[DEBUG] Memory at %p: %016lx %016lx\n", raw, vals[0], vals[1]); } } #endif // Forward declarations for static helpers used before definition struct TinySlab; // forward static void move_to_free_list(int class_idx, struct TinySlab* target_slab); static void move_to_full_list(int class_idx, struct TinySlab* target_slab); static void release_slab(struct TinySlab* slab); static TinySlab* allocate_new_slab(int class_idx); static void tiny_tls_cache_drain(int class_idx); static void tiny_apply_mem_diet(void); // Phase 6.23: SuperSlab allocation forward declaration static inline void* hak_tiny_alloc_superslab(int class_idx); static inline void* superslab_tls_bump_fast(int class_idx); SuperSlab* superslab_refill(int class_idx); static inline void* superslab_alloc_from_slab(SuperSlab* ss, int slab_idx); static inline uint32_t sll_cap_for_class(int class_idx, uint32_t mag_cap); // Forward decl: used by tiny_spec_pop_path before its definition #if HAKMEM_TINY_P0_BATCH_REFILL // P0 enabled: sll_refill_batch_from_ss is defined in hakmem_tiny_refill_p0.inc.h static inline int sll_refill_batch_from_ss(int class_idx, int max_take); #else // Phase 12: sll_refill_small_from_ss is defined in hakmem_tiny_refill.inc.h // Only a single implementation exists there; declare here for callers. #ifdef HAKMEM_TINY_PHASE6_BOX_REFACTOR int sll_refill_small_from_ss(int class_idx, int max_take); #else static inline int sll_refill_small_from_ss(int class_idx, int max_take); #endif #endif static inline void hak_tiny_free_superslab(void* ptr, SuperSlab* ss); static inline void tiny_remote_drain_owner(struct TinySlab* slab); static void tiny_remote_drain_locked(struct TinySlab* slab); // Ultra-fast try-only variant: attempt a direct SuperSlab bump/freelist pop // without any refill or slow-path work. Returns NULL on miss. /* moved below TinyTLSSlab definition */ // Step 3d: Forced inlining for readability + performance (306M target) __attribute__((always_inline)) static inline void* hak_tiny_alloc_wrapper(int class_idx); // Helpers for SuperSlab active block accounting (atomic, saturating dec) // SuperSlab Active Counter Helpers - EXTRACTED to hakmem_tiny_ss_active_box.inc #include "hakmem_tiny_ss_active_box.inc" // EXTRACTED: ss_active_dec_one() moved to hakmem_tiny_superslab.h (Phase 2C-2) // Front refill count global config (declare before init.inc uses them) extern int g_refill_count_global; extern int g_refill_count_hot; extern int g_refill_count_mid; extern int g_refill_count_class[TINY_NUM_CLASSES]; // Step 3d: Forced inlining for slow path (maintain monolithic performance) // Phase 6-1.7: Export for box refactor (Box 5 needs access from hakmem.c) #ifdef HAKMEM_TINY_PHASE6_BOX_REFACTOR void* __attribute__((cold, noinline)) hak_tiny_alloc_slow(size_t size, int class_idx); #else static void* __attribute__((cold, noinline)) hak_tiny_alloc_slow(size_t size, int class_idx); #endif // --------------------------------------------------------------------------- // Box: adopt_gate_try (implementation moved from header for robust linkage) // --------------------------------------------------------------------------- #include "box/adopt_gate_box.h" #include "box/super_reg_box.h" extern int g_super_reg_class_size[TINY_NUM_CLASSES]; extern unsigned long long g_adopt_gate_calls[]; extern unsigned long long g_adopt_gate_success[]; extern unsigned long long g_reg_scan_attempts[]; extern unsigned long long g_reg_scan_hits[]; SuperSlab* adopt_gate_try(int class_idx, TinyTLSSlab* tls) { g_adopt_gate_calls[class_idx]++; ROUTE_MARK(13); SuperSlab* ss = tiny_refill_try_fast(class_idx, tls); if (ss) { g_adopt_gate_success[class_idx]++; return ss; } g_reg_scan_attempts[class_idx]++; int reg_size = g_super_reg_class_size[class_idx]; int reg_cap = super_reg_effective_per_class(); if (reg_cap > 0 && reg_size > reg_cap) { reg_size = reg_cap; } int scan_limit = tiny_reg_scan_max(); if (scan_limit > reg_size) scan_limit = reg_size; // Local helper (mirror adopt_bind_if_safe) to avoid including alloc inline here auto int adopt_bind_if_safe_local(TinyTLSSlab* tls_l, SuperSlab* ss, int slab_idx, int class_idx_l) { (void)class_idx_l; uint32_t self_tid = tiny_self_u32(); SlabHandle h = slab_try_acquire(ss, slab_idx, self_tid); if (!slab_is_valid(&h)) return 0; slab_drain_remote_full(&h); if (__builtin_expect(slab_is_safe_to_bind(&h), 1)) { tiny_tls_bind_slab(tls_l, h.ss, h.slab_idx); slab_release(&h); return 1; } slab_release(&h); return 0; } for (int i = 0; i < scan_limit; i++) { SuperSlab* cand = super_reg_by_class_at(class_idx, i); if (!(cand && cand->magic == SUPERSLAB_MAGIC)) continue; // Fast path: use nonempty_mask / freelist_mask to locate candidates in O(1) uint32_t mask = cand->nonempty_mask; // Fallback to atomic freelist_mask for cross-thread visibility if (mask == 0) { mask = atomic_load_explicit(&cand->freelist_mask, memory_order_acquire); } if (mask == 0) continue; // No visible freelists in this SS int cap = ss_slabs_capacity(cand); while (mask) { int sidx = __builtin_ctz(mask); mask &= (mask - 1); if (sidx >= cap) continue; if (adopt_bind_if_safe_local(tls, cand, sidx, class_idx)) { g_adopt_gate_success[class_idx]++; g_reg_scan_hits[class_idx]++; ROUTE_MARK(14); ROUTE_COMMIT(class_idx, 0x07); return cand; } } } return NULL; } // ============================================================================ // Global State - EXTRACTED to hakmem_tiny_globals_box.inc // ============================================================================ #include "hakmem_tiny_globals_box.inc" #include "hakmem_tiny_publish_box.inc" // ============================================================================ // EXTRACTED TO hakmem_tiny_fastcache.inc.h (Phase 2D-1) // ============================================================================ // Functions: tiny_fast_pop(), tiny_fast_push() - 28 lines (lines 377-404) // Forward declarations for functions defined in hakmem_tiny_fastcache.inc.h static inline hak_base_ptr_t tiny_fast_pop(int class_idx); static inline int tiny_fast_push(int class_idx, hak_base_ptr_t ptr); static inline hak_base_ptr_t fastcache_pop(int class_idx); static inline int fastcache_push(int class_idx, hak_base_ptr_t ptr); // ============================================================================ // EXTRACTED TO hakmem_tiny_hot_pop.inc.h (Phase 2D-1) // ============================================================================ // Functions: tiny_hot_pop_class0(), tiny_hot_pop_class1(), tiny_hot_pop_class2(), tiny_hot_pop_class3() // 88 lines (lines 407-494) // ============================================================================ // EXTRACTED TO hakmem_tiny_refill.inc.h (Phase 2D-1) // ============================================================================ // Function: tiny_fast_refill_and_take() - 39 lines (lines 584-622) // ============================================================================ // TLS/Frontend State & Configuration - EXTRACTED to hakmem_tiny_tls_state_box.inc // ============================================================================ #include "hakmem_tiny_tls_state_box.inc" #include "hakmem_tiny_intel.inc" // ============================================================================ // EXTRACTED TO hakmem_tiny_rss.c (Phase 2B-2) // ============================================================================ // EXTRACTED: static int get_rss_kb_self(void) { // EXTRACTED: FILE* f = fopen("/proc/self/status", "r"); // EXTRACTED: if (!f) return 0; // EXTRACTED: char buf[256]; // EXTRACTED: int kb = 0; // EXTRACTED: while (fgets(buf, sizeof(buf), f)) { // EXTRACTED: if (strncmp(buf, "VmRSS:", 6) == 0) { // EXTRACTED: char* p = buf; // EXTRACTED: while (*p && (*p < '0' || *p > '9')) { // EXTRACTED: p++; // EXTRACTED: } // EXTRACTED: kb = atoi(p); // EXTRACTED: break; // EXTRACTED: } // EXTRACTED: } // EXTRACTED: fclose(f); // EXTRACTED: return kb; // EXTRACTED: } // Miss時にマガジンへ大量リフィルせず、1個だけ確保して即返すオプション // Env: HAKMEM_TINY_REFILL_ONE_ON_MISS=1 で有効(デフォルト: 0) int g_refill_one_on_miss = 0; // Frontend fill target per class (adaptive) // NOTE: Non-static because used in hakmem_tiny_refill.inc.h _Atomic uint32_t g_frontend_fill_target[TINY_NUM_CLASSES]; // Adaptive CAS: Active thread counter (for single-threaded optimization) // Incremented on thread init, decremented on thread shutdown _Atomic uint32_t g_hakmem_active_threads = 0; // Per-thread registration flag (TLS variable) static __thread int g_thread_registered = 0; // Adaptive CAS: Register current thread (called on first allocation) // NOTE: Non-static for cross-TU visibility (called from hak_alloc_api.inc.h) __attribute__((always_inline)) inline void hakmem_thread_register(void) { if (__builtin_expect(g_thread_registered == 0, 0)) { g_thread_registered = 1; atomic_fetch_add_explicit(&g_hakmem_active_threads, 1, memory_order_relaxed); } } // SLL capacity override array (moved from deleted hakmem_tiny_ultra_batch_box.inc) static int g_ultra_batch_override[TINY_NUM_CLASSES] = {0}; static int g_ultra_sll_cap_override[TINY_NUM_CLASSES] = {0}; // Helper function for batch size (moved from deleted hakmem_tiny_ultra_batch_box.inc) static inline int ultra_batch_for_class(int class_idx) { int ov = g_ultra_batch_override[class_idx]; if (ov > 0) return ov; switch (class_idx) { case 0: return 64; // 8B case 1: return 96; // 16B case 2: return 96; // 32B case 3: return 224; // 64B case 4: return 96; // 128B case 5: return 64; // 256B case 6: return 64; // 512B default: return 32; // 1024B and others } } // Helper function for SLL capacity (moved from deleted hakmem_tiny_ultra_batch_box.inc) static inline int ultra_sll_cap_for_class(int class_idx) { int ov = g_ultra_sll_cap_override[class_idx]; if (ov > 0) return ov; switch (class_idx) { case 0: return 256; // 8B case 1: return 384; // 16B case 2: return 384; // 32B case 3: return 768; // 64B case 4: return 256; // 128B default: return 128; // others } } enum { HAK_TIER_SLL=1, HAK_TIER_MAG=2, HAK_TIER_SLAB=3, HAK_TIER_SUPER=4, HAK_TIER_FRONT=5 }; // Event Queue & Telemetry Helpers - EXTRACTED to hakmem_tiny_eventq_box.inc #include "hakmem_tiny_eventq_box.inc" // Background refill workers and intelligence engine #include "hakmem_tiny_background.inc" // ============================================================================ // EXTRACTED TO hakmem_tiny_fastcache.inc.h (Phase 2D-1) // ============================================================================ // Functions: fastcache_pop(), fastcache_push(), quick_pop() - 25 lines (lines 873-896) // Ultra-fast try-only variant: attempt a direct SuperSlab bump/freelist pop // without any refill or slow-path work. Returns NULL on miss. static inline void* hak_tiny_alloc_superslab_try_fast(int class_idx) { if (!g_use_superslab) return NULL; TinyTLSSlab* tls = &g_tls_slabs[class_idx]; TinySlabMeta* meta = tls->meta; if (!meta) return NULL; // Try linear (bump) allocation first when freelist is empty if (meta->freelist == NULL && meta->used < meta->capacity && tls->slab_base) { // Use per-slab class_idx to get stride size_t block_size = tiny_stride_for_class(meta->class_idx); void* block = tls->slab_base + ((size_t)meta->used * block_size); meta->used++; c7_meta_used_note(meta->class_idx, C7_META_USED_SRC_FRONT); // Track active blocks in SuperSlab for conservative reclamation ss_active_inc(tls->ss); return block; } // Do not pop freelist here (keep magazine/SLL handling consistent) return NULL; } // ============================================================================ // EXTRACTED TO hakmem_tiny_refill.inc.h (Phase 2D-1) // ============================================================================ // Functions: quick_refill_from_sll(), quick_refill_from_mag() - 31 lines (lines 918-949) // ============================================================================ // EXTRACTED TO hakmem_tiny_refill.inc.h (Phase 2D-1) // ============================================================================ // Function: sll_refill_small_from_ss() - 45 lines (lines 952-996) // Phase 2C-3: TLS operations module (included after helper function definitions) #include "hakmem_tiny_tls_ops.h" // New TLS list refill: owner-only bulk take from TLS-cached SuperSlab slab // ============================================================================ // EXTRACTED TO hakmem_tiny_tls_ops.h (Phase 2C-3) // ============================================================================ // Function: tls_refill_from_tls_slab() - 101 lines // Hot path refill operation, moved to inline function in header // ============================================================================ // EXTRACTED TO hakmem_tiny_tls_ops.h (Phase 2C-3) // ============================================================================ // Function: tls_list_spill_excess() - 97 lines // Hot path spill operation, moved to inline function in header // ============================================================================ // EXTRACTED TO hakmem_tiny_refill.inc.h (Phase 2D-1) // ============================================================================ // Function: superslab_tls_bump_fast() - 45 lines (lines 1016-1060) // ============================================================================ // EXTRACTED TO hakmem_tiny_refill.inc.h (Phase 2D-1) // ============================================================================ // Function: frontend_refill_fc() - 44 lines (lines 1063-1106) // SLL capacity policy: for hot tiny classes (0..3), allow larger SLL up to multiplier * mag_cap // for >=4 keep current conservative half (to limit footprint). // SLL Capacity Policy - EXTRACTED to hakmem_tiny_sll_cap_box.inc #include "hakmem_tiny_sll_cap_box.inc" // ============================================================================ // EXTRACTED TO hakmem_tiny_refill.inc.h (Phase 2D-1) // ============================================================================ // Function: bulk_mag_to_sll_if_room() - 22 lines (lines 1133-1154) // Ultra-Mode Batch Configuration - REMOVED (dead code cleanup 2025-11-27) #include "hakmem_tiny_remote.inc" // ============================================================================ // Internal Helpers // ============================================================================ // Step 2: Slab Registry Operations // Hash function for slab_base (64KB aligned) // ============================================================================ // EXTRACTED TO hakmem_tiny_registry.c (Phase 2B-3) // ============================================================================ // EXTRACTED: static inline int registry_hash(uintptr_t slab_base) { // EXTRACTED: return (slab_base >> 16) & SLAB_REGISTRY_MASK; // EXTRACTED: } // Register slab in hash table (returns 1 on success, 0 on failure) // EXTRACTED: static int registry_register(uintptr_t slab_base, TinySlab* owner) { // EXTRACTED: pthread_mutex_lock(&g_tiny_registry_lock); // EXTRACTED: int hash = registry_hash(slab_base); // EXTRACTED: // EXTRACTED: // Linear probing (max 8 attempts) // EXTRACTED: for (int i = 0; i < SLAB_REGISTRY_MAX_PROBE; i++) { // EXTRACTED: int idx = (hash + i) & SLAB_REGISTRY_MASK; // EXTRACTED: SlabRegistryEntry* entry = &g_slab_registry[idx]; // EXTRACTED: // EXTRACTED: if (entry->slab_base == 0) { // EXTRACTED: // Empty slot found // EXTRACTED: entry->slab_base = slab_base; // EXTRACTED: atomic_store_explicit(&entry->owner, owner, memory_order_release); // EXTRACTED: pthread_mutex_unlock(&g_tiny_registry_lock); // EXTRACTED: return 1; // EXTRACTED: } // EXTRACTED: } // EXTRACTED: // EXTRACTED: // Registry full (collision limit exceeded) // EXTRACTED: pthread_mutex_unlock(&g_tiny_registry_lock); // EXTRACTED: return 0; // EXTRACTED: } // Unregister slab from hash table // EXTRACTED: static void registry_unregister(uintptr_t slab_base) { // EXTRACTED: pthread_mutex_lock(&g_tiny_registry_lock); // EXTRACTED: int hash = registry_hash(slab_base); // EXTRACTED: // EXTRACTED: // Linear probing search // EXTRACTED: for (int i = 0; i < SLAB_REGISTRY_MAX_PROBE; i++) { // EXTRACTED: int idx = (hash + i) & SLAB_REGISTRY_MASK; // EXTRACTED: SlabRegistryEntry* entry = &g_slab_registry[idx]; // EXTRACTED: // EXTRACTED: if (entry->slab_base == slab_base) { // EXTRACTED: // Found - clear entry (atomic store prevents TOCTOU race) // EXTRACTED: atomic_store_explicit(&entry->owner, NULL, memory_order_release); // EXTRACTED: entry->slab_base = 0; // EXTRACTED: pthread_mutex_unlock(&g_tiny_registry_lock); // EXTRACTED: return; // EXTRACTED: } // EXTRACTED: // EXTRACTED: if (entry->slab_base == 0) { // EXTRACTED: // Empty slot - not found // EXTRACTED: pthread_mutex_unlock(&g_tiny_registry_lock); // EXTRACTED: return; // EXTRACTED: } // EXTRACTED: } // EXTRACTED: pthread_mutex_unlock(&g_tiny_registry_lock); // EXTRACTED: } // Lookup slab by base address (O(1) average) // ============================================================================ // Registry Lookup & Owner Slab Discovery - EXTRACTED to hakmem_tiny_slab_lookup_box.inc // ============================================================================ #include "hakmem_tiny_slab_lookup_box.inc" // Function: move_to_full_list() - 20 lines (lines 1104-1123) // Move slab to full list // Function: move_to_free_list() - 20 lines (lines 1126-1145) // Move slab to free list // ============================================================================ // Public API // ============================================================================ // ============================================================================ // Phase 2D-2: Initialization function (extracted to hakmem_tiny_init.inc) // ============================================================================ #include "hakmem_tiny_init.inc" // ============================================================================ // 3-Layer Architecture (2025-11-01 Simplification) // ============================================================================ // Layer 1: TLS Bump Allocator (ultra-fast, 2-3 instructions/op) #include "hakmem_tiny_bump.inc.h" // Layer 2: TLS Small Magazine (fast, 5-10 instructions/op) #include "hakmem_tiny_smallmag.inc.h" // ============================================================================ // Phase 6 Fast Path Option (Metadata Header) // ============================================================================ // Phase 6-1.6: Metadata Header (recommended) // - Enable: -DHAKMEM_TINY_PHASE6_METADATA=1 // - Speed: 450-480 M ops/sec (expected, Phase 6-1 level) // - Memory: ~6-12% overhead (8 bytes/allocation) // - Method: Store pool_type + size_class in 8-byte header // - Benefit: Extends to ALL pools (Tiny/Mid/L25/Whale) // - Eliminates: Registry lookups, mid_lookup, owner checks // ============================================================================ // Forward declarations for Phase 6 alloc/free functions // ============================================================================ // Phase 6 Wrapper Functions - EXTRACTED to hakmem_tiny_phase6_wrappers_box.inc // ============================================================================ #include "hakmem_tiny_phase6_wrappers_box.inc" // Layer 1-3: Main allocation function (simplified) // Build-time configurable via: -DHAKMEM_TINY_USE_NEW_3LAYER=1 #ifndef HAKMEM_TINY_USE_NEW_3LAYER #define HAKMEM_TINY_USE_NEW_3LAYER 0 // default OFF (legacy path) #endif #if HAKMEM_TINY_USE_NEW_3LAYER #include "hakmem_tiny_alloc_new.inc" #else // Old 6-7 layer architecture (backup) #include "hakmem_tiny_alloc.inc" #endif #include "hakmem_tiny_slow.inc" // Free path implementations #include "hakmem_tiny_free.inc" // ---- Phase 1: Provide default batch-refill symbol (fallback to small refill) // Allows runtime gate HAKMEM_TINY_REFILL_BATCH=1 without requiring a rebuild. #ifndef HAKMEM_TINY_P0_BATCH_REFILL int sll_refill_small_from_ss(int class_idx, int max_take); __attribute__((weak)) int sll_refill_batch_from_ss(int class_idx, int max_take) { return sll_refill_small_from_ss(class_idx, max_take); } #endif // ============================================================================ // EXTRACTED TO hakmem_tiny_lifecycle.inc (Phase 2D-3) // ============================================================================ // Function: hak_tiny_trim() - 116 lines (lines 1164-1279) // Public trim and cleanup operation for lifecycle management // Forward decl for internal registry lookup used by ultra safety validation static TinySlab* registry_lookup(uintptr_t slab_base); // ultra_sll_cap_for_class moved earlier in file (before hakmem_tiny_free.inc) static inline int ultra_validate_sll_head(int class_idx, void* head) { uintptr_t base = ((uintptr_t)head) & ~(TINY_SLAB_SIZE - 1); TinySlab* owner = registry_lookup(base); if (!owner) return 0; uintptr_t start = (uintptr_t)owner->base; if ((uintptr_t)head < start || (uintptr_t)head >= start + TINY_SLAB_SIZE) return 0; return (owner->class_idx == class_idx); } // Optional: wrapper TLS guard(ラッパー再入検知をTLSカウンタで) #ifndef HAKMEM_WRAPPER_TLS_GUARD #define HAKMEM_WRAPPER_TLS_GUARD 0 #endif #if HAKMEM_WRAPPER_TLS_GUARD extern __thread int g_tls_in_wrapper; #endif // ============================================================================ // EXTRACTED TO hakmem_tiny_lifecycle.inc (Phase 2D-3) // ============================================================================ // Function: tiny_tls_cache_drain() - 90 lines (lines 1314-1403) // Static function for draining TLS caches // // Function: tiny_apply_mem_diet() - 20 lines (lines 1405-1424) // Static function for memory diet mode application // // Phase 2D-3: Lifecycle management functions (226 lines total) #include "hakmem_tiny_lifecycle.inc" // Phase 2D-4 (FINAL): Slab management functions (142 lines total) #include "hakmem_tiny_slab_mgmt.inc" // Size→class routing for >=1024B (env: HAKMEM_TINY_ALLOC_1024_METRIC) _Atomic uint64_t g_tiny_alloc_ge1024[TINY_NUM_CLASSES] = {0}; // Tiny Heap v2 stats dump (opt-in) void tiny_heap_v2_print_stats(void) { // Priority-2: Use cached ENV if (!HAK_ENV_TINY_HEAP_V2_STATS()) return; fprintf(stderr, "\n[HeapV2] TLS magazine stats (per class, thread-local)\n"); for (int cls = 0; cls < TINY_NUM_CLASSES; cls++) { TinyHeapV2Mag* mag = &g_tiny_heap_v2_mag[cls]; TinyHeapV2Stats* st = &g_tiny_heap_v2_stats[cls]; fprintf(stderr, "C%d: top=%d alloc_calls=%llu mag_hits=%llu refill_calls=%llu refill_blocks=%llu backend_oom=%llu\n", cls, mag->top, (unsigned long long)st->alloc_calls, (unsigned long long)st->mag_hits, (unsigned long long)st->refill_calls, (unsigned long long)st->refill_blocks, (unsigned long long)st->backend_oom); } } // ============================================================================ // Performance Measurement: TLS SLL Statistics Print Function // ============================================================================ void tls_sll_print_measurements(void) { // Check if measurement is enabled static int g_measure = -1; if (g_measure == -1) { const char* e = getenv("HAKMEM_MEASURE_UNIFIED_CACHE"); g_measure = (e && *e && *e != '0') ? 1 : 0; } if (!g_measure) { return; // Measurement disabled } uint64_t pushes = atomic_load_explicit(&g_tls_sll_push_count_global, memory_order_relaxed); uint64_t pops = atomic_load_explicit(&g_tls_sll_pop_count_global, memory_order_relaxed); uint64_t pop_empty = atomic_load_explicit(&g_tls_sll_pop_empty_count_global, memory_order_relaxed); uint64_t total_pop_attempts = pops + pop_empty; if (total_pop_attempts == 0 && pushes == 0) { fprintf(stderr, "\n========================================\n"); fprintf(stderr, "TLS SLL Statistics\n"); fprintf(stderr, "========================================\n"); fprintf(stderr, "No operations recorded\n"); fprintf(stderr, "========================================\n\n"); return; } double hit_rate = total_pop_attempts > 0 ? (100.0 * pops) / total_pop_attempts : 0.0; double empty_rate = total_pop_attempts > 0 ? (100.0 * pop_empty) / total_pop_attempts : 0.0; fprintf(stderr, "\n========================================\n"); fprintf(stderr, "TLS SLL Statistics\n"); fprintf(stderr, "========================================\n"); fprintf(stderr, "Total Pushes: %llu\n", (unsigned long long)pushes); fprintf(stderr, "Total Pops: %llu\n", (unsigned long long)pops); fprintf(stderr, "Pop Empty Count: %llu (%.1f%% of pops)\n", (unsigned long long)pop_empty, empty_rate); fprintf(stderr, "Hit Rate: %.1f%%\n", hit_rate); fprintf(stderr, "========================================\n\n"); } // ============================================================================ // ACE Learning Layer & Tiny Guard - EXTRACTED to hakmem_tiny_ace_guard_box.inc // ============================================================================ #include "hakmem_tiny_ace_guard_box.inc"