Guard madvise ENOMEM and stabilize pool/tiny front v3

This commit is contained in:
Moe Charm (CI)
2025-12-09 21:50:15 +09:00
parent e274d5f6a9
commit a905e0ffdd
45 changed files with 3154 additions and 242 deletions

View File

@ -4,6 +4,7 @@
#include "pagefault_telemetry_box.h" // Box PageFaultTelemetry (PF_BUCKET_MID)
#include "box/pool_hotbox_v2_box.h"
#include "box/tiny_heap_env_box.h" // TinyHeap profile (C7_SAFE では flatten を無効化)
// Pool v2 is experimental. Default OFF (use legacy v1 path).
static inline int hak_pool_v2_enabled(void) {
@ -40,6 +41,12 @@ static inline int hak_pool_v2_tls_fast_enabled(void) {
static inline int hak_pool_v1_flatten_enabled(void) {
static int g = -1;
if (__builtin_expect(g == -1, 0)) {
// C7_SAFE/C7_ULTRA_BENCH プロファイルでは、安全側で強制 OFF
int mode = tiny_heap_profile_mode();
if (mode == TINY_HEAP_PROFILE_C7_SAFE || mode == TINY_HEAP_PROFILE_C7_ULTRA_BENCH) {
g = 0;
return g;
}
const char* e = getenv("HAKMEM_POOL_V1_FLATTEN_ENABLED");
g = (e && *e && *e != '0') ? 1 : 0;
}

View File

@ -0,0 +1,86 @@
// pool_hotbox_v2_box.h — Experimental PoolHotBox v2 (hot path scaffold)
#ifndef POOL_HOTBOX_V2_BOX_H
#define POOL_HOTBOX_V2_BOX_H
#include <stdint.h>
#include <stdlib.h>
#include <stdatomic.h>
#include "hakmem_pool.h" // for POOL_NUM_CLASSES and size helpers
// ENV gates (bench/実験専用):
// HAKMEM_POOL_V2_ENABLED : overall ON/OFF (default OFF)
// HAKMEM_POOL_V2_CLASSES : bitmask, bit i=1 → class i を HotBox v2 に載せる
// HAKMEM_POOL_V2_STATS : stats dump ON/OFF
typedef struct PoolHotBoxV2Stats {
_Atomic uint64_t alloc_calls;
_Atomic uint64_t alloc_fast;
_Atomic uint64_t alloc_refill;
_Atomic uint64_t alloc_refill_fail;
_Atomic uint64_t alloc_fallback_v1;
_Atomic uint64_t free_calls;
_Atomic uint64_t free_fast;
_Atomic uint64_t free_fallback_v1;
_Atomic uint64_t page_of_fail_header_missing;
_Atomic uint64_t page_of_fail_out_of_range;
_Atomic uint64_t page_of_fail_misaligned;
_Atomic uint64_t page_of_fail_unknown;
} PoolHotBoxV2Stats;
// Simple page/class structs for future HotBox v2 implementation.
typedef struct pool_page_v2 {
void* freelist;
uint32_t used;
uint32_t capacity;
uint32_t block_size;
uint32_t class_idx;
void* base;
void* slab_ref;
struct pool_page_v2* next;
} pool_page_v2;
typedef struct pool_class_v2 {
pool_page_v2* current;
pool_page_v2* partial;
uint16_t max_partial_pages;
uint16_t partial_count;
uint32_t block_size;
} pool_class_v2;
typedef struct pool_ctx_v2 {
pool_class_v2 cls[POOL_NUM_CLASSES];
} pool_ctx_v2;
typedef struct PoolColdIface {
void* (*refill_page)(void* cold_ctx,
uint32_t class_idx,
uint32_t* out_block_size,
uint32_t* out_capacity,
void** out_slab_ref);
void (*retire_page)(void* cold_ctx,
uint32_t class_idx,
void* slab_ref,
void* base);
} PoolColdIface;
// ENV helpers
int pool_hotbox_v2_class_enabled(int class_idx);
int pool_hotbox_v2_stats_enabled(void);
// TLS/context helpers
pool_ctx_v2* pool_v2_tls_get(void);
// Hot path (currently stubbed to always fall back to v1; structure only)
void* pool_hotbox_v2_alloc(uint32_t class_idx, size_t size, uintptr_t site_id);
int pool_hotbox_v2_free(uint32_t class_idx, void* raw_block);
// Stats helpers
void pool_hotbox_v2_record_free_call(uint32_t class_idx);
void pool_hotbox_v2_record_alloc_fallback(uint32_t class_idx);
void pool_hotbox_v2_record_free_fallback(uint32_t class_idx);
// Stats export (destructor in hakmem_pool.c)
extern PoolHotBoxV2Stats g_pool_hotbox_v2_stats[POOL_NUM_CLASSES];
#endif // POOL_HOTBOX_V2_BOX_H

View File

@ -0,0 +1,33 @@
// pool_hotbox_v2_header_box.h
// Small helpers for embedding/reading the v2 pool page pointer in the page header.
#pragma once
#include <stdint.h>
// Mask a pointer down to the page base (POOL_PAGE_SIZE is a power of two).
static inline void* pool_hotbox_v2_page_base(void* ptr, size_t page_size) {
return (void*)((uintptr_t)ptr & ~((uintptr_t)page_size - 1));
}
// Store the PoolHotBox v2 page pointer into the page header.
// Caller must ensure base is page_size aligned and non-NULL.
static inline void pool_hotbox_v2_header_store(void* page_base, void* page_ptr) {
if (!page_base) return;
void** hdr = (void**)page_base;
*hdr = page_ptr;
}
// Clear the page header pointer (used on retire to avoid stale lookups).
static inline void pool_hotbox_v2_header_clear(void* page_base) {
if (!page_base) return;
void** hdr = (void**)page_base;
*hdr = NULL;
}
// Load the page pointer from the page header (may return NULL).
static inline void* pool_hotbox_v2_header_load(void* page_base) {
if (!page_base) return NULL;
void** hdr = (void**)page_base;
return *hdr;
}

View File

@ -41,6 +41,22 @@ static void mid_desc_register(void* page, int class_idx, uint64_t owner_tid) {
void* canonical_page = (void*)((uintptr_t)page & ~((uintptr_t)POOL_PAGE_SIZE - 1));
uint32_t h = mid_desc_hash(canonical_page);
pthread_mutex_lock(&g_mid_desc_mu[h]);
// Check if descriptor already exists
MidPageDesc* existing = g_mid_desc_head[h];
while (existing) {
if (existing->page == canonical_page) {
// Descriptor already exists, update owner_tid if needed
if (existing->owner_tid == 0 && owner_tid != 0) {
existing->owner_tid = owner_tid;
}
pthread_mutex_unlock(&g_mid_desc_mu[h]);
return;
}
existing = existing->next;
}
// Descriptor doesn't exist, create new one
MidPageDesc* d = (MidPageDesc*)hkm_libc_malloc(sizeof(MidPageDesc)); // P0 Fix: Use libc malloc
if (d) {
d->page = canonical_page;
@ -76,7 +92,16 @@ static void mid_desc_adopt(void* addr, int class_idx, uint64_t owner_tid) {
if (d->owner_tid == 0) d->owner_tid = owner_tid;
} else {
MidPageDesc* nd = (MidPageDesc*)hkm_libc_malloc(sizeof(MidPageDesc)); // P0 Fix: Use libc malloc
if (nd) { nd->page = page; nd->class_idx = (uint8_t)class_idx; nd->owner_tid = owner_tid; nd->next = g_mid_desc_head[h]; g_mid_desc_head[h] = nd; }
if (nd) {
nd->page = page;
nd->class_idx = (uint8_t)class_idx;
nd->owner_tid = owner_tid;
nd->next = g_mid_desc_head[h];
atomic_store(&nd->in_use, 0);
nd->blocks_per_page = 0;
atomic_store(&nd->pending_dn, 0);
g_mid_desc_head[h] = nd;
}
}
pthread_mutex_unlock(&g_mid_desc_mu[h]);
}

View File

@ -0,0 +1,80 @@
// smallobject_cold_iface_v1.h - Cold interface wrapper for SmallObject HotBox v3
// 役割:
// - SmallObject Hot Box (v3) と既存 v1 Tiny Cold 層の境界を 1 箇所にまとめる。
// - Phase A: C7 の refill/retire だけを v1 TinyHeap へラップする。
#pragma once
#include <stdint.h>
#include <stdlib.h>
#include "tiny_heap_box.h"
#include "smallobject_hotbox_v3_box.h"
#include "../hakmem_tiny.h" // TINY_SLAB_SIZE for slab base mask
struct so_page_v3;
typedef struct SmallObjectColdIface {
struct so_page_v3* (*refill_page)(void* cold_ctx, uint32_t class_idx);
void (*retire_page)(void* cold_ctx, uint32_t class_idx, struct so_page_v3* page);
} SmallObjectColdIface;
static inline struct so_page_v3* smallobject_cold_refill_page_v1(void* cold_ctx, uint32_t class_idx) {
if (class_idx != 7 && class_idx != 6) {
return NULL; // Phase A-2: C7/C6 のみ対応
}
tiny_heap_ctx_t* ctx = cold_ctx ? (tiny_heap_ctx_t*)cold_ctx : tiny_heap_ctx_for_thread();
if (!ctx) return NULL;
tiny_heap_page_t* lease = tiny_heap_prepare_page(ctx, (int)class_idx);
if (!lease) return NULL;
so_page_v3* page = (so_page_v3*)calloc(1, sizeof(so_page_v3));
if (!page) return NULL;
page->lease_page = lease;
page->meta = lease->meta;
page->ss = lease->ss;
page->slab_idx = lease->slab_idx;
page->base = lease->base;
page->capacity = lease->capacity;
page->block_size = (uint32_t)tiny_stride_for_class((int)class_idx);
page->class_idx = class_idx;
page->slab_ref = lease;
return page;
}
static inline void smallobject_cold_retire_page_v1(void* cold_ctx, uint32_t class_idx, struct so_page_v3* page) {
if (!page || (class_idx != 7 && class_idx != 6)) {
if (page) {
free(page);
}
return;
}
tiny_heap_ctx_t* ctx = cold_ctx ? (tiny_heap_ctx_t*)cold_ctx : tiny_heap_ctx_for_thread();
if (!ctx) {
free(page);
return;
}
tiny_heap_page_t* lease = page->lease_page;
if (!lease) {
free(page);
return;
}
lease->base = (uint8_t*)page->base;
lease->capacity = (uint16_t)page->capacity;
lease->used = (uint16_t)page->used;
lease->meta = page->meta;
lease->ss = page->ss;
lease->slab_idx = page->slab_idx;
lease->free_list = page->freelist;
tiny_heap_page_becomes_empty(ctx, (int)class_idx, lease);
free(page);
}
static inline SmallObjectColdIface smallobject_cold_iface_v1(void) {
SmallObjectColdIface iface = {
.refill_page = smallobject_cold_refill_page_v1,
.retire_page = smallobject_cold_retire_page_v1,
};
return iface;
}

View File

@ -0,0 +1,74 @@
// smallobject_hotbox_v3_box.h - SmallObject HotHeap v3 (C7-first skeleton)
//
// Phase A/B: 型と TLS / stats を用意し、front が呼べる枠を置く。
// まだ中身は v1 fallbackso_alloc は NULL を返す)。
#pragma once
#include <stdint.h>
#include <stddef.h>
#include <stdatomic.h>
#include "tiny_geometry_box.h"
#include "smallobject_hotbox_v3_env_box.h"
#include "tiny_region_id.h"
#ifndef SMALLOBJECT_NUM_CLASSES
#define SMALLOBJECT_NUM_CLASSES TINY_NUM_CLASSES
#endif
struct tiny_heap_page_t;
struct TinySlabMeta;
struct SuperSlab;
typedef struct so_page_v3 {
void* freelist;
uint32_t used;
uint32_t capacity;
uint32_t block_size;
uint32_t class_idx;
uint32_t flags;
void* base; // carve 後のユーザ領域先頭
void* slab_base; // 64KiB slab 基底page_of 用ヘッダを書き込む)
struct TinySlabMeta* meta;
struct SuperSlab* ss;
uint16_t slab_idx;
struct tiny_heap_page_t* lease_page;
void* slab_ref; // kept as a generic token; currently same as lease_page for v1
struct so_page_v3* next;
} so_page_v3;
typedef struct so_class_v3 {
so_page_v3* current;
so_page_v3* partial;
uint16_t max_partial_pages;
uint16_t partial_count;
uint32_t block_size;
} so_class_v3;
typedef struct so_ctx_v3 {
so_class_v3 cls[SMALLOBJECT_NUM_CLASSES];
} so_ctx_v3;
typedef struct so_stats_class_v3 {
_Atomic uint64_t route_hits;
_Atomic uint64_t alloc_calls;
_Atomic uint64_t alloc_refill;
_Atomic uint64_t alloc_fallback_v1;
_Atomic uint64_t free_calls;
_Atomic uint64_t free_fallback_v1;
} so_stats_class_v3;
// Stats helpers (defined in core/smallobject_hotbox_v3.c)
int so_v3_stats_enabled(void);
void so_v3_record_route_hit(uint8_t ci);
void so_v3_record_alloc_call(uint8_t ci);
void so_v3_record_alloc_refill(uint8_t ci);
void so_v3_record_alloc_fallback(uint8_t ci);
void so_v3_record_free_call(uint8_t ci);
void so_v3_record_free_fallback(uint8_t ci);
// TLS accessor (core/smallobject_hotbox_v3.c)
so_ctx_v3* so_tls_get(void);
// Hot path API (Phase B: stub → always fallback to v1)
void* so_alloc(uint32_t class_idx);
void so_free(uint32_t class_idx, void* ptr);

View File

@ -0,0 +1,47 @@
// smallobject_hotbox_v3_env_box.h - ENV gate for SmallObject HotHeap v3
// 役割:
// - HAKMEM_SMALL_HEAP_V3_ENABLED / HAKMEM_SMALL_HEAP_V3_CLASSES をまとめて読む。
// - デフォルトは C7-only ONクラスマスク 0x80。ENV で明示的に 0 を指定した場合のみ v3 を無効化。
#pragma once
#include <stdint.h>
#include <stdlib.h>
#include "../hakmem_tiny_config.h"
static inline int small_heap_v3_enabled(void) {
static int g_enable = -1;
if (__builtin_expect(g_enable == -1, 0)) {
const char* e = getenv("HAKMEM_SMALL_HEAP_V3_ENABLED");
if (e && *e) {
g_enable = (*e != '0') ? 1 : 0;
} else {
// デフォルトは ONENV 未指定時は有効)
g_enable = 1;
}
}
return g_enable;
}
static inline int small_heap_v3_class_enabled(uint8_t class_idx) {
static int g_parsed = 0;
static unsigned g_mask = 0;
if (__builtin_expect(!g_parsed, 0)) {
const char* e = getenv("HAKMEM_SMALL_HEAP_V3_CLASSES");
if (e && *e) {
unsigned v = (unsigned)strtoul(e, NULL, 0);
g_mask = v & 0xFFu;
} else {
// デフォルトは C7 のみ v3 ON
g_mask = 0x80u;
}
g_parsed = 1;
}
if (!small_heap_v3_enabled()) return 0;
if (class_idx >= TINY_NUM_CLASSES) return 0;
return (g_mask & (1u << class_idx)) != 0;
}
static inline int small_heap_v3_c7_enabled(void) {
return small_heap_v3_class_enabled(7);
}

View File

@ -360,7 +360,7 @@ void superslab_free(SuperSlab* ss) {
}
if (lazy_zero_enabled) {
#ifdef MADV_DONTNEED
(void)madvise((void*)ss, ss_size, MADV_DONTNEED);
(void)ss_os_madvise_guarded((void*)ss, ss_size, MADV_DONTNEED, "ss_lru_madvise");
ss_os_stats_record_madvise();
#endif
}

View File

@ -1,6 +1,7 @@
// ss_os_acquire_box.c - SuperSlab OS Memory Acquisition Box Implementation
#include "ss_os_acquire_box.h"
#include "../hakmem_build_flags.h"
#include "../hakmem_env_cache.h"
#include <sys/mman.h>
#include <sys/resource.h>
#include <errno.h>
@ -15,8 +16,11 @@ extern _Atomic uint64_t g_final_fallback_mmap_count;
extern _Atomic uint64_t g_ss_os_alloc_calls;
extern _Atomic uint64_t g_ss_os_free_calls;
extern _Atomic uint64_t g_ss_os_madvise_calls;
extern _Atomic uint64_t g_ss_os_madvise_fail_enomem;
extern _Atomic uint64_t g_ss_os_madvise_fail_other;
extern _Atomic uint64_t g_ss_os_huge_alloc_calls;
extern _Atomic uint64_t g_ss_os_huge_fail_calls;
extern _Atomic bool g_ss_madvise_disabled;
// ============================================================================
// OOM Diagnostics
@ -240,9 +244,12 @@ void* ss_os_acquire(uint8_t size_class, size_t ss_size, uintptr_t ss_mask, int p
// See: EXPLICIT_PREFAULT_IMPLEMENTATION_REPORT_20251205.md
#ifdef MADV_POPULATE_WRITE
if (populate) {
int ret = madvise(ptr, ss_size, MADV_POPULATE_WRITE);
ss_os_stats_record_madvise();
int ret = ss_os_madvise_guarded(ptr, ss_size, MADV_POPULATE_WRITE, "ss_os_acquire_populate");
if (ret != 0) {
if (HAK_ENV_SS_MADVISE_STRICT() && errno == EINVAL) {
fprintf(stderr, "[SS_OS] madvise(MADV_POPULATE_WRITE) EINVAL (strict mode). Aborting.\n");
abort();
}
// Fallback for kernels that support MADV_POPULATE_WRITE but it fails
// Use explicit page-by-page touching with writes
volatile char* p = (volatile char*)ptr;
@ -273,10 +280,14 @@ static void ss_os_stats_destructor(void) {
return;
}
fprintf(stderr,
"[SS_OS_STATS] alloc=%llu free=%llu madvise=%llu mmap_total=%llu fallback_mmap=%llu huge_alloc=%llu huge_fail=%llu\n",
"[SS_OS_STATS] alloc=%llu free=%llu madvise=%llu madvise_enomem=%llu madvise_other=%llu madvise_disabled=%d "
"mmap_total=%llu fallback_mmap=%llu huge_alloc=%llu huge_fail=%llu\n",
(unsigned long long)atomic_load_explicit(&g_ss_os_alloc_calls, memory_order_relaxed),
(unsigned long long)atomic_load_explicit(&g_ss_os_free_calls, memory_order_relaxed),
(unsigned long long)atomic_load_explicit(&g_ss_os_madvise_calls, memory_order_relaxed),
(unsigned long long)atomic_load_explicit(&g_ss_os_madvise_fail_enomem, memory_order_relaxed),
(unsigned long long)atomic_load_explicit(&g_ss_os_madvise_fail_other, memory_order_relaxed),
atomic_load_explicit(&g_ss_madvise_disabled, memory_order_relaxed) ? 1 : 0,
(unsigned long long)atomic_load_explicit(&g_ss_mmap_count, memory_order_relaxed),
(unsigned long long)atomic_load_explicit(&g_final_fallback_mmap_count, memory_order_relaxed),
(unsigned long long)atomic_load_explicit(&g_ss_os_huge_alloc_calls, memory_order_relaxed),

View File

@ -18,7 +18,11 @@
#include <stdint.h>
#include <stddef.h>
#include <stdatomic.h>
#include <stdbool.h>
#include <stdlib.h>
#include <sys/mman.h>
#include <errno.h>
#include <stdio.h>
// ============================================================================
// Global Counters (for debugging/diagnostics)
@ -29,8 +33,11 @@ extern _Atomic uint64_t g_final_fallback_mmap_count;
extern _Atomic uint64_t g_ss_os_alloc_calls;
extern _Atomic uint64_t g_ss_os_free_calls;
extern _Atomic uint64_t g_ss_os_madvise_calls;
extern _Atomic uint64_t g_ss_os_madvise_fail_enomem;
extern _Atomic uint64_t g_ss_os_madvise_fail_other;
extern _Atomic uint64_t g_ss_os_huge_alloc_calls;
extern _Atomic uint64_t g_ss_os_huge_fail_calls;
extern _Atomic bool g_ss_madvise_disabled;
static inline int ss_os_stats_enabled(void) {
static int g_ss_os_stats_enabled = -1;
@ -62,6 +69,52 @@ static inline void ss_os_stats_record_madvise(void) {
atomic_fetch_add_explicit(&g_ss_os_madvise_calls, 1, memory_order_relaxed);
}
// ============================================================================
// madvise guard (shared by Superslab hot/cold paths)
// ============================================================================
//
static inline int ss_os_madvise_guarded(void* ptr, size_t len, int advice, const char* where) {
(void)where;
if (!ptr || len == 0) {
return 0;
}
if (atomic_load_explicit(&g_ss_madvise_disabled, memory_order_relaxed)) {
return 0;
}
int ret = madvise(ptr, len, advice);
ss_os_stats_record_madvise();
if (ret == 0) {
return 0;
}
int e = errno;
if (e == ENOMEM) {
atomic_fetch_add_explicit(&g_ss_os_madvise_fail_enomem, 1, memory_order_relaxed);
atomic_store_explicit(&g_ss_madvise_disabled, true, memory_order_relaxed);
#if !HAKMEM_BUILD_RELEASE
static _Atomic bool g_ss_madvise_enomem_logged = false;
bool already = atomic_exchange_explicit(&g_ss_madvise_enomem_logged, true, memory_order_relaxed);
if (!already) {
fprintf(stderr,
"[SS_OS_MADVISE] madvise(advice=%d, ptr=%p, len=%zu) failed with ENOMEM "
"(vm.max_map_count reached?). Disabling further madvise calls.\n",
advice, ptr, len);
}
#endif
return 0; // soft fail, do not propagate ENOMEM
}
atomic_fetch_add_explicit(&g_ss_os_madvise_fail_other, 1, memory_order_relaxed);
if (e == EINVAL) {
errno = e;
return -1; // let caller decide (strict mode)
}
errno = e;
return 0;
}
// ============================================================================
// HugePage Experiment (research-only)
// ============================================================================

View File

@ -0,0 +1,37 @@
// tiny_cold_iface_v1.h
// TinyHotHeap v2 など別 Hot Box が Superslab/Tier/Stats と話すための共通境界 (v1 wrapper)。
// 前提: tiny_heap_box.h で tiny_heap_page_t / tiny_heap_ctx_t が定義済みであること。
#pragma once
#include "tiny_heap_box.h"
typedef struct TinyColdIface {
tiny_heap_page_t* (*refill_page)(void* cold_ctx, uint32_t class_idx);
void (*retire_page)(void* cold_ctx, uint32_t class_idx, tiny_heap_page_t* page);
} TinyColdIface;
// Forward declarations for the v1 cold helpers (defined in tiny_heap_box.h)
tiny_heap_page_t* tiny_heap_prepare_page(tiny_heap_ctx_t* ctx, int class_idx);
void tiny_heap_page_becomes_empty(tiny_heap_ctx_t* ctx, int class_idx, tiny_heap_page_t* page);
static inline tiny_heap_page_t* tiny_cold_refill_page_v1(void* cold_ctx, uint32_t class_idx) {
if (!cold_ctx) {
return NULL;
}
return tiny_heap_prepare_page((tiny_heap_ctx_t*)cold_ctx, (int)class_idx);
}
static inline void tiny_cold_retire_page_v1(void* cold_ctx, uint32_t class_idx, tiny_heap_page_t* page) {
if (!cold_ctx || !page) {
return;
}
tiny_heap_page_becomes_empty((tiny_heap_ctx_t*)cold_ctx, (int)class_idx, page);
}
static inline TinyColdIface tiny_cold_iface_v1(void) {
TinyColdIface iface = {
.refill_page = tiny_cold_refill_page_v1,
.retire_page = tiny_cold_retire_page_v1,
};
return iface;
}

View File

@ -0,0 +1,101 @@
// tiny_front_v3_env_box.h - Tiny Front v3 ENV gate & snapshot (guard/UC/header)
#pragma once
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#include <stdlib.h>
typedef struct TinyFrontV3Snapshot {
bool unified_cache_on;
bool tiny_guard_on;
uint8_t header_mode; // tiny_header_mode() の値をキャッシュ
bool header_v3_enabled; // ENV: HAKMEM_TINY_HEADER_V3_ENABLED
bool header_v3_skip_c7; // ENV: HAKMEM_TINY_HEADER_V3_SKIP_C7
} TinyFrontV3Snapshot;
// Size→class/route entry for Tiny front v3 LUT (route_kind は tiny_route_kind_t を想定)
typedef struct TinyFrontV3SizeClassEntry {
uint8_t class_idx;
uint8_t route_kind;
} TinyFrontV3SizeClassEntry;
#define TINY_FRONT_V3_INVALID_CLASS ((uint8_t)0xFF)
extern TinyFrontV3Snapshot g_tiny_front_v3_snapshot;
extern int g_tiny_front_v3_snapshot_ready;
// ENV gate: default OFF
static inline bool tiny_front_v3_enabled(void) {
static int g_enable = -1;
if (__builtin_expect(g_enable == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_FRONT_V3_ENABLED");
g_enable = (e && *e && *e != '0') ? 1 : 0;
}
return g_enable != 0;
}
// Optional: size→class LUT gate (default OFF, for A/B)
static inline bool tiny_front_v3_lut_enabled(void) {
static int g = -1;
if (__builtin_expect(g == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_FRONT_V3_LUT_ENABLED");
g = (e && *e && *e != '0') ? 1 : 0;
}
return g != 0;
}
// Optional: route fast path (Tiny LUT→1 switch). Default OFF for easy rollback.
static inline bool tiny_front_v3_route_fast_enabled(void) {
static int g = -1;
if (__builtin_expect(g == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_FRONT_V3_ROUTE_FAST_ENABLED");
g = (e && *e && *e != '0') ? 1 : 0;
}
return g != 0;
}
// Optional stats gate
static inline bool tiny_front_v3_stats_enabled(void) {
static int g = -1;
if (__builtin_expect(g == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_FRONT_V3_STATS");
g = (e && *e && *e != '0') ? 1 : 0;
}
return g != 0;
}
// Header v3 experimental gate (default OFF)
static inline bool tiny_header_v3_enabled(void) {
static int g = -1;
if (__builtin_expect(g == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_HEADER_V3_ENABLED");
g = (e && *e && *e != '0') ? 1 : 0;
}
return g != 0;
}
// Skip header write for C7 v3 allocs (bench/experiment, default OFF)
static inline bool tiny_header_v3_skip_c7(void) {
static int g = -1;
if (__builtin_expect(g == -1, 0)) {
const char* e = getenv("HAKMEM_TINY_HEADER_V3_SKIP_C7");
g = (e && *e && *e != '0') ? 1 : 0;
}
return g != 0;
}
// Snapshot initializer (implemented in hakmem_tiny.c)
void tiny_front_v3_snapshot_init(void);
// LUT initializer / lookup (implemented in hakmem_tiny.c)
void tiny_front_v3_size_class_lut_init(void);
const TinyFrontV3SizeClassEntry* tiny_front_v3_lut_lookup(size_t size);
// Get cached snapshot (lazy init)
static inline const TinyFrontV3Snapshot* tiny_front_v3_snapshot_get(void) {
if (__builtin_expect(!g_tiny_front_v3_snapshot_ready, 0)) {
tiny_front_v3_snapshot_init();
}
return &g_tiny_front_v3_snapshot;
}

View File

@ -36,7 +36,8 @@ typedef struct tiny_hotheap_class_v2 {
tiny_hotheap_page_v2* partial_pages;
tiny_hotheap_page_v2* full_pages;
uint16_t stride;
uint16_t _pad;
uint16_t max_partial_pages; // 空ページを保持する上限C7 専用で 1〜2 を想定)
uint16_t partial_count; // いま握っている partial の枚数
tiny_hotheap_page_v2 storage_page; // C7 専用の 1 枚だけをまず保持Phase36: reuse when空き
} tiny_hotheap_class_v2;
@ -51,8 +52,8 @@ extern __thread tiny_hotheap_ctx_v2* g_tiny_hotheap_ctx_v2;
tiny_hotheap_ctx_v2* tiny_hotheap_v2_tls_get(void);
void* tiny_hotheap_v2_alloc(uint8_t class_idx);
void tiny_hotheap_v2_free(uint8_t class_idx, void* p, void* meta);
void tiny_hotheap_v2_record_route_fallback(void);
void tiny_hotheap_v2_record_free_fallback(void);
void tiny_hotheap_v2_record_route_fallback(uint8_t class_idx);
void tiny_hotheap_v2_record_free_fallback(uint8_t class_idx);
typedef struct tiny_hotheap_v2_stats_snapshot {
uint64_t route_hits;
@ -65,11 +66,19 @@ typedef struct tiny_hotheap_v2_stats_snapshot {
uint64_t free_calls;
uint64_t free_fast;
uint64_t free_fallback_v1;
uint64_t cold_refill_fail;
uint64_t cold_retire_calls;
uint64_t retire_calls_v2;
uint64_t prepare_calls;
uint64_t prepare_with_current_null;
uint64_t prepare_from_partial;
uint64_t free_made_current;
uint64_t page_retired;
uint64_t partial_pushes;
uint64_t partial_pops;
uint64_t partial_peak;
uint64_t refill_with_current;
uint64_t refill_with_partial;
} tiny_hotheap_v2_stats_snapshot_t;
void tiny_hotheap_v2_debug_snapshot(tiny_hotheap_v2_stats_snapshot_t* out);

View File

@ -9,10 +9,13 @@
#include "../hakmem_tiny_config.h"
#include "tiny_heap_env_box.h"
#include "smallobject_hotbox_v3_env_box.h"
typedef enum {
TINY_ROUTE_LEGACY = 0,
TINY_ROUTE_HEAP = 1, // TinyHeap v1
TINY_ROUTE_HOTHEAP_V2 = 2, // TinyHotHeap v2
TINY_ROUTE_HEAP = 1, // TinyHeap v1
TINY_ROUTE_HOTHEAP_V2 = 2, // TinyHotHeap v2
TINY_ROUTE_SMALL_HEAP_V3 = 3, // SmallObject HotHeap v3 (C7-first,研究箱)
} tiny_route_kind_t;
extern tiny_route_kind_t g_tiny_route_class[TINY_NUM_CLASSES];
@ -20,7 +23,9 @@ extern int g_tiny_route_snapshot_done;
static inline void tiny_route_snapshot_init(void) {
for (int i = 0; i < TINY_NUM_CLASSES; i++) {
if (tiny_hotheap_v2_class_enabled((uint8_t)i)) {
if (small_heap_v3_class_enabled((uint8_t)i)) {
g_tiny_route_class[i] = TINY_ROUTE_SMALL_HEAP_V3;
} else if (tiny_hotheap_v2_class_enabled((uint8_t)i)) {
g_tiny_route_class[i] = TINY_ROUTE_HOTHEAP_V2;
} else if (tiny_heap_box_enabled() && tiny_heap_class_route_enabled(i)) {
g_tiny_route_class[i] = TINY_ROUTE_HEAP;
@ -42,7 +47,7 @@ static inline tiny_route_kind_t tiny_route_for_class(uint8_t ci) {
}
static inline int tiny_route_is_heap_kind(tiny_route_kind_t route) {
return route == TINY_ROUTE_HEAP || route == TINY_ROUTE_HOTHEAP_V2;
return route == TINY_ROUTE_HEAP || route == TINY_ROUTE_HOTHEAP_V2 || route == TINY_ROUTE_SMALL_HEAP_V3;
}
// C7 front が TinyHeap を使うかRoute snapshot 経由で判定)

View File

@ -40,6 +40,8 @@
#include "../box/tiny_c7_hotbox.h" // Optional: C7 専用ホットボックス
#include "../box/tiny_heap_box.h" // TinyHeap 汎用 Box
#include "../box/tiny_hotheap_v2_box.h" // TinyHotHeap v2 (Phase31 A/B)
#include "../box/smallobject_hotbox_v3_box.h" // SmallObject HotHeap v3 skeleton
#include "../box/tiny_front_v3_env_box.h" // Tiny front v3 snapshot gate
#include "../box/tiny_heap_env_box.h" // ENV gate for TinyHeap front (A/B)
#include "../box/tiny_route_env_box.h" // Route snapshot (Heap vs Legacy)
#include "../box/tiny_front_stats_box.h" // Front class distribution counters
@ -102,24 +104,58 @@ static inline int front_gate_unified_enabled(void) {
//
__attribute__((always_inline))
static inline void* malloc_tiny_fast(size_t size) {
// size → class_idx を 1 回だけ決定
int class_idx = hak_tiny_size_to_class(size);
if (__builtin_expect(class_idx < 0 || class_idx >= TINY_NUM_CLASSES, 0)) {
return NULL;
const int front_v3_on = tiny_front_v3_enabled();
const TinyFrontV3Snapshot* front_snap =
__builtin_expect(front_v3_on, 0) ? tiny_front_v3_snapshot_get() : NULL;
const bool route_fast_on = front_v3_on && tiny_front_v3_lut_enabled() &&
tiny_front_v3_route_fast_enabled();
int class_idx = -1;
tiny_route_kind_t route = TINY_ROUTE_LEGACY;
bool route_trusted = false;
if (front_v3_on && tiny_front_v3_lut_enabled()) {
const TinyFrontV3SizeClassEntry* e = tiny_front_v3_lut_lookup(size);
if (e && e->class_idx != TINY_FRONT_V3_INVALID_CLASS) {
class_idx = (int)e->class_idx;
route = (tiny_route_kind_t)e->route_kind;
route_trusted = route_fast_on;
}
}
if (__builtin_expect(class_idx < 0 || class_idx >= TINY_NUM_CLASSES, 0)) {
class_idx = hak_tiny_size_to_class(size);
if (__builtin_expect(class_idx < 0 || class_idx >= TINY_NUM_CLASSES, 0)) {
return NULL;
}
route = tiny_route_for_class((uint8_t)class_idx);
route_trusted = false;
} else if (!route_trusted &&
route != TINY_ROUTE_LEGACY && route != TINY_ROUTE_HEAP &&
route != TINY_ROUTE_HOTHEAP_V2 && route != TINY_ROUTE_SMALL_HEAP_V3) {
route = tiny_route_for_class((uint8_t)class_idx);
}
tiny_front_alloc_stat_inc(class_idx);
tiny_route_kind_t route = tiny_route_for_class((uint8_t)class_idx);
switch (route) {
case TINY_ROUTE_HOTHEAP_V2: {
if (class_idx == 7) {
void* v2p = tiny_hotheap_v2_alloc(7);
if (TINY_HOT_LIKELY(v2p != NULL)) {
return v2p;
}
tiny_hotheap_v2_record_route_fallback();
case TINY_ROUTE_SMALL_HEAP_V3: {
void* v3p = so_alloc((uint32_t)class_idx);
if (TINY_HOT_LIKELY(v3p != NULL)) {
return v3p;
}
so_v3_record_alloc_fallback((uint8_t)class_idx);
// fallthrough to v2/v1
__attribute__((fallthrough));
}
case TINY_ROUTE_HOTHEAP_V2: {
void* v2p = tiny_hotheap_v2_alloc((uint8_t)class_idx);
if (TINY_HOT_LIKELY(v2p != NULL)) {
return v2p;
}
tiny_hotheap_v2_record_route_fallback((uint8_t)class_idx);
// fallthrough to TinyHeap v1
__attribute__((fallthrough));
}
case TINY_ROUTE_HEAP: {
void* heap_ptr = NULL;
@ -139,7 +175,10 @@ static inline void* malloc_tiny_fast(size_t size) {
}
// Legacy Tiny front
void* ptr = tiny_hot_alloc_fast(class_idx);
void* ptr = NULL;
if (!front_snap || front_snap->unified_cache_on) {
ptr = tiny_hot_alloc_fast(class_idx);
}
if (TINY_HOT_LIKELY(ptr != NULL)) {
return ptr;
}
@ -192,6 +231,8 @@ static inline int free_tiny_fast(void* ptr) {
tiny_front_free_stat_inc(class_idx);
tiny_route_kind_t route = tiny_route_for_class((uint8_t)class_idx);
const int use_tiny_heap = tiny_route_is_heap_kind(route);
const TinyFrontV3Snapshot* front_snap =
__builtin_expect(tiny_front_v3_enabled(), 0) ? tiny_front_v3_snapshot_get() : NULL;
// TWO-SPEED: SuperSlab registration check is DEBUG-ONLY to keep HOT PATH fast.
// In Release builds, we trust header magic (0xA0) as sufficient validation.
@ -255,6 +296,9 @@ static inline int free_tiny_fast(void* ptr) {
// Same-thread + TinyHeap route → route-based free
if (__builtin_expect(use_tiny_heap, 0)) {
switch (route) {
case TINY_ROUTE_SMALL_HEAP_V3:
so_free((uint32_t)class_idx, base);
return 1;
case TINY_ROUTE_HOTHEAP_V2:
tiny_hotheap_v2_free((uint8_t)class_idx, base, meta);
return 1;
@ -276,7 +320,9 @@ static inline int free_tiny_fast(void* ptr) {
if (use_tiny_heap) {
// fallback: lookup failed but TinyHeap front is ON → use generic TinyHeap free
if (route == TINY_ROUTE_HOTHEAP_V2) {
tiny_hotheap_v2_record_free_fallback();
tiny_hotheap_v2_record_free_fallback((uint8_t)class_idx);
} else if (route == TINY_ROUTE_SMALL_HEAP_V3) {
so_v3_record_free_fallback((uint8_t)class_idx);
}
tiny_heap_free_class_fast(tiny_heap_ctx_for_thread(), class_idx, ptr);
return 1;
@ -300,7 +346,10 @@ static inline int free_tiny_fast(void* ptr) {
}
#endif
int pushed = unified_cache_push(class_idx, HAK_BASE_FROM_RAW(base));
int pushed = 0;
if (!front_snap || front_snap->unified_cache_on) {
pushed = unified_cache_push(class_idx, HAK_BASE_FROM_RAW(base));
}
if (__builtin_expect(pushed, 1)) {
return 1; // Success
}

View File

@ -11,6 +11,7 @@
#include "hakmem_sys.h" // Phase 6.11.1: Syscall wrappers with timing
#include "hakmem_whale.h" // Phase 6.11.1: Whale fast-path cache
#include "hakmem_env_cache.h" // Priority-2: ENV cache
#include "box/ss_os_acquire_box.h" // madvise guard
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
@ -117,12 +118,17 @@ void hak_batch_flush(void) {
size_t size = snap.sizes[i];
// Step 1: MADV_FREE to release physical pages (fast, low TLB cost)
int ret = madvise(ptr, size, MADV_FREE);
int ret = ss_os_madvise_guarded(ptr, size, MADV_FREE, "batch_free");
if (ret != 0) {
if (HAK_ENV_SS_MADVISE_STRICT() && errno == EINVAL) {
fprintf(stderr, "[Batch] madvise(MADV_FREE) EINVAL (STRICT). Aborting.\n");
abort();
}
// Fallback to MADV_DONTNEED if MADV_FREE not supported
ret = madvise(ptr, size, MADV_DONTNEED);
if (ret != 0) {
fprintf(stderr, "[Batch] Warning: madvise failed for block %p (size %zu)\n", ptr, size);
ret = ss_os_madvise_guarded(ptr, size, MADV_DONTNEED, "batch_dontneed");
if (ret != 0 && HAK_ENV_SS_MADVISE_STRICT() && errno == EINVAL) {
fprintf(stderr, "[Batch] madvise(MADV_DONTNEED) EINVAL (STRICT). Aborting.\n");
abort();
}
}

View File

@ -91,6 +91,9 @@ typedef struct {
// ===== Cold Path: Batch (1 variable) =====
int batch_bg; // HAKMEM_BATCH_BG (default: 0)
// ===== Cold Path: Superslab Madvise (1 variable) =====
int ss_madvise_strict; // HAKMEM_SS_MADVISE_STRICT (default: 1)
} HakEnvCache;
// Global cache instance (initialized once at startup)
@ -289,10 +292,17 @@ static inline void hakmem_env_cache_init(void) {
g_hak_env_cache.batch_bg = (e && atoi(e) != 0) ? 1 : 0; // default: 0 (OFF)
}
// ===== Cold Path: Superslab Madvise =====
{
const char* e = getenv("HAKMEM_SS_MADVISE_STRICT");
// Default: 1 (STRICT), set HAKMEM_SS_MADVISE_STRICT=0 to relax
g_hak_env_cache.ss_madvise_strict = (e && *e && *e == '0') ? 0 : 1;
}
#if !HAKMEM_BUILD_RELEASE
// Debug: Print cache summary (stderr only)
if (!g_hak_env_cache.quiet) {
fprintf(stderr, "[ENV_CACHE_INIT] Parsed %d ENV variables at startup\n", 49);
fprintf(stderr, "[ENV_CACHE_INIT] Parsed %d ENV variables at startup\n", 50);
fprintf(stderr, "[ENV_CACHE_INIT] Hot path syscalls eliminated: ~2000/sec → 0/sec\n");
fflush(stderr);
}
@ -361,4 +371,7 @@ static inline void hakmem_env_cache_init(void) {
// Cold path: Batch
#define HAK_ENV_BATCH_BG() (g_hak_env_cache.batch_bg)
// Cold path: Superslab Madvise
#define HAK_ENV_SS_MADVISE_STRICT() (g_hak_env_cache.ss_madvise_strict)
#endif // HAKMEM_ENV_CACHE_H

View File

@ -49,6 +49,7 @@
#include "hakmem_l25_pool.h"
#include "hakmem_config.h"
#include "hakmem_internal.h" // For AllocHeader and HAKMEM_MAGIC
#include "box/ss_os_acquire_box.h"
#include "hakmem_syscall.h" // Phase 6.X P0 Fix: Box 3 syscall layer (bypasses LD_PRELOAD)
#include "box/pagefault_telemetry_box.h" // Box PageFaultTelemetry (PF_BUCKET_L25)
#include "page_arena.h" // Phase 24: PageArena integration for L25
@ -560,7 +561,7 @@ void hak_l25_pool_free_fast(void* user_ptr, uintptr_t site_id) {
// Optional: demand-zero for larger classes
if (g_l25_pool.demand_zero && class_idx >= 3) {
madvise((char*)raw, HEADER_SIZE + g_class_sizes[class_idx], MADV_DONTNEED);
(void)ss_os_madvise_guarded((char*)raw, HEADER_SIZE + g_class_sizes[class_idx], MADV_DONTNEED, "l25_pool_dontneed_class");
}
// Same-thread hint: prefer per-block owner if header present (HDR_LIGHT>=1), else page owner
@ -1118,7 +1119,7 @@ void hak_l25_pool_free(void* ptr, size_t size, uintptr_t site_id) {
if (g_l25_pool.demand_zero) {
int class_idx_dz = hak_l25_pool_get_class_index(size);
if (class_idx_dz >= 3) {
madvise((char*)raw, HEADER_SIZE + size, MADV_DONTNEED);
(void)ss_os_madvise_guarded((char*)raw, HEADER_SIZE + size, MADV_DONTNEED, "l25_pool_dontneed_size");
}
}

View File

@ -46,7 +46,9 @@
#include "hakmem_pool.h"
#include "hakmem_config.h"
#include "hakmem_internal.h" // For AllocHeader and HAKMEM_MAGIC
#include "box/pool_hotbox_v2_header_box.h"
#include "hakmem_syscall.h" // Box 3 syscall layer (bypasses LD_PRELOAD)
#include "box/pool_hotbox_v2_box.h"
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
@ -58,6 +60,11 @@
#include "hakmem_policy.h" // FrozenPolicy caps (Soft CAP gating)
#include "hakmem_debug.h"
#define POOL_HOTBOX_V2_HEADER_BYTES ((size_t)sizeof(void*))
// Use an over-sized mapping to guarantee POOL_PAGE_SIZE alignment for the
// v2 page base. This keeps page_of() O(1) without relying on mmap alignment.
#define POOL_HOTBOX_V2_MAP_LEN (POOL_PAGE_SIZE * 2)
// False sharing mitigation: padded mutex type (64B)
typedef struct { pthread_mutex_t m; char _pad[64 - (sizeof(pthread_mutex_t) % 64)]; } PaddedMutex;
@ -808,6 +815,513 @@ static int g_pool_min_bundle = 2; // env: HAKMEM_POOL_MIN_BUNDLE (default 2)
static int g_count_sample_exp = 10; // env: HAKMEM_POOL_COUNT_SAMPLE (0..16)
static __thread uint32_t t_pool_rng = 0x243f6a88u; // per-thread RNG for sampling
// ---------------------------------------------------------------------------
// PoolHotBox v2 scaffolding (research-only; defaults to v1)
// ---------------------------------------------------------------------------
PoolHotBoxV2Stats g_pool_hotbox_v2_stats[POOL_NUM_CLASSES];
static __thread pool_ctx_v2* g_pool_ctx_v2 = NULL;
// Forward decls for helpers used in HotBox v2.
static inline uint32_t pool_hotbox_v2_block_size(int ci);
static inline uint32_t pool_block_size_for_class(int ci);
static inline void mid_set_header(AllocHeader* hdr, size_t class_sz, uintptr_t site_id);
static inline void mid_page_inuse_inc(void* raw);
static void* pool_cold_refill_page_v1(void* cold_ctx, uint32_t ci, uint32_t* out_block_size, uint32_t* out_capacity, void** out_slab_ref);
static void pool_cold_retire_page_v1(void* cold_ctx, uint32_t ci, void* slab_ref, void* base);
static int pool_hotbox_v2_global_enabled(void) {
static int g = -1;
if (__builtin_expect(g == -1, 0)) {
const char* e = getenv("HAKMEM_POOL_V2_ENABLED");
g = (e && *e && *e != '0') ? 1 : 0;
}
return g;
}
static unsigned pool_hotbox_v2_class_mask(void) {
static int parsed = 0;
static unsigned mask = 0;
if (__builtin_expect(!parsed, 0)) {
const char* e = getenv("HAKMEM_POOL_V2_CLASSES");
if (e && *e) {
mask = (unsigned)strtoul(e, NULL, 0);
} else {
mask = 0; // default: all OFF (opt-in only)
}
parsed = 1;
}
return mask;
}
int pool_hotbox_v2_class_enabled(int class_idx) {
if (!pool_hotbox_v2_global_enabled()) return 0;
if (class_idx < 0 || class_idx >= POOL_NUM_CLASSES) return 0;
unsigned mask = pool_hotbox_v2_class_mask();
static int logged = 0;
if (__builtin_expect(!logged && pool_hotbox_v2_stats_enabled(), 0)) {
fprintf(stderr, "[POOL_V2_MASK] enabled=0x%x\n", mask);
logged = 1;
}
return (mask & (1u << class_idx)) != 0;
}
int pool_hotbox_v2_stats_enabled(void) {
static int g = -1;
if (__builtin_expect(g == -1, 0)) {
const char* e = getenv("HAKMEM_POOL_V2_STATS");
g = (e && *e && *e != '0') ? 1 : 0;
}
return g;
}
pool_ctx_v2* pool_v2_tls_get(void) {
pool_ctx_v2* ctx = g_pool_ctx_v2;
if (__builtin_expect(ctx == NULL, 0)) {
ctx = (pool_ctx_v2*)calloc(1, sizeof(pool_ctx_v2));
if (!ctx) abort();
for (int i = 0; i < POOL_NUM_CLASSES; i++) {
uint32_t user_sz = pool_block_size_for_class(i);
ctx->cls[i].block_size = user_sz ? (user_sz + HEADER_SIZE) : 0;
ctx->cls[i].max_partial_pages = 2;
}
g_pool_ctx_v2 = ctx;
}
return ctx;
}
static inline uint32_t pool_hotbox_v2_block_size(int ci) {
switch (ci) {
case 0: return POOL_CLASS_2KB;
case 1: return POOL_CLASS_4KB;
case 2: return POOL_CLASS_8KB;
case 3: return POOL_CLASS_16KB;
case 4: return POOL_CLASS_32KB;
case 5: return POOL_CLASS_40KB;
case 6: return POOL_CLASS_52KB;
default: return 0;
}
}
static inline uint32_t pool_block_size_for_class(int ci) {
return pool_hotbox_v2_block_size(ci);
}
static inline void pool_hotbox_v2_record_alloc(uint32_t ci) {
if ((int)ci >= POOL_NUM_CLASSES) return;
atomic_fetch_add_explicit(&g_pool_hotbox_v2_stats[ci].alloc_calls, 1, memory_order_relaxed);
}
static inline void pool_hotbox_v2_record_alloc_refill(uint32_t ci) {
if ((int)ci >= POOL_NUM_CLASSES) return;
atomic_fetch_add_explicit(&g_pool_hotbox_v2_stats[ci].alloc_refill, 1, memory_order_relaxed);
}
static inline void pool_hotbox_v2_record_alloc_refill_fail(uint32_t ci) {
if ((int)ci >= POOL_NUM_CLASSES) return;
atomic_fetch_add_explicit(&g_pool_hotbox_v2_stats[ci].alloc_refill_fail, 1, memory_order_relaxed);
}
void pool_hotbox_v2_record_alloc_fallback(uint32_t ci) {
if ((int)ci >= POOL_NUM_CLASSES) return;
atomic_fetch_add_explicit(&g_pool_hotbox_v2_stats[ci].alloc_fallback_v1, 1, memory_order_relaxed);
}
static inline void pool_hotbox_v2_record_free(uint32_t ci) {
if ((int)ci >= POOL_NUM_CLASSES) return;
atomic_fetch_add_explicit(&g_pool_hotbox_v2_stats[ci].free_calls, 1, memory_order_relaxed);
}
void pool_hotbox_v2_record_free_call(uint32_t ci) {
pool_hotbox_v2_record_free(ci);
}
void pool_hotbox_v2_record_free_fallback(uint32_t ci) {
if ((int)ci >= POOL_NUM_CLASSES) return;
atomic_fetch_add_explicit(&g_pool_hotbox_v2_stats[ci].free_fallback_v1, 1, memory_order_relaxed);
}
enum pool_v2_pageof_fail {
POOL_V2_PAGEOF_NONE = 0,
POOL_V2_PAGEOF_OUT_OF_RANGE = 1,
POOL_V2_PAGEOF_MISALIGNED = 2,
POOL_V2_PAGEOF_HEADER_MISSING = 3,
POOL_V2_PAGEOF_UNKNOWN = 4,
};
static inline void pool_hotbox_v2_record_pageof_fail(uint32_t ci, int reason) {
if ((int)ci >= POOL_NUM_CLASSES) return;
switch (reason) {
case POOL_V2_PAGEOF_HEADER_MISSING:
atomic_fetch_add_explicit(&g_pool_hotbox_v2_stats[ci].page_of_fail_header_missing, 1, memory_order_relaxed);
break;
case POOL_V2_PAGEOF_OUT_OF_RANGE:
atomic_fetch_add_explicit(&g_pool_hotbox_v2_stats[ci].page_of_fail_out_of_range, 1, memory_order_relaxed);
break;
case POOL_V2_PAGEOF_MISALIGNED:
atomic_fetch_add_explicit(&g_pool_hotbox_v2_stats[ci].page_of_fail_misaligned, 1, memory_order_relaxed);
break;
case POOL_V2_PAGEOF_UNKNOWN:
default:
atomic_fetch_add_explicit(&g_pool_hotbox_v2_stats[ci].page_of_fail_unknown, 1, memory_order_relaxed);
break;
}
}
static pool_page_v2* pool_hotbox_v2_page_acquire(void) {
pool_page_v2* p = (pool_page_v2*)calloc(1, sizeof(pool_page_v2));
return p;
}
static void pool_hotbox_v2_page_release(pool_page_v2* p) {
free(p);
}
static void* pool_hotbox_v2_build_freelist(pool_page_v2* p) {
if (!p || !p->base || p->block_size == 0 || p->capacity == 0) return NULL;
uint8_t* base = (uint8_t*)p->base + POOL_HOTBOX_V2_HEADER_BYTES;
void* head = NULL;
for (uint32_t i = 0; i < p->capacity; i++) {
void* blk = base + ((size_t)i * p->block_size);
*(void**)blk = head;
head = blk;
}
return head;
}
static PoolColdIface pool_cold_iface_v1(void);
static pool_page_v2* pool_hotbox_v2_page_of(pool_ctx_v2* ctx, uint32_t ci, void* ptr, int* out_reason) {
if (out_reason) *out_reason = POOL_V2_PAGEOF_UNKNOWN;
if (!ctx || ci >= POOL_NUM_CLASSES || !ptr) return NULL;
// Compute page base by mask (POOL_PAGE_SIZE is a power of two).
void* page_base = pool_hotbox_v2_page_base(ptr, POOL_PAGE_SIZE);
pool_page_v2* p = (pool_page_v2*)pool_hotbox_v2_header_load(page_base);
if (!p) {
if (out_reason) *out_reason = POOL_V2_PAGEOF_HEADER_MISSING;
return NULL;
}
if (p->class_idx != ci || !p->base) {
if (out_reason) *out_reason = POOL_V2_PAGEOF_UNKNOWN;
return NULL;
}
uint8_t* data_base = (uint8_t*)p->base + POOL_HOTBOX_V2_HEADER_BYTES;
size_t span = (size_t)p->block_size * (size_t)p->capacity;
uintptr_t off = (uintptr_t)((uint8_t*)ptr - data_base);
if (off >= span) {
if (out_reason) *out_reason = POOL_V2_PAGEOF_OUT_OF_RANGE;
return NULL;
}
if (off % p->block_size != 0) {
if (out_reason) *out_reason = POOL_V2_PAGEOF_MISALIGNED;
return NULL;
}
if (out_reason) *out_reason = POOL_V2_PAGEOF_NONE;
return p;
}
static void pool_hotbox_v2_page_retire_slow(pool_ctx_v2* ctx, uint32_t ci, pool_page_v2* p) {
(void)ctx;
if (!p) return;
// Clear reverse header to avoid stale page_of hits.
pool_hotbox_v2_header_clear(p->base);
PoolColdIface cold = pool_cold_iface_v1();
if (cold.retire_page) {
void* cold_ctx = NULL;
cold.retire_page(cold_ctx, ci, p->slab_ref, p->base);
}
pool_hotbox_v2_page_release(p);
}
static void pool_hotbox_v2_push_partial(pool_class_v2* hc, pool_page_v2* p) {
if (!hc || !p) return;
p->next = hc->partial;
hc->partial = p;
if (hc->partial_count < UINT16_MAX) hc->partial_count++;
}
static pool_page_v2* pool_hotbox_v2_pop_partial(pool_class_v2* hc) {
if (!hc || !hc->partial) return NULL;
pool_page_v2* p = hc->partial;
hc->partial = p->next;
p->next = NULL;
if (hc->partial_count > 0) hc->partial_count--;
return p;
}
static pool_page_v2* pool_hotbox_v2_take_usable_partial(pool_class_v2* hc) {
if (!hc) return NULL;
pool_page_v2* prev = NULL;
pool_page_v2* p = hc->partial;
while (p) {
if (p->freelist && p->used < p->capacity) {
if (prev) {
prev->next = p->next;
} else {
hc->partial = p->next;
}
p->next = NULL;
if (hc->partial_count > 0) hc->partial_count--;
return p;
}
prev = p;
p = p->next;
}
return NULL;
}
static int pool_hotbox_v2_unlink_partial(pool_class_v2* hc, pool_page_v2* target) {
if (!hc || !target) return 0;
pool_page_v2* prev = NULL;
pool_page_v2* p = hc->partial;
while (p) {
if (p == target) {
if (prev) {
prev->next = p->next;
} else {
hc->partial = p->next;
}
p->next = NULL;
if (hc->partial_count > 0) hc->partial_count--;
return 1;
}
prev = p;
p = p->next;
}
return 0;
}
static void pool_hotbox_v2_record_alloc_fast(uint32_t ci) {
if ((int)ci >= POOL_NUM_CLASSES) return;
atomic_fetch_add_explicit(&g_pool_hotbox_v2_stats[ci].alloc_fast, 1, memory_order_relaxed);
}
static void pool_hotbox_v2_record_free_fast(uint32_t ci) {
if ((int)ci >= POOL_NUM_CLASSES) return;
atomic_fetch_add_explicit(&g_pool_hotbox_v2_stats[ci].free_fast, 1, memory_order_relaxed);
}
static inline void* pool_hotbox_v2_alloc_fast(pool_ctx_v2* ctx, uint32_t ci, uintptr_t site_id) {
pool_class_v2* hc = &ctx->cls[ci];
pool_page_v2* p = hc->current;
if (p && p->freelist && p->used < p->capacity) {
void* blk = p->freelist;
p->freelist = *(void**)blk;
p->used++;
pool_hotbox_v2_record_alloc_fast(ci);
AllocHeader* hdr = (AllocHeader*)blk;
size_t class_sz = pool_hotbox_v2_block_size((int)ci);
mid_set_header(hdr, class_sz, site_id);
mid_page_inuse_inc(blk);
return (char*)blk + HEADER_SIZE;
}
if (p) {
// Keep exhausted current reachable for free()
pool_hotbox_v2_push_partial(hc, p);
hc->current = NULL;
}
p = pool_hotbox_v2_take_usable_partial(hc);
if (p) {
hc->current = p;
void* blk = p->freelist;
p->freelist = *(void**)blk;
p->used++;
pool_hotbox_v2_record_alloc_fast(ci);
AllocHeader* hdr = (AllocHeader*)blk;
size_t class_sz = pool_hotbox_v2_block_size((int)ci);
mid_set_header(hdr, class_sz, site_id);
mid_page_inuse_inc(blk);
return (char*)blk + HEADER_SIZE;
}
return NULL;
}
static void pool_hotbox_v2_page_init(pool_page_v2* p, uint32_t ci, void* base, uint32_t block_size, uint32_t capacity, void* slab_ref) {
if (!p) return;
// Adjust capacity if caller did not account for header reservation.
size_t avail = (POOL_PAGE_SIZE > POOL_HOTBOX_V2_HEADER_BYTES) ? (POOL_PAGE_SIZE - POOL_HOTBOX_V2_HEADER_BYTES) : 0;
if (block_size > 0) {
uint32_t max_cap = (uint32_t)(avail / (size_t)block_size);
if (capacity == 0 || capacity > max_cap) capacity = max_cap;
}
p->freelist = NULL;
p->used = 0;
p->capacity = capacity;
p->block_size = block_size;
p->class_idx = ci;
p->base = base;
p->slab_ref = slab_ref;
p->next = NULL;
pool_hotbox_v2_header_store(p->base, p);
}
static PoolColdIface pool_cold_iface_v1(void) {
PoolColdIface iface = {pool_cold_refill_page_v1, pool_cold_retire_page_v1};
return iface;
}
static void* pool_cold_refill_page_v1(void* cold_ctx, uint32_t ci, uint32_t* out_block_size, uint32_t* out_capacity, void** out_slab_ref) {
(void)cold_ctx;
uint32_t user_sz = pool_hotbox_v2_block_size((int)ci);
if (user_sz == 0) return NULL;
uint32_t bs = user_sz + HEADER_SIZE;
if (bs == 0) return NULL;
uint32_t cap = 0;
if (POOL_PAGE_SIZE > POOL_HOTBOX_V2_HEADER_BYTES) {
cap = (uint32_t)((POOL_PAGE_SIZE - POOL_HOTBOX_V2_HEADER_BYTES) / bs);
}
if (cap == 0) return NULL;
// Over-allocate so we can align to POOL_PAGE_SIZE (64KiB) for O(1) page_of.
void* raw = mmap(NULL, POOL_HOTBOX_V2_MAP_LEN, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (raw == MAP_FAILED || !raw) {
return NULL;
}
uintptr_t aligned = ((uintptr_t)raw + (POOL_PAGE_SIZE - 1)) & ~((uintptr_t)POOL_PAGE_SIZE - 1);
void* base = (void*)aligned;
// Register page ownership for same-thread fast free consistency.
mid_desc_register(base, (int)ci, (uint64_t)(uintptr_t)pthread_self());
g_pool.refills[ci]++;
g_pool.total_pages_allocated++;
g_pool.pages_by_class[ci]++;
g_pool.total_bytes_allocated += POOL_HOTBOX_V2_MAP_LEN;
if (out_block_size) *out_block_size = bs;
if (out_capacity) *out_capacity = cap;
// slab_ref keeps the raw mapping pointer for unmap.
if (out_slab_ref) *out_slab_ref = raw;
return base;
}
static void pool_cold_retire_page_v1(void* cold_ctx, uint32_t ci, void* slab_ref, void* base) {
(void)cold_ctx;
(void)ci;
void* addr = slab_ref ? slab_ref : base;
if (!addr) return;
if (ci < POOL_NUM_CLASSES) {
if (g_pool.pages_by_class[ci] > 0) g_pool.pages_by_class[ci]--;
}
if (g_pool.total_pages_allocated > 0) g_pool.total_pages_allocated--;
if (g_pool.total_bytes_allocated >= POOL_HOTBOX_V2_MAP_LEN) g_pool.total_bytes_allocated -= POOL_HOTBOX_V2_MAP_LEN;
munmap(addr, POOL_HOTBOX_V2_MAP_LEN);
}
void* pool_hotbox_v2_alloc(uint32_t class_idx, size_t size, uintptr_t site_id) {
(void)size;
(void)site_id;
if ((int)class_idx < 0 || class_idx >= POOL_NUM_CLASSES) return NULL;
pool_hotbox_v2_record_alloc(class_idx);
pool_ctx_v2* ctx = pool_v2_tls_get();
void* blk = pool_hotbox_v2_alloc_fast(ctx, class_idx, site_id);
if (blk) return blk;
// slow: refill via Cold IF
PoolColdIface cold = pool_cold_iface_v1();
uint32_t bs = 0, cap = 0;
void* slab_ref = NULL;
void* base = cold.refill_page ? cold.refill_page(NULL, class_idx, &bs, &cap, &slab_ref) : NULL;
if (!base || !bs || !cap) {
pool_hotbox_v2_record_alloc_refill_fail(class_idx);
return NULL;
}
pool_class_v2* hc = &ctx->cls[class_idx];
pool_page_v2* page = pool_hotbox_v2_page_acquire();
if (!page) {
if (cold.retire_page) cold.retire_page(NULL, class_idx, slab_ref, base);
pool_hotbox_v2_record_alloc_refill_fail(class_idx);
return NULL;
}
pool_hotbox_v2_page_init(page, class_idx, base, bs, cap, slab_ref);
page->freelist = pool_hotbox_v2_build_freelist(page);
if (!page->freelist) {
pool_hotbox_v2_record_alloc_refill_fail(class_idx);
if (cold.retire_page) cold.retire_page(NULL, class_idx, slab_ref, base);
pool_hotbox_v2_page_release(page);
return NULL;
}
hc->current = page;
pool_hotbox_v2_record_alloc_refill(class_idx);
return pool_hotbox_v2_alloc_fast(ctx, class_idx, site_id);
}
int pool_hotbox_v2_free(uint32_t class_idx, void* raw_block) {
if (!raw_block || (int)class_idx < 0 || class_idx >= POOL_NUM_CLASSES) return 0;
pool_hotbox_v2_record_free(class_idx);
pool_ctx_v2* ctx = pool_v2_tls_get();
int pageof_reason = POOL_V2_PAGEOF_UNKNOWN;
pool_page_v2* p = pool_hotbox_v2_page_of(ctx, class_idx, raw_block, &pageof_reason);
if (!p) {
pool_hotbox_v2_record_pageof_fail(class_idx, pageof_reason);
if (pool_hotbox_v2_stats_enabled()) {
static _Atomic uint32_t dbg = 0;
uint32_t n = atomic_fetch_add_explicit(&dbg, 1, memory_order_relaxed);
if (n < 4) {
pool_class_v2* hc = &ctx->cls[class_idx];
fprintf(stderr,
"[POOL_V2 page_of_fail] cls=%u ptr=%p reason=%d cur=%p cur_base=%p cur_cap=%u cur_bs=%u partial=%p\n",
class_idx, raw_block, pageof_reason,
(void*)hc->current,
hc->current ? hc->current->base : NULL,
hc->current ? hc->current->capacity : 0u,
hc->current ? hc->current->block_size : 0u,
(void*)hc->partial);
}
}
return 0; // let caller fall back to v1
}
*(void**)raw_block = p->freelist;
p->freelist = raw_block;
if (p->used > 0) p->used--;
pool_hotbox_v2_record_free_fast(class_idx);
pool_class_v2* hc = &ctx->cls[class_idx];
if (p->used == 0) {
pool_hotbox_v2_unlink_partial(hc, p);
if (hc->current == p) hc->current = NULL;
if (hc->partial_count < hc->max_partial_pages) {
pool_hotbox_v2_push_partial(hc, p);
} else {
pool_hotbox_v2_page_retire_slow(ctx, class_idx, p);
}
} else {
if (!hc->current) hc->current = p;
}
return 1;
}
__attribute__((destructor)) static void pool_hotbox_v2_dump_stats(void) {
if (!pool_hotbox_v2_stats_enabled()) return;
for (int i = 0; i < POOL_NUM_CLASSES; i++) {
uint64_t ac = atomic_load_explicit(&g_pool_hotbox_v2_stats[i].alloc_calls, memory_order_relaxed);
uint64_t ar = atomic_load_explicit(&g_pool_hotbox_v2_stats[i].alloc_refill, memory_order_relaxed);
uint64_t arf = atomic_load_explicit(&g_pool_hotbox_v2_stats[i].alloc_refill_fail, memory_order_relaxed);
uint64_t afb = atomic_load_explicit(&g_pool_hotbox_v2_stats[i].alloc_fallback_v1, memory_order_relaxed);
uint64_t fc = atomic_load_explicit(&g_pool_hotbox_v2_stats[i].free_calls, memory_order_relaxed);
uint64_t ffb = atomic_load_explicit(&g_pool_hotbox_v2_stats[i].free_fallback_v1, memory_order_relaxed);
uint64_t af = atomic_load_explicit(&g_pool_hotbox_v2_stats[i].alloc_fast, memory_order_relaxed);
uint64_t ff = atomic_load_explicit(&g_pool_hotbox_v2_stats[i].free_fast, memory_order_relaxed);
uint64_t pf_hdr = atomic_load_explicit(&g_pool_hotbox_v2_stats[i].page_of_fail_header_missing, memory_order_relaxed);
uint64_t pf_range = atomic_load_explicit(&g_pool_hotbox_v2_stats[i].page_of_fail_out_of_range, memory_order_relaxed);
uint64_t pf_mis = atomic_load_explicit(&g_pool_hotbox_v2_stats[i].page_of_fail_misaligned, memory_order_relaxed);
uint64_t pf_unknown = atomic_load_explicit(&g_pool_hotbox_v2_stats[i].page_of_fail_unknown, memory_order_relaxed);
if (ac || afb || fc || ffb || ar || arf || af || ff || pf_hdr || pf_range || pf_mis || pf_unknown) {
fprintf(stderr, "[POOL_V2_STATS] cls=%d alloc_calls=%llu alloc_fast=%llu alloc_refill=%llu alloc_refill_fail=%llu alloc_fb_v1=%llu free_calls=%llu free_fast=%llu free_fb_v1=%llu pageof_hdr=%llu pageof_range=%llu pageof_misaligned=%llu pageof_unknown=%llu\n",
i, (unsigned long long)ac, (unsigned long long)af, (unsigned long long)ar,
(unsigned long long)arf, (unsigned long long)afb,
(unsigned long long)fc, (unsigned long long)ff, (unsigned long long)ffb,
(unsigned long long)pf_hdr, (unsigned long long)pf_range, (unsigned long long)pf_mis, (unsigned long long)pf_unknown);
}
}
}
// Size class table (for O(1) lookup). Index 5/6 are Bridge classes for 32-64KB gap.
// 7 classes including Bridge classes (40KB, 52KB) to fill 32-64KB gap
static size_t g_class_sizes[POOL_NUM_CLASSES] = {
@ -893,10 +1407,9 @@ int hak_pool_get_shard_index(uintptr_t site_id) {
return (int)((uint32_t)x & (POOL_NUM_SHARDS - 1));
}
// TLS helpers
// TLS helpers (non-inline helpers for shard bookkeeping)
#include "box/pool_tls_core.inc.h"
// Refill/ACE (boxed)
#include "box/pool_refill.inc.h"

View File

@ -5,9 +5,12 @@
#include "hakmem_sys.h"
#include "hakmem_debug.h"
#include "hakmem_env_cache.h" // For HAK_ENV_SS_MADVISE_STRICT
#include "box/ss_os_acquire_box.h"
#include <sys/mman.h>
#include <stdio.h>
#include <stdlib.h>
#include <errno.h> // For errno values
// madvise constants (Linux)
#ifndef MADV_DONTNEED
@ -56,12 +59,16 @@ void hkm_sys_madvise_dontneed(void* ptr, size_t size) {
HKM_TIME_START(t0);
int ret = madvise(ptr, size, MADV_DONTNEED);
int ret = ss_os_madvise_guarded(ptr, size, MADV_DONTNEED, "hakmem_sys_dontneed");
HKM_TIME_END(HKM_CAT_SYSCALL_MADVISE, t0);
if (ret != 0) {
fprintf(stderr, "[HAKMEM SYS] madvise(DONTNEED, %p, %zu) failed\n", ptr, size);
fprintf(stderr, "[HAKMEM SYS] madvise(DONTNEED, %p, %zu) failed errno=%d\n", ptr, size, errno);
if (HAK_ENV_SS_MADVISE_STRICT() && errno == EINVAL) {
fprintf(stderr, "[HAKMEM SYS] Critical: madvise(DONTNEED) failed with EINVAL in strict mode. Aborting.\n");
abort();
}
}
}
@ -70,11 +77,15 @@ void hkm_sys_madvise_willneed(void* ptr, size_t size) {
HKM_TIME_START(t0);
int ret = madvise(ptr, size, MADV_WILLNEED);
int ret = ss_os_madvise_guarded(ptr, size, MADV_WILLNEED, "hakmem_sys_willneed");
HKM_TIME_END(HKM_CAT_SYSCALL_MADVISE, t0);
if (ret != 0) {
fprintf(stderr, "[HAKMEM SYS] madvise(WILLNEED, %p, %zu) failed\n", ptr, size);
fprintf(stderr, "[HAKMEM SYS] madvise(WILLNEED, %p, %zu) failed errno=%d\n", ptr, size, errno);
if (HAK_ENV_SS_MADVISE_STRICT() && errno == EINVAL) {
fprintf(stderr, "[HAKMEM SYS] Critical: madvise(WILLNEED) failed with EINVAL in strict mode. Aborting.\n");
abort();
}
}
}

View File

@ -11,6 +11,7 @@
#include "box/tiny_next_ptr_box.h" // Box API: next pointer read/write
#include "box/ptr_conversion_box.h" // Box API: pointer conversion
#include "hakmem_env_cache.h" // Priority-2: ENV cache
#include "box/tiny_cold_iface_v1.h" // Cold boundary wrapper for TinyHotHeap v2
// Phase 1 modules (must come AFTER hakmem_tiny.h for TinyPool definition)
#include "hakmem_tiny_batch_refill.h" // Phase 1: Batch refill/spill for mini-magazine
#include "hakmem_tiny_stats.h" // Phase 1: Batched statistics (replaces XOR RNG)
@ -24,6 +25,8 @@
#include "tiny_route.h"
#include "front/tiny_heap_v2.h"
#include "box/tiny_front_stats_box.h"
#include "box/tiny_front_v3_env_box.h"
#include "box/ss_os_acquire_box.h"
#include "tiny_tls_guard.h"
#include "tiny_ready.h"
#include "box/c7_meta_used_counter_box.h"
@ -32,6 +35,8 @@
#include "box/tiny_hotheap_v2_box.h"
#include "box/tiny_route_env_box.h"
#include "box/super_reg_box.h"
#include "tiny_region_id.h"
#include "tiny_debug_api.h"
#include "hakmem_tiny_tls_list.h"
#include "hakmem_tiny_remote_target.h" // Phase 2C-1: Remote target queue
#include "hakmem_tiny_bg_spill.h" // Phase 2C-2: Background spill queue
@ -59,6 +64,13 @@ tiny_route_kind_t g_tiny_route_class[TINY_NUM_CLASSES] = {0};
int g_tiny_route_snapshot_done = 0;
_Atomic uint64_t g_tiny_front_alloc_class[TINY_NUM_CLASSES] = {0};
_Atomic uint64_t g_tiny_front_free_class[TINY_NUM_CLASSES] = {0};
TinyFrontV3Snapshot g_tiny_front_v3_snapshot = {0};
int g_tiny_front_v3_snapshot_ready = 0;
static TinyFrontV3SizeClassEntry g_tiny_front_v3_lut[TINY_MAX_SIZE + 1] = {0};
static int g_tiny_front_v3_lut_ready = 0;
// Forward decls (to keep deps light in this TU)
int unified_cache_enabled(void);
static int tiny_heap_stats_dump_enabled(void) {
static int g = -1;
@ -70,6 +82,59 @@ static int tiny_heap_stats_dump_enabled(void) {
return g;
}
void tiny_front_v3_snapshot_init(void) {
if (g_tiny_front_v3_snapshot_ready) {
return;
}
TinyFrontV3Snapshot snap = {
.unified_cache_on = unified_cache_enabled(),
.tiny_guard_on = tiny_guard_is_enabled(),
.header_mode = (uint8_t)tiny_header_mode(),
.header_v3_enabled = tiny_header_v3_enabled(),
.header_v3_skip_c7 = tiny_header_v3_skip_c7(),
};
g_tiny_front_v3_snapshot = snap;
g_tiny_front_v3_snapshot_ready = 1;
}
void tiny_front_v3_size_class_lut_init(void) {
if (g_tiny_front_v3_lut_ready) {
return;
}
tiny_route_snapshot_init();
size_t max_size = tiny_get_max_size();
if (max_size > TINY_MAX_SIZE) {
max_size = TINY_MAX_SIZE;
}
for (size_t sz = 0; sz <= TINY_MAX_SIZE; sz++) {
TinyFrontV3SizeClassEntry e = {
.class_idx = TINY_FRONT_V3_INVALID_CLASS,
.route_kind = (uint8_t)TINY_ROUTE_LEGACY,
};
if (sz == 0 || sz > max_size) {
g_tiny_front_v3_lut[sz] = e;
continue;
}
int cls = hak_tiny_size_to_class((int)sz);
if (cls >= 0 && cls < TINY_NUM_CLASSES) {
e.class_idx = (uint8_t)cls;
e.route_kind = (uint8_t)tiny_route_for_class((uint8_t)cls);
}
g_tiny_front_v3_lut[sz] = e;
}
g_tiny_front_v3_lut_ready = 1;
}
const TinyFrontV3SizeClassEntry* tiny_front_v3_lut_lookup(size_t size) {
if (__builtin_expect(!g_tiny_front_v3_lut_ready, 0)) {
tiny_front_v3_size_class_lut_init();
}
if (size == 0 || size > TINY_MAX_SIZE) {
return NULL;
}
return &g_tiny_front_v3_lut[size];
}
__attribute__((destructor))
static void tiny_heap_stats_dump(void) {
if (!tiny_heap_stats_enabled() || !tiny_heap_stats_dump_enabled()) {
@ -159,16 +224,24 @@ static inline int tiny_hotheap_v2_stats_enabled(void) {
return g;
}
static _Atomic uint64_t g_tiny_hotheap_v2_c7_alloc_calls = 0;
static _Atomic uint64_t g_tiny_hotheap_v2_c7_route_hits = 0;
static _Atomic uint64_t g_tiny_hotheap_v2_c7_alloc_fast = 0;
static _Atomic uint64_t g_tiny_hotheap_v2_c7_alloc_lease = 0;
static _Atomic uint64_t g_tiny_hotheap_v2_c7_alloc_fallback_v1 = 0;
static _Atomic uint64_t g_tiny_hotheap_v2_c7_alloc_refill = 0;
static _Atomic uint64_t g_tiny_hotheap_v2_c7_alloc_route_fb = 0;
static _Atomic uint64_t g_tiny_hotheap_v2_c7_free_calls = 0;
static _Atomic uint64_t g_tiny_hotheap_v2_c7_free_fast = 0;
static _Atomic uint64_t g_tiny_hotheap_v2_c7_free_fallback_v1 = 0;
static _Atomic uint64_t g_tiny_hotheap_v2_route_hits[TINY_HOTHEAP_MAX_CLASSES] = {0};
static _Atomic uint64_t g_tiny_hotheap_v2_alloc_calls[TINY_HOTHEAP_MAX_CLASSES] = {0};
static _Atomic uint64_t g_tiny_hotheap_v2_alloc_fast[TINY_HOTHEAP_MAX_CLASSES] = {0};
static _Atomic uint64_t g_tiny_hotheap_v2_alloc_lease[TINY_HOTHEAP_MAX_CLASSES] = {0};
static _Atomic uint64_t g_tiny_hotheap_v2_alloc_fallback_v1[TINY_HOTHEAP_MAX_CLASSES] = {0};
static _Atomic uint64_t g_tiny_hotheap_v2_alloc_refill[TINY_HOTHEAP_MAX_CLASSES] = {0};
static _Atomic uint64_t g_tiny_hotheap_v2_refill_with_current[TINY_HOTHEAP_MAX_CLASSES] = {0};
static _Atomic uint64_t g_tiny_hotheap_v2_refill_with_partial[TINY_HOTHEAP_MAX_CLASSES] = {0};
static _Atomic uint64_t g_tiny_hotheap_v2_alloc_route_fb[TINY_HOTHEAP_MAX_CLASSES] = {0};
static _Atomic uint64_t g_tiny_hotheap_v2_free_calls[TINY_HOTHEAP_MAX_CLASSES] = {0};
static _Atomic uint64_t g_tiny_hotheap_v2_free_fast[TINY_HOTHEAP_MAX_CLASSES] = {0};
static _Atomic uint64_t g_tiny_hotheap_v2_free_fallback_v1[TINY_HOTHEAP_MAX_CLASSES] = {0};
static _Atomic uint64_t g_tiny_hotheap_v2_cold_refill_fail[TINY_HOTHEAP_MAX_CLASSES] = {0};
static _Atomic uint64_t g_tiny_hotheap_v2_cold_retire_calls[TINY_HOTHEAP_MAX_CLASSES] = {0};
static _Atomic uint64_t g_tiny_hotheap_v2_retire_calls_v2[TINY_HOTHEAP_MAX_CLASSES] = {0};
static _Atomic uint64_t g_tiny_hotheap_v2_partial_pushes[TINY_HOTHEAP_MAX_CLASSES] = {0};
static _Atomic uint64_t g_tiny_hotheap_v2_partial_pops[TINY_HOTHEAP_MAX_CLASSES] = {0};
static _Atomic uint64_t g_tiny_hotheap_v2_partial_peak[TINY_HOTHEAP_MAX_CLASSES] = {0};
typedef struct {
_Atomic uint64_t prepare_calls;
@ -178,34 +251,54 @@ typedef struct {
_Atomic uint64_t page_retired;
} TinyHotHeapV2PageStats;
static TinyHotHeapV2PageStats g_tiny_hotheap_v2_page_stats = {0};
static TinyHotHeapV2PageStats g_tiny_hotheap_v2_page_stats[TINY_HOTHEAP_MAX_CLASSES] = {0};
static void tiny_hotheap_v2_page_retire_slow(tiny_hotheap_ctx_v2* ctx,
uint8_t class_idx,
tiny_hotheap_page_v2* page);
void tiny_hotheap_v2_record_route_fallback(void) {
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_c7_alloc_route_fb, 1, memory_order_relaxed);
static inline uint8_t tiny_hotheap_v2_idx(uint8_t class_idx) {
return (class_idx < TINY_HOTHEAP_MAX_CLASSES) ? class_idx : 0;
}
void tiny_hotheap_v2_record_free_fallback(void) {
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_c7_free_fallback_v1, 1, memory_order_relaxed);
void tiny_hotheap_v2_record_route_fallback(uint8_t class_idx) {
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_alloc_route_fb[tiny_hotheap_v2_idx(class_idx)],
1,
memory_order_relaxed);
}
void tiny_hotheap_v2_record_free_fallback(uint8_t class_idx) {
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_free_fallback_v1[tiny_hotheap_v2_idx(class_idx)],
1,
memory_order_relaxed);
}
void tiny_hotheap_v2_debug_snapshot(tiny_hotheap_v2_stats_snapshot_t* out) {
if (!out) return;
memset(out, 0, sizeof(*out));
out->route_hits = atomic_load_explicit(&g_tiny_hotheap_v2_c7_route_hits, memory_order_relaxed);
out->alloc_calls = atomic_load_explicit(&g_tiny_hotheap_v2_c7_alloc_calls, memory_order_relaxed);
out->alloc_fast = atomic_load_explicit(&g_tiny_hotheap_v2_c7_alloc_fast, memory_order_relaxed);
out->alloc_lease = atomic_load_explicit(&g_tiny_hotheap_v2_c7_alloc_lease, memory_order_relaxed);
out->alloc_refill = atomic_load_explicit(&g_tiny_hotheap_v2_c7_alloc_refill, memory_order_relaxed);
out->alloc_fallback_v1 = atomic_load_explicit(&g_tiny_hotheap_v2_c7_alloc_fallback_v1, memory_order_relaxed);
out->alloc_route_fb = atomic_load_explicit(&g_tiny_hotheap_v2_c7_alloc_route_fb, memory_order_relaxed);
out->free_calls = atomic_load_explicit(&g_tiny_hotheap_v2_c7_free_calls, memory_order_relaxed);
out->free_fast = atomic_load_explicit(&g_tiny_hotheap_v2_c7_free_fast, memory_order_relaxed);
out->free_fallback_v1 = atomic_load_explicit(&g_tiny_hotheap_v2_c7_free_fallback_v1, memory_order_relaxed);
out->prepare_calls = atomic_load_explicit(&g_tiny_hotheap_v2_page_stats.prepare_calls, memory_order_relaxed);
out->prepare_with_current_null = atomic_load_explicit(&g_tiny_hotheap_v2_page_stats.prepare_with_current_null, memory_order_relaxed);
out->prepare_from_partial = atomic_load_explicit(&g_tiny_hotheap_v2_page_stats.prepare_from_partial, memory_order_relaxed);
out->free_made_current = atomic_load_explicit(&g_tiny_hotheap_v2_page_stats.free_made_current, memory_order_relaxed);
out->page_retired = atomic_load_explicit(&g_tiny_hotheap_v2_page_stats.page_retired, memory_order_relaxed);
uint8_t ci = 7;
out->route_hits = atomic_load_explicit(&g_tiny_hotheap_v2_route_hits[ci], memory_order_relaxed);
out->alloc_calls = atomic_load_explicit(&g_tiny_hotheap_v2_alloc_calls[ci], memory_order_relaxed);
out->alloc_fast = atomic_load_explicit(&g_tiny_hotheap_v2_alloc_fast[ci], memory_order_relaxed);
out->alloc_lease = atomic_load_explicit(&g_tiny_hotheap_v2_alloc_lease[ci], memory_order_relaxed);
out->alloc_refill = atomic_load_explicit(&g_tiny_hotheap_v2_alloc_refill[ci], memory_order_relaxed);
out->refill_with_current = atomic_load_explicit(&g_tiny_hotheap_v2_refill_with_current[ci], memory_order_relaxed);
out->refill_with_partial = atomic_load_explicit(&g_tiny_hotheap_v2_refill_with_partial[ci], memory_order_relaxed);
out->alloc_fallback_v1 = atomic_load_explicit(&g_tiny_hotheap_v2_alloc_fallback_v1[ci], memory_order_relaxed);
out->alloc_route_fb = atomic_load_explicit(&g_tiny_hotheap_v2_alloc_route_fb[ci], memory_order_relaxed);
out->free_calls = atomic_load_explicit(&g_tiny_hotheap_v2_free_calls[ci], memory_order_relaxed);
out->free_fast = atomic_load_explicit(&g_tiny_hotheap_v2_free_fast[ci], memory_order_relaxed);
out->free_fallback_v1 = atomic_load_explicit(&g_tiny_hotheap_v2_free_fallback_v1[ci], memory_order_relaxed);
out->cold_refill_fail = atomic_load_explicit(&g_tiny_hotheap_v2_cold_refill_fail[ci], memory_order_relaxed);
out->cold_retire_calls = atomic_load_explicit(&g_tiny_hotheap_v2_cold_retire_calls[ci], memory_order_relaxed);
out->retire_calls_v2 = atomic_load_explicit(&g_tiny_hotheap_v2_retire_calls_v2[ci], memory_order_relaxed);
out->prepare_calls = atomic_load_explicit(&g_tiny_hotheap_v2_page_stats[ci].prepare_calls, memory_order_relaxed);
out->prepare_with_current_null = atomic_load_explicit(&g_tiny_hotheap_v2_page_stats[ci].prepare_with_current_null, memory_order_relaxed);
out->prepare_from_partial = atomic_load_explicit(&g_tiny_hotheap_v2_page_stats[ci].prepare_from_partial, memory_order_relaxed);
out->free_made_current = atomic_load_explicit(&g_tiny_hotheap_v2_page_stats[ci].free_made_current, memory_order_relaxed);
out->page_retired = atomic_load_explicit(&g_tiny_hotheap_v2_page_stats[ci].page_retired, memory_order_relaxed);
out->partial_pushes = atomic_load_explicit(&g_tiny_hotheap_v2_partial_pushes[ci], memory_order_relaxed);
out->partial_pops = atomic_load_explicit(&g_tiny_hotheap_v2_partial_pops[ci], memory_order_relaxed);
out->partial_peak = atomic_load_explicit(&g_tiny_hotheap_v2_partial_peak[ci], memory_order_relaxed);
}
static tiny_hotheap_page_v2* tiny_hotheap_v2_acquire_page_node(tiny_hotheap_class_v2* hc) {
@ -246,6 +339,57 @@ static tiny_hotheap_page_v2* tiny_hotheap_v2_find_page(tiny_hotheap_class_v2* hc
return NULL;
}
static inline void tiny_hotheap_v2_partial_push(tiny_hotheap_class_v2* hc,
tiny_hotheap_page_v2* page,
uint8_t class_idx,
int stats_on) {
if (!hc || !page) return;
page->next = hc->partial_pages;
hc->partial_pages = page;
if (hc->partial_count < UINT16_MAX) {
hc->partial_count++;
}
if (stats_on) {
uint8_t idx = tiny_hotheap_v2_idx(class_idx);
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_partial_pushes[idx], 1, memory_order_relaxed);
uint64_t cur = hc->partial_count;
uint64_t old = atomic_load_explicit(&g_tiny_hotheap_v2_partial_peak[idx], memory_order_relaxed);
while (cur > old &&
!atomic_compare_exchange_weak_explicit(&g_tiny_hotheap_v2_partial_peak[idx],
&old,
cur,
memory_order_relaxed,
memory_order_relaxed)) {
old = atomic_load_explicit(&g_tiny_hotheap_v2_partial_peak[idx], memory_order_relaxed);
}
}
}
static inline void tiny_hotheap_v2_maybe_trim_partial(tiny_hotheap_ctx_v2* ctx,
tiny_hotheap_class_v2* hc,
uint8_t class_idx,
int stats_on) {
if (!ctx || !hc) return;
uint16_t limit = hc->max_partial_pages;
if (limit == 0) {
return;
}
while (hc->partial_count > limit && hc->partial_pages) {
tiny_hotheap_page_v2* victim = hc->partial_pages;
hc->partial_pages = victim->next;
if (hc->partial_count > 0) {
hc->partial_count--;
}
victim->next = NULL;
if (stats_on) {
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_partial_pops[tiny_hotheap_v2_idx(class_idx)],
1,
memory_order_relaxed);
}
tiny_hotheap_v2_page_retire_slow(ctx, class_idx, victim);
}
}
static inline void tiny_hotheap_v2_build_freelist(tiny_hotheap_page_v2* page,
uint8_t class_idx,
uint16_t stride) {
@ -265,16 +409,6 @@ static inline void tiny_hotheap_v2_build_freelist(tiny_hotheap_page_v2* page,
head = block;
}
page->freelist = head;
if (page->lease_page) {
page->lease_page->free_list = head;
page->lease_page->used = page->used;
if (page->lease_page->meta) {
atomic_store_explicit(&page->lease_page->meta->freelist, head, memory_order_release);
if (page->lease_page->meta->carved < page->capacity) {
page->lease_page->meta->carved = page->capacity;
}
}
}
}
static void tiny_hotheap_v2_unlink_page(tiny_hotheap_class_v2* hc, tiny_hotheap_page_v2* target) {
@ -295,6 +429,9 @@ static void tiny_hotheap_v2_unlink_page(tiny_hotheap_class_v2* hc, tiny_hotheap_
*head = cur->next;
}
cur->next = NULL;
if (i == 0 && hc->partial_count > 0) {
hc->partial_count--;
}
break;
}
prev = cur;
@ -304,17 +441,35 @@ static void tiny_hotheap_v2_unlink_page(tiny_hotheap_class_v2* hc, tiny_hotheap_
}
static tiny_hotheap_page_v2* tiny_hotheap_v2_refill_slow(tiny_hotheap_ctx_v2* ctx, uint8_t class_idx) {
if (!ctx || class_idx != 7) {
if (!ctx || class_idx >= TINY_HOTHEAP_MAX_CLASSES) {
return NULL;
}
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_c7_alloc_refill, 1, memory_order_relaxed);
TinyHeapClassStats* stats = tiny_heap_stats_for_class(7);
int stats_on = tiny_hotheap_v2_stats_enabled();
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_alloc_refill[class_idx], 1, memory_order_relaxed);
TinyHeapClassStats* stats = tiny_heap_stats_for_class(class_idx);
if (__builtin_expect(stats != NULL, 0)) {
atomic_fetch_add_explicit(&stats->alloc_slow_prepare, 1, memory_order_relaxed);
}
tiny_hotheap_class_v2* hc = &ctx->cls[class_idx];
TinyHeapPageLease lease = tiny_heap_c7_lease_page_for_v2();
if (!lease.page) {
if (hc) {
if (hc->current_page) {
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_refill_with_current[class_idx],
1,
memory_order_relaxed);
}
if (hc->partial_pages) {
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_refill_with_partial[class_idx],
1,
memory_order_relaxed);
}
}
// Cold iface (v1 TinyHeap) からページを 1 枚借りる
TinyColdIface cold = tiny_cold_iface_v1();
tiny_heap_ctx_t* cold_ctx = tiny_heap_ctx_for_thread();
tiny_heap_page_t* ipage = cold.refill_page ? cold.refill_page(cold_ctx, class_idx) : NULL;
if (!ipage || !ipage->base || ipage->capacity == 0 || ipage->meta == NULL) {
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_cold_refill_fail[class_idx], 1, memory_order_relaxed);
return NULL;
}
@ -327,33 +482,25 @@ static tiny_hotheap_page_v2* tiny_hotheap_v2_refill_slow(tiny_hotheap_ctx_v2* ct
return NULL;
}
page->lease_page = lease.page;
page->meta = lease.meta;
page->ss = lease.ss;
page->base = lease.base;
page->capacity = lease.capacity;
page->slab_idx = lease.slab_idx;
page->freelist = lease.freelist;
page->used = lease.page->used;
if (page->lease_page) {
page->lease_page->capacity = page->capacity;
page->lease_page->free_list = page->freelist;
page->lease_page->base = (uint8_t*)page->base;
}
page->lease_page = ipage;
page->meta = ipage->meta;
page->ss = ipage->ss;
page->base = ipage->base;
page->capacity = ipage->capacity;
page->slab_idx = ipage->slab_idx;
page->freelist = NULL;
page->used = 0;
const uint16_t stride = hc->stride ? hc->stride : (uint16_t)tiny_stride_for_class(class_idx);
if (page->freelist == NULL && page->base && page->capacity > page->used) {
tiny_hotheap_v2_build_freelist(page, class_idx, stride);
} else if (page->lease_page && page->lease_page->meta) {
atomic_store_explicit(&page->lease_page->meta->freelist, page->freelist, memory_order_release);
}
tiny_hotheap_v2_build_freelist(page, class_idx, stride);
tiny_hotheap_page_v2* old_cur = hc->current_page;
hc->current_page = page;
page->next = NULL;
if (old_cur && old_cur != page) {
old_cur->next = hc->partial_pages;
hc->partial_pages = old_cur;
tiny_hotheap_v2_partial_push(hc, old_cur, class_idx, stats_on);
}
tiny_hotheap_v2_maybe_trim_partial(ctx, hc, class_idx, stats_on);
if (!hc->current_page || !hc->current_page->freelist || hc->current_page->capacity == 0 ||
hc->current_page->used > hc->current_page->capacity) {
fprintf(stderr, "[HOTHEAP_V2_REFILL_ASSERT] current_page missing freelist (page=%p freelist=%p cap=%u used=%u)\n",
@ -361,7 +508,7 @@ static tiny_hotheap_page_v2* tiny_hotheap_v2_refill_slow(tiny_hotheap_ctx_v2* ct
hc->current_page ? hc->current_page->freelist : NULL,
hc->current_page ? (unsigned)hc->current_page->capacity : 0u,
hc->current_page ? (unsigned)hc->current_page->used : 0u);
abort();
return NULL;
}
return hc->current_page;
}
@ -370,17 +517,26 @@ static void tiny_hotheap_v2_page_retire_slow(tiny_hotheap_ctx_v2* ctx,
uint8_t class_idx,
tiny_hotheap_page_v2* page) {
if (!ctx || !page) return;
uint8_t idx = tiny_hotheap_v2_idx(class_idx);
tiny_hotheap_class_v2* hc = &ctx->cls[class_idx];
tiny_hotheap_v2_unlink_page(hc, page);
TinyHeapPageLease lease = tiny_heap_page_lease_nil();
lease.page = page->lease_page;
lease.meta = page->meta;
lease.ss = page->ss;
lease.base = page->base;
lease.capacity = page->capacity;
lease.slab_idx = page->slab_idx;
lease.freelist = page->freelist;
tiny_heap_c7_return_page_from_v2(&lease);
if (page->lease_page) {
page->lease_page->used = page->used;
page->lease_page->free_list = page->freelist;
if (page->lease_page->meta) {
atomic_store_explicit(&page->lease_page->meta->freelist, page->freelist, memory_order_release);
atomic_store_explicit(&page->lease_page->meta->used, page->used, memory_order_relaxed);
}
}
TinyColdIface cold = tiny_cold_iface_v1();
tiny_heap_ctx_t* cold_ctx = tiny_heap_ctx_for_thread();
if (cold.retire_page) {
cold.retire_page(cold_ctx, class_idx, page->lease_page);
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_cold_retire_calls[idx], 1, memory_order_relaxed);
}
if (tiny_hotheap_v2_stats_enabled()) {
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_retire_calls_v2[idx], 1, memory_order_relaxed);
}
if (page != &hc->storage_page) {
free(page);
} else {
@ -394,38 +550,42 @@ static void tiny_hotheap_v2_page_retire_slow(tiny_hotheap_ctx_v2* ctx,
}
}
if (tiny_hotheap_v2_stats_enabled()) {
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_page_stats.page_retired, 1, memory_order_relaxed);
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_page_stats[idx].page_retired, 1, memory_order_relaxed);
}
}
static inline void* tiny_hotheap_v2_try_pop(tiny_hotheap_page_v2* candidate,
tiny_heap_class_t* v1hcls,
static inline void* tiny_hotheap_v2_try_pop(tiny_hotheap_class_v2* hc,
tiny_hotheap_page_v2* page,
uint8_t class_idx,
TinyHeapClassStats* stats,
int stats_on) {
if (!candidate || !candidate->lease_page || !v1hcls) {
if (!hc || !page || !page->base || page->capacity == 0) {
return NULL;
}
tiny_heap_page_t* ipage = candidate->lease_page;
v1hcls->current_page = ipage; // keep v1 hot page pinned to avoid mark_full churn
if (!(ipage->free_list || ipage->used < ipage->capacity)) {
if (hc->stride == 0) {
hc->stride = (uint16_t)tiny_stride_for_class(class_idx);
}
const uint16_t stride = hc->stride;
void* block = NULL;
if (page->freelist) {
block = page->freelist;
void* next = tiny_next_read(class_idx, block);
page->freelist = next;
} else if (page->used < page->capacity) {
block = (void*)((uint8_t*)page->base + ((size_t)page->used * stride));
} else {
return NULL;
}
void* user = tiny_heap_page_pop(v1hcls, 7, ipage);
if (!user) {
return NULL;
}
if (ipage->used >= ipage->capacity && ipage->free_list == NULL) {
tiny_heap_page_mark_full(v1hcls, ipage);
}
page->used++;
if (__builtin_expect(stats != NULL, 0)) {
atomic_fetch_add_explicit(&stats->alloc_fast_current, 1, memory_order_relaxed);
}
if (stats_on) {
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_c7_alloc_fast, 1, memory_order_relaxed);
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_alloc_fast[tiny_hotheap_v2_idx(class_idx)],
1,
memory_order_relaxed);
}
candidate->freelist = ipage->free_list;
candidate->used = ipage->used;
return tiny_region_id_write_header(user, 7);
return tiny_region_id_write_header(block, class_idx);
}
__attribute__((destructor))
@ -433,35 +593,55 @@ static void tiny_hotheap_v2_stats_dump(void) {
if (!tiny_hotheap_v2_stats_enabled()) {
return;
}
uint64_t alloc_calls = atomic_load_explicit(&g_tiny_hotheap_v2_c7_alloc_calls, memory_order_relaxed);
uint64_t route_hits = atomic_load_explicit(&g_tiny_hotheap_v2_c7_route_hits, memory_order_relaxed);
uint64_t alloc_fast = atomic_load_explicit(&g_tiny_hotheap_v2_c7_alloc_fast, memory_order_relaxed);
uint64_t alloc_lease = atomic_load_explicit(&g_tiny_hotheap_v2_c7_alloc_lease, memory_order_relaxed);
uint64_t alloc_fb = atomic_load_explicit(&g_tiny_hotheap_v2_c7_alloc_fallback_v1, memory_order_relaxed);
uint64_t free_calls = atomic_load_explicit(&g_tiny_hotheap_v2_c7_free_calls, memory_order_relaxed);
uint64_t free_fast = atomic_load_explicit(&g_tiny_hotheap_v2_c7_free_fast, memory_order_relaxed);
uint64_t free_fb = atomic_load_explicit(&g_tiny_hotheap_v2_c7_free_fallback_v1, memory_order_relaxed);
for (uint8_t ci = 0; ci < TINY_HOTHEAP_MAX_CLASSES; ci++) {
uint64_t alloc_calls = atomic_load_explicit(&g_tiny_hotheap_v2_alloc_calls[ci], memory_order_relaxed);
uint64_t route_hits = atomic_load_explicit(&g_tiny_hotheap_v2_route_hits[ci], memory_order_relaxed);
uint64_t alloc_fast = atomic_load_explicit(&g_tiny_hotheap_v2_alloc_fast[ci], memory_order_relaxed);
uint64_t alloc_lease = atomic_load_explicit(&g_tiny_hotheap_v2_alloc_lease[ci], memory_order_relaxed);
uint64_t alloc_fb = atomic_load_explicit(&g_tiny_hotheap_v2_alloc_fallback_v1[ci], memory_order_relaxed);
uint64_t free_calls = atomic_load_explicit(&g_tiny_hotheap_v2_free_calls[ci], memory_order_relaxed);
uint64_t free_fast = atomic_load_explicit(&g_tiny_hotheap_v2_free_fast[ci], memory_order_relaxed);
uint64_t free_fb = atomic_load_explicit(&g_tiny_hotheap_v2_free_fallback_v1[ci], memory_order_relaxed);
uint64_t cold_refill_fail = atomic_load_explicit(&g_tiny_hotheap_v2_cold_refill_fail[ci], memory_order_relaxed);
uint64_t cold_retire_calls = atomic_load_explicit(&g_tiny_hotheap_v2_cold_retire_calls[ci], memory_order_relaxed);
uint64_t retire_calls_v2 = atomic_load_explicit(&g_tiny_hotheap_v2_retire_calls_v2[ci], memory_order_relaxed);
uint64_t partial_pushes = atomic_load_explicit(&g_tiny_hotheap_v2_partial_pushes[ci], memory_order_relaxed);
uint64_t partial_pops = atomic_load_explicit(&g_tiny_hotheap_v2_partial_pops[ci], memory_order_relaxed);
uint64_t partial_peak = atomic_load_explicit(&g_tiny_hotheap_v2_partial_peak[ci], memory_order_relaxed);
uint64_t refill_with_cur = atomic_load_explicit(&g_tiny_hotheap_v2_refill_with_current[ci], memory_order_relaxed);
uint64_t refill_with_partial = atomic_load_explicit(&g_tiny_hotheap_v2_refill_with_partial[ci], memory_order_relaxed);
TinyHotHeapV2PageStats ps = {
.prepare_calls = atomic_load_explicit(&g_tiny_hotheap_v2_page_stats.prepare_calls, memory_order_relaxed),
.prepare_with_current_null = atomic_load_explicit(&g_tiny_hotheap_v2_page_stats.prepare_with_current_null, memory_order_relaxed),
.prepare_from_partial = atomic_load_explicit(&g_tiny_hotheap_v2_page_stats.prepare_from_partial, memory_order_relaxed),
.free_made_current = atomic_load_explicit(&g_tiny_hotheap_v2_page_stats.free_made_current, memory_order_relaxed),
.page_retired = atomic_load_explicit(&g_tiny_hotheap_v2_page_stats.page_retired, memory_order_relaxed),
};
TinyHotHeapV2PageStats ps = {
.prepare_calls = atomic_load_explicit(&g_tiny_hotheap_v2_page_stats[ci].prepare_calls, memory_order_relaxed),
.prepare_with_current_null = atomic_load_explicit(&g_tiny_hotheap_v2_page_stats[ci].prepare_with_current_null, memory_order_relaxed),
.prepare_from_partial = atomic_load_explicit(&g_tiny_hotheap_v2_page_stats[ci].prepare_from_partial, memory_order_relaxed),
.free_made_current = atomic_load_explicit(&g_tiny_hotheap_v2_page_stats[ci].free_made_current, memory_order_relaxed),
.page_retired = atomic_load_explicit(&g_tiny_hotheap_v2_page_stats[ci].page_retired, memory_order_relaxed),
};
if (alloc_calls || alloc_fast || alloc_lease || alloc_fb || free_calls || free_fast || free_fb ||
ps.prepare_calls || ps.prepare_with_current_null || ps.prepare_from_partial ||
ps.free_made_current || ps.page_retired) {
if (!(alloc_calls || alloc_fast || alloc_lease || alloc_fb || free_calls || free_fast || free_fb ||
ps.prepare_calls || ps.prepare_with_current_null || ps.prepare_from_partial ||
ps.free_made_current || ps.page_retired || retire_calls_v2 || partial_pushes || partial_pops || partial_peak)) {
continue;
}
tiny_route_kind_t route_kind = tiny_route_for_class(ci);
fprintf(stderr,
"[HOTHEAP_V2_C7_STATS] route_hits=%llu alloc_calls=%llu alloc_fast=%llu alloc_lease=%llu alloc_refill=%llu alloc_fb_v1=%llu alloc_route_fb=%llu free_calls=%llu free_fast=%llu free_fb_v1=%llu prep_calls=%llu prep_null=%llu prep_from_partial=%llu free_made_current=%llu page_retired=%llu\n",
"[HOTHEAP_V2_STATS cls=%u route=%d] route_hits=%llu alloc_calls=%llu alloc_fast=%llu alloc_lease=%llu alloc_refill=%llu refill_cur=%llu refill_partial=%llu alloc_fb_v1=%llu alloc_route_fb=%llu cold_refill_fail=%llu cold_retire_calls=%llu retire_v2=%llu free_calls=%llu free_fast=%llu free_fb_v1=%llu prep_calls=%llu prep_null=%llu prep_from_partial=%llu free_made_current=%llu page_retired=%llu partial_push=%llu partial_pop=%llu partial_peak=%llu\n",
(unsigned)ci,
(int)route_kind,
(unsigned long long)route_hits,
(unsigned long long)alloc_calls,
(unsigned long long)alloc_fast,
(unsigned long long)alloc_lease,
(unsigned long long)atomic_load_explicit(&g_tiny_hotheap_v2_c7_alloc_refill, memory_order_relaxed),
(unsigned long long)atomic_load_explicit(&g_tiny_hotheap_v2_alloc_refill[ci], memory_order_relaxed),
(unsigned long long)refill_with_cur,
(unsigned long long)refill_with_partial,
(unsigned long long)alloc_fb,
(unsigned long long)atomic_load_explicit(&g_tiny_hotheap_v2_c7_alloc_route_fb, memory_order_relaxed),
(unsigned long long)atomic_load_explicit(&g_tiny_hotheap_v2_alloc_route_fb[ci], memory_order_relaxed),
(unsigned long long)cold_refill_fail,
(unsigned long long)cold_retire_calls,
(unsigned long long)retire_calls_v2,
(unsigned long long)free_calls,
(unsigned long long)free_fast,
(unsigned long long)free_fb,
@ -469,7 +649,10 @@ static void tiny_hotheap_v2_stats_dump(void) {
(unsigned long long)ps.prepare_with_current_null,
(unsigned long long)ps.prepare_from_partial,
(unsigned long long)ps.free_made_current,
(unsigned long long)ps.page_retired);
(unsigned long long)ps.page_retired,
(unsigned long long)partial_pushes,
(unsigned long long)partial_pops,
(unsigned long long)partial_peak);
}
}
tiny_hotheap_ctx_v2* tiny_hotheap_v2_tls_get(void) {
@ -484,6 +667,8 @@ tiny_hotheap_ctx_v2* tiny_hotheap_v2_tls_get(void) {
for (int i = 0; i < TINY_HOTHEAP_MAX_CLASSES; i++) {
tiny_hotheap_v2_page_reset(&ctx->cls[i].storage_page);
ctx->cls[i].stride = (uint16_t)tiny_stride_for_class(i);
ctx->cls[i].max_partial_pages = (i == 7 || i == 6) ? 2 : 0; // C6/C7 は 1〜2 枚握る
ctx->cls[i].partial_count = 0;
}
}
return ctx;
@ -491,143 +676,174 @@ tiny_hotheap_ctx_v2* tiny_hotheap_v2_tls_get(void) {
void* tiny_hotheap_v2_alloc(uint8_t class_idx) {
int stats_on = tiny_hotheap_v2_stats_enabled();
uint8_t idx = tiny_hotheap_v2_idx(class_idx);
if (stats_on) {
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_c7_route_hits, 1, memory_order_relaxed);
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_c7_alloc_calls, 1, memory_order_relaxed);
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_route_hits[idx], 1, memory_order_relaxed);
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_alloc_calls[idx], 1, memory_order_relaxed);
}
if (__builtin_expect(class_idx != 7, 0)) {
return NULL; // いまは C7 専用
if (__builtin_expect(!(class_idx == 6 || class_idx == 7), 0)) {
return NULL; // いまは C6/C7 のみ
}
tiny_hotheap_ctx_v2* v2ctx = tiny_hotheap_v2_tls_get();
tiny_hotheap_class_v2* vhcls = v2ctx ? &v2ctx->cls[7] : NULL;
tiny_hotheap_class_v2* vhcls = v2ctx ? &v2ctx->cls[class_idx] : NULL;
tiny_hotheap_page_v2* v2page = vhcls ? vhcls->current_page : NULL;
tiny_heap_ctx_t* v1ctx = tiny_heap_ctx_for_thread();
tiny_heap_class_t* v1hcls = tiny_heap_class(v1ctx, 7);
TinyHeapClassStats* stats = tiny_heap_stats_for_class(7);
TinyHeapClassStats* stats = tiny_heap_stats_for_class(class_idx);
// current_page が壊れていそうなら一度捨てて slow に降りる
if (v2page && (!v2page->base || v2page->capacity == 0)) {
vhcls->current_page = NULL;
v2page = NULL;
}
// Hot path: current_page → partial → refill
void* user = tiny_hotheap_v2_try_pop(v2page, v1hcls, stats, stats_on);
void* user = tiny_hotheap_v2_try_pop(vhcls, v2page, class_idx, stats, stats_on);
if (user) {
return user;
}
// move exhausted current_page to full list if needed
if (vhcls && v2page && v2page->used >= v2page->capacity && vhcls->current_page == v2page) {
vhcls->current_page = NULL;
v2page->next = vhcls->full_pages;
vhcls->full_pages = v2page;
}
while (vhcls && vhcls->partial_pages) {
if (stats_on) {
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_page_stats.prepare_calls, 1, memory_order_relaxed);
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_page_stats.prepare_from_partial, 1, memory_order_relaxed);
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_page_stats[idx].prepare_calls, 1, memory_order_relaxed);
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_page_stats[idx].prepare_from_partial, 1, memory_order_relaxed);
if (vhcls->current_page == NULL) {
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_page_stats.prepare_with_current_null, 1, memory_order_relaxed);
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_page_stats[idx].prepare_with_current_null, 1, memory_order_relaxed);
}
}
v2page = vhcls->partial_pages;
vhcls->partial_pages = vhcls->partial_pages->next;
if (vhcls->partial_count > 0) {
vhcls->partial_count--;
}
if (stats_on) {
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_partial_pops[idx], 1, memory_order_relaxed);
}
v2page->next = NULL;
vhcls->current_page = v2page;
user = tiny_hotheap_v2_try_pop(v2page, v1hcls, stats, stats_on);
user = tiny_hotheap_v2_try_pop(vhcls, v2page, class_idx, stats, stats_on);
if (user) {
return user;
}
if (v2page->used >= v2page->capacity) {
v2page->next = vhcls->full_pages;
vhcls->full_pages = v2page;
vhcls->current_page = NULL;
}
}
// Lease a page from v1 (C7 SAFE) and wrap it
tiny_hotheap_page_v2* leased = tiny_hotheap_v2_refill_slow(v2ctx, 7);
if (!leased || !v1hcls) {
tiny_hotheap_page_v2* leased = tiny_hotheap_v2_refill_slow(v2ctx, class_idx);
if (!leased) {
if (stats_on) {
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_c7_alloc_fallback_v1, 1, memory_order_relaxed);
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_c7_alloc_route_fb, 1, memory_order_relaxed);
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_alloc_fallback_v1[idx], 1, memory_order_relaxed);
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_alloc_route_fb[idx], 1, memory_order_relaxed);
}
size_t size = vhcls ? (vhcls->stride ? vhcls->stride : tiny_stride_for_class(7)) : tiny_stride_for_class(7);
return tiny_c7_alloc_fast(size); // safety fallback to v1
size_t size = vhcls ? (vhcls->stride ? vhcls->stride : tiny_stride_for_class(class_idx)) : tiny_stride_for_class(class_idx);
if (class_idx == 7) {
return tiny_c7_alloc_fast(size); // safety fallback to v1
}
tiny_heap_ctx_t* cold_ctx = tiny_heap_ctx_for_thread();
return tiny_heap_alloc_class_fast(cold_ctx, class_idx, size);
}
vhcls->current_page = leased;
v2page = leased;
if (v1hcls && v2page && v2page->lease_page) {
v1hcls->current_page = v2page->lease_page;
}
if (stats_on) {
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_c7_alloc_lease, 1, memory_order_relaxed);
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_alloc_lease[idx], 1, memory_order_relaxed);
}
user = tiny_hotheap_v2_try_pop(v2page, v1hcls, stats, stats_on);
user = tiny_hotheap_v2_try_pop(vhcls, v2page, class_idx, stats, stats_on);
if (user) {
return user;
}
if (stats_on) {
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_c7_alloc_fallback_v1, 1, memory_order_relaxed);
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_alloc_fallback_v1[idx], 1, memory_order_relaxed);
}
size_t size = vhcls ? (vhcls->stride ? vhcls->stride : tiny_stride_for_class(7)) : tiny_stride_for_class(7);
return tiny_c7_alloc_fast(size);
size_t size = vhcls ? (vhcls->stride ? vhcls->stride : tiny_stride_for_class(class_idx)) : tiny_stride_for_class(class_idx);
if (class_idx == 7) {
return tiny_c7_alloc_fast(size);
}
tiny_heap_ctx_t* cold_ctx = tiny_heap_ctx_for_thread();
return tiny_heap_alloc_class_fast(cold_ctx, class_idx, size);
}
void tiny_hotheap_v2_free(uint8_t class_idx, void* p, void* meta) {
if (__builtin_expect(class_idx != 7, 0)) {
if (__builtin_expect(!(class_idx == 6 || class_idx == 7), 0)) {
return;
}
uint8_t idx = tiny_hotheap_v2_idx(class_idx);
int stats_on = tiny_hotheap_v2_stats_enabled();
if (stats_on) {
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_c7_free_calls, 1, memory_order_relaxed);
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_free_calls[idx], 1, memory_order_relaxed);
}
tiny_hotheap_ctx_v2* v2ctx = tiny_hotheap_v2_tls_get();
tiny_hotheap_class_v2* vhcls = v2ctx ? &v2ctx->cls[7] : NULL;
tiny_hotheap_class_v2* vhcls = v2ctx ? &v2ctx->cls[class_idx] : NULL;
TinySlabMeta* meta_ptr = (TinySlabMeta*)meta;
tiny_heap_ctx_t* v1ctx = tiny_heap_ctx_for_thread();
tiny_heap_class_t* v1hcls = tiny_heap_class(v1ctx, 7);
tiny_hotheap_page_v2* page = tiny_hotheap_v2_find_page(vhcls, 7, p, meta_ptr);
if (page && page->lease_page && v1hcls && tiny_heap_ptr_in_page_range(page->lease_page, p)) {
tiny_heap_page_free_local(v1ctx, 7, page->lease_page, p);
page->freelist = page->lease_page->free_list;
page->used = page->lease_page->used;
if (v1hcls) {
v1hcls->current_page = page->lease_page;
tiny_hotheap_page_v2* page = tiny_hotheap_v2_find_page(vhcls, class_idx, p, meta_ptr);
if (page && page->base && page->capacity > 0) {
tiny_next_write(class_idx, p, page->freelist);
page->freelist = p;
if (page->used > 0) {
page->used--;
}
if (vhcls && vhcls->current_page != page) {
tiny_hotheap_v2_unlink_page(vhcls, page);
page->next = vhcls->current_page;
vhcls->current_page = page;
if (stats_on) {
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_page_stats.free_made_current, 1, memory_order_relaxed);
}
} else if (stats_on) {
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_page_stats.free_made_current, 1, memory_order_relaxed);
}
// C7-only: keep the page hot even when empty to avoid churn
if (vhcls) {
if (!vhcls->current_page) {
vhcls->current_page = page;
} else if (vhcls->current_page != page) {
tiny_hotheap_v2_unlink_page(vhcls, page);
page->next = vhcls->current_page;
vhcls->current_page = page;
}
}
if (page->used == 0 && vhcls && vhcls->partial_pages != page && vhcls->current_page == page) {
// park empty page in partial to allow re-use without immediate Superslab return
page->next = vhcls->partial_pages;
vhcls->partial_pages = page;
vhcls->current_page = page; // still treat as current
}
if (stats_on) {
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_c7_free_fast, 1, memory_order_relaxed);
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_page_stats[idx].free_made_current, 1, memory_order_relaxed);
}
if (page->used == 0) {
// 空ページは一度 partial に温存し、上限を超えたら retire
tiny_hotheap_v2_unlink_page(vhcls, page);
page->next = NULL;
if (vhcls && vhcls->current_page == NULL) {
vhcls->current_page = page;
} else if (vhcls) {
tiny_hotheap_v2_partial_push(vhcls, page, class_idx, stats_on);
tiny_hotheap_v2_maybe_trim_partial(v2ctx, vhcls, class_idx, stats_on);
}
} else if (stats_on) {
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_free_fast[idx], 1, memory_order_relaxed);
}
if (stats_on && page->used == 0) {
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_free_fast[idx], 1, memory_order_relaxed);
}
return;
}
// Fallback: mimic v1 free path
if (stats_on) {
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_c7_free_fallback_v1, 1, memory_order_relaxed);
atomic_fetch_add_explicit(&g_tiny_hotheap_v2_free_fallback_v1[idx], 1, memory_order_relaxed);
}
SuperSlab* ss = hak_super_lookup(p);
if (ss && ss->magic == SUPERSLAB_MAGIC) {
int slab_idx = slab_index_for(ss, p);
if (slab_idx >= 0 && slab_idx < ss_slabs_capacity(ss)) {
tiny_c7_free_fast_with_meta(ss, slab_idx, p);
if (class_idx == 7) {
tiny_c7_free_fast_with_meta(ss, slab_idx, p);
} else {
tiny_heap_ctx_t* cold_ctx = tiny_heap_ctx_for_thread();
tiny_heap_free_class_fast_with_meta(cold_ctx, class_idx, ss, slab_idx, p);
}
return;
}
}
tiny_c7_free_fast(p);
if (class_idx == 7) {
tiny_c7_free_fast(p);
} else {
tiny_heap_ctx_t* cold_ctx = tiny_heap_ctx_for_thread();
tiny_heap_free_class_fast(cold_ctx, class_idx, p);
}
}
#if !HAKMEM_BUILD_RELEASE

View File

@ -9,6 +9,11 @@
//
// Cold path only - called once at startup.
// Some build configurations expect this hook but do not provide an implementation.
// Provide a no-op stub so that non-debug builds continue to link without optional
// signal-dump support.
static inline void hak_tiny_enable_signal_dump(void) { }
void hak_tiny_init(void) {
if (g_tiny_initialized) return;

View File

@ -9,6 +9,9 @@ typedef struct {
uint16_t thread_id; // low bits of thread id (best-effort)
} AllocEvent;
// Forward decl (defined in ss_os_acquire_box.h)
extern int ss_os_madvise_guarded(void* ptr, size_t len, int advice, const char* where);
#define EVENTQ_CAP 65536u
#define EVENTQ_MASK (EVENTQ_CAP - 1u)
static _Atomic uint32_t g_ev_tail = 0;
@ -689,7 +692,7 @@ static inline void superslab_partial_release(SuperSlab* ss, uint32_t epoch) {
uint32_t prev = ss->partial_epoch;
if (epoch != 0 && (epoch - prev) < g_ss_partial_interval) return;
size_t len = (size_t)1 << ss->lg_size;
if (madvise(ss, len, MADV_DONTNEED) == 0) {
if (ss_os_madvise_guarded(ss, len, MADV_DONTNEED, "tiny_ss_partial") == 0) {
ss->partial_epoch = epoch;
}
#else

View File

@ -0,0 +1,325 @@
// smallobject_hotbox_v3.c - SmallObject HotHeap v3 skeleton (C7-first)
// Phase A/B: 型と stats だけ。alloc/free は v1 にフォールバックさせる。
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "box/smallobject_hotbox_v3_box.h"
#include "box/smallobject_cold_iface_v1.h"
#include "box/tiny_heap_box.h"
#include "box/tiny_front_v3_env_box.h"
#include "hakmem_tiny.h" // TINY_SLAB_SIZE mask for page_of
#include "tiny_region_id.h"
static __thread so_ctx_v3* g_so_ctx_v3;
static int g_so_stats_enabled = -1;
static so_stats_class_v3 g_so_stats[SMALLOBJECT_NUM_CLASSES];
int so_v3_stats_enabled(void) {
if (__builtin_expect(g_so_stats_enabled == -1, 0)) {
const char* e = getenv("HAKMEM_SMALL_HEAP_V3_STATS");
g_so_stats_enabled = (e && *e && *e != '0') ? 1 : 0;
}
return g_so_stats_enabled;
}
static inline so_stats_class_v3* so_stats_for(uint8_t ci) {
if (!so_v3_stats_enabled()) return NULL;
if (ci >= SMALLOBJECT_NUM_CLASSES) return NULL;
return &g_so_stats[ci];
}
void so_v3_record_route_hit(uint8_t ci) {
so_stats_class_v3* st = so_stats_for(ci);
if (st) atomic_fetch_add_explicit(&st->route_hits, 1, memory_order_relaxed);
}
void so_v3_record_alloc_call(uint8_t ci) {
so_stats_class_v3* st = so_stats_for(ci);
if (st) atomic_fetch_add_explicit(&st->alloc_calls, 1, memory_order_relaxed);
}
void so_v3_record_alloc_refill(uint8_t ci) {
so_stats_class_v3* st = so_stats_for(ci);
if (st) atomic_fetch_add_explicit(&st->alloc_refill, 1, memory_order_relaxed);
}
void so_v3_record_alloc_fallback(uint8_t ci) {
so_stats_class_v3* st = so_stats_for(ci);
if (st) atomic_fetch_add_explicit(&st->alloc_fallback_v1, 1, memory_order_relaxed);
}
void so_v3_record_free_call(uint8_t ci) {
so_stats_class_v3* st = so_stats_for(ci);
if (st) atomic_fetch_add_explicit(&st->free_calls, 1, memory_order_relaxed);
}
void so_v3_record_free_fallback(uint8_t ci) {
so_stats_class_v3* st = so_stats_for(ci);
if (st) atomic_fetch_add_explicit(&st->free_fallback_v1, 1, memory_order_relaxed);
}
so_ctx_v3* so_tls_get(void) {
so_ctx_v3* ctx = g_so_ctx_v3;
if (__builtin_expect(ctx == NULL, 0)) {
ctx = (so_ctx_v3*)calloc(1, sizeof(so_ctx_v3));
if (!ctx) {
fprintf(stderr, "[SMALL_HEAP_V3] TLS alloc failed\n");
abort();
}
for (int i = 0; i < SMALLOBJECT_NUM_CLASSES; i++) {
so_class_v3* hc = &ctx->cls[i];
hc->block_size = (uint32_t)tiny_stride_for_class(i);
hc->max_partial_pages = 2;
}
g_so_ctx_v3 = ctx;
}
return ctx;
}
static inline void* so_build_freelist(so_page_v3* page) {
if (!page || !page->base || page->block_size == 0 || page->capacity == 0) return NULL;
uint8_t* base = (uint8_t*)page->base;
void* head = NULL;
for (uint32_t i = 0; i < page->capacity; i++) {
uint8_t* blk = base + ((size_t)i * page->block_size);
*(void**)blk = head;
head = blk;
}
return head;
}
static inline int so_ptr_in_page(so_page_v3* page, void* ptr) {
if (!page || !ptr) return 0;
uintptr_t base = (uintptr_t)page->base;
uintptr_t p = (uintptr_t)ptr;
uintptr_t span = (uintptr_t)page->block_size * (uintptr_t)page->capacity;
if (p < base || p >= base + span) return 0;
if (((p - base) % page->block_size) != 0) return 0;
return 1;
}
static inline so_page_v3* so_page_of(so_class_v3* hc, void* ptr) {
if (!ptr || !hc) return NULL;
so_page_v3* page = hc->current;
if (page && so_ptr_in_page(page, ptr)) {
return page;
}
page = hc->partial;
while (page) {
if (so_ptr_in_page(page, ptr)) {
return page;
}
page = page->next;
}
return NULL;
}
static inline void so_page_push_partial(so_class_v3* hc, so_page_v3* page) {
if (!hc || !page) return;
page->next = hc->partial;
hc->partial = page;
hc->partial_count++;
}
static inline void so_page_retire_slow(so_ctx_v3* ctx, uint32_t ci, so_page_v3* page);
static inline void* so_alloc_fast(so_ctx_v3* ctx, uint32_t ci) {
so_class_v3* hc = &ctx->cls[ci];
const bool skip_header_c7 = (ci == 7) && tiny_header_v3_enabled() && tiny_header_v3_skip_c7();
so_page_v3* p = hc->current;
if (p && p->freelist && p->used < p->capacity) {
void* blk = p->freelist;
p->freelist = *(void**)blk;
p->used++;
if (skip_header_c7) {
uint8_t* header_ptr = (uint8_t*)blk;
*header_ptr = (uint8_t)(HEADER_MAGIC | (ci & HEADER_CLASS_MASK));
return header_ptr + 1; // mirror tiny_region_id_write_header fast path
}
return tiny_region_id_write_header(blk, (int)ci);
}
if (hc->partial) {
so_page_v3* old_cur = hc->current;
p = hc->partial;
hc->partial = p->next;
if (hc->partial_count > 0) {
hc->partial_count--;
}
p->next = NULL;
hc->current = p;
if (old_cur && old_cur != p) {
if (hc->partial_count < hc->max_partial_pages) {
so_page_push_partial(hc, old_cur);
} else {
so_page_retire_slow(ctx, ci, old_cur);
}
}
if (p->freelist && p->used < p->capacity) {
void* blk = p->freelist;
p->freelist = *(void**)blk;
p->used++;
if (skip_header_c7) {
uint8_t* header_ptr = (uint8_t*)blk;
*header_ptr = (uint8_t)(HEADER_MAGIC | (ci & HEADER_CLASS_MASK));
return header_ptr + 1;
}
return tiny_region_id_write_header(blk, (int)ci);
}
}
return NULL;
}
static inline int so_unlink_partial(so_class_v3* hc, so_page_v3* target) {
if (!hc || !target) return 0;
so_page_v3* prev = NULL;
so_page_v3* cur = hc->partial;
while (cur) {
if (cur == target) {
if (prev) {
prev->next = cur->next;
} else {
hc->partial = cur->next;
}
if (hc->partial_count > 0) {
hc->partial_count--;
}
return 1;
}
prev = cur;
cur = cur->next;
}
return 0;
}
static inline void so_page_retire_slow(so_ctx_v3* ctx, uint32_t ci, so_page_v3* page) {
SmallObjectColdIface cold = smallobject_cold_iface_v1();
void* cold_ctx = (void*)tiny_heap_ctx_for_thread();
if (cold.retire_page) {
cold.retire_page(cold_ctx, ci, page);
} else {
free(page);
}
(void)ctx;
}
static inline void so_free_fast(so_ctx_v3* ctx, uint32_t ci, void* ptr) {
so_class_v3* hc = &ctx->cls[ci];
so_page_v3* page = so_page_of(hc, ptr);
if (!page) {
so_v3_record_free_fallback((uint8_t)ci);
tiny_heap_free_class_fast(tiny_heap_ctx_for_thread(), (int)ci, ptr);
return;
}
*(void**)ptr = page->freelist;
page->freelist = ptr;
if (page->used > 0) {
page->used--;
}
if (page->used == 0) {
(void)so_unlink_partial(hc, page);
if (hc->partial_count < hc->max_partial_pages) {
so_page_push_partial(hc, page);
if (!hc->current) {
hc->current = page;
}
} else {
if (hc->current == page) {
hc->current = NULL;
}
so_page_retire_slow(ctx, ci, page);
}
} else if (!hc->current) {
hc->current = page;
}
}
static inline so_page_v3* so_alloc_refill_slow(so_ctx_v3* ctx, uint32_t ci) {
SmallObjectColdIface cold = smallobject_cold_iface_v1();
void* cold_ctx = (void*)tiny_heap_ctx_for_thread();
if (!cold.refill_page) return NULL;
so_page_v3* page = cold.refill_page(cold_ctx, ci);
if (!page) return NULL;
if (page->block_size == 0) {
page->block_size = (uint32_t)tiny_stride_for_class((int)ci);
}
page->class_idx = ci;
page->used = 0;
page->freelist = so_build_freelist(page);
if (!page->freelist) {
if (cold.retire_page) {
cold.retire_page(cold_ctx, ci, page);
} else {
free(page);
}
return NULL;
}
page->next = NULL;
so_class_v3* hc = &ctx->cls[ci];
if (hc->current) {
if (hc->partial_count < hc->max_partial_pages) {
so_page_push_partial(hc, hc->current);
} else {
so_page_retire_slow(ctx, ci, hc->current);
}
}
hc->current = page;
return page;
}
void* so_alloc(uint32_t class_idx) {
if (__builtin_expect(class_idx >= SMALLOBJECT_NUM_CLASSES, 0)) {
return NULL;
}
so_v3_record_route_hit((uint8_t)class_idx);
so_v3_record_alloc_call((uint8_t)class_idx);
so_ctx_v3* ctx = so_tls_get();
void* blk = so_alloc_fast(ctx, class_idx);
if (blk) return blk;
so_page_v3* page = so_alloc_refill_slow(ctx, class_idx);
if (!page) {
so_v3_record_alloc_fallback((uint8_t)class_idx);
return NULL;
}
so_v3_record_alloc_refill((uint8_t)class_idx);
blk = so_alloc_fast(ctx, class_idx);
if (!blk) {
so_v3_record_alloc_fallback((uint8_t)class_idx);
}
return blk;
}
void so_free(uint32_t class_idx, void* ptr) {
if (__builtin_expect(class_idx >= SMALLOBJECT_NUM_CLASSES, 0)) {
return;
}
so_v3_record_free_call((uint8_t)class_idx);
so_ctx_v3* ctx = so_tls_get();
so_free_fast(ctx, class_idx, ptr);
}
__attribute__((destructor))
static void so_v3_stats_dump(void) {
if (!so_v3_stats_enabled()) return;
for (int i = 0; i < SMALLOBJECT_NUM_CLASSES; i++) {
so_stats_class_v3* st = &g_so_stats[i];
uint64_t rh = atomic_load_explicit(&st->route_hits, memory_order_relaxed);
uint64_t ac = atomic_load_explicit(&st->alloc_calls, memory_order_relaxed);
uint64_t ar = atomic_load_explicit(&st->alloc_refill, memory_order_relaxed);
uint64_t afb = atomic_load_explicit(&st->alloc_fallback_v1, memory_order_relaxed);
uint64_t fc = atomic_load_explicit(&st->free_calls, memory_order_relaxed);
uint64_t ffb = atomic_load_explicit(&st->free_fallback_v1, memory_order_relaxed);
if (rh + ac + afb + fc + ffb + ar == 0) continue;
fprintf(stderr, "[SMALL_HEAP_V3_STATS] cls=%d route_hits=%llu alloc_calls=%llu alloc_refill=%llu alloc_fb_v1=%llu free_calls=%llu free_fb_v1=%llu\n",
i, (unsigned long long)rh, (unsigned long long)ac,
(unsigned long long)ar, (unsigned long long)afb, (unsigned long long)fc, (unsigned long long)ffb);
}
}

View File

@ -4,6 +4,7 @@
// Date: 2025-11-28
#include "hakmem_tiny_superslab_internal.h"
#include "hakmem_env_cache.h"
#include "box/ss_os_acquire_box.h"
// ============================================================================
@ -116,9 +117,12 @@ void* ss_os_acquire(uint8_t size_class, size_t ss_size, uintptr_t ss_mask, int p
// This is critical: we MUST touch the pages after munmap() to establish valid mappings
// CRITICAL FIX (2025-12-05): Use MADV_POPULATE_WRITE for efficiency
#ifdef MADV_POPULATE_WRITE
int ret = madvise(ptr, ss_size, MADV_POPULATE_WRITE);
ss_os_stats_record_madvise();
int ret = ss_os_madvise_guarded(ptr, ss_size, MADV_POPULATE_WRITE, "ss_cache_populate");
if (ret != 0) {
if (HAK_ENV_SS_MADVISE_STRICT() && errno == EINVAL) {
fprintf(stderr, "[SS_CACHE] madvise(MADV_POPULATE_WRITE) EINVAL (strict). Aborting.\n");
abort();
}
// Fallback: explicit memset
memset(ptr, 0, ss_size);
}

View File

@ -4,6 +4,8 @@
// Date: 2025-11-28
#include "hakmem_tiny_superslab_internal.h"
#include "box/ss_os_acquire_box.h"
#include <stdbool.h>
#include <stdlib.h>
// ============================================================================
@ -33,8 +35,11 @@ _Atomic uint64_t g_final_fallback_mmap_count = 0;
_Atomic uint64_t g_ss_os_alloc_calls = 0;
_Atomic uint64_t g_ss_os_free_calls = 0;
_Atomic uint64_t g_ss_os_madvise_calls = 0;
_Atomic uint64_t g_ss_os_madvise_fail_enomem = 0;
_Atomic uint64_t g_ss_os_madvise_fail_other = 0;
_Atomic uint64_t g_ss_os_huge_alloc_calls = 0;
_Atomic uint64_t g_ss_os_huge_fail_calls = 0;
_Atomic bool g_ss_madvise_disabled = false;
// Superslab/slab observability (Tiny-only; relaxed updates)
_Atomic uint64_t g_ss_live_by_class[8] = {0};
@ -224,10 +229,14 @@ static void ss_os_stats_dump(void) {
return;
}
fprintf(stderr,
"[SS_OS_STATS] alloc=%llu free=%llu madvise=%llu mmap_total=%llu fallback_mmap=%llu huge_alloc=%llu huge_fail=%llu\n",
"[SS_OS_STATS] alloc=%llu free=%llu madvise=%llu madvise_enomem=%llu madvise_other=%llu madvise_disabled=%d "
"mmap_total=%llu fallback_mmap=%llu huge_alloc=%llu huge_fail=%llu\n",
(unsigned long long)atomic_load_explicit(&g_ss_os_alloc_calls, memory_order_relaxed),
(unsigned long long)atomic_load_explicit(&g_ss_os_free_calls, memory_order_relaxed),
(unsigned long long)atomic_load_explicit(&g_ss_os_madvise_calls, memory_order_relaxed),
(unsigned long long)atomic_load_explicit(&g_ss_os_madvise_fail_enomem, memory_order_relaxed),
(unsigned long long)atomic_load_explicit(&g_ss_os_madvise_fail_other, memory_order_relaxed),
atomic_load_explicit(&g_ss_madvise_disabled, memory_order_relaxed) ? 1 : 0,
(unsigned long long)atomic_load_explicit(&g_ss_mmap_count, memory_order_relaxed),
(unsigned long long)atomic_load_explicit(&g_final_fallback_mmap_count, memory_order_relaxed),
(unsigned long long)atomic_load_explicit(&g_ss_os_huge_alloc_calls, memory_order_relaxed),